diff options
-rw-r--r-- | src/corelib/io/qurl.cpp | 11 | ||||
-rw-r--r-- | src/corelib/io/qurl.h | 16 | ||||
-rw-r--r-- | src/corelib/io/qurl_p.h | 1 | ||||
-rw-r--r-- | src/corelib/io/qurlquery.h | 14 | ||||
-rw-r--r-- | src/corelib/io/qurlrecode.cpp | 50 | ||||
-rw-r--r-- | tests/auto/corelib/io/qurl/tst_qurl.cpp | 13 | ||||
-rw-r--r-- | tests/auto/corelib/io/qurlinternal/tst_qurlinternal.cpp | 32 |
7 files changed, 123 insertions, 14 deletions
diff --git a/src/corelib/io/qurl.cpp b/src/corelib/io/qurl.cpp index 3b49b82637..cc326566c8 100644 --- a/src/corelib/io/qurl.cpp +++ b/src/corelib/io/qurl.cpp @@ -2979,6 +2979,17 @@ QByteArray QUrl::toPercentEncoding(const QString &input, const QByteArray &exclu } /*! + \internal + \since 5.0 + Used in the setEncodedXXX compatibility functions. Converts \a ba to + QString form. +*/ +QString QUrl::fromEncodedComponent_helper(const QByteArray &ba) +{ + return qt_urlRecodeByteArray(ba); +} + +/*! \fn QByteArray QUrl::toPunycode(const QString &uc) \obsolete Returns a \a uc in Punycode encoding. diff --git a/src/corelib/io/qurl.h b/src/corelib/io/qurl.h index 8055129e4b..2a651b92f6 100644 --- a/src/corelib/io/qurl.h +++ b/src/corelib/io/qurl.h @@ -274,37 +274,37 @@ public: QT_DEPRECATED inline void removeAllEncodedQueryItems(const QByteArray &key); QT_DEPRECATED void setEncodedUrl(const QByteArray &u, ParsingMode mode = TolerantMode) - { setUrl(QString::fromUtf8(u.constData(), u.size()), mode); } + { setUrl(fromEncodedComponent_helper(u), mode); } QT_DEPRECATED QByteArray encodedUserName() const { return userName(FullyEncoded).toLatin1(); } QT_DEPRECATED void setEncodedUserName(const QByteArray &value) - { setUserName(QString::fromLatin1(value)); } + { setUserName(fromEncodedComponent_helper(value)); } QT_DEPRECATED QByteArray encodedPassword() const { return password(FullyEncoded).toLatin1(); } QT_DEPRECATED void setEncodedPassword(const QByteArray &value) - { setPassword(QString::fromLatin1(value)); } + { setPassword(fromEncodedComponent_helper(value)); } QT_DEPRECATED QByteArray encodedHost() const { return host(FullyEncoded).toLatin1(); } QT_DEPRECATED void setEncodedHost(const QByteArray &value) - { setHost(QString::fromLatin1(value)); } + { setHost(fromEncodedComponent_helper(value)); } QT_DEPRECATED QByteArray encodedPath() const { return path(FullyEncoded).toLatin1(); } QT_DEPRECATED void setEncodedPath(const QByteArray &value) - { setPath(QString::fromLatin1(value)); } + { setPath(fromEncodedComponent_helper(value)); } QT_DEPRECATED QByteArray encodedQuery() const { return toLatin1_helper(query(FullyEncoded)); } QT_DEPRECATED void setEncodedQuery(const QByteArray &value) - { setQuery(value.isNull() ? QString() : QString::fromLatin1(value)); } + { setQuery(fromEncodedComponent_helper(value)); } QT_DEPRECATED QByteArray encodedFragment() const { return toLatin1_helper(fragment(FullyEncoded)); } QT_DEPRECATED void setEncodedFragment(const QByteArray &value) - { setFragment(value.isNull() ? QString() : QString::fromLatin1(value)); } + { setFragment(fromEncodedComponent_helper(value)); } private: // helper function for the encodedQuery and encodedFragment functions @@ -315,6 +315,8 @@ private: return string.toLatin1(); } #endif +private: + static QString fromEncodedComponent_helper(const QByteArray &ba); public: static QString fromAce(const QByteArray &); diff --git a/src/corelib/io/qurl_p.h b/src/corelib/io/qurl_p.h index e55ba4a52a..7c4beccb02 100644 --- a/src/corelib/io/qurl_p.h +++ b/src/corelib/io/qurl_p.h @@ -182,6 +182,7 @@ extern Q_AUTOTEST_EXPORT void qt_nameprep(QString *source, int from); extern Q_AUTOTEST_EXPORT bool qt_check_std3rules(const QChar *uc, int len); extern Q_AUTOTEST_EXPORT void qt_punycodeEncoder(const QChar *s, int ucLength, QString *output); extern Q_AUTOTEST_EXPORT QString qt_punycodeDecoder(const QString &pc); +extern Q_AUTOTEST_EXPORT QString qt_urlRecodeByteArray(const QByteArray &ba); QT_END_NAMESPACE diff --git a/src/corelib/io/qurlquery.h b/src/corelib/io/qurlquery.h index 5eac44f327..c4bb359f3b 100644 --- a/src/corelib/io/qurlquery.h +++ b/src/corelib/io/qurlquery.h @@ -132,22 +132,22 @@ inline void QUrl::removeAllQueryItems(const QString &key) { QUrlQuery q(*this); q.removeAllQueryItems(key); } inline void QUrl::addEncodedQueryItem(const QByteArray &key, const QByteArray &value) -{ QUrlQuery q(*this); q.addQueryItem(QString::fromUtf8(key), QString::fromUtf8(value)); setQuery(q); } +{ QUrlQuery q(*this); q.addQueryItem(fromEncodedComponent_helper(key), fromEncodedComponent_helper(value)); setQuery(q); } inline bool QUrl::hasEncodedQueryItem(const QByteArray &key) const -{ return QUrlQuery(*this).hasQueryItem(QString::fromUtf8(key)); } +{ return QUrlQuery(*this).hasQueryItem(fromEncodedComponent_helper(key)); } inline QByteArray QUrl::encodedQueryItemValue(const QByteArray &key) const -{ return QUrlQuery(*this).queryItemValue(QString::fromUtf8(key), QUrl::FullyEncoded).toLatin1(); } +{ return QUrlQuery(*this).queryItemValue(fromEncodedComponent_helper(key), QUrl::FullyEncoded).toLatin1(); } inline void QUrl::removeEncodedQueryItem(const QByteArray &key) -{ QUrlQuery q(*this); q.removeQueryItem(QString::fromUtf8(key)); setQuery(q); } +{ QUrlQuery q(*this); q.removeQueryItem(fromEncodedComponent_helper(key)); setQuery(q); } inline void QUrl::removeAllEncodedQueryItems(const QByteArray &key) -{ QUrlQuery q(*this); q.removeAllQueryItems(QString::fromUtf8(key)); } +{ QUrlQuery q(*this); q.removeAllQueryItems(fromEncodedComponent_helper(key)); } inline void QUrl::setEncodedQueryItems(const QList<QPair<QByteArray, QByteArray> > &qry) { QUrlQuery q; QList<QPair<QByteArray, QByteArray> >::ConstIterator it = qry.constBegin(); for ( ; it != qry.constEnd(); ++it) - q.addQueryItem(QString::fromUtf8(it->first), QString::fromUtf8(it->second)); + q.addQueryItem(fromEncodedComponent_helper(it->first), fromEncodedComponent_helper(it->second)); setQuery(q); } inline QList<QPair<QByteArray, QByteArray> > QUrl::encodedQueryItems() const @@ -162,7 +162,7 @@ inline QList<QPair<QByteArray, QByteArray> > QUrl::encodedQueryItems() const } inline QList<QByteArray> QUrl::allEncodedQueryItemValues(const QByteArray &key) const { - QStringList items = QUrlQuery(*this).allQueryItemValues(QString::fromUtf8(key), QUrl::FullyEncoded); + QStringList items = QUrlQuery(*this).allQueryItemValues(fromEncodedComponent_helper(key), QUrl::FullyEncoded); QList<QByteArray> result; result.reserve(items.size()); Q_FOREACH (const QString &item, items) diff --git a/src/corelib/io/qurlrecode.cpp b/src/corelib/io/qurlrecode.cpp index 12d23e9450..4399f38286 100644 --- a/src/corelib/io/qurlrecode.cpp +++ b/src/corelib/io/qurlrecode.cpp @@ -674,4 +674,54 @@ qt_urlRecode(QString &appendTo, const QChar *begin, const QChar *end, encoding, actionTable, false); } +/*! + \internal + \since 5.0 + + \a ba contains an 8-bit form of the component and it might be + percent-encoded already. We can't use QString::fromUtf8 because it might + contain non-UTF8 sequences. We can't use QByteArray::toPercentEncoding + because it might already contain percent-encoded sequences. We can't use + qt_urlRecode because it needs UTF-16 input. +*/ +Q_AUTOTEST_EXPORT +QString qt_urlRecodeByteArray(const QByteArray &ba) +{ + if (ba.isNull()) + return QString(); + + // scan ba for anything above or equal to 0x80 + // control points below 0x20 are fine in QString + const char *in = ba.constData(); + const char *const end = ba.constEnd(); + for ( ; in < end; ++in) { + if (*in & 0x80) + break; + } + + if (in == end) { + // no non-ASCII found, we're safe to convert to QString + return QString::fromLatin1(ba, ba.size()); + } + + // we found something that we need to encode + QByteArray intermediate = ba; + intermediate.resize(ba.size() * 3 - (in - ba.constData())); + uchar *out = reinterpret_cast<uchar *>(intermediate.data() + (in - ba.constData())); + for ( ; in < end; ++in) { + if (*in & 0x80) { + // encode + *out++ = '%'; + *out++ = encodeNibble(uchar(*in) >> 4); + *out++ = encodeNibble(uchar(*in) & 0xf); + } else { + // keep + *out++ = uchar(*in); + } + } + + // now it's safe to call fromLatin1 + return QString::fromLatin1(intermediate, out - reinterpret_cast<uchar *>(intermediate.data())); +} + QT_END_NAMESPACE diff --git a/tests/auto/corelib/io/qurl/tst_qurl.cpp b/tests/auto/corelib/io/qurl/tst_qurl.cpp index 6b7a8025ff..12e6e4a01d 100644 --- a/tests/auto/corelib/io/qurl/tst_qurl.cpp +++ b/tests/auto/corelib/io/qurl/tst_qurl.cpp @@ -2247,6 +2247,8 @@ void tst_QUrl::setEncodedFragment_data() QTest::newRow("initial url has fragment") << BA("http://www.kde.org#old") << BA("new") << BA("http://www.kde.org#new"); QTest::newRow("encoded fragment") << BA("http://www.kde.org") << BA("a%20c") << BA("http://www.kde.org#a%20c"); QTest::newRow("with #") << BA("http://www.kde.org") << BA("a#b") << BA("http://www.kde.org#a#b"); + QTest::newRow("unicode") << BA("http://www.kde.org") << BA("\xc3\xa9") << BA("http://www.kde.org#%C3%A9"); + QTest::newRow("binary") << BA("http://www.kde.org") << BA("\x00\xc0\x80", 3) << BA("http://www.kde.org#%00%C0%80"); } void tst_QUrl::setEncodedFragment() @@ -2926,6 +2928,17 @@ void tst_QUrl::componentEncodings() QCOMPARE(url.toString(formatting), (((QString(toString ))))); // the weird () and space is to align the output + if (formatting == QUrl::FullyEncoded) { + QCOMPARE(url.encodedUserName(), userName.toUtf8()); + QCOMPARE(url.encodedPassword(), password.toUtf8()); + // no encodedUserInfo + QCOMPARE(url.encodedHost(), host.toUtf8()); + // no encodedAuthority + QCOMPARE(url.encodedPath(), path.toUtf8()); + QCOMPARE(url.encodedQuery(), query.toUtf8()); + QCOMPARE(url.encodedFragment(), fragment.toUtf8()); + } + // repeat with the URL we got from toString QUrl url2(toString); QCOMPARE(url2.userName(formatting), userName); diff --git a/tests/auto/corelib/io/qurlinternal/tst_qurlinternal.cpp b/tests/auto/corelib/io/qurlinternal/tst_qurlinternal.cpp index e008133d87..5e7fc2c7b6 100644 --- a/tests/auto/corelib/io/qurlinternal/tst_qurlinternal.cpp +++ b/tests/auto/corelib/io/qurlinternal/tst_qurlinternal.cpp @@ -95,6 +95,8 @@ private Q_SLOTS: void encodingRecode(); void encodingRecodeInvalidUtf8_data(); void encodingRecodeInvalidUtf8(); + void recodeByteArray_data(); + void recodeByteArray(); }; #include "tst_qurlinternal.moc" @@ -1006,4 +1008,34 @@ void tst_QUrlInternal::encodingRecodeInvalidUtf8() QCOMPARE(output, QTest::currentDataTag() + input); } +void tst_QUrlInternal::recodeByteArray_data() +{ + QTest::addColumn<QByteArray>("input"); + QTest::addColumn<QString>("expected"); + + QTest::newRow("null") << QByteArray() << QString(); + QTest::newRow("empty") << QByteArray("") << QString(""); + QTest::newRow("normal") << QByteArray("Hello") << "Hello"; + QTest::newRow("valid-utf8") << QByteArray("\xc3\xa9") << "%C3%A9"; + QTest::newRow("percent-encoded") << QByteArray("%C3%A9%00%C0%80") << "%C3%A9%00%C0%80"; + QTest::newRow("invalid-utf8-1") << QByteArray("\xc3\xc3") << "%C3%C3"; + QTest::newRow("invalid-utf8-2") << QByteArray("\xc0\x80") << "%C0%80"; + + // note: percent-encoding the control characters ("\0" -> "%00") would also + // be correct, but it's unnecessary for this function + QTest::newRow("binary") << QByteArray("\0\x1f", 2) << QString::fromLatin1("\0\x1f", 2);; + QTest::newRow("binary+percent-encoded") << QByteArray("\0%25", 4) << QString::fromLatin1("\0%25", 4); +} + +void tst_QUrlInternal::recodeByteArray() +{ + QFETCH(QByteArray, input); + QFETCH(QString, expected); + QString output = qt_urlRecodeByteArray(input); + + QCOMPARE(output.isNull(), input.isNull()); + QCOMPARE(output.isEmpty(), input.isEmpty()); + QCOMPARE(output, expected); +} + QTEST_APPLESS_MAIN(tst_QUrlInternal) |