summaryrefslogtreecommitdiffstats
path: root/src/corelib/io/qurlrecode.cpp
diff options
context:
space:
mode:
authorThiago Macieira <thiago.macieira@intel.com>2012-08-16 15:31:06 +0200
committerQt by Nokia <qt-info@nokia.com>2012-08-20 21:59:32 +0200
commitce9b010ec619aa6e5f19b6ae208b76a4e398b20b (patch)
tree7eccc6c7a4a5654dae1956df4a3f262dd8c0f109 /src/corelib/io/qurlrecode.cpp
parent60818231d82ca34f1d33ccb9ba7500b5470a3d0d (diff)
Fix decoding of QByteArray in the deprecated "encoded" setters in QUrl
The asymmetry is intentional: the getters can use toLatin1() because the called functions, with a QUrl::FullyEncoded parameter, return ASCII only. This gives a small performance improvement over the need to run the UTF-8 encoder. However, the data passed to setters could contain non-ASCII binary data, in addition to the percent-encoded data. We can't use fromUtf8 because it's binary and we can't use toPercentEncoded because it already encoded. Change-Id: I5ecdb49be5af51ac86fd9764eb3a6aa96385f512 Reviewed-by: David Faure <faure@kde.org>
Diffstat (limited to 'src/corelib/io/qurlrecode.cpp')
-rw-r--r--src/corelib/io/qurlrecode.cpp50
1 files changed, 50 insertions, 0 deletions
diff --git a/src/corelib/io/qurlrecode.cpp b/src/corelib/io/qurlrecode.cpp
index 12d23e9450..4399f38286 100644
--- a/src/corelib/io/qurlrecode.cpp
+++ b/src/corelib/io/qurlrecode.cpp
@@ -674,4 +674,54 @@ qt_urlRecode(QString &appendTo, const QChar *begin, const QChar *end,
encoding, actionTable, false);
}
+/*!
+ \internal
+ \since 5.0
+
+ \a ba contains an 8-bit form of the component and it might be
+ percent-encoded already. We can't use QString::fromUtf8 because it might
+ contain non-UTF8 sequences. We can't use QByteArray::toPercentEncoding
+ because it might already contain percent-encoded sequences. We can't use
+ qt_urlRecode because it needs UTF-16 input.
+*/
+Q_AUTOTEST_EXPORT
+QString qt_urlRecodeByteArray(const QByteArray &ba)
+{
+ if (ba.isNull())
+ return QString();
+
+ // scan ba for anything above or equal to 0x80
+ // control points below 0x20 are fine in QString
+ const char *in = ba.constData();
+ const char *const end = ba.constEnd();
+ for ( ; in < end; ++in) {
+ if (*in & 0x80)
+ break;
+ }
+
+ if (in == end) {
+ // no non-ASCII found, we're safe to convert to QString
+ return QString::fromLatin1(ba, ba.size());
+ }
+
+ // we found something that we need to encode
+ QByteArray intermediate = ba;
+ intermediate.resize(ba.size() * 3 - (in - ba.constData()));
+ uchar *out = reinterpret_cast<uchar *>(intermediate.data() + (in - ba.constData()));
+ for ( ; in < end; ++in) {
+ if (*in & 0x80) {
+ // encode
+ *out++ = '%';
+ *out++ = encodeNibble(uchar(*in) >> 4);
+ *out++ = encodeNibble(uchar(*in) & 0xf);
+ } else {
+ // keep
+ *out++ = uchar(*in);
+ }
+ }
+
+ // now it's safe to call fromLatin1
+ return QString::fromLatin1(intermediate, out - reinterpret_cast<uchar *>(intermediate.data()));
+}
+
QT_END_NAMESPACE