Fix decoding of QByteArray in the deprecated "encoded" setters in QUrl

The asymmetry is intentional: the getters can use toLatin1() because the called functions, with a QUrl::FullyEncoded parameter, return ASCII only. This gives a small performance improvement over the need to run the UTF-8 encoder. However, the data passed to setters could contain non-ASCII binary data, in addition to the percent-encoded data. We can't use fromUtf8 because it's binary and we can't use toPercentEncoded because it already encoded. Change-Id: I5ecdb49be5af51ac86fd9764eb3a6aa96385f512 Reviewed-by: David Faure <faure@kde.org>
author: Thiago Macieira <thiago.macieira@intel.com> 2012-08-16 15:31:06 +0200
committer: Qt by Nokia <qt-info@nokia.com> 2012-08-20 21:59:32 +0200
commit: ce9b010ec619aa6e5f19b6ae208b76a4e398b20b (patch)
tree: 7eccc6c7a4a5654dae1956df4a3f262dd8c0f109 /src/corelib/io/qurlrecode.cpp
parent: 60818231d82ca34f1d33ccb9ba7500b5470a3d0d (diff)
1 files changed, 50 insertions, 0 deletions
diff --git a/src/corelib/io/qurlrecode.cpp b/src/corelib/io/qurlrecode.cpp
index 12d23e9450..4399f38286 100644
--- a/src/corelib/io/qurlrecode.cpp
+++ b/src/corelib/io/qurlrecode.cpp
@@ -674,4 +674,54 @@ qt_urlRecode(QString &appendTo, const QChar *begin, const QChar *end,
                   encoding, actionTable, false);
 }
 
+/*!
+    \internal
+    \since 5.0
+
+    \a ba contains an 8-bit form of the component and it might be
+    percent-encoded already. We can't use QString::fromUtf8 because it might
+    contain non-UTF8 sequences. We can't use QByteArray::toPercentEncoding
+    because it might already contain percent-encoded sequences. We can't use
+    qt_urlRecode because it needs UTF-16 input.
+*/
+Q_AUTOTEST_EXPORT
+QString qt_urlRecodeByteArray(const QByteArray &ba)
+{
+    if (ba.isNull())
+        return QString();
+
+    // scan ba for anything above or equal to 0x80
+    // control points below 0x20 are fine in QString
+    const char *in = ba.constData();
+    const char *const end = ba.constEnd();
+    for ( ; in < end; ++in) {
+        if (*in & 0x80)
+            break;
+    }
+
+    if (in == end) {
+        // no non-ASCII found, we're safe to convert to QString
+        return QString::fromLatin1(ba, ba.size());
+    }
+
+    // we found something that we need to encode
+    QByteArray intermediate = ba;
+    intermediate.resize(ba.size() * 3 - (in - ba.constData()));
+    uchar *out = reinterpret_cast<uchar *>(intermediate.data() + (in - ba.constData()));
+    for ( ; in < end; ++in) {
+        if (*in & 0x80) {
+            // encode
+            *out++ = '%';
+            *out++ = encodeNibble(uchar(*in) >> 4);
+            *out++ = encodeNibble(uchar(*in) & 0xf);
+        } else {
+            // keep
+            *out++ = uchar(*in);
+        }
+    }
+
+    // now it's safe to call fromLatin1
+    return QString::fromLatin1(intermediate, out - reinterpret_cast<uchar *>(intermediate.data()));
+}
+
 QT_END_NAMESPACE
author	Thiago Macieira <thiago.macieira@intel.com>	2012-08-16 15:31:06 +0200
committer	Qt by Nokia <qt-info@nokia.com>	2012-08-20 21:59:32 +0200
commit	ce9b010ec619aa6e5f19b6ae208b76a4e398b20b (patch)
tree	7eccc6c7a4a5654dae1956df4a3f262dd8c0f109 /src/corelib/io/qurlrecode.cpp
parent	60818231d82ca34f1d33ccb9ba7500b5470a3d0d (diff)