diff options
author | Thiago Macieira <thiago.macieira@intel.com> | 2012-08-16 15:31:06 +0200 |
---|---|---|
committer | Qt by Nokia <qt-info@nokia.com> | 2012-08-20 21:59:32 +0200 |
commit | ce9b010ec619aa6e5f19b6ae208b76a4e398b20b (patch) | |
tree | 7eccc6c7a4a5654dae1956df4a3f262dd8c0f109 /src/corelib/io/qurlrecode.cpp | |
parent | 60818231d82ca34f1d33ccb9ba7500b5470a3d0d (diff) |
Fix decoding of QByteArray in the deprecated "encoded" setters in QUrl
The asymmetry is intentional: the getters can use toLatin1() because the
called functions, with a QUrl::FullyEncoded parameter, return ASCII
only. This gives a small performance improvement over the need to run
the UTF-8 encoder.
However, the data passed to setters could contain non-ASCII binary data,
in addition to the percent-encoded data. We can't use fromUtf8 because
it's binary and we can't use toPercentEncoded because it already encoded.
Change-Id: I5ecdb49be5af51ac86fd9764eb3a6aa96385f512
Reviewed-by: David Faure <faure@kde.org>
Diffstat (limited to 'src/corelib/io/qurlrecode.cpp')
-rw-r--r-- | src/corelib/io/qurlrecode.cpp | 50 |
1 files changed, 50 insertions, 0 deletions
diff --git a/src/corelib/io/qurlrecode.cpp b/src/corelib/io/qurlrecode.cpp index 12d23e9450..4399f38286 100644 --- a/src/corelib/io/qurlrecode.cpp +++ b/src/corelib/io/qurlrecode.cpp @@ -674,4 +674,54 @@ qt_urlRecode(QString &appendTo, const QChar *begin, const QChar *end, encoding, actionTable, false); } +/*! + \internal + \since 5.0 + + \a ba contains an 8-bit form of the component and it might be + percent-encoded already. We can't use QString::fromUtf8 because it might + contain non-UTF8 sequences. We can't use QByteArray::toPercentEncoding + because it might already contain percent-encoded sequences. We can't use + qt_urlRecode because it needs UTF-16 input. +*/ +Q_AUTOTEST_EXPORT +QString qt_urlRecodeByteArray(const QByteArray &ba) +{ + if (ba.isNull()) + return QString(); + + // scan ba for anything above or equal to 0x80 + // control points below 0x20 are fine in QString + const char *in = ba.constData(); + const char *const end = ba.constEnd(); + for ( ; in < end; ++in) { + if (*in & 0x80) + break; + } + + if (in == end) { + // no non-ASCII found, we're safe to convert to QString + return QString::fromLatin1(ba, ba.size()); + } + + // we found something that we need to encode + QByteArray intermediate = ba; + intermediate.resize(ba.size() * 3 - (in - ba.constData())); + uchar *out = reinterpret_cast<uchar *>(intermediate.data() + (in - ba.constData())); + for ( ; in < end; ++in) { + if (*in & 0x80) { + // encode + *out++ = '%'; + *out++ = encodeNibble(uchar(*in) >> 4); + *out++ = encodeNibble(uchar(*in) & 0xf); + } else { + // keep + *out++ = uchar(*in); + } + } + + // now it's safe to call fromLatin1 + return QString::fromLatin1(intermediate, out - reinterpret_cast<uchar *>(intermediate.data())); +} + QT_END_NAMESPACE |