summaryrefslogtreecommitdiffstats
path: root/src/corelib
diff options
context:
space:
mode:
authorGiuseppe D'Angelo <giuseppe.dangelo@kdab.com>2014-02-06 00:44:03 +0100
committerThe Qt Project <gerrit-noreply@qt-project.org>2014-02-07 15:00:39 +0100
commit704c4d0e107969cbfde7ba35a1a3f332a2268773 (patch)
treebe591794c4233a765d340312a8b81c1ac0cf534d /src/corelib
parentbcd1b7fe8ee0ab83f7838172c287557c94711602 (diff)
QUtfCodec: don't encode invalid UCS-4 codepoints
The code didn't check for malformed surrogate pairs. That means that - high surrogates followed by *anything* were decoded as they formed a valid surrogate pair; - stray low surrogates were returned as-is. We can't return surrogate values in UCS-4, so properly detect these cases and return U+FFFD instead. [ChangeLog][QtCore][QTextCodec] Encoding a QString in UTF-32 will now replace malformed UTF-16 subsequences in the string with the Unicode replacement character (U+FFFD). Change-Id: I5cd771d6aa21ffeff4dd9d9e5a7961cf692dc457 Reviewed-by: Thiago Macieira <thiago.macieira@intel.com> Reviewed-by: Konstantin Ritt <ritt.ks@gmail.com>
Diffstat (limited to 'src/corelib')
-rw-r--r--src/corelib/codecs/qutfcodec.cpp17
1 files changed, 9 insertions, 8 deletions
diff --git a/src/corelib/codecs/qutfcodec.cpp b/src/corelib/codecs/qutfcodec.cpp
index 20bacb1584..a5d16b0b54 100644
--- a/src/corelib/codecs/qutfcodec.cpp
+++ b/src/corelib/codecs/qutfcodec.cpp
@@ -46,6 +46,7 @@
#include "qchar.h"
#include "private/qsimd_p.h"
+#include "private/qstringiterator_p.h"
QT_BEGIN_NAMESPACE
@@ -503,21 +504,21 @@ QByteArray QUtf32::convertFromUnicode(const QChar *uc, int len, QTextCodec::Conv
}
data += 4;
}
+
+ QStringIterator i(uc, uc + len);
if (endian == BigEndianness) {
- for (int i = 0; i < len; ++i) {
- uint cp = uc[i].unicode();
- if (uc[i].isHighSurrogate() && i < len - 1)
- cp = QChar::surrogateToUcs4(cp, uc[++i].unicode());
+ while (i.hasNext()) {
+ uint cp = i.next();
+
*(data++) = cp >> 24;
*(data++) = (cp >> 16) & 0xff;
*(data++) = (cp >> 8) & 0xff;
*(data++) = cp & 0xff;
}
} else {
- for (int i = 0; i < len; ++i) {
- uint cp = uc[i].unicode();
- if (uc[i].isHighSurrogate() && i < len - 1)
- cp = QChar::surrogateToUcs4(cp, uc[++i].unicode());
+ while (i.hasNext()) {
+ uint cp = i.next();
+
*(data++) = cp & 0xff;
*(data++) = (cp >> 8) & 0xff;
*(data++) = (cp >> 16) & 0xff;