From 704c4d0e107969cbfde7ba35a1a3f332a2268773 Mon Sep 17 00:00:00 2001
From: Giuseppe D'Angelo <giuseppe.dangelo@kdab.com>
Date: Thu, 6 Feb 2014 00:44:03 +0100
Subject: QUtfCodec: don't encode invalid UCS-4 codepoints

The code didn't check for malformed surrogate pairs. That means that

- high surrogates followed by *anything* were decoded as they formed
  a valid surrogate pair;
- stray low surrogates were returned as-is.

We can't return surrogate values in UCS-4, so properly detect these
cases and return U+FFFD instead.

[ChangeLog][QtCore][QTextCodec] Encoding a QString in UTF-32 will now
replace malformed UTF-16 subsequences in the string with the Unicode
replacement character (U+FFFD).

Change-Id: I5cd771d6aa21ffeff4dd9d9e5a7961cf692dc457
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
Reviewed-by: Konstantin Ritt <ritt.ks@gmail.com>
---
 src/corelib/codecs/qutfcodec.cpp | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/src/corelib/codecs/qutfcodec.cpp b/src/corelib/codecs/qutfcodec.cpp
index 20bacb1584..a5d16b0b54 100644
--- a/src/corelib/codecs/qutfcodec.cpp
+++ b/src/corelib/codecs/qutfcodec.cpp
@@ -46,6 +46,7 @@
 #include "qchar.h"
 
 #include "private/qsimd_p.h"
+#include "private/qstringiterator_p.h"
 
 QT_BEGIN_NAMESPACE
 
@@ -503,21 +504,21 @@ QByteArray QUtf32::convertFromUnicode(const QChar *uc, int len, QTextCodec::Conv
         }
         data += 4;
     }
+
+    QStringIterator i(uc, uc + len);
     if (endian == BigEndianness) {
-        for (int i = 0; i < len; ++i) {
-            uint cp = uc[i].unicode();
-            if (uc[i].isHighSurrogate() && i < len - 1)
-                cp = QChar::surrogateToUcs4(cp, uc[++i].unicode());
+        while (i.hasNext()) {
+            uint cp = i.next();
+
             *(data++) = cp >> 24;
             *(data++) = (cp >> 16) & 0xff;
             *(data++) = (cp >> 8) & 0xff;
             *(data++) = cp & 0xff;
         }
     } else {
-        for (int i = 0; i < len; ++i) {
-            uint cp = uc[i].unicode();
-            if (uc[i].isHighSurrogate() && i < len - 1)
-                cp = QChar::surrogateToUcs4(cp, uc[++i].unicode());
+        while (i.hasNext()) {
+            uint cp = i.next();
+
             *(data++) = cp & 0xff;
             *(data++) = (cp >> 8) & 0xff;
             *(data++) = (cp >> 16) & 0xff;
-- 
cgit v1.2.3