QCborValue diagnostics: Properly escape strings

I'm intentionally not testing improperly-paired surrogates, since those can't be encoded in CBOR. Change-Id: I0d3cc366baaa49f3ad28fffd154240287ce34c22 Reviewed-by: Edward Welbourne <edward.welbourne@qt.io> Reviewed-by: Mårten Nordheim <marten.nordheim@qt.io>
author: Thiago Macieira <thiago.macieira@intel.com> 2018-07-17 12:56:05 -0700
committer: Thiago Macieira <thiago.macieira@intel.com> 2018-08-03 20:26:42 +0000
commit: 6c8160b0dcadf7afd8eb0bd22536d6d0331dcbfa (patch)
tree: 4b3ebfca680206a4a40e6ba0ba9087e3c5a6e3c9 /src/corelib
parent: 0eb1574b28ced49cc0134c557a1744d4af0f84e6 (diff)
1 files changed, 71 insertions, 5 deletions
diff --git a/src/corelib/serialization/qcbordiagnostic.cpp b/src/corelib/serialization/qcbordiagnostic.cpp
index 8e641ff7cf..78f99927c8 100644
--- a/src/corelib/serialization/qcbordiagnostic.cpp
+++ b/src/corelib/serialization/qcbordiagnostic.cpp
@@ -45,6 +45,7 @@
 
 #include <private/qnumeric_p.h>
 #include <qstack.h>
+#include <private/qtools_p.h>
 
 QT_BEGIN_NAMESPACE
 
@@ -130,11 +131,76 @@ static bool isByteArrayEncodingTag(QCborTag tag)
 
 void DiagnosticNotation::appendString(const QString &s)
 {
-    result += QLatin1Char('"')
-            + QString(s)
-              .replace(QLatin1Char('\\'), QLatin1String("\\\\"))
-              .replace(QLatin1Char('"'), QLatin1String("\\\""))
-            + QLatin1Char('"');
+    result += QLatin1Char('"');
+
+    const QChar *begin = s.begin();
+    const QChar *end = s.end();
+    while (begin < end) {
+        // find the longest span comprising only non-escaped characters
+        const QChar *ptr = begin;
+        for ( ; ptr < end; ++ptr) {
+            ushort uc = ptr->unicode();
+            if (uc == '\\' || uc == '"' || uc < ' ' || uc >= 0x7f)
+                break;
+        }
+
+        if (ptr != begin)
+            result.append(begin, ptr - begin);
+
+        if (ptr == end)
+            break;
+
+        // there's an escaped character
+        static const char escapeMap[16] = {
+            // The C escape characters \a \b \t \n \v \f and \r indexed by
+            // their ASCII values
+            0, 0, 0, 0,
+            0, 0, 0, 'a',
+            'b', 't', 'n', 'v',
+            'f', 'r', 0, 0
+        };
+        int buflen = 2;
+        QChar buf[10];
+        buf[0] = QLatin1Char('\\');
+        buf[1] = QChar::Null;
+        char16_t uc = ptr->unicode();
+
+        if (uc < sizeof(escapeMap))
+            buf[1] = QLatin1Char(escapeMap[uc]);
+        else if (uc == '"' || uc == '\\')
+            buf[1] = QChar(uc);
+
+        if (buf[1] == QChar::Null) {
+            using QtMiscUtils::toHexUpper;
+            if (ptr->isHighSurrogate() && (ptr + 1) != end && ptr[1].isLowSurrogate()) {
+                // properly-paired surrogates
+                ++ptr;
+                char32_t ucs4 = QChar::surrogateToUcs4(uc, ptr->unicode());
+                buf[1] = 'U';
+                buf[2] = '0'; // toHexUpper(ucs4 >> 28);
+                buf[3] = '0'; // toHexUpper(ucs4 >> 24);
+                buf[4] = toHexUpper(ucs4 >> 20);
+                buf[5] = toHexUpper(ucs4 >> 16);
+                buf[6] = toHexUpper(ucs4 >> 12);
+                buf[7] = toHexUpper(ucs4 >> 8);
+                buf[8] = toHexUpper(ucs4 >> 4);
+                buf[9] = toHexUpper(ucs4);
+                buflen = 10;
+            } else {
+                buf[1] = 'u';
+                buf[2] = toHexUpper(uc >> 12);
+                buf[3] = toHexUpper(uc >> 8);
+                buf[4] = toHexUpper(uc >> 4);
+                buf[5] = toHexUpper(uc);
+                buflen = 6;
+            }
+        }
+
+        result.append(buf, buflen);
+        begin = ptr + 1;
+    }
+
+    result += QLatin1Char('"');
 }
 
 void DiagnosticNotation::appendArray(const QCborArray &a)
author	Thiago Macieira <thiago.macieira@intel.com>	2018-07-17 12:56:05 -0700
committer	Thiago Macieira <thiago.macieira@intel.com>	2018-08-03 20:26:42 +0000
commit	6c8160b0dcadf7afd8eb0bd22536d6d0331dcbfa (patch)
tree	4b3ebfca680206a4a40e6ba0ba9087e3c5a6e3c9 /src/corelib
parent	0eb1574b28ced49cc0134c557a1744d4af0f84e6 (diff)