From 6c8160b0dcadf7afd8eb0bd22536d6d0331dcbfa Mon Sep 17 00:00:00 2001 From: Thiago Macieira Date: Tue, 17 Jul 2018 12:56:05 -0700 Subject: QCborValue diagnostics: Properly escape strings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I'm intentionally not testing improperly-paired surrogates, since those can't be encoded in CBOR. Change-Id: I0d3cc366baaa49f3ad28fffd154240287ce34c22 Reviewed-by: Edward Welbourne Reviewed-by: MÃ¥rten Nordheim --- src/corelib/serialization/qcbordiagnostic.cpp | 76 ++++++++++++++++++++-- .../serialization/qcborvalue/tst_qcborvalue.cpp | 6 ++ 2 files changed, 77 insertions(+), 5 deletions(-) diff --git a/src/corelib/serialization/qcbordiagnostic.cpp b/src/corelib/serialization/qcbordiagnostic.cpp index 8e641ff7cf..78f99927c8 100644 --- a/src/corelib/serialization/qcbordiagnostic.cpp +++ b/src/corelib/serialization/qcbordiagnostic.cpp @@ -45,6 +45,7 @@ #include #include +#include QT_BEGIN_NAMESPACE @@ -130,11 +131,76 @@ static bool isByteArrayEncodingTag(QCborTag tag) void DiagnosticNotation::appendString(const QString &s) { - result += QLatin1Char('"') - + QString(s) - .replace(QLatin1Char('\\'), QLatin1String("\\\\")) - .replace(QLatin1Char('"'), QLatin1String("\\\"")) - + QLatin1Char('"'); + result += QLatin1Char('"'); + + const QChar *begin = s.begin(); + const QChar *end = s.end(); + while (begin < end) { + // find the longest span comprising only non-escaped characters + const QChar *ptr = begin; + for ( ; ptr < end; ++ptr) { + ushort uc = ptr->unicode(); + if (uc == '\\' || uc == '"' || uc < ' ' || uc >= 0x7f) + break; + } + + if (ptr != begin) + result.append(begin, ptr - begin); + + if (ptr == end) + break; + + // there's an escaped character + static const char escapeMap[16] = { + // The C escape characters \a \b \t \n \v \f and \r indexed by + // their ASCII values + 0, 0, 0, 0, + 0, 0, 0, 'a', + 'b', 't', 'n', 'v', + 'f', 'r', 0, 0 + }; + int buflen = 2; + QChar buf[10]; + buf[0] = QLatin1Char('\\'); + buf[1] = QChar::Null; + char16_t uc = ptr->unicode(); + + if (uc < sizeof(escapeMap)) + buf[1] = QLatin1Char(escapeMap[uc]); + else if (uc == '"' || uc == '\\') + buf[1] = QChar(uc); + + if (buf[1] == QChar::Null) { + using QtMiscUtils::toHexUpper; + if (ptr->isHighSurrogate() && (ptr + 1) != end && ptr[1].isLowSurrogate()) { + // properly-paired surrogates + ++ptr; + char32_t ucs4 = QChar::surrogateToUcs4(uc, ptr->unicode()); + buf[1] = 'U'; + buf[2] = '0'; // toHexUpper(ucs4 >> 28); + buf[3] = '0'; // toHexUpper(ucs4 >> 24); + buf[4] = toHexUpper(ucs4 >> 20); + buf[5] = toHexUpper(ucs4 >> 16); + buf[6] = toHexUpper(ucs4 >> 12); + buf[7] = toHexUpper(ucs4 >> 8); + buf[8] = toHexUpper(ucs4 >> 4); + buf[9] = toHexUpper(ucs4); + buflen = 10; + } else { + buf[1] = 'u'; + buf[2] = toHexUpper(uc >> 12); + buf[3] = toHexUpper(uc >> 8); + buf[4] = toHexUpper(uc >> 4); + buf[5] = toHexUpper(uc); + buflen = 6; + } + } + + result.append(buf, buflen); + begin = ptr + 1; + } + + result += QLatin1Char('"'); } void DiagnosticNotation::appendArray(const QCborArray &a) diff --git a/tests/auto/corelib/serialization/qcborvalue/tst_qcborvalue.cpp b/tests/auto/corelib/serialization/qcborvalue/tst_qcborvalue.cpp index e469bffa4f..92d5cd8702 100644 --- a/tests/auto/corelib/serialization/qcborvalue/tst_qcborvalue.cpp +++ b/tests/auto/corelib/serialization/qcborvalue/tst_qcborvalue.cpp @@ -1646,6 +1646,12 @@ void tst_QCborValue::toDiagnosticNotation_data() << QCborValue(QCborMap{{-1, QCborMap{{0, 0}, {"foo", "bar"}}}}) << int(QCborValue::LineWrapped) << "{\n -1: {\n 0: 0,\n \"foo\": \"bar\"\n }\n}"; + // string escaping + QTest::newRow("String:escaping") + << QCborValue("\1\a\b\t\f\r\n\v\x1f\x7f \"\xc2\xa0\xe2\x82\xac\xf0\x90\x80\x80\\\"") + << int(QCborValue::DiagnosticNotationOptions{}) + << "\"\\u0001\\a\\b\\t\\f\\r\\n\\v\\u001F\\u007F \\\"\\u00A0\\u20AC\\U00010000\\\\\\\"\""; + // extended formatting for byte arrays QTest::newRow("Extended:ByteArray:0") << QCborValue(QByteArray()) << int(QCborValue::ExtendedFormat) -- cgit v1.2.3