From cd750c86d62152855d7f1105a5a4dd86d8ff8797 Mon Sep 17 00:00:00 2001 From: Thiago Macieira Date: Sat, 19 Oct 2013 23:04:47 -0400 Subject: Use the new UTF-8 codec in QJsonDocument The encoder is in qjsonwriter.cpp, which requires special handling for ASCII due to the use of escape sequences. The decoder is in qjsonparser.cpp, which only scan one character at a time. As a side-effect, the JSON parser now reports the UTF-8 error in the first character with error, instead of the last. This is probably what should have been expected. Change-Id: I52e5bc30d71466b6a36098b4150c61b2e385d8e9 Reviewed-by: Thiago Macieira --- src/corelib/json/qjsonparser.cpp | 45 +++++++--------------------------- src/corelib/json/qjsonwriter.cpp | 52 +++++++--------------------------------- 2 files changed, 18 insertions(+), 79 deletions(-) (limited to 'src/corelib/json') diff --git a/src/corelib/json/qjsonparser.cpp b/src/corelib/json/qjsonparser.cpp index 516c53775c..0c61718843 100644 --- a/src/corelib/json/qjsonparser.cpp +++ b/src/corelib/json/qjsonparser.cpp @@ -1,6 +1,7 @@ /**************************************************************************** ** ** Copyright (C) 2013 Digia Plc and/or its subsidiary(-ies). +** Copyright (C) 2013 Intel Corporation ** Contact: http://www.qt-project.org/legal ** ** This file is part of the QtCore module of the Qt Toolkit. @@ -45,6 +46,7 @@ #include #include "qjsonparser_p.h" #include "qjson_p.h" +#include "private/qutfcodec_p.h" //#define PARSER_DEBUG #ifdef PARSER_DEBUG @@ -820,45 +822,16 @@ static inline bool scanEscapeSequence(const char *&json, const char *end, uint * static inline bool scanUtf8Char(const char *&json, const char *end, uint *result) { - int need; - uint min_uc; - uint uc; - uchar ch = *json++; - if (ch < 128) { - *result = ch; - return true; - } else if ((ch & 0xe0) == 0xc0) { - uc = ch & 0x1f; - need = 1; - min_uc = 0x80; - } else if ((ch & 0xf0) == 0xe0) { - uc = ch & 0x0f; - need = 2; - min_uc = 0x800; - } else if ((ch&0xf8) == 0xf0) { - uc = ch & 0x07; - need = 3; - min_uc = 0x10000; - } else { - return false; - } - - if (json >= end - need) - return false; - - for (int i = 0; i < need; ++i) { - ch = *json++; - if ((ch&0xc0) != 0x80) - return false; - uc = (uc << 6) | (ch & 0x3f); - } - - if (uc < min_uc || - QChar::isSurrogate(uc) || uc > QChar::LastValidCodePoint) { + const uchar *&src = reinterpret_cast(json); + const uchar *uend = reinterpret_cast(end); + uchar b = *src++; + int res = QUtf8Functions::fromUtf8(b, result, src, uend); + if (res < 0) { + // decoding error, backtrack the character we read above + --json; return false; } - *result = uc; return true; } diff --git a/src/corelib/json/qjsonwriter.cpp b/src/corelib/json/qjsonwriter.cpp index 86cca4bb26..5defd497b1 100644 --- a/src/corelib/json/qjsonwriter.cpp +++ b/src/corelib/json/qjsonwriter.cpp @@ -1,6 +1,7 @@ /**************************************************************************** ** ** Copyright (C) 2013 Digia Plc and/or its subsidiary(-ies). +** Copyright (C) 2013 Intel Corporation ** Contact: http://www.qt-project.org/legal ** ** This file is part of the QtCore module of the Qt Toolkit. @@ -41,6 +42,7 @@ #include "qjsonwriter_p.h" #include "qjson_p.h" +#include "private/qutfcodec_p.h" QT_BEGIN_NAMESPACE @@ -59,15 +61,12 @@ static QByteArray escapedString(const QString &s) const uchar replacement = '?'; QByteArray ba(s.length(), Qt::Uninitialized); - uchar *cursor = (uchar *)ba.data(); + uchar *cursor = reinterpret_cast(const_cast(ba.constData())); const uchar *ba_end = cursor + ba.length(); + const ushort *src = reinterpret_cast(s.constBegin()); + const ushort *const end = reinterpret_cast(s.constEnd()); - const QChar *ch = (const QChar *)s.constData(); - const QChar *end = ch + s.length(); - - int surrogate_high = -1; - - while (ch < end) { + while (src != end) { if (cursor >= ba_end - 6) { // ensure we have enough space int pos = cursor - (const uchar *)ba.constData(); @@ -76,29 +75,7 @@ static QByteArray escapedString(const QString &s) ba_end = (const uchar *)ba.constData() + ba.length(); } - uint u = ch->unicode(); - if (surrogate_high >= 0) { - if (ch->isLowSurrogate()) { - u = QChar::surrogateToUcs4(surrogate_high, u); - surrogate_high = -1; - } else { - // high surrogate without low - *cursor = replacement; - ++ch; - surrogate_high = -1; - continue; - } - } else if (ch->isLowSurrogate()) { - // low surrogate without high - *cursor = replacement; - ++ch; - continue; - } else if (ch->isHighSurrogate()) { - surrogate_high = u; - ++ch; - continue; - } - + uint u = *src++; if (u < 0x80) { if (u < 0x20 || u == 0x22 || u == 0x5c) { *cursor++ = '\\'; @@ -135,20 +112,9 @@ static QByteArray escapedString(const QString &s) *cursor++ = (uchar)u; } } else { - if (u < 0x0800) { - *cursor++ = 0xc0 | ((uchar) (u >> 6)); - } else { - if (QChar::requiresSurrogates(u)) { - *cursor++ = 0xf0 | ((uchar) (u >> 18)); - *cursor++ = 0x80 | (((uchar) (u >> 12)) & 0x3f); - } else { - *cursor++ = 0xe0 | (((uchar) (u >> 12)) & 0x3f); - } - *cursor++ = 0x80 | (((uchar) (u >> 6)) & 0x3f); - } - *cursor++ = 0x80 | ((uchar) (u&0x3f)); + if (QUtf8Functions::toUtf8(u, cursor, src, end) < 0) + *cursor++ = replacement; } - ++ch; } ba.resize(cursor - (const uchar *)ba.constData()); -- cgit v1.2.3