From cd750c86d62152855d7f1105a5a4dd86d8ff8797 Mon Sep 17 00:00:00 2001 From: Thiago Macieira Date: Sat, 19 Oct 2013 23:04:47 -0400 Subject: Use the new UTF-8 codec in QJsonDocument The encoder is in qjsonwriter.cpp, which requires special handling for ASCII due to the use of escape sequences. The decoder is in qjsonparser.cpp, which only scan one character at a time. As a side-effect, the JSON parser now reports the UTF-8 error in the first character with error, instead of the last. This is probably what should have been expected. Change-Id: I52e5bc30d71466b6a36098b4150c61b2e385d8e9 Reviewed-by: Thiago Macieira --- src/corelib/codecs/qutfcodec_p.h | 5 ++++ src/corelib/json/qjsonparser.cpp | 45 ++++++----------------------- src/corelib/json/qjsonwriter.cpp | 52 ++++++---------------------------- tests/auto/corelib/json/tst_qtjson.cpp | 4 +-- 4 files changed, 25 insertions(+), 81 deletions(-) diff --git a/src/corelib/codecs/qutfcodec_p.h b/src/corelib/codecs/qutfcodec_p.h index c94a7a12e4..c252edede7 100644 --- a/src/corelib/codecs/qutfcodec_p.h +++ b/src/corelib/codecs/qutfcodec_p.h @@ -108,6 +108,11 @@ struct QUtf8BaseTraits { *ptr++ = uc; } }; +struct QUtf8BaseTraitsNoAscii : public QUtf8BaseTraits +{ + static const bool skipAsciiHandling = true; +}; + namespace QUtf8Functions { /// returns 0 on success; errors can only happen if \a u is a surrogate: diff --git a/src/corelib/json/qjsonparser.cpp b/src/corelib/json/qjsonparser.cpp index 516c53775c..0c61718843 100644 --- a/src/corelib/json/qjsonparser.cpp +++ b/src/corelib/json/qjsonparser.cpp @@ -1,6 +1,7 @@ /**************************************************************************** ** ** Copyright (C) 2013 Digia Plc and/or its subsidiary(-ies). +** Copyright (C) 2013 Intel Corporation ** Contact: http://www.qt-project.org/legal ** ** This file is part of the QtCore module of the Qt Toolkit. @@ -45,6 +46,7 @@ #include #include "qjsonparser_p.h" #include "qjson_p.h" +#include "private/qutfcodec_p.h" //#define PARSER_DEBUG #ifdef PARSER_DEBUG @@ -820,45 +822,16 @@ static inline bool scanEscapeSequence(const char *&json, const char *end, uint * static inline bool scanUtf8Char(const char *&json, const char *end, uint *result) { - int need; - uint min_uc; - uint uc; - uchar ch = *json++; - if (ch < 128) { - *result = ch; - return true; - } else if ((ch & 0xe0) == 0xc0) { - uc = ch & 0x1f; - need = 1; - min_uc = 0x80; - } else if ((ch & 0xf0) == 0xe0) { - uc = ch & 0x0f; - need = 2; - min_uc = 0x800; - } else if ((ch&0xf8) == 0xf0) { - uc = ch & 0x07; - need = 3; - min_uc = 0x10000; - } else { - return false; - } - - if (json >= end - need) - return false; - - for (int i = 0; i < need; ++i) { - ch = *json++; - if ((ch&0xc0) != 0x80) - return false; - uc = (uc << 6) | (ch & 0x3f); - } - - if (uc < min_uc || - QChar::isSurrogate(uc) || uc > QChar::LastValidCodePoint) { + const uchar *&src = reinterpret_cast(json); + const uchar *uend = reinterpret_cast(end); + uchar b = *src++; + int res = QUtf8Functions::fromUtf8(b, result, src, uend); + if (res < 0) { + // decoding error, backtrack the character we read above + --json; return false; } - *result = uc; return true; } diff --git a/src/corelib/json/qjsonwriter.cpp b/src/corelib/json/qjsonwriter.cpp index 86cca4bb26..5defd497b1 100644 --- a/src/corelib/json/qjsonwriter.cpp +++ b/src/corelib/json/qjsonwriter.cpp @@ -1,6 +1,7 @@ /**************************************************************************** ** ** Copyright (C) 2013 Digia Plc and/or its subsidiary(-ies). +** Copyright (C) 2013 Intel Corporation ** Contact: http://www.qt-project.org/legal ** ** This file is part of the QtCore module of the Qt Toolkit. @@ -41,6 +42,7 @@ #include "qjsonwriter_p.h" #include "qjson_p.h" +#include "private/qutfcodec_p.h" QT_BEGIN_NAMESPACE @@ -59,15 +61,12 @@ static QByteArray escapedString(const QString &s) const uchar replacement = '?'; QByteArray ba(s.length(), Qt::Uninitialized); - uchar *cursor = (uchar *)ba.data(); + uchar *cursor = reinterpret_cast(const_cast(ba.constData())); const uchar *ba_end = cursor + ba.length(); + const ushort *src = reinterpret_cast(s.constBegin()); + const ushort *const end = reinterpret_cast(s.constEnd()); - const QChar *ch = (const QChar *)s.constData(); - const QChar *end = ch + s.length(); - - int surrogate_high = -1; - - while (ch < end) { + while (src != end) { if (cursor >= ba_end - 6) { // ensure we have enough space int pos = cursor - (const uchar *)ba.constData(); @@ -76,29 +75,7 @@ static QByteArray escapedString(const QString &s) ba_end = (const uchar *)ba.constData() + ba.length(); } - uint u = ch->unicode(); - if (surrogate_high >= 0) { - if (ch->isLowSurrogate()) { - u = QChar::surrogateToUcs4(surrogate_high, u); - surrogate_high = -1; - } else { - // high surrogate without low - *cursor = replacement; - ++ch; - surrogate_high = -1; - continue; - } - } else if (ch->isLowSurrogate()) { - // low surrogate without high - *cursor = replacement; - ++ch; - continue; - } else if (ch->isHighSurrogate()) { - surrogate_high = u; - ++ch; - continue; - } - + uint u = *src++; if (u < 0x80) { if (u < 0x20 || u == 0x22 || u == 0x5c) { *cursor++ = '\\'; @@ -135,20 +112,9 @@ static QByteArray escapedString(const QString &s) *cursor++ = (uchar)u; } } else { - if (u < 0x0800) { - *cursor++ = 0xc0 | ((uchar) (u >> 6)); - } else { - if (QChar::requiresSurrogates(u)) { - *cursor++ = 0xf0 | ((uchar) (u >> 18)); - *cursor++ = 0x80 | (((uchar) (u >> 12)) & 0x3f); - } else { - *cursor++ = 0xe0 | (((uchar) (u >> 12)) & 0x3f); - } - *cursor++ = 0x80 | (((uchar) (u >> 6)) & 0x3f); - } - *cursor++ = 0x80 | ((uchar) (u&0x3f)); + if (QUtf8Functions::toUtf8(u, cursor, src, end) < 0) + *cursor++ = replacement; } - ++ch; } ba.resize(cursor - (const uchar *)ba.constData()); diff --git a/tests/auto/corelib/json/tst_qtjson.cpp b/tests/auto/corelib/json/tst_qtjson.cpp index a8534bf6f0..8ff6c8be6b 100644 --- a/tests/auto/corelib/json/tst_qtjson.cpp +++ b/tests/auto/corelib/json/tst_qtjson.cpp @@ -1546,7 +1546,7 @@ void tst_QtJson::fromJsonErrors() QJsonDocument doc = QJsonDocument::fromJson(json, &error); QVERIFY(doc.isEmpty()); QCOMPARE(error.error, QJsonParseError::IllegalUTF8String); - QCOMPARE(error.offset, 14); + QCOMPARE(error.offset, 12); } { QJsonParseError error; @@ -1570,7 +1570,7 @@ void tst_QtJson::fromJsonErrors() QJsonDocument doc = QJsonDocument::fromJson(json, &error); QVERIFY(doc.isEmpty()); QCOMPARE(error.error, QJsonParseError::IllegalUTF8String); - QCOMPARE(error.offset, 15); + QCOMPARE(error.offset, 13); } { QJsonParseError error; -- cgit v1.2.3