summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorThiago Macieira <thiago.macieira@intel.com>2013-10-19 23:04:47 -0400
committerThe Qt Project <gerrit-noreply@qt-project.org>2014-01-09 22:34:54 +0100
commitcd750c86d62152855d7f1105a5a4dd86d8ff8797 (patch)
tree42a1d9780ab59390595c453486192ac3abdfc063 /src
parent8dd47e34b9b96ac27a99cdcf10b8aec506882fc2 (diff)
Use the new UTF-8 codec in QJsonDocument
The encoder is in qjsonwriter.cpp, which requires special handling for ASCII due to the use of escape sequences. The decoder is in qjsonparser.cpp, which only scan one character at a time. As a side-effect, the JSON parser now reports the UTF-8 error in the first character with error, instead of the last. This is probably what should have been expected. Change-Id: I52e5bc30d71466b6a36098b4150c61b2e385d8e9 Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
Diffstat (limited to 'src')
-rw-r--r--src/corelib/codecs/qutfcodec_p.h5
-rw-r--r--src/corelib/json/qjsonparser.cpp45
-rw-r--r--src/corelib/json/qjsonwriter.cpp52
3 files changed, 23 insertions, 79 deletions
diff --git a/src/corelib/codecs/qutfcodec_p.h b/src/corelib/codecs/qutfcodec_p.h
index c94a7a12e4..c252edede7 100644
--- a/src/corelib/codecs/qutfcodec_p.h
+++ b/src/corelib/codecs/qutfcodec_p.h
@@ -108,6 +108,11 @@ struct QUtf8BaseTraits
{ *ptr++ = uc; }
};
+struct QUtf8BaseTraitsNoAscii : public QUtf8BaseTraits
+{
+ static const bool skipAsciiHandling = true;
+};
+
namespace QUtf8Functions
{
/// returns 0 on success; errors can only happen if \a u is a surrogate:
diff --git a/src/corelib/json/qjsonparser.cpp b/src/corelib/json/qjsonparser.cpp
index 516c53775c..0c61718843 100644
--- a/src/corelib/json/qjsonparser.cpp
+++ b/src/corelib/json/qjsonparser.cpp
@@ -1,6 +1,7 @@
/****************************************************************************
**
** Copyright (C) 2013 Digia Plc and/or its subsidiary(-ies).
+** Copyright (C) 2013 Intel Corporation
** Contact: http://www.qt-project.org/legal
**
** This file is part of the QtCore module of the Qt Toolkit.
@@ -45,6 +46,7 @@
#include <qdebug.h>
#include "qjsonparser_p.h"
#include "qjson_p.h"
+#include "private/qutfcodec_p.h"
//#define PARSER_DEBUG
#ifdef PARSER_DEBUG
@@ -820,45 +822,16 @@ static inline bool scanEscapeSequence(const char *&json, const char *end, uint *
static inline bool scanUtf8Char(const char *&json, const char *end, uint *result)
{
- int need;
- uint min_uc;
- uint uc;
- uchar ch = *json++;
- if (ch < 128) {
- *result = ch;
- return true;
- } else if ((ch & 0xe0) == 0xc0) {
- uc = ch & 0x1f;
- need = 1;
- min_uc = 0x80;
- } else if ((ch & 0xf0) == 0xe0) {
- uc = ch & 0x0f;
- need = 2;
- min_uc = 0x800;
- } else if ((ch&0xf8) == 0xf0) {
- uc = ch & 0x07;
- need = 3;
- min_uc = 0x10000;
- } else {
- return false;
- }
-
- if (json >= end - need)
- return false;
-
- for (int i = 0; i < need; ++i) {
- ch = *json++;
- if ((ch&0xc0) != 0x80)
- return false;
- uc = (uc << 6) | (ch & 0x3f);
- }
-
- if (uc < min_uc ||
- QChar::isSurrogate(uc) || uc > QChar::LastValidCodePoint) {
+ const uchar *&src = reinterpret_cast<const uchar *&>(json);
+ const uchar *uend = reinterpret_cast<const uchar *>(end);
+ uchar b = *src++;
+ int res = QUtf8Functions::fromUtf8<QUtf8BaseTraits>(b, result, src, uend);
+ if (res < 0) {
+ // decoding error, backtrack the character we read above
+ --json;
return false;
}
- *result = uc;
return true;
}
diff --git a/src/corelib/json/qjsonwriter.cpp b/src/corelib/json/qjsonwriter.cpp
index 86cca4bb26..5defd497b1 100644
--- a/src/corelib/json/qjsonwriter.cpp
+++ b/src/corelib/json/qjsonwriter.cpp
@@ -1,6 +1,7 @@
/****************************************************************************
**
** Copyright (C) 2013 Digia Plc and/or its subsidiary(-ies).
+** Copyright (C) 2013 Intel Corporation
** Contact: http://www.qt-project.org/legal
**
** This file is part of the QtCore module of the Qt Toolkit.
@@ -41,6 +42,7 @@
#include "qjsonwriter_p.h"
#include "qjson_p.h"
+#include "private/qutfcodec_p.h"
QT_BEGIN_NAMESPACE
@@ -59,15 +61,12 @@ static QByteArray escapedString(const QString &s)
const uchar replacement = '?';
QByteArray ba(s.length(), Qt::Uninitialized);
- uchar *cursor = (uchar *)ba.data();
+ uchar *cursor = reinterpret_cast<uchar *>(const_cast<char *>(ba.constData()));
const uchar *ba_end = cursor + ba.length();
+ const ushort *src = reinterpret_cast<const ushort *>(s.constBegin());
+ const ushort *const end = reinterpret_cast<const ushort *>(s.constEnd());
- const QChar *ch = (const QChar *)s.constData();
- const QChar *end = ch + s.length();
-
- int surrogate_high = -1;
-
- while (ch < end) {
+ while (src != end) {
if (cursor >= ba_end - 6) {
// ensure we have enough space
int pos = cursor - (const uchar *)ba.constData();
@@ -76,29 +75,7 @@ static QByteArray escapedString(const QString &s)
ba_end = (const uchar *)ba.constData() + ba.length();
}
- uint u = ch->unicode();
- if (surrogate_high >= 0) {
- if (ch->isLowSurrogate()) {
- u = QChar::surrogateToUcs4(surrogate_high, u);
- surrogate_high = -1;
- } else {
- // high surrogate without low
- *cursor = replacement;
- ++ch;
- surrogate_high = -1;
- continue;
- }
- } else if (ch->isLowSurrogate()) {
- // low surrogate without high
- *cursor = replacement;
- ++ch;
- continue;
- } else if (ch->isHighSurrogate()) {
- surrogate_high = u;
- ++ch;
- continue;
- }
-
+ uint u = *src++;
if (u < 0x80) {
if (u < 0x20 || u == 0x22 || u == 0x5c) {
*cursor++ = '\\';
@@ -135,20 +112,9 @@ static QByteArray escapedString(const QString &s)
*cursor++ = (uchar)u;
}
} else {
- if (u < 0x0800) {
- *cursor++ = 0xc0 | ((uchar) (u >> 6));
- } else {
- if (QChar::requiresSurrogates(u)) {
- *cursor++ = 0xf0 | ((uchar) (u >> 18));
- *cursor++ = 0x80 | (((uchar) (u >> 12)) & 0x3f);
- } else {
- *cursor++ = 0xe0 | (((uchar) (u >> 12)) & 0x3f);
- }
- *cursor++ = 0x80 | (((uchar) (u >> 6)) & 0x3f);
- }
- *cursor++ = 0x80 | ((uchar) (u&0x3f));
+ if (QUtf8Functions::toUtf8<QUtf8BaseTraits>(u, cursor, src, end) < 0)
+ *cursor++ = replacement;
}
- ++ch;
}
ba.resize(cursor - (const uchar *)ba.constData());