summaryrefslogtreecommitdiffstats
path: root/src/corelib/serialization/qjsonparser.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/corelib/serialization/qjsonparser.cpp')
-rw-r--r--src/corelib/serialization/qjsonparser.cpp453
1 files changed, 183 insertions, 270 deletions
diff --git a/src/corelib/serialization/qjsonparser.cpp b/src/corelib/serialization/qjsonparser.cpp
index cd36bd5a5b..6d0a92e094 100644
--- a/src/corelib/serialization/qjsonparser.cpp
+++ b/src/corelib/serialization/qjsonparser.cpp
@@ -45,6 +45,8 @@
#include "qjsonparser_p.h"
#include "qjson_p.h"
#include "private/qutfcodec_p.h"
+#include "private/qcborvalue_p.h"
+#include "private/qnumeric_p.h"
//#define PARSER_DEBUG
#ifdef PARSER_DEBUG
@@ -197,9 +199,32 @@ QString QJsonParseError::errorString() const
using namespace QJsonPrivate;
+class StashedContainer
+{
+ Q_DISABLE_COPY_MOVE(StashedContainer)
+public:
+ StashedContainer(QExplicitlySharedDataPointer<QCborContainerPrivate> *container,
+ QCborValue::Type type)
+ : type(type), stashed(std::move(*container)), current(container)
+ {
+ }
+
+ ~StashedContainer()
+ {
+ stashed->append(QCborContainerPrivate::makeValue(type, -1, current->take(),
+ QCborContainerPrivate::MoveContainer));
+ *current = std::move(stashed);
+ }
+
+private:
+ QCborValue::Type type;
+ QExplicitlySharedDataPointer<QCborContainerPrivate> stashed;
+ QExplicitlySharedDataPointer<QCborContainerPrivate> *current;
+};
+
Parser::Parser(const char *json, int length)
- : head(json), json(json), data(nullptr)
- , dataLength(0), current(0), nestingLevel(0)
+ : head(json), json(json)
+ , nestingLevel(0)
, lastError(QJsonParseError::NoError)
{
end = json + length;
@@ -297,34 +322,30 @@ char Parser::nextToken()
/*
JSON-text = object / array
*/
-QJsonDocument Parser::parse(QJsonParseError *error)
+QCborValue Parser::parse(QJsonParseError *error)
{
#ifdef PARSER_DEBUG
indent = 0;
qDebug(">>>>> parser begin");
#endif
- // allocate some space
- dataLength = qMax(end - json, (ptrdiff_t) 256);
- data = (char *)malloc(dataLength);
- Q_CHECK_PTR(data);
-
- // fill in Header data
- QJsonPrivate::Header *h = (QJsonPrivate::Header *)data;
- h->tag = QJsonDocument::BinaryFormatTag;
- h->version = 1u;
-
- current = sizeof(QJsonPrivate::Header);
-
eatBOM();
char token = nextToken();
+ QCborValue data;
+
DEBUG << Qt::hex << (uint)token;
if (token == BeginArray) {
+ container = new QCborContainerPrivate;
if (!parseArray())
goto error;
+ data = QCborContainerPrivate::makeValue(QCborValue::Array, -1, container.take(),
+ QCborContainerPrivate::MoveContainer);
} else if (token == BeginObject) {
+ container = new QCborContainerPrivate;
if (!parseObject())
goto error;
+ data = QCborContainerPrivate::makeValue(QCborValue::Map, -1, container.take(),
+ QCborContainerPrivate::MoveContainer);
} else {
lastError = QJsonParseError::IllegalValue;
goto error;
@@ -342,44 +363,95 @@ QJsonDocument Parser::parse(QJsonParseError *error)
error->offset = 0;
error->error = QJsonParseError::NoError;
}
- QJsonPrivate::Data *d = new QJsonPrivate::Data(data, current);
- return QJsonDocument(d);
+
+ return data;
}
error:
#ifdef PARSER_DEBUG
qDebug(">>>>> parser error");
#endif
+ container.reset();
if (error) {
error->offset = json - head;
error->error = lastError;
}
- free(data);
- return QJsonDocument();
+ return QCborValue();
}
-void Parser::ParsedObject::insert(uint offset) {
- const QJsonPrivate::Entry *newEntry = reinterpret_cast<const QJsonPrivate::Entry *>(parser->data + objectPosition + offset);
- int min = 0;
- int n = offsets.size();
- while (n > 0) {
- int half = n >> 1;
- int middle = min + half;
- if (*entryAt(middle) >= *newEntry) {
- n = half;
- } else {
- min = middle + 1;
- n -= half + 1;
+
+static void sortContainer(QCborContainerPrivate *container)
+{
+ using Forward = QJsonPrivate::KeyIterator;
+ using Reverse = std::reverse_iterator<Forward>;
+ using Value = Forward::value_type;
+
+ auto compare = [container](const Value &a, const Value &b)
+ {
+ const auto &aKey = a.key();
+ const auto &bKey = b.key();
+
+ Q_ASSERT(aKey.flags & QtCbor::Element::HasByteData);
+ Q_ASSERT(bKey.flags & QtCbor::Element::HasByteData);
+
+ const QtCbor::ByteData *aData = container->byteData(aKey);
+ const QtCbor::ByteData *bData = container->byteData(bKey);
+
+ if (!aData)
+ return bData ? -1 : 0;
+ if (!bData)
+ return 1;
+
+ // If StringIsAscii is set, we can use either the UTF-8 or the latin1 comparison
+ // for the string as ASCII is a subset of both. If nothing is set, that means UTF-8.
+
+ // We are currently missing an efficient comparison between UTF-8 and UTF-16 strings.
+ // Therefore, we need to convert the UTF-8 string if we encounter such a case.
+
+ if (aKey.flags & QtCbor::Element::StringIsAscii) {
+ if (bKey.flags & QtCbor::Element::StringIsAscii)
+ return QtPrivate::compareStrings(aData->asLatin1(), bData->asLatin1());
+ if (bKey.flags & QtCbor::Element::StringIsUtf16)
+ return QtPrivate::compareStrings(aData->asLatin1(), bData->asStringView());
+
+ return QCborContainerPrivate::compareUtf8(aData, bData->asLatin1());
}
- }
- if (min < offsets.size() && *entryAt(min) == *newEntry) {
- offsets[min] = offset;
- } else {
- offsets.insert(min, offset);
- }
+
+ if (aKey.flags & QtCbor::Element::StringIsUtf16) {
+ if (bKey.flags & QtCbor::Element::StringIsAscii)
+ return QtPrivate::compareStrings(aData->asStringView(), bData->asLatin1());
+ if (bKey.flags & QtCbor::Element::StringIsUtf16)
+ return QtPrivate::compareStrings(aData->asStringView(), bData->asStringView());
+
+ // Nasty case. a is UTF-16 and b is UTF-8
+ return QtPrivate::compareStrings(aData->asStringView(), bData->toUtf8String());
+ }
+
+ if (bKey.flags & QtCbor::Element::StringIsAscii)
+ return QCborContainerPrivate::compareUtf8(aData, bData->asLatin1());
+
+ // Nasty case. a is UTF-8 and b is UTF-16
+ if (bKey.flags & QtCbor::Element::StringIsUtf16)
+ return QtPrivate::compareStrings(aData->toUtf8String(), bData->asStringView());
+
+ return QCborContainerPrivate::compareUtf8(aData, bData->asLatin1());
+ };
+
+ std::sort(Forward(container->elements.begin()), Forward(container->elements.end()),
+ [&compare](const Value &a, const Value &b) { return compare(a, b) < 0; });
+
+ // We need to retain the _last_ value for any duplicate keys. Therefore the reverse dance here.
+ auto it = std::unique(Reverse(container->elements.end()), Reverse(container->elements.begin()),
+ [&compare](const Value &a, const Value &b) {
+ return compare(a, b) == 0;
+ }).base().elementsIterator();
+
+ // The erase from beginning is expensive but hopefully rare.
+ container->elements.erase(container->elements.begin(), it);
}
+
/*
object = begin-object [ member *( value-separator member ) ]
end-object
@@ -392,19 +464,14 @@ bool Parser::parseObject()
return false;
}
- int objectOffset = reserveSpace(sizeof(QJsonPrivate::Object));
- if (objectOffset < 0)
- return false;
- BEGIN << "parseObject pos=" << objectOffset << current << json;
-
- ParsedObject parsedObject(this, objectOffset);
+ BEGIN << "parseObject" << json;
char token = nextToken();
while (token == Quote) {
- int off = current - objectOffset;
- if (!parseMember(objectOffset))
+ if (!container)
+ container = new QCborContainerPrivate;
+ if (!parseMember())
return false;
- parsedObject.insert(off);
token = nextToken();
if (token != ValueSeparator)
break;
@@ -421,50 +488,23 @@ bool Parser::parseObject()
return false;
}
- DEBUG << "numEntries" << parsedObject.offsets.size();
- int table = objectOffset;
- // finalize the object
- if (parsedObject.offsets.size()) {
- int tableSize = parsedObject.offsets.size()*sizeof(uint);
- table = reserveSpace(tableSize);
- if (table < 0)
- return false;
-
-#if Q_BYTE_ORDER == Q_LITTLE_ENDIAN
- memcpy(data + table, parsedObject.offsets.constData(), tableSize);
-#else
- offset *o = (offset *)(data + table);
- for (int i = 0; i < parsedObject.offsets.size(); ++i)
- o[i] = parsedObject.offsets[i];
-
-#endif
- }
-
- QJsonPrivate::Object *o = (QJsonPrivate::Object *)(data + objectOffset);
- o->tableOffset = table - objectOffset;
- o->size = current - objectOffset;
- o->is_object = true;
- o->length = parsedObject.offsets.size();
-
- DEBUG << "current=" << current;
END;
--nestingLevel;
+
+ if (container)
+ sortContainer(container.data());
return true;
}
/*
member = string name-separator value
*/
-bool Parser::parseMember(int baseOffset)
+bool Parser::parseMember()
{
- int entryOffset = reserveSpace(sizeof(QJsonPrivate::Entry));
- if (entryOffset < 0)
- return false;
- BEGIN << "parseMember pos=" << entryOffset;
+ BEGIN << "parseMember";
- bool latin1;
- if (!parseString(&latin1))
+ if (!parseString())
return false;
char token = nextToken();
if (token != NameSeparator) {
@@ -475,56 +515,13 @@ bool Parser::parseMember(int baseOffset)
lastError = QJsonParseError::UnterminatedObject;
return false;
}
- QJsonPrivate::Value val;
- if (!parseValue(&val, baseOffset))
+ if (!parseValue())
return false;
- // finalize the entry
- QJsonPrivate::Entry *e = (QJsonPrivate::Entry *)(data + entryOffset);
- e->value = val;
- e->value.latinKey = latin1;
-
END;
return true;
}
-namespace {
- struct ValueArray {
- static const int prealloc = 128;
- ValueArray() : data(stackValues), alloc(prealloc), size(0) {}
- ~ValueArray() { if (data != stackValues) free(data); }
-
- inline bool grow() {
- alloc *= 2;
- if (data == stackValues) {
- QJsonPrivate::Value *newValues = static_cast<QJsonPrivate::Value *>(malloc(alloc*sizeof(QJsonPrivate::Value)));
- if (!newValues)
- return false;
- memcpy(newValues, data, size*sizeof(QJsonPrivate::Value));
- data = newValues;
- } else {
- void *newValues = realloc(data, alloc * sizeof(QJsonPrivate::Value));
- if (!newValues)
- return false;
- data = static_cast<QJsonPrivate::Value *>(newValues);
- }
- return true;
- }
- bool append(const QJsonPrivate::Value &v) {
- if (alloc == size && !grow())
- return false;
- data[size] = v;
- ++size;
- return true;
- }
-
- QJsonPrivate::Value stackValues[prealloc];
- QJsonPrivate::Value *data;
- int alloc;
- int size;
- };
-}
-
/*
array = begin-array [ value *( value-separator value ) ] end-array
*/
@@ -537,12 +534,6 @@ bool Parser::parseArray()
return false;
}
- int arrayOffset = reserveSpace(sizeof(QJsonPrivate::Array));
- if (arrayOffset < 0)
- return false;
-
- ValueArray values;
-
if (!eatSpace()) {
lastError = QJsonParseError::UnterminatedArray;
return false;
@@ -555,13 +546,10 @@ bool Parser::parseArray()
lastError = QJsonParseError::UnterminatedArray;
return false;
}
- QJsonPrivate::Value val;
- if (!parseValue(&val, arrayOffset))
+ if (!container)
+ container = new QCborContainerPrivate;
+ if (!parseValue())
return false;
- if (!values.append(val)) {
- lastError = QJsonParseError::DocumentTooLarge;
- return false;
- }
char token = nextToken();
if (token == EndArray)
break;
@@ -575,27 +563,11 @@ bool Parser::parseArray()
}
}
- DEBUG << "size =" << values.size;
- int table = arrayOffset;
- // finalize the object
- if (values.size) {
- int tableSize = values.size*sizeof(QJsonPrivate::Value);
- table = reserveSpace(tableSize);
- if (table < 0)
- return false;
- memcpy(data + table, values.data, tableSize);
- }
-
- QJsonPrivate::Array *a = (QJsonPrivate::Array *)(data + arrayOffset);
- a->tableOffset = table - arrayOffset;
- a->size = current - arrayOffset;
- a->is_object = false;
- a->length = values.size;
-
- DEBUG << "current=" << current;
+ DEBUG << "size =" << (container ? container->elements.length() : 0);
END;
--nestingLevel;
+
return true;
}
@@ -604,10 +576,9 @@ value = false / null / true / object / array / number / string
*/
-bool Parser::parseValue(QJsonPrivate::Value *val, int baseOffset)
+bool Parser::parseValue()
{
BEGIN << "parse Value" << json;
- val->_dummy = 0;
switch (*json++) {
case 'n':
@@ -618,7 +589,7 @@ bool Parser::parseValue(QJsonPrivate::Value *val, int baseOffset)
if (*json++ == 'u' &&
*json++ == 'l' &&
*json++ == 'l') {
- val->type = QJsonValue::Null;
+ container->append(QCborValue(QCborValue::Null));
DEBUG << "value: null";
END;
return true;
@@ -633,8 +604,7 @@ bool Parser::parseValue(QJsonPrivate::Value *val, int baseOffset)
if (*json++ == 'r' &&
*json++ == 'u' &&
*json++ == 'e') {
- val->type = QJsonValue::Bool;
- val->value = true;
+ container->append(QCborValue(true));
DEBUG << "value: true";
END;
return true;
@@ -650,8 +620,7 @@ bool Parser::parseValue(QJsonPrivate::Value *val, int baseOffset)
*json++ == 'l' &&
*json++ == 's' &&
*json++ == 'e') {
- val->type = QJsonValue::Bool;
- val->value = false;
+ container->append(QCborValue(false));
DEBUG << "value: false";
END;
return true;
@@ -659,44 +628,28 @@ bool Parser::parseValue(QJsonPrivate::Value *val, int baseOffset)
lastError = QJsonParseError::IllegalValue;
return false;
case Quote: {
- val->type = QJsonValue::String;
- if (current - baseOffset >= Value::MaxSize) {
- lastError = QJsonParseError::DocumentTooLarge;
+ if (!parseString())
return false;
- }
- val->value = current - baseOffset;
- bool latin1;
- if (!parseString(&latin1))
- return false;
- val->latinOrIntValue = latin1;
DEBUG << "value: string";
END;
return true;
}
- case BeginArray:
- val->type = QJsonValue::Array;
- if (current - baseOffset >= Value::MaxSize) {
- lastError = QJsonParseError::DocumentTooLarge;
- return false;
- }
- val->value = current - baseOffset;
+ case BeginArray: {
+ StashedContainer stashedContainer(&container, QCborValue::Array);
if (!parseArray())
return false;
DEBUG << "value: array";
END;
return true;
- case BeginObject:
- val->type = QJsonValue::Object;
- if (current - baseOffset >= Value::MaxSize) {
- lastError = QJsonParseError::DocumentTooLarge;
- return false;
- }
- val->value = current - baseOffset;
+ }
+ case BeginObject: {
+ StashedContainer stashedContainer(&container, QCborValue::Map);
if (!parseObject())
return false;
DEBUG << "value: object";
END;
return true;
+ }
case ValueSeparator:
// Essentially missing value, but after a colon, not after a comma
// like the other MissingObject errors.
@@ -708,7 +661,7 @@ bool Parser::parseValue(QJsonPrivate::Value *val, int baseOffset)
return false;
default:
--json;
- if (!parseNumber(val, baseOffset))
+ if (!parseNumber())
return false;
DEBUG << "value: number";
END;
@@ -735,10 +688,9 @@ bool Parser::parseValue(QJsonPrivate::Value *val, int baseOffset)
*/
-bool Parser::parseNumber(QJsonPrivate::Value *val, int baseOffset)
+bool Parser::parseNumber()
{
BEGIN << "parseNumber" << json;
- val->type = QJsonValue::Double;
const char *start = json;
bool isInt = true;
@@ -778,42 +730,32 @@ bool Parser::parseNumber(QJsonPrivate::Value *val, int baseOffset)
return false;
}
- QByteArray number(start, json - start);
+ const QByteArray number = QByteArray::fromRawData(start, json - start);
DEBUG << "numberstring" << number;
if (isInt) {
bool ok;
- int n = number.toInt(&ok);
- if (ok && n < (1<<25) && n > -(1<<25)) {
- val->int_value = n;
- val->latinOrIntValue = true;
+ qlonglong n = number.toLongLong(&ok);
+ if (ok) {
+ container->append(QCborValue(n));
END;
return true;
}
}
bool ok;
- union {
- quint64 ui;
- double d;
- };
- d = number.toDouble(&ok);
+ double d = number.toDouble(&ok);
if (!ok) {
lastError = QJsonParseError::IllegalNumber;
return false;
}
- int pos = reserveSpace(sizeof(double));
- if (pos < 0)
- return false;
- qToLittleEndian(ui, data + pos);
- if (current - baseOffset >= Value::MaxSize) {
- lastError = QJsonParseError::DocumentTooLarge;
- return false;
- }
- val->value = pos - baseOffset;
- val->latinOrIntValue = false;
+ qint64 n;
+ if (convertDoubleTo(d, &n))
+ container->append(QCborValue(n));
+ else
+ container->append(QCborValue(d));
END;
return true;
@@ -902,58 +844,45 @@ static inline bool scanEscapeSequence(const char *&json, const char *end, uint *
static inline bool scanUtf8Char(const char *&json, const char *end, uint *result)
{
- const uchar *&src = reinterpret_cast<const uchar *&>(json);
- const uchar *uend = reinterpret_cast<const uchar *>(end);
- uchar b = *src++;
- int res = QUtf8Functions::fromUtf8<QUtf8BaseTraits>(b, result, src, uend);
- if (res < 0) {
- // decoding error, backtrack the character we read above
- --json;
+ const auto *usrc = reinterpret_cast<const uchar *>(json);
+ const auto *uend = reinterpret_cast<const uchar *>(end);
+ const uchar b = *usrc++;
+ int res = QUtf8Functions::fromUtf8<QUtf8BaseTraits>(b, result, usrc, uend);
+ if (res < 0)
return false;
- }
+ json = reinterpret_cast<const char *>(usrc);
return true;
}
-bool Parser::parseString(bool *latin1)
+bool Parser::parseString()
{
- *latin1 = true;
-
const char *start = json;
- int outStart = current;
- // try to write out a latin1 string
-
- int stringPos = reserveSpace(2);
- if (stringPos < 0)
- return false;
+ // try to parse a utf-8 string without escape sequences, and note whether it's 7bit ASCII.
- BEGIN << "parse string stringPos=" << stringPos << json;
+ BEGIN << "parse string" << json;
+ bool isUtf8 = true;
+ bool isAscii = true;
while (json < end) {
uint ch = 0;
if (*json == '"')
break;
- else if (*json == '\\') {
- if (!scanEscapeSequence(json, end, &ch)) {
- lastError = QJsonParseError::IllegalEscapeSequence;
- return false;
- }
- } else {
- if (!scanUtf8Char(json, end, &ch)) {
- lastError = QJsonParseError::IllegalUTF8String;
- return false;
- }
- }
- // bail out if the string is not pure latin1 or too long to hold as a latin1string (which has only 16 bit for the length)
- if (ch > 0xff || json - start >= 0x8000) {
- *latin1 = false;
+ if (*json == '\\') {
+ isAscii = false;
+ // If we find escape sequences, we store UTF-16 as there are some
+ // escape sequences which are hard to represent in UTF-8.
+ // (plain "\\ud800" for example)
+ isUtf8 = false;
break;
}
- int pos = reserveSpace(1);
- if (pos < 0)
+ if (!scanUtf8Char(json, end, &ch)) {
+ lastError = QJsonParseError::IllegalUTF8String;
return false;
- DEBUG << " " << ch << (char)ch;
- data[pos] = (uchar)ch;
+ }
+ if (ch > 0x7f)
+ isAscii = false;
+ DEBUG << " " << ch << char(ch);
}
++json;
DEBUG << "end of string";
@@ -962,25 +891,20 @@ bool Parser::parseString(bool *latin1)
return false;
}
- // no unicode string, we are done
- if (*latin1) {
- // write string length
- *(QJsonPrivate::qle_ushort *)(data + stringPos) = ushort(current - outStart - sizeof(ushort));
- int pos = reserveSpace((4 - current) & 3);
- if (pos < 0)
- return false;
- while (pos & 3)
- data[pos++] = 0;
+ // no escape sequences, we are done
+ if (isUtf8) {
+ container->appendByteData(start, json - start - 1, QCborValue::String,
+ isAscii ? QtCbor::Element::StringIsAscii
+ : QtCbor::Element::ValueFlags {});
END;
return true;
}
- *latin1 = false;
- DEBUG << "not latin";
+ DEBUG << "has escape sequences";
json = start;
- current = outStart + sizeof(int);
+ QString ucs4;
while (json < end) {
uint ch = 0;
if (*json == '"')
@@ -997,16 +921,10 @@ bool Parser::parseString(bool *latin1)
}
}
if (QChar::requiresSurrogates(ch)) {
- int pos = reserveSpace(4);
- if (pos < 0)
- return false;
- *(QJsonPrivate::qle_ushort *)(data + pos) = QChar::highSurrogate(ch);
- *(QJsonPrivate::qle_ushort *)(data + pos + 2) = QChar::lowSurrogate(ch);
+ ucs4.append(QChar::highSurrogate(ch));
+ ucs4.append(QChar::lowSurrogate(ch));
} else {
- int pos = reserveSpace(2);
- if (pos < 0)
- return false;
- *(QJsonPrivate::qle_ushort *)(data + pos) = (ushort)ch;
+ ucs4.append(QChar(ushort(ch)));
}
}
++json;
@@ -1016,13 +934,8 @@ bool Parser::parseString(bool *latin1)
return false;
}
- // write string length
- *(QJsonPrivate::qle_int *)(data + stringPos) = (current - outStart - sizeof(int))/2;
- int pos = reserveSpace((4 - current) & 3);
- if (pos < 0)
- return false;
- while (pos & 3)
- data[pos++] = 0;
+ container->appendByteData(reinterpret_cast<const char *>(ucs4.utf16()), ucs4.size() * 2,
+ QCborValue::String, QtCbor::Element::StringIsUtf16);
END;
return true;
}