From 37a7b035f8feb248d25741b22e22ffb0ec7463d6 Mon Sep 17 00:00:00 2001 From: Lars Knoll Date: Wed, 18 Jan 2012 10:12:50 +0100 Subject: Import json support from playground/qtbinaryjson This imports the JSON support for Qt 5 from playground/qtbinaryjson. It adds a fast, fully compliant json parser, a convenient C++ API, conversion to and from QVariants and a binary format for JSON that is extremely fast to use together with the C++ API. Change-Id: If9e3a21a4241d388d0abaa446b6824f9cc6edb1c Reviewed-by: Thiago Macieira --- src/corelib/json/qjsonparser.cpp | 752 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 752 insertions(+) create mode 100644 src/corelib/json/qjsonparser.cpp (limited to 'src/corelib/json/qjsonparser.cpp') diff --git a/src/corelib/json/qjsonparser.cpp b/src/corelib/json/qjsonparser.cpp new file mode 100644 index 0000000000..d071e35ad8 --- /dev/null +++ b/src/corelib/json/qjsonparser.cpp @@ -0,0 +1,752 @@ +/**************************************************************************** +** +** Copyright (C) 2012 Nokia Corporation and/or its subsidiary(-ies). +** All rights reserved. +** Contact: Nokia Corporation (qt-info@nokia.com) +** +** This file is part of the QtCore module of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL$ +** GNU Lesser General Public License Usage +** This file may be used under the terms of the GNU Lesser General Public +** License version 2.1 as published by the Free Software Foundation and +** appearing in the file LICENSE.LGPL included in the packaging of this +** file. Please review the following information to ensure the GNU Lesser +** General Public License version 2.1 requirements will be met: +** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. +** +** In addition, as a special exception, Nokia gives you certain additional +** rights. These rights are described in the Nokia Qt LGPL Exception +** version 1.1, included in the file LGPL_EXCEPTION.txt in this package. +** +** GNU General Public License Usage +** Alternatively, this file may be used under the terms of the GNU General +** Public License version 3.0 as published by the Free Software Foundation +** and appearing in the file LICENSE.GPL included in the packaging of this +** file. Please review the following information to ensure the GNU General +** Public License version 3.0 requirements will be met: +** http://www.gnu.org/copyleft/gpl.html. +** +** Other Usage +** Alternatively, this file may be used in accordance with the terms and +** conditions contained in a signed written agreement between you and Nokia. +** +** +** +** +** +** $QT_END_LICENSE$ +** +****************************************************************************/ + +#include +#include "qjsonparser_p.h" +#include "qjson_p.h" + +//#define PARSER_DEBUG +#ifdef PARSER_DEBUG +static int indent = 0; +#define BEGIN qDebug() << QByteArray(4*indent++, ' ').constData() << "pos=" << current +#define END --indent +#define DEBUG qDebug() << QByteArray(4*indent, ' ').constData() +#else +#define BEGIN if (1) ; else qDebug() +#define END do {} while (0) +#define DEBUG if (1) ; else qDebug() +#endif + +QT_BEGIN_NAMESPACE + +using namespace QJsonPrivate; + +Parser::Parser(const char *json, int length) + : json(json), data(0), dataLength(0), current(0) +{ + end = json + length; +} + + + +/* + +begin-array = ws %x5B ws ; [ left square bracket + +begin-object = ws %x7B ws ; { left curly bracket + +end-array = ws %x5D ws ; ] right square bracket + +end-object = ws %x7D ws ; } right curly bracket + +name-separator = ws %x3A ws ; : colon + +value-separator = ws %x2C ws ; , comma + +Insignificant whitespace is allowed before or after any of the six +structural characters. + +ws = *( + %x20 / ; Space + %x09 / ; Horizontal tab + %x0A / ; Line feed or New line + %x0D ; Carriage return + ) + +*/ + +enum { + Space = 0x20, + Tab = 0x09, + LineFeed = 0x0a, + Return = 0x0d, + BeginArray = 0x5b, + BeginObject = 0x7b, + EndArray = 0x5d, + EndObject = 0x7d, + NameSeparator = 0x3a, + ValueSeparator = 0x2c, + Quote = 0x22 +}; + + + +bool Parser::eatSpace() +{ + while (json < end) { + if (*json > Space) + break; + if (*json != Space && + *json != Tab && + *json != LineFeed && + *json != Return) + break; + ++json; + } + return (json < end); +} + +char Parser::nextToken() +{ + if (!eatSpace()) + return 0; + char token = *json++; + switch (token) { + case BeginArray: + case BeginObject: + case NameSeparator: + case ValueSeparator: + if (!eatSpace()) + return 0; + case EndArray: + case EndObject: + eatSpace(); + case Quote: + break; + default: + token = 0; + break; + } + return token; +} + +/* + JSON-text = object / array +*/ +QJsonDocument Parser::parse() +{ +#ifdef PARSER_DEBUG + indent = 0; + qDebug() << ">>>>> parser begin"; +#endif + // allocate some space + dataLength = qMax(end - json, (ptrdiff_t) 256); + data = (char *)malloc(dataLength); + + // fill in Header data + QJsonPrivate::Header *h = (QJsonPrivate::Header *)data; + h->tag = QJsonDocument::BinaryFormatTag; + h->version = 1u; + + current = sizeof(QJsonPrivate::Header); + + char token = nextToken(); + DEBUG << token; + if (token == BeginArray) { + if (!parseArray()) + goto error; + } else if (token == BeginObject) { + if (!parseObject()) + goto error; + } else { + goto error; + } + + END; + { + QJsonPrivate::Data *d = new QJsonPrivate::Data(data, current); + return QJsonDocument(d); + } + +error: +#ifdef PARSER_DEBUG + qDebug() << ">>>>> parser error"; +#endif + free(data); + return QJsonDocument(); +} + + +void Parser::ParsedObject::insert(uint offset) { + const QJsonPrivate::Entry *newEntry = reinterpret_cast(parser->data + objectPosition + offset); + int min = 0; + int n = offsets.size(); + while (n > 0) { + int half = n >> 1; + int middle = min + half; + if (*entryAt(middle) >= *newEntry) { + n = half; + } else { + min = middle + 1; + n -= half + 1; + } + } + if (min < offsets.size() && *entryAt(min) == *newEntry) { + offsets[min] = offset; + } else { + offsets.insert(min, offset); + } +} + +/* + object = begin-object [ member *( value-separator member ) ] + end-object +*/ + +bool Parser::parseObject() +{ + int objectOffset = reserveSpace(sizeof(QJsonPrivate::Object)); + BEGIN << "parseObject pos=" << objectOffset << current << json; + + ParsedObject parsedObject(this, objectOffset); + + char token = nextToken(); + while (token == Quote) { + int off = current - objectOffset; + if (!parseMember(objectOffset)) + return false; + parsedObject.insert(off); + token = nextToken(); + if (token != ValueSeparator) + break; + token = nextToken(); + } + + DEBUG << "end token=" << token; + if (token != EndObject) + return false; + + DEBUG << "numEntries" << parsedObject.offsets.size(); + int table = objectOffset; + // finalize the object + if (parsedObject.offsets.size()) { + int tableSize = parsedObject.offsets.size()*sizeof(uint); + table = reserveSpace(tableSize); +#if Q_BYTE_ORDER == Q_LITTLE_ENDIAN + memcpy(data + table, parsedObject.offsets.constData(), tableSize); +#else + offset *o = (offset *)(data + table); + for (int i = 0; i < tableSize; ++i) + o[i] = parsedObject.offsets[i]; + +#endif + } + + QJsonPrivate::Object *o = (QJsonPrivate::Object *)(data + objectOffset); + o->tableOffset = table - objectOffset; + o->size = current - objectOffset; + o->is_object = true; + o->length = parsedObject.offsets.size(); + + DEBUG << "current=" << current; + END; + return true; +} + +/* + member = string name-separator value +*/ +bool Parser::parseMember(int baseOffset) +{ + int entryOffset = reserveSpace(sizeof(QJsonPrivate::Entry)); + BEGIN << "parseMember pos=" << entryOffset; + + bool latin1; + if (!parseString(&latin1)) + return false; + char token = nextToken(); + if (token != NameSeparator) + return false; + QJsonPrivate::Value val; + if (!parseValue(&val, baseOffset)) + return false; + + // finalize the entry + QJsonPrivate::Entry *e = (QJsonPrivate::Entry *)(data + entryOffset); + e->value = val; + e->value.latinKey = latin1; + + END; + return true; +} + +/* + array = begin-array [ value *( value-separator value ) ] end-array +*/ +bool Parser::parseArray() +{ + BEGIN << "parseArray"; + int arrayOffset = reserveSpace(sizeof(QJsonPrivate::Array)); + + QVarLengthArray values; + + if (!eatSpace()) + return false; + if (*json == EndArray) { + nextToken(); + } else { + while (1) { + QJsonPrivate::Value val; + if (!parseValue(&val, arrayOffset)) + return false; + values.append(val); + char token = nextToken(); + if (token == EndArray) + break; + else if (token != ValueSeparator) + return false; + } + } + + DEBUG << "size =" << values.size(); + int table = arrayOffset; + // finalize the object + if (values.size()) { + int tableSize = values.size()*sizeof(QJsonPrivate::Value); + table = reserveSpace(tableSize); + memcpy(data + table, values.constData(), tableSize); + } + + QJsonPrivate::Array *a = (QJsonPrivate::Array *)(data + arrayOffset); + a->tableOffset = table - arrayOffset; + a->size = current - arrayOffset; + a->is_object = false; + a->length = values.size(); + + DEBUG << "current=" << current; + END; + return true; +} + +/* +value = false / null / true / object / array / number / string + +*/ + +bool Parser::parseValue(QJsonPrivate::Value *val, int baseOffset) +{ + BEGIN << "parse Value" << json; + val->_dummy = 0; + + switch (*json++) { + case 'n': + if (end - json < 4) + return false; + if (*json++ == 'u' && + *json++ == 'l' && + *json++ == 'l') { + val->type = QJsonValue::Null; + DEBUG << "value: null"; + END; + return true; + } + return false; + case 't': + if (end - json < 4) + return false; + if (*json++ == 'r' && + *json++ == 'u' && + *json++ == 'e') { + val->type = QJsonValue::Bool; + val->value = true; + DEBUG << "value: true"; + END; + return true; + } + return false; + case 'f': + if (end - json < 5) + return false; + if (*json++ == 'a' && + *json++ == 'l' && + *json++ == 's' && + *json++ == 'e') { + val->type = QJsonValue::Bool; + val->value = false; + DEBUG << "value: false"; + END; + return true; + } + return false; + case Quote: { + val->type = QJsonValue::String; + val->value = current - baseOffset; + bool latin1; + if (!parseString(&latin1)) + return false; + val->latinOrIntValue = latin1; + DEBUG << "value: string"; + END; + return true; + } + case BeginArray: + val->type = QJsonValue::Array; + val->value = current - baseOffset; + if (!parseArray()) + return false; + DEBUG << "value: array"; + END; + return true; + case BeginObject: + val->type = QJsonValue::Object; + val->value = current - baseOffset; + if (!parseObject()) + return false; + DEBUG << "value: object"; + END; + return true; + default: + --json; + if (!parseNumber(val, baseOffset)) + return false; + DEBUG << "value: number"; + END; + } + + return true; +} + + + + + +/* + number = [ minus ] int [ frac ] [ exp ] + decimal-point = %x2E ; . + digit1-9 = %x31-39 ; 1-9 + e = %x65 / %x45 ; e E + exp = e [ minus / plus ] 1*DIGIT + frac = decimal-point 1*DIGIT + int = zero / ( digit1-9 *DIGIT ) + minus = %x2D ; - + plus = %x2B ; + + zero = %x30 ; 0 + +*/ + +bool Parser::parseNumber(QJsonPrivate::Value *val, int baseOffset) +{ + BEGIN << "parseNumber" << json; + val->type = QJsonValue::Double; + + const char *start = json; + bool isInt = true; + + // minus + if (json < end && *json == '-') + ++json; + + // int = zero / ( digit1-9 *DIGIT ) + if (json < end && *json == '0') { + ++json; + } else { + while (json < end && *json >= '0' && *json <= '9') + ++json; + } + + // frac = decimal-point 1*DIGIT + if (json < end && *json == '.') { + isInt = false; + ++json; + while (json < end && *json >= '0' && *json <= '9') + ++json; + } + + // exp = e [ minus / plus ] 1*DIGIT + if (json < end && (*json == 'e' || *json == 'E')) { + isInt = false; + ++json; + if (json < end && (*json == '-' || *json == '+')) + ++json; + while (json < end && *json >= '0' && *json <= '9') + ++json; + } + + if (json >= end) + return false; + + QByteArray number(start, json - start); + DEBUG << "numberstring" << number; + + if (isInt) { + bool ok; + int n = number.toInt(&ok); + if (ok && n < (1<<25) && n > -(1<<25)) { + val->int_value = n; + val->latinOrIntValue = true; + END; + return true; + } + } + + bool ok; + union { + quint64 ui; + double d; + }; + d = number.toDouble(&ok); + + if (!ok) + return false; + + int pos = reserveSpace(sizeof(double)); + *(quint64 *)(data + pos) = qToLittleEndian(ui); + val->value = pos - baseOffset; + val->latinOrIntValue = false; + + END; + return true; +} + +/* + + string = quotation-mark *char quotation-mark + + char = unescaped / + escape ( + %x22 / ; " quotation mark U+0022 + %x5C / ; \ reverse solidus U+005C + %x2F / ; / solidus U+002F + %x62 / ; b backspace U+0008 + %x66 / ; f form feed U+000C + %x6E / ; n line feed U+000A + %x72 / ; r carriage return U+000D + %x74 / ; t tab U+0009 + %x75 4HEXDIG ) ; uXXXX U+XXXX + + escape = %x5C ; \ + + quotation-mark = %x22 ; " + + unescaped = %x20-21 / %x23-5B / %x5D-10FFFF + */ +static inline bool addHexDigit(char digit, uint *result) +{ + *result <<= 4; + if (digit >= '0' && digit <= '9') + *result |= (digit - '0'); + else if (digit >= 'a' && digit <= 'f') + *result |= (digit - 'a'); + else if (digit >= 'A' && digit <= 'F') + *result |= (digit - 'A'); + else + return false; + return true; +} + +static inline bool scanEscapeSequence(const char *&json, const char *end, uint *ch) +{ + ++json; + if (json >= end) + return false; + + DEBUG << "scan escape" << (char)*json; + uint escaped = *json++; + switch (escaped) { + case '"': + *ch = '"'; break; + case '\\': + *ch = '\\'; break; + case '/': + *ch = '/'; break; + case 'b': + *ch = 0x8; break; + case 'f': + *ch = 0xc; break; + case 'n': + *ch = 0xa; break; + case 'r': + *ch = 0xd; break; + case 't': + *ch = 0x9; break; + case 'u': { + *ch = 0; + if (json > end - 4) + return false; + for (int i = 0; i < 4; ++i) { + if (!addHexDigit(*json, ch)) + return false; + ++json; + } + return true; + } + default: + // this is not as strict as one could be, but allows for more Json files + // to be parsed correctly. + *ch = escaped; + return true; + } + return true; +} + +static inline bool isUnicodeNonCharacter(uint ucs4) +{ + // Unicode has a couple of "non-characters" that one can use internally, + // but are not allowed to be used for text interchange. + // + // Those are the last two entries each Unicode Plane (U+FFFE, U+FFFF, + // U+1FFFE, U+1FFFF, etc.) as well as the entries between U+FDD0 and + // U+FDEF (inclusive) + + return (ucs4 & 0xfffe) == 0xfffe + || (ucs4 - 0xfdd0U) < 16; +} + +static inline bool scanUtf8Char(const char *&json, const char *end, uint *result) +{ + int need; + uint min_uc; + uint uc; + uchar ch = *json++; + if (ch < 128) { + *result = ch; + return true; + } else if ((ch & 0xe0) == 0xc0) { + uc = ch & 0x1f; + need = 1; + min_uc = 0x80; + } else if ((ch & 0xf0) == 0xe0) { + uc = ch & 0x0f; + need = 2; + min_uc = 0x800; + } else if ((ch&0xf8) == 0xf0) { + uc = ch & 0x07; + need = 3; + min_uc = 0x10000; + } else { + return false; + } + + if (json >= end - need) + return false; + + for (int i = 0; i < need; ++i) { + ch = *json++; + if ((ch&0xc0) != 0x80) + return false; + uc = (uc << 6) | (ch & 0x3f); + } + + if (isUnicodeNonCharacter(uc) || uc >= 0x110000 || + (uc < min_uc) || (uc >= 0xd800 && uc <= 0xdfff)) + return false; + + *result = uc; + return true; +} + +bool Parser::parseString(bool *latin1) +{ + *latin1 = true; + + const char *start = json; + int outStart = current; + + // try to write out a latin1 string + + int stringPos = reserveSpace(2); + BEGIN << "parse string stringPos=" << stringPos << json; + while (json < end) { + uint ch = 0; + if (*json == '"') + break; + else if (*json == '\\') { + if (!scanEscapeSequence(json, end, &ch)) + return false; + } else { + if (!scanUtf8Char(json, end, &ch)) + return false; + } + if (ch > 0xff) { + *latin1 = false; + break; + } + int pos = reserveSpace(1); + DEBUG << " " << ch << (char)ch; + data[pos] = (uchar)ch; + } + ++json; + DEBUG << "end of string"; + if (json >= end) + return false; + + // no unicode string, we are done + if (*latin1) { + // write string length + *(QJsonPrivate::qle_ushort *)(data + stringPos) = current - outStart - sizeof(ushort); + int pos = reserveSpace((4 - current) & 3); + while (pos & 3) + data[pos++] = 0; + END; + return true; + } + + *latin1 = false; + DEBUG << "not latin"; + + json = start; + current = outStart + sizeof(int); + + while (json < end) { + uint ch = 0; + if (*json == '"') + break; + else if (*json == '\\') { + if (!scanEscapeSequence(json, end, &ch)) + return false; + } else { + if (!scanUtf8Char(json, end, &ch)) + return false; + } + if (ch > 0xffff) { + int pos = reserveSpace(4); + *(QJsonPrivate::qle_ushort *)(data + pos) = QChar::highSurrogate(ch); + *(QJsonPrivate::qle_ushort *)(data + pos + 2) = QChar::lowSurrogate(ch); + } else { + int pos = reserveSpace(2); + *(QJsonPrivate::qle_ushort *)(data + pos) = (ushort)ch; + } + } + ++json; + + if (json >= end) + return false; + + // write string length + *(QJsonPrivate::qle_int *)(data + stringPos) = (current - outStart - sizeof(int))/2; + int pos = reserveSpace((4 - current) & 3); + while (pos & 3) + data[pos++] = 0; + END; + return true; +} + +QT_END_NAMESPACE -- cgit v1.2.3