diff options
Diffstat (limited to 'src/3rdparty/assimp/contrib/rapidjson/include/rapidjson/reader.h')
-rw-r--r-- | src/3rdparty/assimp/contrib/rapidjson/include/rapidjson/reader.h | 1293 |
1 files changed, 1002 insertions, 291 deletions
diff --git a/src/3rdparty/assimp/contrib/rapidjson/include/rapidjson/reader.h b/src/3rdparty/assimp/contrib/rapidjson/include/rapidjson/reader.h index 6fb29e908..120c31115 100644 --- a/src/3rdparty/assimp/contrib/rapidjson/include/rapidjson/reader.h +++ b/src/3rdparty/assimp/contrib/rapidjson/include/rapidjson/reader.h @@ -1,5 +1,5 @@ // Tencent is pleased to support the open source community by making RapidJSON available. -// +// // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved. // // Licensed under the MIT License (the "License"); you may not use this file except @@ -7,9 +7,9 @@ // // http://opensource.org/licenses/MIT // -// Unless required by applicable law or agreed to in writing, software distributed -// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// Unless required by applicable law or agreed to in writing, software distributed +// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the // specific language governing permissions and limitations under the License. #ifndef RAPIDJSON_READER_H_ @@ -17,11 +17,13 @@ /*! \file reader.h */ -#include "rapidjson.h" -#include "encodings.h" +#include "allocators.h" +#include "stream.h" +#include "encodedstream.h" #include "internal/meta.h" #include "internal/stack.h" #include "internal/strtod.h" +#include <limits> #if defined(RAPIDJSON_SIMD) && defined(_MSC_VER) #include <intrin.h> @@ -31,6 +33,8 @@ #include <nmmintrin.h> #elif defined(RAPIDJSON_SSE2) #include <emmintrin.h> +#elif defined(RAPIDJSON_NEON) +#include <arm_neon.h> #endif #ifdef _MSC_VER @@ -39,6 +43,13 @@ RAPIDJSON_DIAG_OFF(4127) // conditional expression is constant RAPIDJSON_DIAG_OFF(4702) // unreachable code #endif +#ifdef __clang__ +RAPIDJSON_DIAG_PUSH +RAPIDJSON_DIAG_OFF(old-style-cast) +RAPIDJSON_DIAG_OFF(padded) +RAPIDJSON_DIAG_OFF(switch-enum) +#endif + #ifdef __GNUC__ RAPIDJSON_DIAG_PUSH RAPIDJSON_DIAG_OFF(effc++) @@ -49,7 +60,7 @@ RAPIDJSON_DIAG_OFF(effc++) #ifndef RAPIDJSON_PARSE_ERROR_EARLY_RETURN #define RAPIDJSON_PARSE_ERROR_EARLY_RETURN(value) \ RAPIDJSON_MULTILINEMACRO_BEGIN \ - if (HasParseError()) { return value; } \ + if (RAPIDJSON_UNLIKELY(HasParseError())) { return value; } \ RAPIDJSON_MULTILINEMACRO_END #endif #define RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID \ @@ -120,7 +131,7 @@ RAPIDJSON_NAMESPACE_BEGIN /////////////////////////////////////////////////////////////////////////////// // ParseFlag -/*! \def RAPIDJSON_PARSE_DEFAULT_FLAGS +/*! \def RAPIDJSON_PARSE_DEFAULT_FLAGS \ingroup RAPIDJSON_CONFIG \brief User-defined kParseDefaultFlags definition. @@ -141,6 +152,9 @@ enum ParseFlag { kParseStopWhenDoneFlag = 8, //!< After parsing a complete JSON root from stream, stop further processing the rest of stream. When this flag is used, parser will not generate kParseErrorDocumentRootNotSingular error. kParseFullPrecisionFlag = 16, //!< Parse number in full precision (but slower). kParseCommentsFlag = 32, //!< Allow one-line (//) and multi-line (/**/) comments. + kParseNumbersAsStringsFlag = 64, //!< Parse all numbers (ints/doubles) as strings. + kParseTrailingCommasFlag = 128, //!< Allow trailing commas at the end of objects and arrays. + kParseNanAndInfFlag = 256, //!< Allow parsing NaN, Inf, Infinity, -Inf and -Infinity as doubles. kParseDefaultFlags = RAPIDJSON_PARSE_DEFAULT_FLAGS //!< Default parse flags. Can be customized by defining RAPIDJSON_PARSE_DEFAULT_FLAGS }; @@ -149,7 +163,7 @@ enum ParseFlag { /*! \class rapidjson::Handler \brief Concept for receiving events from GenericReader upon parsing. - The functions return true if no error occurs. If they return false, + The functions return true if no error occurs. If they return false, the event publisher should terminate the process. \code concept Handler { @@ -162,6 +176,8 @@ concept Handler { bool Int64(int64_t i); bool Uint64(uint64_t i); bool Double(double d); + /// enabled via kParseNumbersAsStringsFlag, string is not null-terminated (use length) + bool RawNumber(const Ch* str, SizeType length, bool copy); bool String(const Ch* str, SizeType length, bool copy); bool StartObject(); bool Key(const Ch* str, SizeType length, bool copy); @@ -192,6 +208,8 @@ struct BaseReaderHandler { bool Int64(int64_t) { return static_cast<Override&>(*this).Default(); } bool Uint64(uint64_t) { return static_cast<Override&>(*this).Default(); } bool Double(double) { return static_cast<Override&>(*this).Default(); } + /// enabled via kParseNumbersAsStringsFlag, string is not null-terminated (use length) + bool RawNumber(const Ch* str, SizeType len, bool copy) { return static_cast<Override&>(*this).String(str, len, copy); } bool String(const Ch*, SizeType, bool) { return static_cast<Override&>(*this).Default(); } bool StartObject() { return static_cast<Override&>(*this).Default(); } bool Key(const Ch* str, SizeType len, bool copy) { return static_cast<Override&>(*this).String(str, len, copy); } @@ -249,10 +267,17 @@ void SkipWhitespace(InputStream& is) { internal::StreamLocalCopy<InputStream> copy(is); InputStream& s(copy.s); - while (s.Peek() == ' ' || s.Peek() == '\n' || s.Peek() == '\r' || s.Peek() == '\t') + typename InputStream::Ch c; + while ((c = s.Peek()) == ' ' || c == '\n' || c == '\r' || c == '\t') s.Take(); } +inline const char* SkipWhitespace(const char* p, const char* end) { + while (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')) + ++p; + return p; +} + #ifdef RAPIDJSON_SSE42 //! Skip whitespace with SSE 4.2 pcmpistrm instruction, testing 16 8-byte characters at once. inline const char *SkipWhitespace_SIMD(const char* p) { @@ -263,7 +288,7 @@ inline const char *SkipWhitespace_SIMD(const char* p) { return p; // 16-byte align to the next boundary - const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & ~15); + const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15)); while (p != nextAligned) if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t') ++p; @@ -272,23 +297,37 @@ inline const char *SkipWhitespace_SIMD(const char* p) { // The rest of string using SIMD static const char whitespace[16] = " \n\r\t"; - const __m128i w = _mm_loadu_si128((const __m128i *)&whitespace[0]); + const __m128i w = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespace[0])); for (;; p += 16) { - const __m128i s = _mm_load_si128((const __m128i *)p); - const unsigned r = _mm_cvtsi128_si32(_mm_cmpistrm(w, s, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK | _SIDD_NEGATIVE_POLARITY)); - if (r != 0) { // some of characters is non-whitespace -#ifdef _MSC_VER // Find the index of first non-whitespace - unsigned long offset; - _BitScanForward(&offset, r); - return p + offset; -#else - return p + __builtin_ffs(r) - 1; -#endif - } + const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p)); + const int r = _mm_cmpistri(w, s, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_LEAST_SIGNIFICANT | _SIDD_NEGATIVE_POLARITY); + if (r != 16) // some of characters is non-whitespace + return p + r; } } +inline const char *SkipWhitespace_SIMD(const char* p, const char* end) { + // Fast return for single non-whitespace + if (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')) + ++p; + else + return p; + + // The middle of string using SIMD + static const char whitespace[16] = " \n\r\t"; + const __m128i w = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespace[0])); + + for (; p <= end - 16; p += 16) { + const __m128i s = _mm_loadu_si128(reinterpret_cast<const __m128i *>(p)); + const int r = _mm_cmpistri(w, s, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_LEAST_SIGNIFICANT | _SIDD_NEGATIVE_POLARITY); + if (r != 16) // some of characters is non-whitespace + return p + r; + } + + return SkipWhitespace(p, end); +} + #elif defined(RAPIDJSON_SSE2) //! Skip whitespace with SSE2 instructions, testing 16 8-byte characters at once. @@ -300,7 +339,7 @@ inline const char *SkipWhitespace_SIMD(const char* p) { return p; // 16-byte align to the next boundary - const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & ~15); + const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15)); while (p != nextAligned) if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t') ++p; @@ -308,24 +347,58 @@ inline const char *SkipWhitespace_SIMD(const char* p) { return p; // The rest of string - static const char whitespaces[4][17] = { - " ", - "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n", - "\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r", - "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"}; + #define C16(c) { c, c, c, c, c, c, c, c, c, c, c, c, c, c, c, c } + static const char whitespaces[4][16] = { C16(' '), C16('\n'), C16('\r'), C16('\t') }; + #undef C16 - const __m128i w0 = _mm_loadu_si128((const __m128i *)&whitespaces[0][0]); - const __m128i w1 = _mm_loadu_si128((const __m128i *)&whitespaces[1][0]); - const __m128i w2 = _mm_loadu_si128((const __m128i *)&whitespaces[2][0]); - const __m128i w3 = _mm_loadu_si128((const __m128i *)&whitespaces[3][0]); + const __m128i w0 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[0][0])); + const __m128i w1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[1][0])); + const __m128i w2 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[2][0])); + const __m128i w3 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[3][0])); for (;; p += 16) { - const __m128i s = _mm_load_si128((const __m128i *)p); + const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p)); + __m128i x = _mm_cmpeq_epi8(s, w0); + x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w1)); + x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w2)); + x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w3)); + unsigned short r = static_cast<unsigned short>(~_mm_movemask_epi8(x)); + if (r != 0) { // some of characters may be non-whitespace +#ifdef _MSC_VER // Find the index of first non-whitespace + unsigned long offset; + _BitScanForward(&offset, r); + return p + offset; +#else + return p + __builtin_ffs(r) - 1; +#endif + } + } +} + +inline const char *SkipWhitespace_SIMD(const char* p, const char* end) { + // Fast return for single non-whitespace + if (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')) + ++p; + else + return p; + + // The rest of string + #define C16(c) { c, c, c, c, c, c, c, c, c, c, c, c, c, c, c, c } + static const char whitespaces[4][16] = { C16(' '), C16('\n'), C16('\r'), C16('\t') }; + #undef C16 + + const __m128i w0 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[0][0])); + const __m128i w1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[1][0])); + const __m128i w2 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[2][0])); + const __m128i w3 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&whitespaces[3][0])); + + for (; p <= end - 16; p += 16) { + const __m128i s = _mm_loadu_si128(reinterpret_cast<const __m128i *>(p)); __m128i x = _mm_cmpeq_epi8(s, w0); x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w1)); x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w2)); x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w3)); - unsigned short r = (unsigned short)~_mm_movemask_epi8(x); + unsigned short r = static_cast<unsigned short>(~_mm_movemask_epi8(x)); if (r != 0) { // some of characters may be non-whitespace #ifdef _MSC_VER // Find the index of first non-whitespace unsigned long offset; @@ -336,13 +409,100 @@ inline const char *SkipWhitespace_SIMD(const char* p) { #endif } } + + return SkipWhitespace(p, end); +} + +#elif defined(RAPIDJSON_NEON) + +//! Skip whitespace with ARM Neon instructions, testing 16 8-byte characters at once. +inline const char *SkipWhitespace_SIMD(const char* p) { + // Fast return for single non-whitespace + if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t') + ++p; + else + return p; + + // 16-byte align to the next boundary + const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15)); + while (p != nextAligned) + if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t') + ++p; + else + return p; + + const uint8x16_t w0 = vmovq_n_u8(' '); + const uint8x16_t w1 = vmovq_n_u8('\n'); + const uint8x16_t w2 = vmovq_n_u8('\r'); + const uint8x16_t w3 = vmovq_n_u8('\t'); + + for (;; p += 16) { + const uint8x16_t s = vld1q_u8(reinterpret_cast<const uint8_t *>(p)); + uint8x16_t x = vceqq_u8(s, w0); + x = vorrq_u8(x, vceqq_u8(s, w1)); + x = vorrq_u8(x, vceqq_u8(s, w2)); + x = vorrq_u8(x, vceqq_u8(s, w3)); + + x = vmvnq_u8(x); // Negate + x = vrev64q_u8(x); // Rev in 64 + uint64_t low = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 0); // extract + uint64_t high = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 1); // extract + + if (low == 0) { + if (high != 0) { + int lz =__builtin_clzll(high);; + return p + 8 + (lz >> 3); + } + } else { + int lz = __builtin_clzll(low);; + return p + (lz >> 3); + } + } +} + +inline const char *SkipWhitespace_SIMD(const char* p, const char* end) { + // Fast return for single non-whitespace + if (p != end && (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')) + ++p; + else + return p; + + const uint8x16_t w0 = vmovq_n_u8(' '); + const uint8x16_t w1 = vmovq_n_u8('\n'); + const uint8x16_t w2 = vmovq_n_u8('\r'); + const uint8x16_t w3 = vmovq_n_u8('\t'); + + for (; p <= end - 16; p += 16) { + const uint8x16_t s = vld1q_u8(reinterpret_cast<const uint8_t *>(p)); + uint8x16_t x = vceqq_u8(s, w0); + x = vorrq_u8(x, vceqq_u8(s, w1)); + x = vorrq_u8(x, vceqq_u8(s, w2)); + x = vorrq_u8(x, vceqq_u8(s, w3)); + + x = vmvnq_u8(x); // Negate + x = vrev64q_u8(x); // Rev in 64 + uint64_t low = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 0); // extract + uint64_t high = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 1); // extract + + if (low == 0) { + if (high != 0) { + int lz = __builtin_clzll(high); + return p + 8 + (lz >> 3); + } + } else { + int lz = __builtin_clzll(low); + return p + (lz >> 3); + } + } + + return SkipWhitespace(p, end); } -#endif // RAPIDJSON_SSE2 +#endif // RAPIDJSON_NEON #ifdef RAPIDJSON_SIMD //! Template function specialization for InsituStringStream -template<> inline void SkipWhitespace(InsituStringStream& is) { +template<> inline void SkipWhitespace(InsituStringStream& is) { is.src_ = const_cast<char*>(SkipWhitespace_SIMD(is.src_)); } @@ -350,23 +510,27 @@ template<> inline void SkipWhitespace(InsituStringStream& is) { template<> inline void SkipWhitespace(StringStream& is) { is.src_ = SkipWhitespace_SIMD(is.src_); } + +template<> inline void SkipWhitespace(EncodedInputStream<UTF8<>, MemoryStream>& is) { + is.is_.src_ = SkipWhitespace_SIMD(is.is_.src_, is.is_.end_); +} #endif // RAPIDJSON_SIMD /////////////////////////////////////////////////////////////////////////////// // GenericReader //! SAX-style JSON parser. Use \ref Reader for UTF8 encoding and default allocator. -/*! GenericReader parses JSON text from a stream, and send events synchronously to an +/*! GenericReader parses JSON text from a stream, and send events synchronously to an object implementing Handler concept. - It needs to allocate a stack for storing a single decoded string during + It needs to allocate a stack for storing a single decoded string during non-destructive parsing. - For in-situ parsing, the decoded string is directly written to the source + For in-situ parsing, the decoded string is directly written to the source text string, no temporary buffer is required. A GenericReader object can be reused for parsing multiple JSON text. - + \tparam SourceEncoding Encoding of the input stream. \tparam TargetEncoding Encoding of the parse output. \tparam StackAllocator Allocator type for stack. @@ -402,7 +566,7 @@ public: SkipWhitespaceAndComments<parseFlags>(is); RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_); - if (is.Peek() == '\0') { + if (RAPIDJSON_UNLIKELY(is.Peek() == '\0')) { RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorDocumentEmpty, is.Tell()); RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_); } @@ -414,7 +578,7 @@ public: SkipWhitespaceAndComments<parseFlags>(is); RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_); - if (is.Peek() != '\0') { + if (RAPIDJSON_UNLIKELY(is.Peek() != '\0')) { RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorDocumentRootNotSingular, is.Tell()); RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_); } @@ -436,9 +600,86 @@ public: return Parse<kParseDefaultFlags>(is, handler); } + //! Initialize JSON text token-by-token parsing + /*! + */ + void IterativeParseInit() { + parseResult_.Clear(); + state_ = IterativeParsingStartState; + } + + //! Parse one token from JSON text + /*! \tparam InputStream Type of input stream, implementing Stream concept + \tparam Handler Type of handler, implementing Handler concept. + \param is Input stream to be parsed. + \param handler The handler to receive events. + \return Whether the parsing is successful. + */ + template <unsigned parseFlags, typename InputStream, typename Handler> + bool IterativeParseNext(InputStream& is, Handler& handler) { + while (RAPIDJSON_LIKELY(is.Peek() != '\0')) { + SkipWhitespaceAndComments<parseFlags>(is); + + Token t = Tokenize(is.Peek()); + IterativeParsingState n = Predict(state_, t); + IterativeParsingState d = Transit<parseFlags>(state_, t, n, is, handler); + + // If we've finished or hit an error... + if (RAPIDJSON_UNLIKELY(IsIterativeParsingCompleteState(d))) { + // Report errors. + if (d == IterativeParsingErrorState) { + HandleError(state_, is); + return false; + } + + // Transition to the finish state. + RAPIDJSON_ASSERT(d == IterativeParsingFinishState); + state_ = d; + + // If StopWhenDone is not set... + if (!(parseFlags & kParseStopWhenDoneFlag)) { + // ... and extra non-whitespace data is found... + SkipWhitespaceAndComments<parseFlags>(is); + if (is.Peek() != '\0') { + // ... this is considered an error. + HandleError(state_, is); + return false; + } + } + + // Success! We are done! + return true; + } + + // Transition to the new state. + state_ = d; + + // If we parsed anything other than a delimiter, we invoked the handler, so we can return true now. + if (!IsIterativeParsingDelimiterState(n)) + return true; + } + + // We reached the end of file. + stack_.Clear(); + + if (state_ != IterativeParsingFinishState) { + HandleError(state_, is); + return false; + } + + return true; + } + + //! Check if token-by-token parsing JSON text is complete + /*! \return Whether the JSON has been fully decoded. + */ + RAPIDJSON_FORCEINLINE bool IterativeParseComplete() { + return IsIterativeParsingCompleteState(state_); + } + //! Whether a parse error has occured in the last parsing. bool HasParseError() const { return parseResult_.IsError(); } - + //! Get the \ref ParseErrorCode of last parsing. ParseErrorCode GetParseErrorCode() const { return parseResult_.Code(); } @@ -470,29 +711,23 @@ private: SkipWhitespace(is); if (parseFlags & kParseCommentsFlag) { - while (is.Peek() == '/') { - is.Take(); - - if (is.Peek() == '*') { - is.Take(); + while (RAPIDJSON_UNLIKELY(Consume(is, '/'))) { + if (Consume(is, '*')) { while (true) { - if (is.Peek() == '\0') + if (RAPIDJSON_UNLIKELY(is.Peek() == '\0')) RAPIDJSON_PARSE_ERROR(kParseErrorUnspecificSyntaxError, is.Tell()); - - if (is.Take() == '*') { - if (is.Peek() == '\0') - RAPIDJSON_PARSE_ERROR(kParseErrorUnspecificSyntaxError, is.Tell()); - - if (is.Take() == '/') + else if (Consume(is, '*')) { + if (Consume(is, '/')) break; } + else + is.Take(); } - } else if (is.Peek() == '/') { - is.Take(); - while (is.Peek() != '\0' && is.Take() != '\n') { } - } else { - RAPIDJSON_PARSE_ERROR(kParseErrorUnspecificSyntaxError, is.Tell()); } + else if (RAPIDJSON_LIKELY(Consume(is, '/'))) + while (is.Peek() != '\0' && is.Take() != '\n') {} + else + RAPIDJSON_PARSE_ERROR(kParseErrorUnspecificSyntaxError, is.Tell()); SkipWhitespace(is); } @@ -504,22 +739,21 @@ private: void ParseObject(InputStream& is, Handler& handler) { RAPIDJSON_ASSERT(is.Peek() == '{'); is.Take(); // Skip '{' - - if (!handler.StartObject()) + + if (RAPIDJSON_UNLIKELY(!handler.StartObject())) RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); SkipWhitespaceAndComments<parseFlags>(is); RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; - if (is.Peek() == '}') { - is.Take(); - if (!handler.EndObject(0)) // empty object + if (Consume(is, '}')) { + if (RAPIDJSON_UNLIKELY(!handler.EndObject(0))) // empty object RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); return; } for (SizeType memberCount = 0;;) { - if (is.Peek() != '"') + if (RAPIDJSON_UNLIKELY(is.Peek() != '"')) RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissName, is.Tell()); ParseString<parseFlags>(is, handler, true); @@ -528,7 +762,7 @@ private: SkipWhitespaceAndComments<parseFlags>(is); RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; - if (is.Take() != ':') + if (RAPIDJSON_UNLIKELY(!Consume(is, ':'))) RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissColon, is.Tell()); SkipWhitespaceAndComments<parseFlags>(is); @@ -542,18 +776,28 @@ private: ++memberCount; - switch (is.Take()) { + switch (is.Peek()) { case ',': + is.Take(); SkipWhitespaceAndComments<parseFlags>(is); RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; break; - case '}': - if (!handler.EndObject(memberCount)) + case '}': + is.Take(); + if (RAPIDJSON_UNLIKELY(!handler.EndObject(memberCount))) RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); return; - default: - RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissCommaOrCurlyBracket, is.Tell()); - break; + default: + RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissCommaOrCurlyBracket, is.Tell()); break; // This useless break is only for making warning and coverage happy + } + + if (parseFlags & kParseTrailingCommasFlag) { + if (is.Peek() == '}') { + if (RAPIDJSON_UNLIKELY(!handler.EndObject(memberCount))) + RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); + is.Take(); + return; + } } } } @@ -563,16 +807,15 @@ private: void ParseArray(InputStream& is, Handler& handler) { RAPIDJSON_ASSERT(is.Peek() == '['); is.Take(); // Skip '[' - - if (!handler.StartArray()) + + if (RAPIDJSON_UNLIKELY(!handler.StartArray())) RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); - + SkipWhitespaceAndComments<parseFlags>(is); RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; - if (is.Peek() == ']') { - is.Take(); - if (!handler.EndArray(0)) // empty array + if (Consume(is, ']')) { + if (RAPIDJSON_UNLIKELY(!handler.EndArray(0))) // empty array RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); return; } @@ -585,18 +828,25 @@ private: SkipWhitespaceAndComments<parseFlags>(is); RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; - switch (is.Take()) { - case ',': - SkipWhitespaceAndComments<parseFlags>(is); - RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; - break; - case ']': - if (!handler.EndArray(elementCount)) + if (Consume(is, ',')) { + SkipWhitespaceAndComments<parseFlags>(is); + RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; + } + else if (Consume(is, ']')) { + if (RAPIDJSON_UNLIKELY(!handler.EndArray(elementCount))) + RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); + return; + } + else + RAPIDJSON_PARSE_ERROR(kParseErrorArrayMissCommaOrSquareBracket, is.Tell()); + + if (parseFlags & kParseTrailingCommasFlag) { + if (is.Peek() == ']') { + if (RAPIDJSON_UNLIKELY(!handler.EndArray(elementCount))) RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); + is.Take(); return; - default: - RAPIDJSON_PARSE_ERROR(kParseErrorArrayMissCommaOrSquareBracket, is.Tell()); - break; + } } } } @@ -606,12 +856,12 @@ private: RAPIDJSON_ASSERT(is.Peek() == 'n'); is.Take(); - if (is.Take() == 'u' && is.Take() == 'l' && is.Take() == 'l') { - if (!handler.Null()) + if (RAPIDJSON_LIKELY(Consume(is, 'u') && Consume(is, 'l') && Consume(is, 'l'))) { + if (RAPIDJSON_UNLIKELY(!handler.Null())) RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); } else - RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell() - 1); + RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell()); } template<unsigned parseFlags, typename InputStream, typename Handler> @@ -619,12 +869,12 @@ private: RAPIDJSON_ASSERT(is.Peek() == 't'); is.Take(); - if (is.Take() == 'r' && is.Take() == 'u' && is.Take() == 'e') { - if (!handler.Bool(true)) + if (RAPIDJSON_LIKELY(Consume(is, 'r') && Consume(is, 'u') && Consume(is, 'e'))) { + if (RAPIDJSON_UNLIKELY(!handler.Bool(true))) RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); } else - RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell() - 1); + RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell()); } template<unsigned parseFlags, typename InputStream, typename Handler> @@ -632,20 +882,30 @@ private: RAPIDJSON_ASSERT(is.Peek() == 'f'); is.Take(); - if (is.Take() == 'a' && is.Take() == 'l' && is.Take() == 's' && is.Take() == 'e') { - if (!handler.Bool(false)) + if (RAPIDJSON_LIKELY(Consume(is, 'a') && Consume(is, 'l') && Consume(is, 's') && Consume(is, 'e'))) { + if (RAPIDJSON_UNLIKELY(!handler.Bool(false))) RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell()); } else - RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell() - 1); + RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell()); + } + + template<typename InputStream> + RAPIDJSON_FORCEINLINE static bool Consume(InputStream& is, typename InputStream::Ch expect) { + if (RAPIDJSON_LIKELY(is.Peek() == expect)) { + is.Take(); + return true; + } + else + return false; } // Helper function to parse four hexidecimal digits in \uXXXX in ParseString(). template<typename InputStream> - unsigned ParseHex4(InputStream& is) { + unsigned ParseHex4(InputStream& is, size_t escapeOffset) { unsigned codepoint = 0; for (int i = 0; i < 4; i++) { - Ch c = is.Take(); + Ch c = is.Peek(); codepoint <<= 4; codepoint += static_cast<unsigned>(c); if (c >= '0' && c <= '9') @@ -655,9 +915,10 @@ private: else if (c >= 'a' && c <= 'f') codepoint -= 'a' - 10; else { - RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorStringUnicodeEscapeInvalidHex, is.Tell() - 1); + RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorStringUnicodeEscapeInvalidHex, escapeOffset); RAPIDJSON_PARSE_ERROR_EARLY_RETURN(0); } + is.Take(); } return codepoint; } @@ -672,7 +933,14 @@ private: *stack_.template Push<Ch>() = c; ++length_; } + + RAPIDJSON_FORCEINLINE void* Push(SizeType count) { + length_ += count; + return stack_.template Push<Ch>(count); + } + size_t Length() const { return length_; } + Ch* Pop() { return stack_.template Pop<Ch>(length_); } @@ -691,6 +959,9 @@ private: internal::StreamLocalCopy<InputStream> copy(is); InputStream& s(copy.s); + RAPIDJSON_ASSERT(s.Peek() == '\"'); + s.Take(); // Skip '\"' + bool success = false; if (parseFlags & kParseInsituFlag) { typename InputStream::Ch *head = s.PutBegin(); @@ -698,7 +969,7 @@ private: RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; size_t length = s.PutEnd(head) - 1; RAPIDJSON_ASSERT(length <= 0xFFFFFFFF); - const typename TargetEncoding::Ch* const str = (typename TargetEncoding::Ch*)head; + const typename TargetEncoding::Ch* const str = reinterpret_cast<typename TargetEncoding::Ch*>(head); success = (isKey ? handler.Key(str, SizeType(length), false) : handler.String(str, SizeType(length), false)); } else { @@ -709,7 +980,7 @@ private: const typename TargetEncoding::Ch* const str = stackStream.Pop(); success = (isKey ? handler.Key(str, length, true) : handler.String(str, length, true)); } - if (!success) + if (RAPIDJSON_UNLIKELY(!success)) RAPIDJSON_PARSE_ERROR(kParseErrorTermination, s.Tell()); } @@ -720,74 +991,421 @@ private: //!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN #define Z16 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 static const char escape[256] = { - Z16, Z16, 0, 0,'\"', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,'/', - Z16, Z16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,'\\', 0, 0, 0, - 0, 0,'\b', 0, 0, 0,'\f', 0, 0, 0, 0, 0, 0, 0,'\n', 0, - 0, 0,'\r', 0,'\t', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + Z16, Z16, 0, 0,'\"', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,'/', + Z16, Z16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,'\\', 0, 0, 0, + 0, 0,'\b', 0, 0, 0,'\f', 0, 0, 0, 0, 0, 0, 0,'\n', 0, + 0, 0,'\r', 0,'\t', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, Z16, Z16, Z16, Z16, Z16, Z16, Z16, Z16 }; #undef Z16 //!@endcond - RAPIDJSON_ASSERT(is.Peek() == '\"'); - is.Take(); // Skip '\"' - for (;;) { + // Scan and copy string before "\\\"" or < 0x20. This is an optional optimzation. + if (!(parseFlags & kParseValidateEncodingFlag)) + ScanCopyUnescapedString(is, os); + Ch c = is.Peek(); - if (c == '\\') { // Escape + if (RAPIDJSON_UNLIKELY(c == '\\')) { // Escape + size_t escapeOffset = is.Tell(); // For invalid escaping, report the inital '\\' as error offset is.Take(); - Ch e = is.Take(); - if ((sizeof(Ch) == 1 || unsigned(e) < 256) && escape[(unsigned char)e]) { - os.Put(escape[(unsigned char)e]); + Ch e = is.Peek(); + if ((sizeof(Ch) == 1 || unsigned(e) < 256) && RAPIDJSON_LIKELY(escape[static_cast<unsigned char>(e)])) { + is.Take(); + os.Put(static_cast<typename TEncoding::Ch>(escape[static_cast<unsigned char>(e)])); } - else if (e == 'u') { // Unicode - unsigned codepoint = ParseHex4(is); + else if (RAPIDJSON_LIKELY(e == 'u')) { // Unicode + is.Take(); + unsigned codepoint = ParseHex4(is, escapeOffset); RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; - if (codepoint >= 0xD800 && codepoint <= 0xDBFF) { + if (RAPIDJSON_UNLIKELY(codepoint >= 0xD800 && codepoint <= 0xDBFF)) { // Handle UTF-16 surrogate pair - if (is.Take() != '\\' || is.Take() != 'u') - RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, is.Tell() - 2); - unsigned codepoint2 = ParseHex4(is); + if (RAPIDJSON_UNLIKELY(!Consume(is, '\\') || !Consume(is, 'u'))) + RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, escapeOffset); + unsigned codepoint2 = ParseHex4(is, escapeOffset); RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; - if (codepoint2 < 0xDC00 || codepoint2 > 0xDFFF) - RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, is.Tell() - 2); + if (RAPIDJSON_UNLIKELY(codepoint2 < 0xDC00 || codepoint2 > 0xDFFF)) + RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, escapeOffset); codepoint = (((codepoint - 0xD800) << 10) | (codepoint2 - 0xDC00)) + 0x10000; } TEncoding::Encode(os, codepoint); } else - RAPIDJSON_PARSE_ERROR(kParseErrorStringEscapeInvalid, is.Tell() - 1); + RAPIDJSON_PARSE_ERROR(kParseErrorStringEscapeInvalid, escapeOffset); } - else if (c == '"') { // Closing double quote + else if (RAPIDJSON_UNLIKELY(c == '"')) { // Closing double quote is.Take(); os.Put('\0'); // null-terminate the string return; } - else if (c == '\0') - RAPIDJSON_PARSE_ERROR(kParseErrorStringMissQuotationMark, is.Tell() - 1); - else if ((unsigned)c < 0x20) // RFC 4627: unescaped = %x20-21 / %x23-5B / %x5D-10FFFF - RAPIDJSON_PARSE_ERROR(kParseErrorStringEscapeInvalid, is.Tell() - 1); - else { - if (parseFlags & kParseValidateEncodingFlag ? - !Transcoder<SEncoding, TEncoding>::Validate(is, os) : - !Transcoder<SEncoding, TEncoding>::Transcode(is, os)) + else if (RAPIDJSON_UNLIKELY(static_cast<unsigned>(c) < 0x20)) { // RFC 4627: unescaped = %x20-21 / %x23-5B / %x5D-10FFFF + if (c == '\0') + RAPIDJSON_PARSE_ERROR(kParseErrorStringMissQuotationMark, is.Tell()); + else RAPIDJSON_PARSE_ERROR(kParseErrorStringInvalidEncoding, is.Tell()); } + else { + size_t offset = is.Tell(); + if (RAPIDJSON_UNLIKELY((parseFlags & kParseValidateEncodingFlag ? + !Transcoder<SEncoding, TEncoding>::Validate(is, os) : + !Transcoder<SEncoding, TEncoding>::Transcode(is, os)))) + RAPIDJSON_PARSE_ERROR(kParseErrorStringInvalidEncoding, offset); + } + } + } + + template<typename InputStream, typename OutputStream> + static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(InputStream&, OutputStream&) { + // Do nothing for generic version + } + +#if defined(RAPIDJSON_SSE2) || defined(RAPIDJSON_SSE42) + // StringStream -> StackStream<char> + static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(StringStream& is, StackStream<char>& os) { + const char* p = is.src_; + + // Scan one by one until alignment (unaligned load may cross page boundary and cause crash) + const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15)); + while (p != nextAligned) + if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) { + is.src_ = p; + return; + } + else + os.Put(*p++); + + // The rest of string using SIMD + static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' }; + static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' }; + static const char space[16] = { 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F }; + const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&dquote[0])); + const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&bslash[0])); + const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&space[0])); + + for (;; p += 16) { + const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p)); + const __m128i t1 = _mm_cmpeq_epi8(s, dq); + const __m128i t2 = _mm_cmpeq_epi8(s, bs); + const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x1F) == 0x1F + const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3); + unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x)); + if (RAPIDJSON_UNLIKELY(r != 0)) { // some of characters is escaped + SizeType length; + #ifdef _MSC_VER // Find the index of first escaped + unsigned long offset; + _BitScanForward(&offset, r); + length = offset; + #else + length = static_cast<SizeType>(__builtin_ffs(r) - 1); + #endif + if (length != 0) { + char* q = reinterpret_cast<char*>(os.Push(length)); + for (size_t i = 0; i < length; i++) + q[i] = p[i]; + + p += length; + } + break; + } + _mm_storeu_si128(reinterpret_cast<__m128i *>(os.Push(16)), s); + } + + is.src_ = p; + } + + // InsituStringStream -> InsituStringStream + static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(InsituStringStream& is, InsituStringStream& os) { + RAPIDJSON_ASSERT(&is == &os); + (void)os; + + if (is.src_ == is.dst_) { + SkipUnescapedString(is); + return; + } + + char* p = is.src_; + char *q = is.dst_; + + // Scan one by one until alignment (unaligned load may cross page boundary and cause crash) + const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15)); + while (p != nextAligned) + if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) { + is.src_ = p; + is.dst_ = q; + return; + } + else + *q++ = *p++; + + // The rest of string using SIMD + static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' }; + static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' }; + static const char space[16] = { 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F }; + const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&dquote[0])); + const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&bslash[0])); + const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&space[0])); + + for (;; p += 16, q += 16) { + const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p)); + const __m128i t1 = _mm_cmpeq_epi8(s, dq); + const __m128i t2 = _mm_cmpeq_epi8(s, bs); + const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x1F) == 0x1F + const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3); + unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x)); + if (RAPIDJSON_UNLIKELY(r != 0)) { // some of characters is escaped + size_t length; +#ifdef _MSC_VER // Find the index of first escaped + unsigned long offset; + _BitScanForward(&offset, r); + length = offset; +#else + length = static_cast<size_t>(__builtin_ffs(r) - 1); +#endif + for (const char* pend = p + length; p != pend; ) + *q++ = *p++; + break; + } + _mm_storeu_si128(reinterpret_cast<__m128i *>(q), s); } + + is.src_ = p; + is.dst_ = q; } - template<typename InputStream, bool backup> + // When read/write pointers are the same for insitu stream, just skip unescaped characters + static RAPIDJSON_FORCEINLINE void SkipUnescapedString(InsituStringStream& is) { + RAPIDJSON_ASSERT(is.src_ == is.dst_); + char* p = is.src_; + + // Scan one by one until alignment (unaligned load may cross page boundary and cause crash) + const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15)); + for (; p != nextAligned; p++) + if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) { + is.src_ = is.dst_ = p; + return; + } + + // The rest of string using SIMD + static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' }; + static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' }; + static const char space[16] = { 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F }; + const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&dquote[0])); + const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&bslash[0])); + const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&space[0])); + + for (;; p += 16) { + const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p)); + const __m128i t1 = _mm_cmpeq_epi8(s, dq); + const __m128i t2 = _mm_cmpeq_epi8(s, bs); + const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x1F) == 0x1F + const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3); + unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x)); + if (RAPIDJSON_UNLIKELY(r != 0)) { // some of characters is escaped + size_t length; +#ifdef _MSC_VER // Find the index of first escaped + unsigned long offset; + _BitScanForward(&offset, r); + length = offset; +#else + length = static_cast<size_t>(__builtin_ffs(r) - 1); +#endif + p += length; + break; + } + } + + is.src_ = is.dst_ = p; + } +#elif defined(RAPIDJSON_NEON) + // StringStream -> StackStream<char> + static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(StringStream& is, StackStream<char>& os) { + const char* p = is.src_; + + // Scan one by one until alignment (unaligned load may cross page boundary and cause crash) + const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15)); + while (p != nextAligned) + if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) { + is.src_ = p; + return; + } + else + os.Put(*p++); + + // The rest of string using SIMD + const uint8x16_t s0 = vmovq_n_u8('"'); + const uint8x16_t s1 = vmovq_n_u8('\\'); + const uint8x16_t s2 = vmovq_n_u8('\b'); + const uint8x16_t s3 = vmovq_n_u8(32); + + for (;; p += 16) { + const uint8x16_t s = vld1q_u8(reinterpret_cast<const uint8_t *>(p)); + uint8x16_t x = vceqq_u8(s, s0); + x = vorrq_u8(x, vceqq_u8(s, s1)); + x = vorrq_u8(x, vceqq_u8(s, s2)); + x = vorrq_u8(x, vcltq_u8(s, s3)); + + x = vrev64q_u8(x); // Rev in 64 + uint64_t low = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 0); // extract + uint64_t high = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 1); // extract + + SizeType length = 0; + bool escaped = false; + if (low == 0) { + if (high != 0) { + unsigned lz = (unsigned)__builtin_clzll(high);; + length = 8 + (lz >> 3); + escaped = true; + } + } else { + unsigned lz = (unsigned)__builtin_clzll(low);; + length = lz >> 3; + escaped = true; + } + if (RAPIDJSON_UNLIKELY(escaped)) { // some of characters is escaped + if (length != 0) { + char* q = reinterpret_cast<char*>(os.Push(length)); + for (size_t i = 0; i < length; i++) + q[i] = p[i]; + + p += length; + } + break; + } + vst1q_u8(reinterpret_cast<uint8_t *>(os.Push(16)), s); + } + + is.src_ = p; + } + + // InsituStringStream -> InsituStringStream + static RAPIDJSON_FORCEINLINE void ScanCopyUnescapedString(InsituStringStream& is, InsituStringStream& os) { + RAPIDJSON_ASSERT(&is == &os); + (void)os; + + if (is.src_ == is.dst_) { + SkipUnescapedString(is); + return; + } + + char* p = is.src_; + char *q = is.dst_; + + // Scan one by one until alignment (unaligned load may cross page boundary and cause crash) + const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15)); + while (p != nextAligned) + if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) { + is.src_ = p; + is.dst_ = q; + return; + } + else + *q++ = *p++; + + // The rest of string using SIMD + const uint8x16_t s0 = vmovq_n_u8('"'); + const uint8x16_t s1 = vmovq_n_u8('\\'); + const uint8x16_t s2 = vmovq_n_u8('\b'); + const uint8x16_t s3 = vmovq_n_u8(32); + + for (;; p += 16, q += 16) { + const uint8x16_t s = vld1q_u8(reinterpret_cast<uint8_t *>(p)); + uint8x16_t x = vceqq_u8(s, s0); + x = vorrq_u8(x, vceqq_u8(s, s1)); + x = vorrq_u8(x, vceqq_u8(s, s2)); + x = vorrq_u8(x, vcltq_u8(s, s3)); + + x = vrev64q_u8(x); // Rev in 64 + uint64_t low = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 0); // extract + uint64_t high = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 1); // extract + + SizeType length = 0; + bool escaped = false; + if (low == 0) { + if (high != 0) { + unsigned lz = (unsigned)__builtin_clzll(high); + length = 8 + (lz >> 3); + escaped = true; + } + } else { + unsigned lz = (unsigned)__builtin_clzll(low); + length = lz >> 3; + escaped = true; + } + if (RAPIDJSON_UNLIKELY(escaped)) { // some of characters is escaped + for (const char* pend = p + length; p != pend; ) { + *q++ = *p++; + } + break; + } + vst1q_u8(reinterpret_cast<uint8_t *>(q), s); + } + + is.src_ = p; + is.dst_ = q; + } + + // When read/write pointers are the same for insitu stream, just skip unescaped characters + static RAPIDJSON_FORCEINLINE void SkipUnescapedString(InsituStringStream& is) { + RAPIDJSON_ASSERT(is.src_ == is.dst_); + char* p = is.src_; + + // Scan one by one until alignment (unaligned load may cross page boundary and cause crash) + const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & static_cast<size_t>(~15)); + for (; p != nextAligned; p++) + if (RAPIDJSON_UNLIKELY(*p == '\"') || RAPIDJSON_UNLIKELY(*p == '\\') || RAPIDJSON_UNLIKELY(static_cast<unsigned>(*p) < 0x20)) { + is.src_ = is.dst_ = p; + return; + } + + // The rest of string using SIMD + const uint8x16_t s0 = vmovq_n_u8('"'); + const uint8x16_t s1 = vmovq_n_u8('\\'); + const uint8x16_t s2 = vmovq_n_u8('\b'); + const uint8x16_t s3 = vmovq_n_u8(32); + + for (;; p += 16) { + const uint8x16_t s = vld1q_u8(reinterpret_cast<uint8_t *>(p)); + uint8x16_t x = vceqq_u8(s, s0); + x = vorrq_u8(x, vceqq_u8(s, s1)); + x = vorrq_u8(x, vceqq_u8(s, s2)); + x = vorrq_u8(x, vcltq_u8(s, s3)); + + x = vrev64q_u8(x); // Rev in 64 + uint64_t low = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 0); // extract + uint64_t high = vgetq_lane_u64(reinterpret_cast<uint64x2_t>(x), 1); // extract + + if (low == 0) { + if (high != 0) { + int lz = __builtin_clzll(high); + p += 8 + (lz >> 3); + break; + } + } else { + int lz = __builtin_clzll(low); + p += lz >> 3; + break; + } + } + + is.src_ = is.dst_ = p; + } +#endif // RAPIDJSON_NEON + + template<typename InputStream, bool backup, bool pushOnTake> class NumberStream; template<typename InputStream> - class NumberStream<InputStream, false> { + class NumberStream<InputStream, false, false> { public: + typedef typename InputStream::Ch Ch; + NumberStream(GenericReader& reader, InputStream& s) : is(s) { (void)reader; } - ~NumberStream() {} RAPIDJSON_FORCEINLINE Ch Peek() const { return is.Peek(); } RAPIDJSON_FORCEINLINE Ch TakePush() { return is.Take(); } RAPIDJSON_FORCEINLINE Ch Take() { return is.Take(); } + RAPIDJSON_FORCEINLINE void Push(char) {} + size_t Tell() { return is.Tell(); } size_t Length() { return 0; } const char* Pop() { return 0; } @@ -799,17 +1417,20 @@ private: }; template<typename InputStream> - class NumberStream<InputStream, true> : public NumberStream<InputStream, false> { - typedef NumberStream<InputStream, false> Base; + class NumberStream<InputStream, true, false> : public NumberStream<InputStream, false, false> { + typedef NumberStream<InputStream, false, false> Base; public: - NumberStream(GenericReader& reader, InputStream& is) : NumberStream<InputStream, false>(reader, is), stackStream(reader.stack_) {} - ~NumberStream() {} + NumberStream(GenericReader& reader, InputStream& is) : Base(reader, is), stackStream(reader.stack_) {} RAPIDJSON_FORCEINLINE Ch TakePush() { - stackStream.Put((char)Base::is.Peek()); + stackStream.Put(static_cast<char>(Base::is.Peek())); return Base::is.Take(); } + RAPIDJSON_FORCEINLINE void Push(char c) { + stackStream.Put(c); + } + size_t Length() { return stackStream.Length(); } const char* Pop() { @@ -821,34 +1442,48 @@ private: StackStream<char> stackStream; }; + template<typename InputStream> + class NumberStream<InputStream, true, true> : public NumberStream<InputStream, true, false> { + typedef NumberStream<InputStream, true, false> Base; + public: + NumberStream(GenericReader& reader, InputStream& is) : Base(reader, is) {} + + RAPIDJSON_FORCEINLINE Ch Take() { return Base::TakePush(); } + }; + template<unsigned parseFlags, typename InputStream, typename Handler> void ParseNumber(InputStream& is, Handler& handler) { internal::StreamLocalCopy<InputStream> copy(is); - NumberStream<InputStream, (parseFlags & kParseFullPrecisionFlag) != 0> s(*this, copy.s); + NumberStream<InputStream, + ((parseFlags & kParseNumbersAsStringsFlag) != 0) ? + ((parseFlags & kParseInsituFlag) == 0) : + ((parseFlags & kParseFullPrecisionFlag) != 0), + (parseFlags & kParseNumbersAsStringsFlag) != 0 && + (parseFlags & kParseInsituFlag) == 0> s(*this, copy.s); + + size_t startOffset = s.Tell(); + double d = 0.0; + bool useNanOrInf = false; // Parse minus - bool minus = false; - if (s.Peek() == '-') { - minus = true; - s.Take(); - } + bool minus = Consume(s, '-'); // Parse int: zero / ( digit1-9 *DIGIT ) unsigned i = 0; uint64_t i64 = 0; bool use64bit = false; int significandDigit = 0; - if (s.Peek() == '0') { + if (RAPIDJSON_UNLIKELY(s.Peek() == '0')) { i = 0; s.TakePush(); } - else if (s.Peek() >= '1' && s.Peek() <= '9') { + else if (RAPIDJSON_LIKELY(s.Peek() >= '1' && s.Peek() <= '9')) { i = static_cast<unsigned>(s.TakePush() - '0'); if (minus) - while (s.Peek() >= '0' && s.Peek() <= '9') { - if (i >= 214748364) { // 2^31 = 2147483648 - if (i != 214748364 || s.Peek() > '8') { + while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) { + if (RAPIDJSON_UNLIKELY(i >= 214748364)) { // 2^31 = 2147483648 + if (RAPIDJSON_LIKELY(i != 214748364 || s.Peek() > '8')) { i64 = i; use64bit = true; break; @@ -858,9 +1493,9 @@ private: significandDigit++; } else - while (s.Peek() >= '0' && s.Peek() <= '9') { - if (i >= 429496729) { // 2^32 - 1 = 4294967295 - if (i != 429496729 || s.Peek() > '5') { + while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) { + if (RAPIDJSON_UNLIKELY(i >= 429496729)) { // 2^32 - 1 = 4294967295 + if (RAPIDJSON_LIKELY(i != 429496729 || s.Peek() > '5')) { i64 = i; use64bit = true; break; @@ -870,18 +1505,41 @@ private: significandDigit++; } } + // Parse NaN or Infinity here + else if ((parseFlags & kParseNanAndInfFlag) && RAPIDJSON_LIKELY((s.Peek() == 'I' || s.Peek() == 'N'))) { + if (Consume(s, 'N')) { + if (Consume(s, 'a') && Consume(s, 'N')) { + d = std::numeric_limits<double>::quiet_NaN(); + useNanOrInf = true; + } + } + else if (RAPIDJSON_LIKELY(Consume(s, 'I'))) { + if (Consume(s, 'n') && Consume(s, 'f')) { + d = (minus ? -std::numeric_limits<double>::infinity() : std::numeric_limits<double>::infinity()); + useNanOrInf = true; + + if (RAPIDJSON_UNLIKELY(s.Peek() == 'i' && !(Consume(s, 'i') && Consume(s, 'n') + && Consume(s, 'i') && Consume(s, 't') && Consume(s, 'y')))) { + RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, s.Tell()); + } + } + } + + if (RAPIDJSON_UNLIKELY(!useNanOrInf)) { + RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, s.Tell()); + } + } else RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, s.Tell()); // Parse 64bit int bool useDouble = false; - double d = 0.0; if (use64bit) { - if (minus) - while (s.Peek() >= '0' && s.Peek() <= '9') { - if (i64 >= RAPIDJSON_UINT64_C2(0x0CCCCCCC, 0xCCCCCCCC)) // 2^63 = 9223372036854775808 - if (i64 != RAPIDJSON_UINT64_C2(0x0CCCCCCC, 0xCCCCCCCC) || s.Peek() > '8') { - d = i64; + if (minus) + while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) { + if (RAPIDJSON_UNLIKELY(i64 >= RAPIDJSON_UINT64_C2(0x0CCCCCCC, 0xCCCCCCCC))) // 2^63 = 9223372036854775808 + if (RAPIDJSON_LIKELY(i64 != RAPIDJSON_UINT64_C2(0x0CCCCCCC, 0xCCCCCCCC) || s.Peek() > '8')) { + d = static_cast<double>(i64); useDouble = true; break; } @@ -889,10 +1547,10 @@ private: significandDigit++; } else - while (s.Peek() >= '0' && s.Peek() <= '9') { - if (i64 >= RAPIDJSON_UINT64_C2(0x19999999, 0x99999999)) // 2^64 - 1 = 18446744073709551615 - if (i64 != RAPIDJSON_UINT64_C2(0x19999999, 0x99999999) || s.Peek() > '5') { - d = i64; + while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) { + if (RAPIDJSON_UNLIKELY(i64 >= RAPIDJSON_UINT64_C2(0x19999999, 0x99999999))) // 2^64 - 1 = 18446744073709551615 + if (RAPIDJSON_LIKELY(i64 != RAPIDJSON_UINT64_C2(0x19999999, 0x99999999) || s.Peek() > '5')) { + d = static_cast<double>(i64); useDouble = true; break; } @@ -903,9 +1561,9 @@ private: // Force double for big integer if (useDouble) { - while (s.Peek() >= '0' && s.Peek() <= '9') { - if (d >= 1.7976931348623157e307) // DBL_MAX / 10.0 - RAPIDJSON_PARSE_ERROR(kParseErrorNumberTooBig, s.Tell()); + while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) { + if (RAPIDJSON_UNLIKELY(d >= 1.7976931348623157e307)) // DBL_MAX / 10.0 + RAPIDJSON_PARSE_ERROR(kParseErrorNumberTooBig, startOffset); d = d * 10 + (s.TakePush() - '0'); } } @@ -913,11 +1571,10 @@ private: // Parse frac = decimal-point 1*DIGIT int expFrac = 0; size_t decimalPosition; - if (s.Peek() == '.') { - s.Take(); + if (Consume(s, '.')) { decimalPosition = s.Length(); - if (!(s.Peek() >= '0' && s.Peek() <= '9')) + if (RAPIDJSON_UNLIKELY(!(s.Peek() >= '0' && s.Peek() <= '9'))) RAPIDJSON_PARSE_ERROR(kParseErrorNumberMissFraction, s.Tell()); if (!useDouble) { @@ -925,8 +1582,8 @@ private: // Use i64 to store significand in 64-bit architecture if (!use64bit) i64 = i; - - while (s.Peek() >= '0' && s.Peek() <= '9') { + + while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) { if (i64 > RAPIDJSON_UINT64_C2(0x1FFFFF, 0xFFFFFFFF)) // 2^53 - 1 for fast path break; else { @@ -937,19 +1594,19 @@ private: } } - d = (double)i64; + d = static_cast<double>(i64); #else // Use double to store significand in 32-bit architecture - d = use64bit ? (double)i64 : (double)i; + d = static_cast<double>(use64bit ? i64 : i); #endif useDouble = true; } - while (s.Peek() >= '0' && s.Peek() <= '9') { + while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) { if (significandDigit < 17) { d = d * 10.0 + (s.TakePush() - '0'); --expFrac; - if (d > 0.0) + if (RAPIDJSON_LIKELY(d > 0.0)) significandDigit++; } else @@ -961,38 +1618,35 @@ private: // Parse exp = e [ minus / plus ] 1*DIGIT int exp = 0; - if (s.Peek() == 'e' || s.Peek() == 'E') { + if (Consume(s, 'e') || Consume(s, 'E')) { if (!useDouble) { - d = use64bit ? i64 : i; + d = static_cast<double>(use64bit ? i64 : i); useDouble = true; } - s.Take(); bool expMinus = false; - if (s.Peek() == '+') - s.Take(); - else if (s.Peek() == '-') { - s.Take(); + if (Consume(s, '+')) + ; + else if (Consume(s, '-')) expMinus = true; - } - if (s.Peek() >= '0' && s.Peek() <= '9') { - exp = s.Take() - '0'; + if (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) { + exp = static_cast<int>(s.Take() - '0'); if (expMinus) { - while (s.Peek() >= '0' && s.Peek() <= '9') { - exp = exp * 10 + (s.Take() - '0'); + while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) { + exp = exp * 10 + static_cast<int>(s.Take() - '0'); if (exp >= 214748364) { // Issue #313: prevent overflow exponent - while (s.Peek() >= '0' && s.Peek() <= '9') // Consume the rest of exponent + while (RAPIDJSON_UNLIKELY(s.Peek() >= '0' && s.Peek() <= '9')) // Consume the rest of exponent s.Take(); } } } else { // positive exp int maxExp = 308 - expFrac; - while (s.Peek() >= '0' && s.Peek() <= '9') { - exp = exp * 10 + (s.Take() - '0'); - if (exp > maxExp) - RAPIDJSON_PARSE_ERROR(kParseErrorNumberTooBig, s.Tell()); + while (RAPIDJSON_LIKELY(s.Peek() >= '0' && s.Peek() <= '9')) { + exp = exp * 10 + static_cast<int>(s.Take() - '0'); + if (RAPIDJSON_UNLIKELY(exp > maxExp)) + RAPIDJSON_PARSE_ERROR(kParseErrorNumberTooBig, startOffset); } } } @@ -1005,34 +1659,63 @@ private: // Finish parsing, call event according to the type of number. bool cont = true; - size_t length = s.Length(); - const char* decimal = s.Pop(); // Pop stack no matter if it will be used or not. - - if (useDouble) { - int p = exp + expFrac; - if (parseFlags & kParseFullPrecisionFlag) - d = internal::StrtodFullPrecision(d, p, decimal, length, decimalPosition, exp); - else - d = internal::StrtodNormalPrecision(d, p); - cont = handler.Double(minus ? -d : d); - } - else { - if (use64bit) { - if (minus) - cont = handler.Int64(static_cast<int64_t>(~i64 + 1)); - else - cont = handler.Uint64(i64); + if (parseFlags & kParseNumbersAsStringsFlag) { + if (parseFlags & kParseInsituFlag) { + s.Pop(); // Pop stack no matter if it will be used or not. + typename InputStream::Ch* head = is.PutBegin(); + const size_t length = s.Tell() - startOffset; + RAPIDJSON_ASSERT(length <= 0xFFFFFFFF); + // unable to insert the \0 character here, it will erase the comma after this number + const typename TargetEncoding::Ch* const str = reinterpret_cast<typename TargetEncoding::Ch*>(head); + cont = handler.RawNumber(str, SizeType(length), false); } else { - if (minus) - cont = handler.Int(static_cast<int32_t>(~i + 1)); - else - cont = handler.Uint(i); + SizeType numCharsToCopy = static_cast<SizeType>(s.Length()); + StringStream srcStream(s.Pop()); + StackStream<typename TargetEncoding::Ch> dstStream(stack_); + while (numCharsToCopy--) { + Transcoder<UTF8<>, TargetEncoding>::Transcode(srcStream, dstStream); + } + dstStream.Put('\0'); + const typename TargetEncoding::Ch* str = dstStream.Pop(); + const SizeType length = static_cast<SizeType>(dstStream.Length()) - 1; + cont = handler.RawNumber(str, SizeType(length), true); } } - if (!cont) - RAPIDJSON_PARSE_ERROR(kParseErrorTermination, s.Tell()); + else { + size_t length = s.Length(); + const char* decimal = s.Pop(); // Pop stack no matter if it will be used or not. + + if (useDouble) { + int p = exp + expFrac; + if (parseFlags & kParseFullPrecisionFlag) + d = internal::StrtodFullPrecision(d, p, decimal, length, decimalPosition, exp); + else + d = internal::StrtodNormalPrecision(d, p); + + cont = handler.Double(minus ? -d : d); + } + else if (useNanOrInf) { + cont = handler.Double(d); + } + else { + if (use64bit) { + if (minus) + cont = handler.Int64(static_cast<int64_t>(~i64 + 1)); + else + cont = handler.Uint64(i64); + } + else { + if (minus) + cont = handler.Int(static_cast<int32_t>(~i + 1)); + else + cont = handler.Uint(i); + } + } + } + if (RAPIDJSON_UNLIKELY(!cont)) + RAPIDJSON_PARSE_ERROR(kParseErrorTermination, startOffset); } // Parse any JSON value @@ -1045,10 +1728,10 @@ private: case '"': ParseString<parseFlags>(is, handler); break; case '{': ParseObject<parseFlags>(is, handler); break; case '[': ParseArray <parseFlags>(is, handler); break; - default : + default : ParseNumber<parseFlags>(is, handler); break; - + } } @@ -1056,27 +1739,29 @@ private: // States enum IterativeParsingState { - IterativeParsingStartState = 0, - IterativeParsingFinishState, - IterativeParsingErrorState, + IterativeParsingFinishState = 0, // sink states at top + IterativeParsingErrorState, // sink states at top + IterativeParsingStartState, // Object states IterativeParsingObjectInitialState, IterativeParsingMemberKeyState, - IterativeParsingKeyValueDelimiterState, IterativeParsingMemberValueState, - IterativeParsingMemberDelimiterState, IterativeParsingObjectFinishState, // Array states IterativeParsingArrayInitialState, IterativeParsingElementState, - IterativeParsingElementDelimiterState, IterativeParsingArrayFinishState, // Single value state IterativeParsingValueState, - + + // Delimiter states (at bottom) + IterativeParsingElementDelimiterState, + IterativeParsingMemberDelimiterState, + IterativeParsingKeyValueDelimiterState, + cIterativeParsingStateCount }; @@ -1120,9 +1805,9 @@ private: #undef N #undef N16 //!@endcond - - if (sizeof(Ch) == 1 || unsigned(c) < 256) - return (Token)tokenMap[(unsigned char)c]; + + if (sizeof(Ch) == 1 || static_cast<unsigned>(c) < 256) + return static_cast<Token>(tokenMap[static_cast<unsigned char>(c)]); else return NumberToken; } @@ -1130,6 +1815,18 @@ private: RAPIDJSON_FORCEINLINE IterativeParsingState Predict(IterativeParsingState state, Token token) { // current state x one lookahead token -> new state static const char G[cIterativeParsingStateCount][kTokenCount] = { + // Finish(sink state) + { + IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, + IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, + IterativeParsingErrorState + }, + // Error(sink state) + { + IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, + IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, + IterativeParsingErrorState + }, // Start { IterativeParsingArrayInitialState, // Left bracket @@ -1144,18 +1841,6 @@ private: IterativeParsingValueState, // Null IterativeParsingValueState // Number }, - // Finish(sink state) - { - IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, - IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, - IterativeParsingErrorState - }, - // Error(sink state) - { - IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, - IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, - IterativeParsingErrorState - }, // ObjectInitial { IterativeParsingErrorState, // Left bracket @@ -1184,20 +1869,6 @@ private: IterativeParsingErrorState, // Null IterativeParsingErrorState // Number }, - // KeyValueDelimiter - { - IterativeParsingArrayInitialState, // Left bracket(push MemberValue state) - IterativeParsingErrorState, // Right bracket - IterativeParsingObjectInitialState, // Left curly bracket(push MemberValue state) - IterativeParsingErrorState, // Right curly bracket - IterativeParsingErrorState, // Comma - IterativeParsingErrorState, // Colon - IterativeParsingMemberValueState, // String - IterativeParsingMemberValueState, // False - IterativeParsingMemberValueState, // True - IterativeParsingMemberValueState, // Null - IterativeParsingMemberValueState // Number - }, // MemberValue { IterativeParsingErrorState, // Left bracket @@ -1212,20 +1883,6 @@ private: IterativeParsingErrorState, // Null IterativeParsingErrorState // Number }, - // MemberDelimiter - { - IterativeParsingErrorState, // Left bracket - IterativeParsingErrorState, // Right bracket - IterativeParsingErrorState, // Left curly bracket - IterativeParsingErrorState, // Right curly bracket - IterativeParsingErrorState, // Comma - IterativeParsingErrorState, // Colon - IterativeParsingMemberKeyState, // String - IterativeParsingErrorState, // False - IterativeParsingErrorState, // True - IterativeParsingErrorState, // Null - IterativeParsingErrorState // Number - }, // ObjectFinish(sink state) { IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, @@ -1260,10 +1917,22 @@ private: IterativeParsingErrorState, // Null IterativeParsingErrorState // Number }, + // ArrayFinish(sink state) + { + IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, + IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, + IterativeParsingErrorState + }, + // Single Value (sink state) + { + IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, + IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, + IterativeParsingErrorState + }, // ElementDelimiter { IterativeParsingArrayInitialState, // Left bracket(push Element state) - IterativeParsingErrorState, // Right bracket + IterativeParsingArrayFinishState, // Right bracket IterativeParsingObjectInitialState, // Left curly bracket(push Element state) IterativeParsingErrorState, // Right curly bracket IterativeParsingErrorState, // Comma @@ -1274,21 +1943,37 @@ private: IterativeParsingElementState, // Null IterativeParsingElementState // Number }, - // ArrayFinish(sink state) + // MemberDelimiter { - IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, - IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, - IterativeParsingErrorState + IterativeParsingErrorState, // Left bracket + IterativeParsingErrorState, // Right bracket + IterativeParsingErrorState, // Left curly bracket + IterativeParsingObjectFinishState, // Right curly bracket + IterativeParsingErrorState, // Comma + IterativeParsingErrorState, // Colon + IterativeParsingMemberKeyState, // String + IterativeParsingErrorState, // False + IterativeParsingErrorState, // True + IterativeParsingErrorState, // Null + IterativeParsingErrorState // Number }, - // Single Value (sink state) + // KeyValueDelimiter { - IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, - IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, - IterativeParsingErrorState - } + IterativeParsingArrayInitialState, // Left bracket(push MemberValue state) + IterativeParsingErrorState, // Right bracket + IterativeParsingObjectInitialState, // Left curly bracket(push MemberValue state) + IterativeParsingErrorState, // Right curly bracket + IterativeParsingErrorState, // Comma + IterativeParsingErrorState, // Colon + IterativeParsingMemberValueState, // String + IterativeParsingMemberValueState, // False + IterativeParsingMemberValueState, // True + IterativeParsingMemberValueState, // Null + IterativeParsingMemberValueState // Number + }, }; // End of G - return (IterativeParsingState)G[state][token]; + return static_cast<IterativeParsingState>(G[state][token]); } // Make an advance in the token stream and state based on the candidate destination state which was returned by Transit(). @@ -1365,6 +2050,11 @@ private: case IterativeParsingObjectFinishState: { + // Transit from delimiter is only allowed when trailing commas are enabled + if (!(parseFlags & kParseTrailingCommasFlag) && src == IterativeParsingMemberDelimiterState) { + RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorObjectMissName, is.Tell()); + return IterativeParsingErrorState; + } // Get member count. SizeType c = *stack_.template Pop<SizeType>(1); // If the object is not empty, count the last member. @@ -1390,6 +2080,11 @@ private: case IterativeParsingArrayFinishState: { + // Transit from delimiter is only allowed when trailing commas are enabled + if (!(parseFlags & kParseTrailingCommasFlag) && src == IterativeParsingElementDelimiterState) { + RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorValueInvalid, is.Tell()); + return IterativeParsingErrorState; + } // Get element count. SizeType c = *stack_.template Pop<SizeType>(1); // If the array is not empty, count the last element. @@ -1441,7 +2136,7 @@ private: // Error flag has been set. return; } - + switch (src) { case IterativeParsingStartState: RAPIDJSON_PARSE_ERROR(kParseErrorDocumentEmpty, is.Tell()); return; case IterativeParsingFinishState: RAPIDJSON_PARSE_ERROR(kParseErrorDocumentRootNotSingular, is.Tell()); return; @@ -1449,49 +2144,60 @@ private: case IterativeParsingMemberDelimiterState: RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissName, is.Tell()); return; case IterativeParsingMemberKeyState: RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissColon, is.Tell()); return; case IterativeParsingMemberValueState: RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissCommaOrCurlyBracket, is.Tell()); return; - case IterativeParsingElementState: RAPIDJSON_PARSE_ERROR(kParseErrorArrayMissCommaOrSquareBracket, is.Tell()); return; - default: RAPIDJSON_PARSE_ERROR(kParseErrorUnspecificSyntaxError, is.Tell()); return; - } + case IterativeParsingKeyValueDelimiterState: + case IterativeParsingArrayInitialState: + case IterativeParsingElementDelimiterState: RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell()); return; + default: RAPIDJSON_ASSERT(src == IterativeParsingElementState); RAPIDJSON_PARSE_ERROR(kParseErrorArrayMissCommaOrSquareBracket, is.Tell()); return; + } } + RAPIDJSON_FORCEINLINE bool IsIterativeParsingDelimiterState(IterativeParsingState s) { + return s >= IterativeParsingElementDelimiterState; + } + + RAPIDJSON_FORCEINLINE bool IsIterativeParsingCompleteState(IterativeParsingState s) { + return s <= IterativeParsingErrorState; + } + template <unsigned parseFlags, typename InputStream, typename Handler> ParseResult IterativeParse(InputStream& is, Handler& handler) { parseResult_.Clear(); ClearStackOnExit scope(*this); IterativeParsingState state = IterativeParsingStartState; - + SkipWhitespaceAndComments<parseFlags>(is); RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_); while (is.Peek() != '\0') { Token t = Tokenize(is.Peek()); IterativeParsingState n = Predict(state, t); IterativeParsingState d = Transit<parseFlags>(state, t, n, is, handler); - + if (d == IterativeParsingErrorState) { HandleError(state, is); break; } - + state = d; - + // Do not further consume streams if a root JSON has been parsed. if ((parseFlags & kParseStopWhenDoneFlag) && state == IterativeParsingFinishState) break; - + SkipWhitespaceAndComments<parseFlags>(is); RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_); } - + // Handle the end of file. if (state != IterativeParsingFinishState) HandleError(state, is); - + return parseResult_; } static const size_t kDefaultStackCapacity = 256; //!< Default stack capacity in bytes for storing a single decoded string. internal::Stack<StackAllocator> stack_; //!< A stack for storing decoded string temporarily during non-destructive parsing. ParseResult parseResult_; + IterativeParsingState state_; }; // class GenericReader //! Reader with UTF8 encoding and default allocator. @@ -1499,6 +2205,11 @@ typedef GenericReader<UTF8<>, UTF8<> > Reader; RAPIDJSON_NAMESPACE_END +#ifdef __clang__ +RAPIDJSON_DIAG_POP +#endif + + #ifdef __GNUC__ RAPIDJSON_DIAG_POP #endif |