From 85a771b89d425950fdd51cf758c52885390ab674 Mon Sep 17 00:00:00 2001 From: Thiago Macieira Date: Mon, 5 Feb 2018 23:27:42 -0800 Subject: QCborStreamReader: update to the new TinyCBOR zero-copy string API Change-Id: Iab119b62106d40fb8499fffd1510abe5d8f2722a Reviewed-by: Edward Welbourne --- src/3rdparty/tinycbor/src/cbor.h | 44 ++++++++- src/3rdparty/tinycbor/src/cborerrorstrings.c | 4 +- src/3rdparty/tinycbor/src/cborinternal_p.h | 2 - src/3rdparty/tinycbor/src/cborparser.c | 89 +++++++----------- src/3rdparty/tinycbor/tests/parser/tst_parser.cpp | 11 ++- src/corelib/serialization/qcborstream.cpp | 109 +++++++++++++--------- 6 files changed, 148 insertions(+), 111 deletions(-) (limited to 'src') diff --git a/src/3rdparty/tinycbor/src/cbor.h b/src/3rdparty/tinycbor/src/cbor.h index 7a335d96ae..d7dee0f55b 100644 --- a/src/3rdparty/tinycbor/src/cbor.h +++ b/src/3rdparty/tinycbor/src/cbor.h @@ -166,8 +166,7 @@ typedef enum CborError { CborErrorIllegalType, /* type not allowed here */ CborErrorIllegalNumber, CborErrorIllegalSimpleType, /* types of value less than 32 encoded in two bytes */ - - CborErrorLastStringChunk, /* not really an error */ + CborErrorNoMoreStringChunks, /* parser errors in strict mode parsing only */ CborErrorUnknownSimpleType = 512, @@ -291,11 +290,23 @@ enum CborParserGlobalFlags enum CborParserIteratorFlags { + /* used for all types, but not during string chunk iteration + * (values are static-asserted, don't change) */ CborIteratorFlag_IntegerValueIs64Bit = 0x01, CborIteratorFlag_IntegerValueTooLarge = 0x02, + + /* used only for CborIntegerType */ CborIteratorFlag_NegativeInteger = 0x04, + + /* used only during string iteration */ + CborIteratorFlag_BeforeFirstStringChunk = 0x04, CborIteratorFlag_IteratingStringChunks = 0x08, + + /* used for arrays, maps and strings, including during chunk iteration */ CborIteratorFlag_UnknownLength = 0x10, + + /* used for maps, but must be kept for all types + * (ContainerIsMap value must be CborMapType - CborArrayType) */ CborIteratorFlag_ContainerIsMap = 0x20, CborIteratorFlag_NextIsMapKey = 0x40 }; @@ -496,9 +507,36 @@ CBOR_INLINE_API CborError cbor_value_dup_byte_string(const CborValue *value, uin return _cbor_value_dup_string(value, (void **)buffer, buflen, next); } +CBOR_PRIVATE_API CborError _cbor_value_get_string_chunk_size(const CborValue *value, size_t *len); +CBOR_INLINE_API CborError cbor_value_get_string_chunk_size(const CborValue *value, size_t *len) +{ + assert(value->flags & CborIteratorFlag_IteratingStringChunks); + return _cbor_value_get_string_chunk_size(value, len); +} + +CBOR_INLINE_API bool cbor_value_string_iteration_at_end(const CborValue *value) +{ + size_t dummy; + return cbor_value_get_string_chunk_size(value, &dummy) == CborErrorNoMoreStringChunks; +} + +CBOR_PRIVATE_API CborError _cbor_value_begin_string_iteration(CborValue *value); +CBOR_INLINE_API CborError cbor_value_begin_string_iteration(CborValue *value) +{ + assert(cbor_value_is_text_string(value) || cbor_value_is_byte_string(value)); + assert(!(value->flags & CborIteratorFlag_IteratingStringChunks)); + return _cbor_value_begin_string_iteration(value); +} + +CBOR_PRIVATE_API CborError _cbor_value_finish_string_iteration(CborValue *value); +CBOR_INLINE_API CborError cbor_value_finish_string_iteration(CborValue *value) +{ + assert(cbor_value_string_iteration_at_end(value)); + return _cbor_value_finish_string_iteration(value); +} + CBOR_PRIVATE_API CborError _cbor_value_get_string_chunk(const CborValue *value, const void **bufferptr, size_t *len, CborValue *next); -CBOR_API CborError cbor_value_get_string_chunk_size(CborValue *value, size_t *len); CBOR_INLINE_API CborError cbor_value_get_text_string_chunk(const CborValue *value, const char **bufferptr, size_t *len, CborValue *next) { diff --git a/src/3rdparty/tinycbor/src/cborerrorstrings.c b/src/3rdparty/tinycbor/src/cborerrorstrings.c index 4acda9dd38..1dd8ae25bd 100644 --- a/src/3rdparty/tinycbor/src/cborerrorstrings.c +++ b/src/3rdparty/tinycbor/src/cborerrorstrings.c @@ -119,8 +119,8 @@ const char *cbor_error_string(CborError error) case CborErrorIllegalSimpleType: return _("illegal encoding of simple type smaller than 32"); - case CborErrorLastStringChunk: - return _("no size available: that was the last string chunk"); + case CborErrorNoMoreStringChunks: + return _("no more byte or text strings available"); case CborErrorUnknownSimpleType: return _("unknown simple type"); diff --git a/src/3rdparty/tinycbor/src/cborinternal_p.h b/src/3rdparty/tinycbor/src/cborinternal_p.h index 16c911d26a..9546e44d61 100644 --- a/src/3rdparty/tinycbor/src/cborinternal_p.h +++ b/src/3rdparty/tinycbor/src/cborinternal_p.h @@ -90,8 +90,6 @@ enum { BreakByte = (unsigned)Break | (SimpleTypesType << MajorTypeShift) }; -CBOR_INTERNAL_API CborError CBOR_INTERNAL_API_CC _cbor_value_prepare_string_iteration(CborValue *it); - static inline void copy_current_position(CborValue *dst, const CborValue *src) { // This "if" is here for pedantry only: the two branches should perform diff --git a/src/3rdparty/tinycbor/src/cborparser.c b/src/3rdparty/tinycbor/src/cborparser.c index 3e12c98972..65b0b4b5c8 100644 --- a/src/3rdparty/tinycbor/src/cborparser.c +++ b/src/3rdparty/tinycbor/src/cborparser.c @@ -963,60 +963,45 @@ CborError cbor_value_calculate_string_length(const CborValue *value, size_t *len return _cbor_value_copy_string(value, NULL, len, NULL); } -static inline void prepare_string_iteration(CborValue *it) +CborError _cbor_value_begin_string_iteration(CborValue *it) { + it->flags |= CborIteratorFlag_IteratingStringChunks | + CborIteratorFlag_BeforeFirstStringChunk; if (!cbor_value_is_length_known(it)) { /* chunked string: we're before the first chunk; * advance to the first chunk */ advance_bytes(it, 1); - it->flags |= CborIteratorFlag_IteratingStringChunks; } + + return CborNoError; } -CborError CBOR_INTERNAL_API_CC _cbor_value_prepare_string_iteration(CborValue *it) +CborError _cbor_value_finish_string_iteration(CborValue *it) { - cbor_assert((it->flags & CborIteratorFlag_IteratingStringChunks) == 0); - prepare_string_iteration(it); + if (!cbor_value_is_length_known(it)) + advance_bytes(it, 1); /* skip the Break */ - /* are we at the end? */ - if (!can_read_bytes(it, 1)) - return CborErrorUnexpectedEOF; - return CborNoError; + return preparse_next_value(it); } -static CborError get_string_chunk_size(CborValue *it, size_t *offset, size_t *len) +static CborError get_string_chunk_size(const CborValue *it, size_t *offset, size_t *len) { - /* Possible states: - * length known | iterating | meaning - * no | no | before the first chunk of a chunked string - * yes | no | at a non-chunked string - * no | yes | second or later chunk - * yes | yes | after a non-chunked string - */ - if (it->flags & CborIteratorFlag_IteratingStringChunks) { - /* already iterating */ - if (cbor_value_is_length_known(it)) { - /* if the length was known, it wasn't chunked, so finish iteration */ - *len = 0; - return CborErrorLastStringChunk; - } - } else { - prepare_string_iteration(it); - } + uint8_t descriptor; + size_t bytesNeeded = 1; + + if (cbor_value_is_length_known(it) && (it->flags & CborIteratorFlag_BeforeFirstStringChunk) == 0) + return CborErrorNoMoreStringChunks; /* are we at the end? */ - uint8_t descriptor; if (!read_bytes(it, &descriptor, 0, 1)) return CborErrorUnexpectedEOF; if (descriptor == BreakByte) - return CborErrorLastStringChunk; + return CborErrorNoMoreStringChunks; if ((descriptor & MajorTypeMask) != it->type) return CborErrorIllegalType; /* find the string length */ - size_t bytesNeeded = 1; - descriptor &= SmallValueMask; if (descriptor < Value8Bit) { *len = descriptor; @@ -1047,45 +1032,33 @@ static CborError get_string_chunk_size(CborValue *it, size_t *offset, size_t *le ++bytesNeeded; } - if (*len != (size_t)*len) - return CborErrorDataTooLarge; - *offset = bytesNeeded; return CborNoError; } +CborError _cbor_value_get_string_chunk_size(const CborValue *value, size_t *len) +{ + size_t offset; + return get_string_chunk_size(value, &offset, len); +} + static CborError get_string_chunk(CborValue *it, const void **bufferptr, size_t *len) { size_t offset; CborError err = get_string_chunk_size(it, &offset, len); - if (err == CborErrorLastStringChunk) { - /* last chunk */ - if (!cbor_value_is_length_known(it)) { - /* skip the break byte */ - advance_bytes(it, 1); - } - *bufferptr = NULL; - *len = 0; - return preparse_next_value(it); - } else if (err) { + if (err) return err; - } /* we're good, transfer the string now */ err = transfer_string(it, bufferptr, offset, *len); if (err) return err; - it->flags |= CborIteratorFlag_IteratingStringChunks; + /* we've iterated at least once */ + it->flags &= ~CborIteratorFlag_BeforeFirstStringChunk; return CborNoError; } -CborError cbor_value_get_string_chunk_size(CborValue *value, size_t *len) -{ - size_t offset; - return get_string_chunk_size(value, &offset, len); -} - /** * \fn CborError cbor_value_get_text_string_chunk(const CborValue *value, const char **bufferptr, size_t *len, CborValue *next) * @@ -1168,7 +1141,7 @@ CborError cbor_value_get_string_chunk_size(CborValue *value, size_t *len) */ CborError _cbor_value_get_string_chunk(const CborValue *value, const void **bufferptr, - size_t *len, CborValue *next) + size_t *len, CborValue *next) { CborValue tmp; if (!next) @@ -1216,14 +1189,18 @@ static CborError iterate_string_chunks(const CborValue *value, char *buffer, siz *next = *value; *result = true; + err = _cbor_value_begin_string_iteration(next); + if (err) + return err; + while (1) { size_t newTotal; size_t chunkLen; err = get_string_chunk(next, &ptr, &chunkLen); + if (err == CborErrorNoMoreStringChunks) + break; if (err) return err; - if (!ptr) - break; if (unlikely(add_check_overflow(total, chunkLen, &newTotal))) return CborErrorDataTooLarge; @@ -1242,7 +1219,7 @@ static CborError iterate_string_chunks(const CborValue *value, char *buffer, siz *result = !!func(buffer + total, nul, 1); } *buflen = total; - return CborNoError; + return _cbor_value_finish_string_iteration(next); } /** diff --git a/src/3rdparty/tinycbor/tests/parser/tst_parser.cpp b/src/3rdparty/tinycbor/tests/parser/tst_parser.cpp index 078d03885f..74c480bc51 100644 --- a/src/3rdparty/tinycbor/tests/parser/tst_parser.cpp +++ b/src/3rdparty/tinycbor/tests/parser/tst_parser.cpp @@ -691,18 +691,23 @@ static void chunkedStringTest(const QByteArray &data, const QString &concatenate CborValue copy = value; + err = cbor_value_begin_string_iteration(&value); + QVERIFY2(!err, QByteArray("Got error \"") + cbor_error_string(err) + "\""); forever { QString decoded; err = parseOneChunk(&value, &decoded); - QVERIFY2(!err, QByteArray("Got error \"") + cbor_error_string(err) + "\""); - - if (decoded.isEmpty()) + if (err == CborErrorNoMoreStringChunks) break; // last chunk + QVERIFY2(!err, QByteArray("Got error \"") + cbor_error_string(err) + "\""); + QVERIFY2(!chunks.isEmpty(), "Too many chunks"); QString expected = chunks.takeFirst(); QCOMPARE(decoded, expected); } + + err = cbor_value_finish_string_iteration(&value); + QVERIFY2(!err, QByteArray("Got error \"") + cbor_error_string(err) + "\""); QVERIFY2(chunks.isEmpty(), "Too few chunks"); // compare to the concatenated data diff --git a/src/corelib/serialization/qcborstream.cpp b/src/corelib/serialization/qcborstream.cpp index 7173bea734..04c8da15aa 100644 --- a/src/corelib/serialization/qcborstream.cpp +++ b/src/corelib/serialization/qcborstream.cpp @@ -1847,11 +1847,6 @@ public: IdealIoBufferSize = 256 }; - struct ChunkParameters { - qsizetype offset; - qsizetype size; - }; - QIODevice *device; QByteArray buffer; QStack containerStack; @@ -1941,12 +1936,12 @@ public: lastError = { QCborError::Code(err) }; } - void updateBufferAfterString(ChunkParameters params) + void updateBufferAfterString(qsizetype offset, qsizetype size) { Q_ASSERT(device); - bufferStart += params.offset; - qsizetype newStart = bufferStart + params.size; + bufferStart += offset; + qsizetype newStart = bufferStart + size; qsizetype remainingInBuffer = buffer.size() - newStart; if (remainingInBuffer <= 0) { @@ -1962,7 +1957,7 @@ public: bufferStart = 0; } - ChunkParameters getStringChunkParameters(); + bool ensureStringIteration(); }; void qt_cbor_stream_set_error(QCborStreamReaderPrivate *d, QCborError error) @@ -2022,30 +2017,16 @@ static CborError qt_cbor_decoder_transfer_string(void *token, const void **userp return total > avail ? CborErrorUnexpectedEOF : CborNoError; } -QCborStreamReaderPrivate::ChunkParameters QCborStreamReaderPrivate::getStringChunkParameters() +bool QCborStreamReaderPrivate::ensureStringIteration() { - size_t len; - const void *content = &len; // set to any non-null value - CborError err; - -#if 1 - // Using internal TinyCBOR API! - err = _cbor_value_get_string_chunk(¤tElement, &content, &len, ¤tElement); -#else - // the above is effectively the same as: - if (cbor_value_is_byte_string(¤tElement)) - err = cbor_value_get_byte_string_chunk(¤tElement, reinterpret_cast(&content), - &len, ¤tElement); - else - err = cbor_value_get_text_string_chunk(¤tElement, reinterpret_cast(&content), - &len, ¤tElement); -#endif + if (currentElement.flags & CborIteratorFlag_IteratingStringChunks) + return true; - if (err) - handleError(err); - else - lastError = {}; - return { qintptr(content), err ? -1 : qsizetype(len) }; + CborError err = cbor_value_begin_string_iteration(¤tElement); + if (!err) + return true; + handleError(err); + return false; } /*! @@ -2731,9 +2712,12 @@ QCborStreamReader::StringResult QCborStreamReader::_readByteArray_he */ qsizetype QCborStreamReader::_currentStringChunkSize() const { + if (!d->ensureStringIteration()) + return -1; + size_t len; CborError err = cbor_value_get_string_chunk_size(&d->currentElement, &len); - if (err == CborErrorLastStringChunk) + if (err == CborErrorNoMoreStringChunks) return 0; // not a real error else if (err) d->handleError(err); @@ -2783,28 +2767,63 @@ qsizetype QCborStreamReader::_currentStringChunkSize() const QCborStreamReader::StringResult QCborStreamReader::readStringChunk(char *ptr, qsizetype maxlen) { - auto params = d->getStringChunkParameters(); + CborError err; + size_t len; + const void *content; QCborStreamReader::StringResult result; result.data = 0; result.status = Error; - if (params.offset == 0) { - d->preread(); - preparse(); - result.status = EndOfString; + d->lastError = {}; + if (!d->ensureStringIteration()) return result; - } - if (params.size < 0) + +#if 1 + // Using internal TinyCBOR API! + err = _cbor_value_get_string_chunk(&d->currentElement, &content, &len, &d->currentElement); +#else + // the above is effectively the same as: + if (cbor_value_is_byte_string(¤tElement)) + err = cbor_value_get_byte_string_chunk(&d->currentElement, reinterpret_cast(&content), + &len, &d->currentElement); + else + err = cbor_value_get_text_string_chunk(&d->currentElement, reinterpret_cast(&content), + &len, &d->currentElement); +#endif + + // Range check: using implementation-defined behavior in converting an + // unsigned value out of range of the destination signed type (same as + // "len > size_t(std::numeric_limits::max())", but generates + // better code with ICC and MSVC). + if (!err && qsizetype(len) < 0) + err = CborErrorDataTooLarge; + + if (err) { + if (err == CborErrorNoMoreStringChunks) { + d->preread(); + err = cbor_value_finish_string_iteration(&d->currentElement); + result.status = EndOfString; + } + if (err) + d->handleError(err); + else + preparse(); return result; + } // Read the chunk into the user's buffer. - qsizetype toRead = qMin(maxlen, params.size); - qsizetype left = params.size - maxlen; qint64 actuallyRead; + qptrdiff offset = qptrdiff(content); + qsizetype toRead = qsizetype(len); + qsizetype left = toRead - maxlen; + if (left < 0) + left = 0; // buffer bigger than string + else + toRead = maxlen; // buffer smaller than string if (d->device) { // This first skip can't fail because we've already read this many bytes. - d->device->skip(d->bufferStart + params.offset); + d->device->skip(d->bufferStart + qptrdiff(content)); actuallyRead = d->device->read(ptr, toRead); if (actuallyRead != toRead) { @@ -2820,11 +2839,11 @@ QCborStreamReader::readStringChunk(char *ptr, qsizetype maxlen) return result; } - d->updateBufferAfterString(params); + d->updateBufferAfterString(offset, len); } else { actuallyRead = toRead; - memcpy(ptr, d->buffer.constData() + d->bufferStart + params.offset, toRead); - d->bufferStart += QByteArray::size_type(params.offset + params.size); + memcpy(ptr, d->buffer.constData() + d->bufferStart + offset, toRead); + d->bufferStart += QByteArray::size_type(offset + len); } d->preread(); -- cgit v1.2.3