summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorThiago Macieira <thiago.macieira@intel.com>2020-11-06 23:48:49 -0800
committerThiago Macieira <thiago.macieira@intel.com>2020-12-07 22:54:38 +0000
commitbab2cd1125a21885bea97079219031ab45861826 (patch)
treedc686c22217fdcc7b9379750de8a6b1c4aabcc97
parentb4d7908d5a623bd0024d290eee6e2226a627542d (diff)
QCborStreamReader: move the UTF-8 decoding into readStringChunk
This allows us to decode long UTF-8 strings in chunks, instead of allocating a big block of the size of the UTF-8 source and then another for the full UTF-16 content. Pick-to: 5.15 6.0 Task-number: QTBUG-88253 Change-Id: I7b9b97ae9b32412abdc6fffd16452a47b1036ef3 Reviewed-by: Allan Sandfeld Jensen <allan.jensen@qt.io>
-rw-r--r--src/corelib/serialization/qcborstreamreader.cpp146
1 files changed, 109 insertions, 37 deletions
diff --git a/src/corelib/serialization/qcborstreamreader.cpp b/src/corelib/serialization/qcborstreamreader.cpp
index d097979bda..ef2ec08b88 100644
--- a/src/corelib/serialization/qcborstreamreader.cpp
+++ b/src/corelib/serialization/qcborstreamreader.cpp
@@ -48,6 +48,7 @@
#include <qiodevice.h>
#include <qdebug.h>
#include <qstack.h>
+#include <qvarlengtharray.h>
QT_BEGIN_NAMESPACE
@@ -671,18 +672,23 @@ public:
union {
char *ptr;
QByteArray *array;
+ QString *string;
};
- enum { ByteArray = -1 };
+ enum { ByteArray = -1, String = -3 };
qsizetype maxlen_or_type;
ReadStringChunk(char *ptr, qsizetype maxlen) : ptr(ptr), maxlen_or_type(maxlen) {}
ReadStringChunk(QByteArray *array) : array(array), maxlen_or_type(ByteArray) {}
+ ReadStringChunk(QString *str) : string(str), maxlen_or_type(String) {}
+ bool isString() const { return maxlen_or_type == String; }
bool isByteArray() const { return maxlen_or_type == ByteArray; }
bool isPlainPointer() const { return maxlen_or_type >= 0; }
};
static QCborStreamReader::StringResultCode appendStringChunk(QCborStreamReader &reader, QByteArray *data);
QCborStreamReader::StringResult<qsizetype> readStringChunk(ReadStringChunk params);
+ qsizetype readStringChunk_byte(ReadStringChunk params, qsizetype len);
+ qsizetype readStringChunk_unicode(ReadStringChunk params, qsizetype utf8len);
bool ensureStringIteration();
};
@@ -1354,29 +1360,17 @@ bool QCborStreamReader::leaveContainer()
*/
QCborStreamReader::StringResult<QString> QCborStreamReader::_readString_helper()
{
- auto r = _readByteArray_helper();
QCborStreamReader::StringResult<QString> result;
+ auto r = d->readStringChunk(&result.data);
result.status = r.status;
-
- if (r.status == Ok) {
- // See QUtf8::convertToUnicode() a detailed explanation of why this
- // conversion uses the same number of words or less.
- CborError err = CborNoError;
- if (r.data.size() > MaxStringSize) {
- err = CborErrorDataTooLarge;
- } else {
- QStringConverter::State cs(QStringConverter::Flag::Stateless);
- result.data = QUtf8::convertToUnicode(r.data, &cs);
- if (cs.invalidChars != 0 || cs.remainingChars != 0)
- err = CborErrorInvalidUtf8TextString;
- }
-
- if (err) {
- d->handleError(err);
- result.data.clear();
- result.status = Error;
- }
+ if (r.status == Error) {
+ result.data.clear();
+ } else {
+ Q_ASSERT(r.data == result.data.length());
+ if (r.status == EndOfString && lastError() == QCborError::NoError)
+ preparse();
}
+
return result;
}
@@ -1547,12 +1541,41 @@ QCborStreamReaderPrivate::readStringChunk(ReadStringChunk params)
return result;
}
- // Read the chunk into the user's buffer.
- qint64 actuallyRead;
qptrdiff offset = qptrdiff(content);
+ bufferStart += offset;
+ if (device) {
+ // This first skip can't fail because we've already read this many bytes.
+ device->skip(bufferStart);
+ }
+
+ if (params.isString()) {
+ // readString()
+ result.data = readStringChunk_unicode(params, qsizetype(len));
+ } else {
+ // readByteArray() or readStringChunk()
+ result.data = readStringChunk_byte(params, qsizetype(len));
+ }
+
+ if (result.data < 0)
+ return result; // error
+
+ if (device)
+ updateBufferAfterString(0, len);
+ else
+ bufferStart += len;
+
+ preread();
+ result.status = QCborStreamReader::Ok;
+ return result;
+}
+
+inline qsizetype
+QCborStreamReaderPrivate::readStringChunk_byte(ReadStringChunk params, qsizetype len)
+{
+ qint64 actuallyRead;
qsizetype toRead = qsizetype(len);
qsizetype left = 0; // bytes from the chunk not copied to the user buffer, to discard
- char *ptr;
+ char *ptr = nullptr;
if (params.isPlainPointer()) {
left = toRead - params.maxlen_or_type;
@@ -1567,7 +1590,7 @@ QCborStreamReaderPrivate::readStringChunk(ReadStringChunk params)
auto newSize = oldSize;
if (add_overflow<decltype(newSize)>(oldSize, toRead, &newSize)) {
handleError(CborErrorDataTooLarge);
- return result;
+ return -1;
}
try {
params.array->resize(newSize);
@@ -1576,15 +1599,13 @@ QCborStreamReaderPrivate::readStringChunk(ReadStringChunk params)
// compatibility with Qt 5; in Qt 6, we could consider everything
// to be OOM.
handleError(newSize > MaxByteArraySize ? CborErrorDataTooLarge: CborErrorOutOfMemory);
- return result;
+ return -1;
}
ptr = const_cast<char *>(params.array->constData()) + oldSize;
}
if (device) {
- // This first skip can't fail because we've already read this many bytes.
- device->skip(bufferStart + qptrdiff(content));
actuallyRead = device->read(ptr, toRead);
if (actuallyRead != toRead) {
@@ -1597,20 +1618,71 @@ QCborStreamReaderPrivate::readStringChunk(ReadStringChunk params)
if (actuallyRead < 0) {
handleError(CborErrorIO);
- return result;
+ return -1;
}
-
- updateBufferAfterString(offset, len);
} else {
actuallyRead = toRead;
- memcpy(ptr, buffer.constData() + bufferStart + offset, toRead);
- bufferStart += QByteArray::size_type(offset + len);
+ memcpy(ptr, buffer.constData() + bufferStart, toRead);
}
- preread();
- result.data = actuallyRead;
- result.status = QCborStreamReader::Ok;
- return result;
+ return actuallyRead;
+}
+
+inline qsizetype
+QCborStreamReaderPrivate::readStringChunk_unicode(ReadStringChunk params, qsizetype utf8len)
+{
+ // See QUtf8::convertToUnicode() a detailed explanation of why this
+ // conversion uses the same number of words or less.
+ QChar *begin = nullptr;
+ if (params.isString()) {
+ try {
+ params.string->resize(utf8len);
+ } catch (const std::bad_alloc &) {
+ if (utf8len > MaxStringSize)
+ handleError(CborErrorDataTooLarge);
+ else
+ handleError(CborErrorOutOfMemory);
+ return -1;
+ }
+
+ begin = const_cast<QChar *>(params.string->constData());
+ }
+
+ QChar *ptr = begin;
+ QStringConverter::State cs(QStringConverter::Flag::Stateless);
+ if (device == nullptr) {
+ // Easy case: we can decode straight from the buffer we already have
+ ptr = QUtf8::convertToUnicode(ptr, { buffer.constData() + bufferStart, utf8len }, &cs);
+ } else {
+ // read in chunks, to avoid creating large, intermediate buffers
+ constexpr qsizetype StringChunkSize = 16384;
+ qsizetype chunkSize = qMin(StringChunkSize, utf8len);
+ QVarLengthArray<char> chunk(chunkSize);
+
+ cs = { QStringConverter::Flag::ConvertInitialBom };
+ while (utf8len > 0 && cs.invalidChars == 0) {
+ qsizetype toRead = qMin(chunkSize, utf8len);
+ qint64 actuallyRead = device->read(chunk.data(), toRead);
+ if (actuallyRead == toRead)
+ ptr = QUtf8::convertToUnicode(ptr, { chunk.data(), toRead }, &cs);
+
+ if (actuallyRead != toRead) {
+ handleError(CborErrorIO);
+ return -1;
+ }
+ utf8len -= toRead;
+ }
+ }
+
+ if (cs.invalidChars != 0 || cs.remainingChars != 0) {
+ handleError(CborErrorInvalidUtf8TextString);
+ return -1;
+ }
+
+ qsizetype size = ptr - begin;
+ if (params.isString())
+ params.string->truncate(size);
+ return size;
}
QT_END_NAMESPACE