1 files changed, 255 insertions, 79 deletions
diff --git a/src/corelib/serialization/qcborstreamreader.cpp b/src/corelib/serialization/qcborstreamreader.cpp
index 8e42efb0cc..863c24534a 100644
--- a/src/corelib/serialization/qcborstreamreader.cpp
+++ b/src/corelib/serialization/qcborstreamreader.cpp
@@ -1,48 +1,11 @@
-/****************************************************************************
-**
-** Copyright (C) 2020 Intel Corporation.
-** Contact: https://www.qt.io/licensing/
-**
-** This file is part of the QtCore module of the Qt Toolkit.
-**
-** $QT_BEGIN_LICENSE:LGPL$
-** Commercial License Usage
-** Licensees holding valid commercial Qt licenses may use this file in
-** accordance with the commercial license agreement provided with the
-** Software or, alternatively, in accordance with the terms contained in
-** a written agreement between you and The Qt Company. For licensing terms
-** and conditions see https://www.qt.io/terms-conditions. For further
-** information use the contact form at https://www.qt.io/contact-us.
-**
-** GNU Lesser General Public License Usage
-** Alternatively, this file may be used under the terms of the GNU Lesser
-** General Public License version 3 as published by the Free Software
-** Foundation and appearing in the file LICENSE.LGPL3 included in the
-** packaging of this file. Please review the following information to
-** ensure the GNU Lesser General Public License version 3 requirements
-** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
-**
-** GNU General Public License Usage
-** Alternatively, this file may be used under the terms of the GNU
-** General Public License version 2.0 or (at your option) the GNU General
-** Public license version 3 or any later version approved by the KDE Free
-** Qt Foundation. The licenses are as published by the Free Software
-** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
-** included in the packaging of this file. Please review the following
-** information to ensure the GNU General Public License requirements will
-** be met: https://www.gnu.org/licenses/gpl-2.0.html and
-** https://www.gnu.org/licenses/gpl-3.0.html.
-**
-** $QT_END_LICENSE$
-**
-****************************************************************************/
+// Copyright (C) 2020 Intel Corporation.
+// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
 
 #include "qcborstreamreader.h"
 
 #define CBOR_NO_ENCODER_API
 #include <private/qcborcommon_p.h>
 
-#include <private/qbytearray_p.h>
 #include <private/qnumeric_p.h>
 #include <private/qstringconverter_p.h>
 #include <qiodevice.h>
@@ -65,6 +28,7 @@ static CborError qt_cbor_decoder_transfer_string(void *token, const void **userp
 
 QT_WARNING_PUSH
 QT_WARNING_DISABLE_MSVC(4334) // '<<': result of 32-bit shift implicitly converted to 64 bits (was 64-bit shift intended?)
+QT_WARNING_DISABLE_GCC("-Wimplicit-fallthrough")
 
 #include <cborparser.c>
 
@@ -72,13 +36,11 @@ QT_WARNING_POP
 
 static CborError _cbor_value_dup_string(const CborValue *, void **, size_t *, CborValue *)
 {
-    Q_UNREACHABLE();
-    return CborErrorInternalError;
+    Q_UNREACHABLE_RETURN(CborErrorInternalError);
 }
 [[maybe_unused]] static CborError cbor_value_get_half_float_as_float(const CborValue *, float *)
 {
-    Q_UNREACHABLE();
-    return CborErrorInternalError;
+    Q_UNREACHABLE_RETURN(CborErrorInternalError);
 }
 
 // confirm our constants match TinyCBOR's
@@ -98,6 +60,7 @@ static_assert(int(QCborStreamReader::Invalid) == CborInvalidType);
    \class QCborStreamReader
    \inmodule QtCore
    \ingroup cbor
+   \ingroup qtserialization
    \reentrant
    \since 5.12
 
@@ -109,7 +72,7 @@ static_assert(int(QCborStreamReader::Invalid) == CborInvalidType);
    Representation, a very compact form of binary data encoding that is
    compatible with JSON. It was created by the IETF Constrained RESTful
    Environments (CoRE) WG, which has used it in many new RFCs. It is meant to
-   be used alongside the \l{https://tools.ietf.org/html/rfc7252}{CoAP
+   be used alongside the \l{RFC 7252}{CoAP
    protocol}.
 
    QCborStreamReader provides a StAX-like API, similar to that of
@@ -187,7 +150,9 @@ static_assert(int(QCborStreamReader::Invalid) == CborInvalidType);
    parsing from a QByteArray, or reparse(), if it is instead reading directly
    a the QIDOevice that now has more data available (see setDevice()).
 
-   \sa QCborStreamWriter, QCborValue, QXmlStreamReader
+   \sa QCborStreamWriter, QCborValue, QXmlStreamReader,
+       {Parsing and displaying CBOR data}, {Serialization Converter},
+       {Saving and Loading a Game}
  */
 
 /*!
@@ -565,7 +530,7 @@ public:
     CborValue currentElement;
     QCborError lastError = {};
 
-    QByteArray::size_type bufferStart;
+    QByteArray::size_type bufferStart = 0;
     bool corrupt = false;
 
     QCborStreamReaderPrivate(const QByteArray &data)
@@ -653,21 +618,24 @@ public:
             QByteArray *array;
             QString *string;
         };
-        enum { ByteArray = -1, String = -3 };
+        enum Type { ByteArray = -1, String = -3, Utf8String = -5 };
         qsizetype maxlen_or_type;
 
         ReadStringChunk(char *ptr, qsizetype maxlen) : ptr(ptr), maxlen_or_type(maxlen) {}
-        ReadStringChunk(QByteArray *array) : array(array), maxlen_or_type(ByteArray) {}
+        ReadStringChunk(QByteArray *array, Type type = ByteArray) : array(array), maxlen_or_type(type) {}
         ReadStringChunk(QString *str) : string(str), maxlen_or_type(String) {}
         bool isString() const { return maxlen_or_type == String; }
+        bool isUtf8String() const { return maxlen_or_type == Utf8String; }
         bool isByteArray() const { return maxlen_or_type == ByteArray; }
         bool isPlainPointer() const { return maxlen_or_type >= 0; }
     };
 
     static QCborStreamReader::StringResultCode appendStringChunk(QCborStreamReader &reader, QByteArray *data);
+    bool readFullString(ReadStringChunk params);
     QCborStreamReader::StringResult<qsizetype> readStringChunk(ReadStringChunk params);
     qsizetype readStringChunk_byte(ReadStringChunk params, qsizetype len);
     qsizetype readStringChunk_unicode(ReadStringChunk params, qsizetype utf8len);
+    qsizetype readStringChunk_utf8(ReadStringChunk params, qsizetype utf8len);
     bool ensureStringIteration();
 };
 
@@ -717,7 +685,7 @@ static CborError qt_cbor_decoder_transfer_string(void *token, const void **userp
     // (otherwise, we'd lose the length information)
     qsizetype total;
     if (len > size_t(std::numeric_limits<QByteArray::size_type>::max())
-            || add_overflow<qsizetype>(offset, len, &total))
+            || qAddOverflow<qsizetype>(offset, len, &total))
         return CborErrorDataTooLarge;
 
     // our string transfer is just saving the offset to the userptr
@@ -787,7 +755,7 @@ inline void QCborStreamReader::preparse()
    \sa addData(), isValid()
  */
 QCborStreamReader::QCborStreamReader()
-    : QCborStreamReader(QByteArray())
+    : d(new QCborStreamReaderPrivate({})), type_(Invalid)
 {
 }
 
@@ -830,7 +798,7 @@ QCborStreamReader::QCborStreamReader(const QByteArray &data)
 
    Creates a QCborStreamReader object that will parse the CBOR stream found by
    reading from \a device. QCborStreamReader does not take ownership of \a
-   device, so it must remain valid until this oject is destroyed.
+   device, so it must remain valid until this object is destroyed.
  */
 QCborStreamReader::QCborStreamReader(QIODevice *device)
     : d(new QCborStreamReaderPrivate(device))
@@ -964,7 +932,7 @@ void QCborStreamReader::reset()
 
    \sa isValid()
  */
-QCborError QCborStreamReader::lastError()
+QCborError QCborStreamReader::lastError() const
 {
     return d->lastError;
 }
@@ -1005,7 +973,7 @@ QCborStreamReader::Type QCborStreamReader::parentContainerType() const
 {
     if (d->containerStack.isEmpty())
         return Invalid;
-    return Type(cbor_value_get_type(&qAsConst(d->containerStack).top()));
+    return Type(cbor_value_get_type(&std::as_const(d->containerStack).top()));
 }
 
 /*!
@@ -1325,16 +1293,20 @@ bool QCborStreamReader::leaveContainer()
 
    Decodes one string chunk from the CBOR string and returns it. This function
    is used for both regular and chunked string contents, so the caller must
-   always loop around calling this function, even if isLengthKnown() has
+   always loop around calling this function, even if isLengthKnown()
    is true. The typical use of this function is as follows:
 
    \snippet code/src_corelib_serialization_qcborstream.cpp 27
 
+   The readAllString() function implements the above loop and some extra checks.
+
+//! [string-no-type-conversions]
    This function does not perform any type conversions, including from integers
    or from byte arrays. Therefore, it may only be called if isString() returned
    true; calling it in any other condition is an error.
+//! [string-no-type-conversions]
 
-   \sa readByteArray(), isString(), readStringChunk()
+   \sa readAllString(), readByteArray(), isString(), readStringChunk()
  */
 QCborStreamReader::StringResult<QString> QCborStreamReader::_readString_helper()
 {
@@ -1344,7 +1316,41 @@ QCborStreamReader::StringResult<QString> QCborStreamReader::_readString_helper()
     if (r.status == Error) {
         result.data.clear();
     } else {
-        Q_ASSERT(r.data == result.data.length());
+        Q_ASSERT(r.data == result.data.size());
+        if (r.status == EndOfString && lastError() == QCborError::NoError)
+            preparse();
+    }
+
+    return result;
+}
+
+/*!
+   \fn QCborStreamReader::StringResult<QByteArray> QCborStreamReader::readUtf8String()
+   \since 6.7
+
+   Decodes one string chunk from the CBOR string and returns it. This function
+   is used for both regular and chunked string contents, so the caller must
+   always loop around calling this function, even if isLengthKnown() is true.
+   The typical use of this function is as for readString() in the following:
+
+   \snippet code/src_corelib_serialization_qcborstream.cpp 27
+
+   The readAllUtf8String() function implements the above loop and some extra checks.
+
+    \include qcborstreamreader.cpp string-no-type-conversions
+
+   \sa readAllString(), readByteArray(), isString(), readStringChunk()
+ */
+QCborStreamReader::StringResult<QByteArray> QCborStreamReader::_readUtf8String_helper()
+{
+    using P = QCborStreamReaderPrivate::ReadStringChunk;
+    QCborStreamReader::StringResult<QByteArray> result;
+    auto r = d->readStringChunk(P{ &result.data, P::Utf8String });
+    result.status = r.status;
+    if (r.status == Error) {
+        result.data.clear();
+    } else {
+        Q_ASSERT(r.data == result.data.size());
         if (r.status == EndOfString && lastError() == QCborError::NoError)
             preparse();
     }
@@ -1357,16 +1363,20 @@ QCborStreamReader::StringResult<QString> QCborStreamReader::_readString_helper()
 
    Decodes one byte array chunk from the CBOR string and returns it. This
    function is used for both regular and chunked contents, so the caller must
-   always loop around calling this function, even if isLengthKnown() has
+   always loop around calling this function, even if isLengthKnown()
    is true. The typical use of this function is as follows:
 
    \snippet code/src_corelib_serialization_qcborstream.cpp 28
 
+   The readAllByteArray() function implements the above loop and some extra checks.
+
+//! [bytearray-no-type-conversions]
    This function does not perform any type conversions, including from integers
    or from strings. Therefore, it may only be called if isByteArray() is true;
    calling it in any other condition is an error.
+//! [bytearray-no-type-conversions]
 
-   \sa readString(), isByteArray(), readStringChunk()
+   \sa readAllByteArray(), readString(), isByteArray(), readStringChunk()
  */
 QCborStreamReader::StringResult<QByteArray> QCborStreamReader::_readByteArray_helper()
 {
@@ -1376,7 +1386,7 @@ QCborStreamReader::StringResult<QByteArray> QCborStreamReader::_readByteArray_he
     if (r.status == Error) {
         result.data.clear();
     } else {
-        Q_ASSERT(r.data == result.data.length());
+        Q_ASSERT(r.data == result.data.size());
         if (r.status == EndOfString && lastError() == QCborError::NoError)
             preparse();
     }
@@ -1415,6 +1425,147 @@ qsizetype QCborStreamReader::_currentStringChunkSize() const
     return -1;
 }
 
+bool QCborStreamReaderPrivate::readFullString(ReadStringChunk params)
+{
+    auto r = readStringChunk(params);
+    while (r.status == QCborStreamReader::Ok) {
+        // keep appending
+        r = readStringChunk(params);
+    }
+
+    bool ok = r.status == QCborStreamReader::EndOfString;
+    Q_ASSERT(ok == !lastError);
+    return ok;
+}
+
+/*!
+    \fn QCborStreamReader::readAllString()
+    \since 6.7
+
+    Decodes the current text string and returns it. If the string is chunked,
+    this function will iterate over all chunks and concatenate them. If an
+    error happens, this function returns a default-constructed QString(), but
+    that may not be distinguishable from certain empty text strings. Instead,
+    check lastError() to determine if an error has happened.
+
+    \include qcborstreamreader.cpp string-no-type-conversions
+
+//! [note-not-restartable]
+    \note This function cannot be resumed. That is, this function should not
+    be used in contexts where the CBOR data may still be received, for example
+    from a socket or pipe. It should only be used when the full data has
+    already been received and is available in the input QByteArray or
+    QIODevice.
+//! [note-not-restartable]
+
+    \sa readString(), readStringChunk(), isString(), readAllByteArray()
+ */
+/*!
+    \fn QCborStreamReader::readAndAppendToString(QString &dst)
+    \since 6.7
+
+    Decodes the current text string and appends to \a dst. If the string is
+    chunked, this function will iterate over all chunks and concatenate them.
+    If an error happens during decoding, other chunks that could be decoded
+    successfully may have been written to \a dst nonetheless. Returns \c true
+    if the decoding happened without errors, \c false otherwise.
+
+    \include qcborstreamreader.cpp string-no-type-conversions
+
+    \include qcborstreamreader.cpp note-not-restartable
+
+    \sa readString(), readStringChunk(), isString(), readAndAppendToByteArray()
+ */
+bool QCborStreamReader::_readAndAppendToString_helper(QString &dst)
+{
+    bool ok = d->readFullString(&dst);
+    if (ok)
+        preparse();
+    return ok;
+}
+
+/*!
+    \fn QCborStreamReader::readAllUtf8String()
+    \since 6.7
+
+    Decodes the current text string and returns it. If the string is chunked,
+    this function will iterate over all chunks and concatenate them. If an
+    error happens, this function returns a default-constructed QString(), but
+    that may not be distinguishable from certain empty text strings. Instead,
+    check lastError() to determine if an error has happened.
+
+    \include qcborstreamreader.cpp string-no-type-conversions
+
+    \include qcborstreamreader.cpp note-not-restartable
+
+    \sa readString(), readStringChunk(), isString(), readAllByteArray()
+ */
+/*!
+    \fn QCborStreamReader::readAndAppendToUtf8String(QByteArray &dst)
+    \since 6.7
+
+    Decodes the current text string and appends to \a dst. If the string is
+    chunked, this function will iterate over all chunks and concatenate them.
+    If an error happens during decoding, other chunks that could be decoded
+    successfully may have been written to \a dst nonetheless. Returns \c true
+    if the decoding happened without errors, \c false otherwise.
+
+    \include qcborstreamreader.cpp string-no-type-conversions
+
+    \include qcborstreamreader.cpp note-not-restartable
+
+    \sa readString(), readStringChunk(), isString(), readAndAppendToByteArray()
+ */
+bool QCborStreamReader::_readAndAppendToUtf8String_helper(QByteArray &dst)
+{
+    using P = QCborStreamReaderPrivate::ReadStringChunk;
+    bool ok = d->readFullString({ &dst, P::Utf8String });
+    if (ok)
+        preparse();
+    return ok;
+}
+
+/*!
+    \fn QCborStreamReader::readAllByteArray()
+    \since 6.7
+
+    Decodes the current byte string and returns it. If the string is chunked,
+    this function will iterate over all chunks and concatenate them. If an
+    error happens, this function returns a default-constructed QByteArray(),
+    but that may not be distinguishable from certain empty byte strings.
+    Instead, check lastError() to determine if an error has happened.
+
+    \include qcborstreamreader.cpp bytearray-no-type-conversions
+
+    \include qcborstreamreader.cpp note-not-restartable
+
+    \sa readByteArray(), readStringChunk(), isByteArray(), readAllString()
+ */
+
+/*!
+    \fn QCborStreamReader::readAndAppendToByteArray(QByteArray &dst)
+    \since 6.7
+
+    Decodes the current byte string and appends to \a dst. If the string is
+    chunked, this function will iterate over all chunks and concatenate them.
+    If an error happens during decoding, other chunks that could be decoded
+    successfully may have been written to \a dst nonetheless. Returns \c true
+    if the decoding happened without errors, \c false otherwise.
+
+    \include qcborstreamreader.cpp bytearray-no-type-conversions
+
+    \include qcborstreamreader.cpp note-not-restartable
+
+    \sa readByteArray(), readStringChunk(), isByteArray(), readAndAppendToString()
+ */
+bool QCborStreamReader::_readAndAppendToByteArray_helper(QByteArray &dst)
+{
+    bool ok = d->readFullString(&dst);
+    if (ok)
+        preparse();
+    return ok;
+}
+
 /*!
     Reads the current string chunk into the buffer pointed to by \a ptr, whose
     size is \a maxlen. This function returns a \l StringResult object, with the
@@ -1487,6 +1638,12 @@ QCborStreamReaderPrivate::readStringChunk(ReadStringChunk params)
     // qt_cbor_decoder_transfer_string() enforces that
     // QIODevice::bytesAvailable() be bigger than the amount we're about to
     // read.
+    //
+    // This is an important security gate: if the CBOR stream is corrupt or
+    // malicious, and has an impossibly large string size, we only go past it
+    // if the transfer to the destination buffer will succeed (modulo QIODevice
+    // I/O failures).
+
 #if 1
     // Using internal TinyCBOR API!
     err = _cbor_value_get_string_chunk(&currentElement, &content, &len, &currentElement);
@@ -1529,6 +1686,8 @@ QCborStreamReaderPrivate::readStringChunk(ReadStringChunk params)
     if (params.isString()) {
         // readString()
         result.data = readStringChunk_unicode(params, qsizetype(len));
+    } else if (params.isUtf8String()) {
+        result.data = readStringChunk_utf8(params, qsizetype(len));
     } else {
         // readByteArray() or readStringChunk()
         result.data = readStringChunk_byte(params, qsizetype(len));
@@ -1575,11 +1734,11 @@ QCborStreamReaderPrivate::readStringChunk_byte(ReadStringChunk params, qsizetype
         else
             toRead = params.maxlen_or_type;     // buffer smaller than string
         ptr = params.ptr;
-    } else if (params.isByteArray()) {
+    } else if (!params.isString()) {
         // See note above on having ensured there is enough incoming data.
         auto oldSize = params.array->size();
         auto newSize = oldSize;
-        if (add_overflow<decltype(newSize)>(oldSize, toRead, &newSize)) {
+        if (qAddOverflow<decltype(newSize)>(oldSize, toRead, &newSize)) {
             handleError(CborErrorDataTooLarge);
             return -1;
         }
@@ -1589,7 +1748,7 @@ QCborStreamReaderPrivate::readStringChunk_byte(ReadStringChunk params, qsizetype
             // the distinction between DataTooLarge and OOM is mostly for
             // compatibility with Qt 5; in Qt 6, we could consider everything
             // to be OOM.
-            handleError(newSize > MaxByteArraySize ? CborErrorDataTooLarge: CborErrorOutOfMemory);
+            handleError(newSize > QByteArray::max_size() ? CborErrorDataTooLarge: CborErrorOutOfMemory);
             return -1;
         }
 
@@ -1622,24 +1781,25 @@ QCborStreamReaderPrivate::readStringChunk_byte(ReadStringChunk params, qsizetype
 inline qsizetype
 QCborStreamReaderPrivate::readStringChunk_unicode(ReadStringChunk params, qsizetype utf8len)
 {
+    Q_ASSERT(params.isString());
+
     // See QUtf8::convertToUnicode() a detailed explanation of why this
     // conversion uses the same number of words or less.
-    QChar *begin = nullptr;
-    if (params.isString()) {
-        QT_TRY {
-            params.string->resize(utf8len);
-        } QT_CATCH (const std::bad_alloc &) {
-            if (utf8len > MaxStringSize)
-                handleError(CborErrorDataTooLarge);
-            else
-                handleError(CborErrorOutOfMemory);
-            return -1;
-        }
-
-        begin = const_cast<QChar *>(params.string->constData());
+    qsizetype currentSize = params.string->size();
+    size_t newSize = size_t(utf8len) + size_t(currentSize); // can't overflow
+    if (utf8len > QString::max_size() || qsizetype(newSize) < 0) {
+        handleError(CborErrorDataTooLarge);
+        return -1;
+    }
+    QT_TRY {
+        params.string->resize(qsizetype(newSize));
+    } QT_CATCH (const std::bad_alloc &) {
+        handleError(CborErrorOutOfMemory);
+        return -1;
     }
 
-    QChar *ptr = begin;
+    QChar *begin = const_cast<QChar *>(params.string->constData());
+    QChar *ptr = begin + currentSize;
     QStringConverter::State cs(QStringConverter::Flag::Stateless);
     if (device == nullptr) {
         // Easy case: we can decode straight from the buffer we already have
@@ -1671,9 +1831,25 @@ QCborStreamReaderPrivate::readStringChunk_unicode(ReadStringChunk params, qsizet
     }
 
     qsizetype size = ptr - begin;
-    if (params.isString())
-        params.string->truncate(size);
-    return size;
+    params.string->truncate(ptr - begin);
+    return size - currentSize;  // how many bytes we added
+}
+
+inline qsizetype
+QCborStreamReaderPrivate::readStringChunk_utf8(ReadStringChunk params, qsizetype utf8len)
+{
+    qsizetype result = readStringChunk_byte(params, utf8len);
+    if (result < 0)
+        return result;
+
+    // validate the UTF-8 content we've just read
+    QByteArrayView chunk = *params.array;
+    chunk = chunk.last(result);
+    if (QtPrivate::isValidUtf8(chunk))
+        return result;
+
+    handleError(CborErrorInvalidUtf8TextString);
+    return -1;
 }
 
 QT_END_NAMESPACE