From b19220d17fa66de5ded41690ffff263ee2af5c63 Mon Sep 17 00:00:00 2001 From: Giuseppe D'Angelo Date: Sun, 23 Jun 2019 15:12:57 +0200 Subject: QByteArray: add a strict mode to fromBase64 QByteArray::fromBase64 was liberal in its input, simply skipping over invalid characters. As a side-effect of this, it had no error reporting, meaning it could not be used to convert fromBase64 _and_ validate the input in one go. Add more option flags to make fromBase64 strictly validate its input. Since we want to know whether it has succeeded or not, and the existing fromBase64 overloads do not allow for that, introduce a new function that returns an optional-like datatype. While at it: base64 decoding can be done in-place; add an rvalue overload to enable this use case. [ChangeLog][QtCore][QByteArray] Added the new fromBase64Encoding function. [ChangeLog][QtCore][QByteArray] Added new flags to make fromBase64 / fromBase64Encoding strictly validate their input, instead of skipping over invalid characters. Change-Id: I99cd5f2230f3d62970b28b4cb102913301da6ccd Reviewed-by: Thiago Macieira --- .../snippets/code/src_corelib_tools_qbytearray.cpp | 13 + src/corelib/text/qbytearray.cpp | 300 +++++++++++++++++---- src/corelib/text/qbytearray.h | 60 ++++- 3 files changed, 323 insertions(+), 50 deletions(-) (limited to 'src') diff --git a/src/corelib/doc/snippets/code/src_corelib_tools_qbytearray.cpp b/src/corelib/doc/snippets/code/src_corelib_tools_qbytearray.cpp index 11ab50687d..01f620cf08 100644 --- a/src/corelib/doc/snippets/code/src_corelib_tools_qbytearray.cpp +++ b/src/corelib/doc/snippets/code/src_corelib_tools_qbytearray.cpp @@ -429,6 +429,19 @@ QByteArray::fromBase64("PHA+SGVsbG8/PC9wPg==", QByteArray::Base64Encoding); // r QByteArray::fromBase64("PHA-SGVsbG8_PC9wPg==", QByteArray::Base64UrlEncoding); // returns "

Hello?

" //! [44bis] +//! [44ter] +void process(const QByteArray &); + +if (auto result = QByteArray::fromBase64Encoding(encodedData)) + process(*result); +//! [44ter] + +//! [44quater] +auto result = QByteArray::fromBase64Encoding(encodedData); +if (result.decodingStatus == QByteArray::Base64DecodingStatus::Ok) + process(result.decoded); +//! [44quater] + //! [45] QByteArray text = QByteArray::fromHex("517420697320677265617421"); diff --git a/src/corelib/text/qbytearray.cpp b/src/corelib/text/qbytearray.cpp index 444980e9c0..5a61d7f70d 100644 --- a/src/corelib/text/qbytearray.cpp +++ b/src/corelib/text/qbytearray.cpp @@ -2,6 +2,7 @@ ** ** Copyright (C) 2019 The Qt Company Ltd. ** Copyright (C) 2016 Intel Corporation. +** Copyright (C) 2019 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com, author Giuseppe D'Angelo ** Contact: https://www.qt.io/licensing/ ** ** This file is part of the QtCore module of the Qt Toolkit. @@ -41,6 +42,7 @@ #include "qbytearray.h" #include "qbytearraymatcher.h" #include "private/qtools_p.h" +#include "qhashfunctions.h" #include "qstring.h" #include "qlist.h" #include "qlocale.h" @@ -1002,10 +1004,20 @@ QByteArray qUncompress(const uchar* data, int nbytes) four. \value OmitTrailingEquals Omits adding the padding equal signs at the end of the encoded data. - - QByteArray::fromBase64() ignores the KeepTrailingEquals and - OmitTrailingEquals options and will not flag errors in case they are - missing or if there are too many of them. + \value IgnoreBase64DecodingErrors When decoding Base64-encoded data, ignores errors + in the input; invalid characters are simply skipped. + This enum value has been added in Qt 5.15. + \value AbortOnBase64DecodingErrors When decoding Base64-encoded data, stops at the first + decoding error. + This enum value has been added in Qt 5.15. + + QByteArray::fromBase64Encoding() and QByteArray::fromBase64() + ignore the KeepTrailingEquals and OmitTrailingEquals options. If + the IgnoreBase64DecodingErrors option is specified, they will not + flag errors in case trailing equal signs are missing or if there + are too many of them. If instead the AbortOnBase64DecodingErrors is + specified, then the input must either have no padding or have the + correct amount of equal signs. */ /*! \fn QByteArray::iterator QByteArray::begin() @@ -4513,7 +4525,140 @@ QByteArray &QByteArray::setRawData(const char *data, uint size) return *this; } +namespace { +struct fromBase64_helper_result { + qsizetype decodedLength; + QByteArray::Base64DecodingStatus status; +}; + +fromBase64_helper_result fromBase64_helper(const char *input, qsizetype inputSize, + char *output /* may alias input */, + QByteArray::Base64Options options) +{ + fromBase64_helper_result result{ 0, QByteArray::Base64DecodingStatus::Ok }; + + unsigned int buf = 0; + int nbits = 0; + + qsizetype offset = 0; + for (qsizetype i = 0; i < inputSize; ++i) { + int ch = input[i]; + int d; + + if (ch >= 'A' && ch <= 'Z') { + d = ch - 'A'; + } else if (ch >= 'a' && ch <= 'z') { + d = ch - 'a' + 26; + } else if (ch >= '0' && ch <= '9') { + d = ch - '0' + 52; + } else if (ch == '+' && (options & QByteArray::Base64UrlEncoding) == 0) { + d = 62; + } else if (ch == '-' && (options & QByteArray::Base64UrlEncoding) != 0) { + d = 62; + } else if (ch == '/' && (options & QByteArray::Base64UrlEncoding) == 0) { + d = 63; + } else if (ch == '_' && (options & QByteArray::Base64UrlEncoding) != 0) { + d = 63; + } else { + if (options & QByteArray::AbortOnBase64DecodingErrors) { + if (ch == '=') { + // can have 1 or 2 '=' signs, in both cases padding base64Size to + // a multiple of 4. Any other case is illegal. + if ((inputSize % 4) != 0) { + result.status = QByteArray::Base64DecodingStatus::IllegalInputLength; + return result; + } else if ((i == inputSize - 1) || + (i == inputSize - 2 && input[++i] == '=')) { + d = -1; // ... and exit the loop, normally + } else { + result.status = QByteArray::Base64DecodingStatus::IllegalPadding; + return result; + } + } else { + result.status = QByteArray::Base64DecodingStatus::IllegalCharacter; + return result; + } + } else { + d = -1; + } + } + + if (d != -1) { + buf = (buf << 6) | d; + nbits += 6; + if (nbits >= 8) { + nbits -= 8; + Q_ASSERT(offset < i); + output[offset++] = buf >> nbits; + buf &= (1 << nbits) - 1; + } + } + } + + result.decodedLength = offset; + return result; +} +} // anonymous namespace + /*! + \fn QByteArray::FromBase64Result QByteArray::fromBase64Encoding(QByteArray &&base64, Base64Options options) + \fn QByteArray::FromBase64Result QByteArray::fromBase64Encoding(const QByteArray &base64, Base64Options options) + \since 5.15 + \overload + + Decodes the Base64 array \a base64, using the options + defined by \a options. If \a options contains \c{IgnoreBase64DecodingErrors} + (the default), the input is not checked for validity; invalid + characters in the input are skipped, enabling the decoding process to + continue with subsequent characters. If \a options contains + \c{AbortOnBase64DecodingErrors}, then decoding will stop at the first + invalid character. + + For example: + + \snippet code/src_corelib_tools_qbytearray.cpp 44ter + + The algorithm used to decode Base64-encoded data is defined in \l{RFC 4648}. + + Returns a QByteArrayFromBase64Result object, containing the decoded + data and a flag telling whether decoding was successful. If the + \c{AbortOnBase64DecodingErrors} option was passed and the input + data was invalid, it is unspecified what the decoded data contains. + + \sa toBase64() +*/ +QByteArray::FromBase64Result QByteArray::fromBase64Encoding(QByteArray &&base64, Base64Options options) +{ + // try to avoid a detach when calling data(), as it would over-allocate + // (we need less space when decoding than the one required by the full copy) + if (base64.isDetached()) { + const auto base64result = fromBase64_helper(base64.data(), + base64.size(), + base64.data(), // in-place + options); + base64.truncate(int(base64result.decodedLength)); + return { std::move(base64), base64result.status }; + } + + return fromBase64Encoding(base64, options); +} + + +QByteArray::FromBase64Result QByteArray::fromBase64Encoding(const QByteArray &base64, Base64Options options) +{ + const auto base64Size = base64.size(); + QByteArray result((base64Size * 3) / 4, Qt::Uninitialized); + const auto base64result = fromBase64_helper(base64.data(), + base64Size, + const_cast(result.constData()), + options); + result.truncate(int(base64result.decodedLength)); + return { std::move(result), base64result.status }; +} + +/*! + \overload + Returns a decoded copy of the Base64 array \a base64. Input is not checked for validity; invalid characters in the input are skipped, enabling the decoding process to continue with subsequent characters. @@ -4524,21 +4669,28 @@ QByteArray &QByteArray::setRawData(const char *data, uint size) The algorithm used to decode Base64-encoded data is defined in \l{RFC 4648}. - \sa toBase64() + \note The fromBase64Encoding() function is recommended in new code. + + \sa toBase64(), fromBase64Encoding() */ QByteArray QByteArray::fromBase64(const QByteArray &base64) { - return fromBase64(base64, Base64Encoding); + if (auto result = fromBase64Encoding(base64, Base64Encoding)) + return std::move(result.decoded); + return QByteArray(); } /*! \since 5.2 \overload - Returns a decoded copy of the Base64 array \a base64, using the alphabet - defined by \a options. Input is not checked for validity; invalid + Returns a decoded copy of the Base64 array \a base64, using the options + defined by \a options. If \a options contains \c{IgnoreBase64DecodingErrors} + (the default), the input is not checked for validity; invalid characters in the input are skipped, enabling the decoding process to - continue with subsequent characters. + continue with subsequent characters. If \a options contains + \c{AbortOnBase64DecodingErrors}, then decoding will stop at the first + invalid character. For example: @@ -4546,49 +4698,18 @@ QByteArray QByteArray::fromBase64(const QByteArray &base64) The algorithm used to decode Base64-encoded data is defined in \l{RFC 4648}. - \sa toBase64() + Returns the decoded data, or, if the \c{AbortOnBase64DecodingErrors} + option was passed and the input data was invalid, an empty byte array. + + \note The fromBase64Encoding() function is recommended in new code. + + \sa toBase64(), fromBase64Encoding() */ QByteArray QByteArray::fromBase64(const QByteArray &base64, Base64Options options) { - unsigned int buf = 0; - int nbits = 0; - QByteArray tmp((base64.size() * 3) / 4, Qt::Uninitialized); - - int offset = 0; - for (int i = 0; i < base64.size(); ++i) { - int ch = base64.at(i); - int d; - - if (ch >= 'A' && ch <= 'Z') - d = ch - 'A'; - else if (ch >= 'a' && ch <= 'z') - d = ch - 'a' + 26; - else if (ch >= '0' && ch <= '9') - d = ch - '0' + 52; - else if (ch == '+' && (options & Base64UrlEncoding) == 0) - d = 62; - else if (ch == '-' && (options & Base64UrlEncoding) != 0) - d = 62; - else if (ch == '/' && (options & Base64UrlEncoding) == 0) - d = 63; - else if (ch == '_' && (options & Base64UrlEncoding) != 0) - d = 63; - else - d = -1; - - if (d != -1) { - buf = (buf << 6) | d; - nbits += 6; - if (nbits >= 8) { - nbits -= 8; - tmp[offset++] = buf >> nbits; - buf &= (1 << nbits) - 1; - } - } - } - - tmp.truncate(offset); - return tmp; + if (auto result = fromBase64Encoding(base64, options)) + return std::move(result.decoded); + return QByteArray(); } /*! @@ -5008,5 +5129,86 @@ void warn(WarningType w, EmittingClass c) } // namespace DeprecatedRefClassBehavior } // namespace QtPrivate +/*! + \class QByteArray::FromBase64Result + \inmodule QtCore + \ingroup tools + \since 5.15 + + \brief The QByteArray::FromBase64Result class holds the result of + a call to QByteArray::fromBase64Encoding. + + Objects of this class can be used to check whether the conversion + was successful, and if so, retrieve the decoded QByteArray. The + conversion operators defined for QByteArray::FromBase64Result make + its usage straightforward: + + \snippet code/src_corelib_tools_qbytearray.cpp 44ter + + In alternative, it is possible to access the conversion status + and the decoded data directly: + + \snippet code/src_corelib_tools_qbytearray.cpp 44quater + + \sa QByteArray::fromBase64 +*/ + +/*! + \variable QByteArray::FromBase64Result::decoded + + Contains the decoded byte array. +*/ + +/*! + \variable QByteArray::FromBase64Result::decodingStatus + + Contains whether the decoding was successful, expressed as a value + of type QByteArray::Base64DecodingStatus. +*/ + +/*! + \fn QByteArray::FromBase64Result::operator bool() const + + Returns whether the decoding was successful. This is equivalent + to checking whether the \c{decodingStatus} member is equal to + QByteArray::Base64DecodingStatus::Ok. +*/ + +/*! + \fn QByteArray::FromBase64Result::operator QByteArray() const + + Returns the decoded byte array. +*/ + +/*! + \fn bool operator==(const QByteArray::FromBase64Result &lhs, const QByteArray::FromBase64Result &rhs) noexcept + \relates QByteArray::FromBase64Result + + Compares \a lhs and \a rhs for equality. \a lhs and \a rhs are equal + if and only if they contain the same decoding status and, if the + status is QByteArray::Base64DecodingStatus::Ok, if and only if + they contain the same decoded data. +*/ + +/*! + \fn bool operator!=(const QByteArray::FromBase64Result &lhs, const QByteArray::FromBase64Result &rhs) noexcept + \relates QByteArray::FromBase64Result + + Compares \a lhs and \a rhs for inequality. +*/ + +/*! + \relates QByteArray::FromBase64Result + + Returns the hash value for \a key, using + \a seed to seed the calculation. +*/ +uint qHash(const QByteArray::FromBase64Result &key, uint seed) noexcept +{ + QtPrivate::QHashCombine hash; + seed = hash(seed, key.decoded); + seed = hash(seed, static_cast(key.decodingStatus)); + return seed; +} QT_END_NAMESPACE diff --git a/src/corelib/text/qbytearray.h b/src/corelib/text/qbytearray.h index 7c571706d8..4fa17cf58b 100644 --- a/src/corelib/text/qbytearray.h +++ b/src/corelib/text/qbytearray.h @@ -164,10 +164,20 @@ public: Base64UrlEncoding = 1, KeepTrailingEquals = 0, - OmitTrailingEquals = 2 + OmitTrailingEquals = 2, + + IgnoreBase64DecodingErrors = 0, + AbortOnBase64DecodingErrors = 4, }; Q_DECLARE_FLAGS(Base64Options, Base64Option) + enum class Base64DecodingStatus { + Ok, + IllegalInputLength, + IllegalCharacter, + IllegalPadding, + }; + inline QByteArray() noexcept; QByteArray(const char *, int size = -1); QByteArray(int size, char c); @@ -379,6 +389,10 @@ public: Q_REQUIRED_RESULT static QByteArray number(qulonglong, int base = 10); Q_REQUIRED_RESULT static QByteArray number(double, char f = 'g', int prec = 6); Q_REQUIRED_RESULT static QByteArray fromRawData(const char *, int size); + + class FromBase64Result; + Q_REQUIRED_RESULT static FromBase64Result fromBase64Encoding(QByteArray &&base64, Base64Options options = Base64Encoding); + Q_REQUIRED_RESULT static FromBase64Result fromBase64Encoding(const QByteArray &base64, Base64Options options = Base64Encoding); Q_REQUIRED_RESULT static QByteArray fromBase64(const QByteArray &base64, Base64Options options); Q_REQUIRED_RESULT static QByteArray fromBase64(const QByteArray &base64); // ### Qt6 merge with previous Q_REQUIRED_RESULT static QByteArray fromHex(const QByteArray &hexEncoded); @@ -749,6 +763,50 @@ inline QByteArray qUncompress(const QByteArray& data) Q_DECLARE_SHARED(QByteArray) +class QByteArray::FromBase64Result +{ +public: + QByteArray decoded; + QByteArray::Base64DecodingStatus decodingStatus; + + void swap(QByteArray::FromBase64Result &other) noexcept + { + qSwap(decoded, other.decoded); + qSwap(decodingStatus, other.decodingStatus); + } + + explicit operator bool() const noexcept { return decodingStatus == QByteArray::Base64DecodingStatus::Ok; } + +#if defined(Q_COMPILER_REF_QUALIFIERS) && !defined(Q_QDOC) + QByteArray &operator*() & noexcept { return decoded; } + const QByteArray &operator*() const & noexcept { return decoded; } + QByteArray &&operator*() && noexcept { return std::move(decoded); } +#else + QByteArray &operator*() noexcept { return decoded; } + const QByteArray &operator*() const noexcept { return decoded; } +#endif +}; + +Q_DECLARE_SHARED(QByteArray::FromBase64Result) + +inline bool operator==(const QByteArray::FromBase64Result &lhs, const QByteArray::FromBase64Result &rhs) noexcept +{ + if (lhs.decodingStatus != rhs.decodingStatus) + return false; + + if (lhs.decodingStatus == QByteArray::Base64DecodingStatus::Ok && lhs.decoded != rhs.decoded) + return false; + + return true; +} + +inline bool operator!=(const QByteArray::FromBase64Result &lhs, const QByteArray::FromBase64Result &rhs) noexcept +{ + return !operator==(lhs, rhs); +} + +Q_CORE_EXPORT Q_DECL_PURE_FUNCTION uint qHash(const QByteArray::FromBase64Result &key, uint seed = 0) noexcept; + QT_END_NAMESPACE #endif // QBYTEARRAY_H -- cgit v1.2.3