From 07b008425acb1f550c3e6e96e2bd6033fe86976a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A5rten=20Nordheim?= Date: Thu, 9 Apr 2020 13:23:42 +0200 Subject: Privately introducing QDecompressHelper for network purposes To support streaming decompression in QNAM. Will also be used to refactor existing decompression code in QNAM. Task-number: QTBUG-83269 Change-Id: Iecf3e359734163f15686c949f75d41fa4794a00e Reviewed-by: Thiago Macieira --- src/network/access/access.pri | 2 + src/network/access/qdecompresshelper.cpp | 479 +++++++++++++++++++++++++++++++ src/network/access/qdecompresshelper_p.h | 118 ++++++++ 3 files changed, 599 insertions(+) create mode 100644 src/network/access/qdecompresshelper.cpp create mode 100644 src/network/access/qdecompresshelper_p.h (limited to 'src/network/access') diff --git a/src/network/access/access.pri b/src/network/access/access.pri index 083fbbf5fd..4add94a111 100644 --- a/src/network/access/access.pri +++ b/src/network/access/access.pri @@ -87,6 +87,7 @@ qtConfig(http) { include($$PWD/http2/http2.pri) SOURCES += \ + access/qdecompresshelper.cpp \ access/qabstractprotocolhandler.cpp \ access/qhttp2protocolhandler.cpp \ access/qhttpmultipart.cpp \ @@ -101,6 +102,7 @@ qtConfig(http) { access/qhttp2configuration.cpp HEADERS += \ + access/qdecompresshelper_p.h \ access/qabstractprotocolhandler_p.h \ access/qhttp2protocolhandler_p.h \ access/qhttpmultipart.h \ diff --git a/src/network/access/qdecompresshelper.cpp b/src/network/access/qdecompresshelper.cpp new file mode 100644 index 0000000000..b478e220ea --- /dev/null +++ b/src/network/access/qdecompresshelper.cpp @@ -0,0 +1,479 @@ +/**************************************************************************** +** +** Copyright (C) 2020 The Qt Company Ltd. +** Contact: https://www.qt.io/licensing/ +** +** This file is part of the QtNetwork module of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL$ +** Commercial License Usage +** Licensees holding valid commercial Qt licenses may use this file in +** accordance with the commercial license agreement provided with the +** Software or, alternatively, in accordance with the terms contained in +** a written agreement between you and The Qt Company. For licensing terms +** and conditions see https://www.qt.io/terms-conditions. For further +** information use the contact form at https://www.qt.io/contact-us. +** +** GNU Lesser General Public License Usage +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 3 as published by the Free Software +** Foundation and appearing in the file LICENSE.LGPL3 included in the +** packaging of this file. Please review the following information to +** ensure the GNU Lesser General Public License version 3 requirements +** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. +** +** GNU General Public License Usage +** Alternatively, this file may be used under the terms of the GNU +** General Public License version 2.0 or (at your option) the GNU General +** Public license version 3 or any later version approved by the KDE Free +** Qt Foundation. The licenses are as published by the Free Software +** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 +** included in the packaging of this file. Please review the following +** information to ensure the GNU General Public License requirements will +** be met: https://www.gnu.org/licenses/gpl-2.0.html and +** https://www.gnu.org/licenses/gpl-3.0.html. +** +** $QT_END_LICENSE$ +** +****************************************************************************/ + +#include "qdecompresshelper_p.h" + +#include +#include + +#include + +#include + +QT_BEGIN_NAMESPACE +namespace { +struct ContentEncodingMapping +{ + char name[8]; + QDecompressHelper::ContentEncoding encoding; +}; + +constexpr ContentEncodingMapping contentEncodingMapping[] { + { "deflate", QDecompressHelper::Deflate }, + { "gzip", QDecompressHelper::GZip }, +}; + +QDecompressHelper::ContentEncoding encodingFromByteArray(const QByteArray &ce) noexcept +{ + for (const auto &mapping : contentEncodingMapping) { + if (ce.compare(QByteArrayView(mapping.name, strlen(mapping.name)), Qt::CaseInsensitive) == 0) + return mapping.encoding; + } + return QDecompressHelper::None; +} + +z_stream *toZlibPointer(void *ptr) +{ + return static_cast(ptr); +} +} + +bool QDecompressHelper::isSupportedEncoding(const QByteArray &encoding) +{ + return encodingFromByteArray(encoding) != QDecompressHelper::None; +} + +QDecompressHelper::~QDecompressHelper() +{ + clear(); +} + +bool QDecompressHelper::setEncoding(const QByteArray &encoding) +{ + Q_ASSERT(contentEncoding == QDecompressHelper::None); + if (contentEncoding != QDecompressHelper::None) { + qWarning("Encoding is already set."); + return false; + } + ContentEncoding ce = encodingFromByteArray(encoding); + if (ce == None) { + qWarning("An unsupported content encoding was selected: %s", encoding.data()); + return false; + } + return setEncoding(ce); +} + +bool QDecompressHelper::setEncoding(ContentEncoding ce) +{ + Q_ASSERT(contentEncoding == None); + contentEncoding = ce; + switch (contentEncoding) { + case None: + Q_UNREACHABLE(); + break; + case Deflate: + case GZip: { + z_stream *inflateStream = new z_stream; + memset(inflateStream, 0, sizeof(z_stream)); + // "windowBits can also be greater than 15 for optional gzip decoding. + // Add 32 to windowBits to enable zlib and gzip decoding with automatic header detection" + // http://www.zlib.net/manual.html + if (inflateInit2(inflateStream, MAX_WBITS + 32) != Z_OK) { + delete inflateStream; + inflateStream = nullptr; + } + decoderPointer = inflateStream; + break; + } + } + if (!decoderPointer) { + qWarning("Failed to initialize the decoder."); + contentEncoding = QDecompressHelper::None; + return false; + } + return true; +} + +/*! + \internal + + Returns true if the QDecompressHelper is measuring the + size of the decompressed data. + + \sa setCountingBytesEnabled, uncompressedSize +*/ +bool QDecompressHelper::isCountingBytes() const +{ + return countDecompressed; +} + +/*! + \internal + + Enable or disable counting the decompressed size of the data + based on \a shouldCount. Enabling this means the data will be + decompressed twice (once for counting and once when data is + being read). + + \note Can only be called before contentEncoding is set and data + is fed to the object. + + \sa isCountingBytes, uncompressedSize +*/ +void QDecompressHelper::setCountingBytesEnabled(bool shouldCount) +{ + // These are a best-effort check to ensure that no data has already been processed before this + // gets enabled + Q_ASSERT(compressedDataBuffer.byteAmount() == 0); + Q_ASSERT(contentEncoding == None); + countDecompressed = shouldCount; +} + +/*! + \internal + + Returns the amount of uncompressed bytes left. + + \note Since this is only based on the data received + so far the final size could be larger. + + \note It is only valid to call this if isCountingBytes() + returns true + + \sa isCountingBytes, setCountBytes +*/ +qint64 QDecompressHelper::uncompressedSize() const +{ + Q_ASSERT(countDecompressed); + return uncompressedBytes; +} + +/*! + \internal + \overload +*/ +void QDecompressHelper::feed(const QByteArray &data) +{ + return feed(QByteArray(data)); +} + +/*! + \internal + Give \a data to the QDecompressHelper which will be stored until + a read is attempted. + + If \c isCountingBytes() is true then it will decompress immediately + before discarding the data, but will count the uncompressed byte + size. +*/ +void QDecompressHelper::feed(QByteArray &&data) +{ + Q_ASSERT(contentEncoding != None); + if (!countInternal(data)) + clear(); // If our counting brother failed then so will we :| + else + compressedDataBuffer.append(std::move(data)); +} + +/*! + \internal + \overload +*/ +void QDecompressHelper::feed(const QByteDataBuffer &buffer) +{ + Q_ASSERT(contentEncoding != None); + if (!countInternal(buffer)) + clear(); // If our counting brother failed then so will we :| + else + compressedDataBuffer.append(buffer); +} + +/*! + \internal + \overload +*/ +void QDecompressHelper::feed(QByteDataBuffer &&buffer) +{ + Q_ASSERT(contentEncoding != None); + if (!countInternal(buffer)) + clear(); // If our counting brother failed then so will we :| + else + compressedDataBuffer.append(std::move(buffer)); +} + +/*! + \internal + Decompress the data internally and immediately discard the + uncompressed data, but count how many bytes were decoded. + This lets us know the final size, unfortunately at the cost of + increased computation. + + Potential @future improvement: + Decompress XX MiB/KiB before starting the count. + For smaller files the extra decompression can then be avoided. +*/ +bool QDecompressHelper::countInternal() +{ + Q_ASSERT(countDecompressed); + while (countHelper->hasData()) { + std::array temp; + qsizetype bytesRead = countHelper->read(temp.data(), temp.size()); + if (bytesRead == -1) + return false; + uncompressedBytes += bytesRead; + } + return true; +} + +/*! + \internal + \overload +*/ +bool QDecompressHelper::countInternal(const QByteArray &data) +{ + if (countDecompressed) { + if (!countHelper) { + countHelper = std::make_unique(); + countHelper->setEncoding(contentEncoding); + } + countHelper->feed(data); + return countInternal(); + } + return true; +} + +/*! + \internal + \overload +*/ +bool QDecompressHelper::countInternal(const QByteDataBuffer &buffer) +{ + if (countDecompressed) { + if (!countHelper) { + countHelper = std::make_unique(); + countHelper->setEncoding(contentEncoding); + } + countHelper->feed(buffer); + return countInternal(); + } + return true; +} + +qsizetype QDecompressHelper::read(char *data, qsizetype maxSize) +{ + if (!isValid()) + return -1; + + qsizetype bytesRead = -1; + if (!hasData()) + return 0; + + switch (contentEncoding) { + case None: + Q_UNREACHABLE(); + break; + case Deflate: + case GZip: + bytesRead = readZLib(data, maxSize); + break; + } + if (bytesRead == -1) + clear(); + else if (countDecompressed) + uncompressedBytes -= bytesRead; + return bytesRead; +} + +/*! + \internal + Returns true if there are encoded bytes left or there is some + indication that the decoder still has data left internally. + + \note Even if this returns true the next call to read() might + read 0 bytes. This most likely means the decompression is done. +*/ +bool QDecompressHelper::hasData() const +{ + return encodedBytesAvailable() || decoderHasData; +} + +qint64 QDecompressHelper::encodedBytesAvailable() const +{ + return compressedDataBuffer.byteAmount(); +} + +bool QDecompressHelper::isValid() const +{ + return contentEncoding != None; +} + +void QDecompressHelper::clear() +{ + switch (contentEncoding) { + case None: + break; + case Deflate: + case GZip: { + z_stream *inflateStream = toZlibPointer(decoderPointer); + if (inflateStream) + inflateEnd(inflateStream); + delete inflateStream; + break; + } + } + decoderPointer = nullptr; + contentEncoding = None; + + compressedDataBuffer.clear(); + decoderHasData = false; + + countDecompressed = false; + countHelper.reset(); + uncompressedBytes = 0; +} + +qsizetype QDecompressHelper::readZLib(char *data, const qsizetype maxSize) +{ + bool triedRawDeflate = false; + + z_stream *inflateStream = toZlibPointer(decoderPointer); + static const size_t zlibMaxSize = + size_t(std::numeric_limitsavail_in)>::max()); + + QByteArray input; + if (!compressedDataBuffer.isEmpty()) { + if (zlibMaxSize < size_t(compressedDataBuffer.sizeNextBlock())) + input = compressedDataBuffer.read(zlibMaxSize); + else + input = compressedDataBuffer.read(); + } + + inflateStream->avail_in = input.size(); + inflateStream->next_in = reinterpret_cast(input.data()); + + bool bigMaxSize = (zlibMaxSize < size_t(maxSize)); + qsizetype adjustedAvailableOut = bigMaxSize ? qsizetype(zlibMaxSize) : maxSize; + inflateStream->avail_out = adjustedAvailableOut; + inflateStream->next_out = reinterpret_cast(data); + + qsizetype bytesDecoded = 0; + do { + auto previous_avail_out = inflateStream->avail_out; + int ret = inflate(inflateStream, Z_NO_FLUSH); + // All negative return codes are errors, in the context of HTTP compression, Z_NEED_DICT is + // also an error. + // in the case where we get Z_DATA_ERROR this could be because we received raw deflate + // compressed data. + if (ret == Z_DATA_ERROR && !triedRawDeflate) { + inflateEnd(inflateStream); + triedRawDeflate = true; + inflateStream->zalloc = Z_NULL; + inflateStream->zfree = Z_NULL; + inflateStream->opaque = Z_NULL; + inflateStream->avail_in = 0; + inflateStream->next_in = Z_NULL; + int ret = inflateInit2(inflateStream, -MAX_WBITS); + if (ret != Z_OK) { + return -1; + } else { + inflateStream->avail_in = input.size(); + inflateStream->next_in = reinterpret_cast(input.data()); + continue; + } + } else if (ret < 0 || ret == Z_NEED_DICT) { + return -1; + } + bytesDecoded += qsizetype(previous_avail_out - inflateStream->avail_out); + if (ret == Z_STREAM_END) { + + // If there's more data after the stream then this is probably composed of multiple + // streams. + if (inflateStream->avail_in != 0) { + inflateEnd(inflateStream); + Bytef *next_in = inflateStream->next_in; + uInt avail_in = inflateStream->avail_in; + inflateStream->zalloc = Z_NULL; + inflateStream->zfree = Z_NULL; + inflateStream->opaque = Z_NULL; + if (inflateInit2(inflateStream, MAX_WBITS + 32) != Z_OK) { + delete inflateStream; + decoderPointer = nullptr; + // Failed to reinitialize, so we'll just return what we have + return bytesDecoded; + } else { + inflateStream->next_in = next_in; + inflateStream->avail_in = avail_in; + // Keep going to handle the other cases below + } + } else { + // No extra data, stream is at the end. We're done. + return bytesDecoded; + } + } + + if (bigMaxSize && inflateStream->avail_out == 0) { + // Need to adjust the next_out and avail_out parameters since we reached the end + // of the current range + bigMaxSize = (zlibMaxSize < size_t(maxSize - bytesDecoded)); + inflateStream->avail_out = bigMaxSize ? qsizetype(zlibMaxSize) : maxSize - bytesDecoded; + inflateStream->next_out = reinterpret_cast(data + bytesDecoded); + } + + if (inflateStream->avail_in == 0 && inflateStream->avail_out > 0 + && !compressedDataBuffer.isEmpty()) { + // Grab the next input! + if (zlibMaxSize < size_t(compressedDataBuffer.sizeNextBlock())) + input = compressedDataBuffer.read(zlibMaxSize); + else + input = compressedDataBuffer.read(); + inflateStream->avail_in = input.size(); + inflateStream->next_in = reinterpret_cast(input.data()); + } + } while (inflateStream->avail_out > 0 && inflateStream->avail_in > 0); + + if (inflateStream->avail_in) { + // Some input was left unused; move back to the buffer + input = input.right(inflateStream->avail_in); + compressedDataBuffer.prepend(input); + } + + return bytesDecoded; +} + +QT_END_NAMESPACE diff --git a/src/network/access/qdecompresshelper_p.h b/src/network/access/qdecompresshelper_p.h new file mode 100644 index 0000000000..5252925862 --- /dev/null +++ b/src/network/access/qdecompresshelper_p.h @@ -0,0 +1,118 @@ +/**************************************************************************** +** +** Copyright (C) 2020 The Qt Company Ltd. +** Contact: https://www.qt.io/licensing/ +** +** This file is part of the QtNetwork module of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL$ +** Commercial License Usage +** Licensees holding valid commercial Qt licenses may use this file in +** accordance with the commercial license agreement provided with the +** Software or, alternatively, in accordance with the terms contained in +** a written agreement between you and The Qt Company. For licensing terms +** and conditions see https://www.qt.io/terms-conditions. For further +** information use the contact form at https://www.qt.io/contact-us. +** +** GNU Lesser General Public License Usage +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 3 as published by the Free Software +** Foundation and appearing in the file LICENSE.LGPL3 included in the +** packaging of this file. Please review the following information to +** ensure the GNU Lesser General Public License version 3 requirements +** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. +** +** GNU General Public License Usage +** Alternatively, this file may be used under the terms of the GNU +** General Public License version 2.0 or (at your option) the GNU General +** Public license version 3 or any later version approved by the KDE Free +** Qt Foundation. The licenses are as published by the Free Software +** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 +** included in the packaging of this file. Please review the following +** information to ensure the GNU General Public License requirements will +** be met: https://www.gnu.org/licenses/gpl-2.0.html and +** https://www.gnu.org/licenses/gpl-3.0.html. +** +** $QT_END_LICENSE$ +** +****************************************************************************/ + +#ifndef DECOMPRESS_HELPER_P_H +#define DECOMPRESS_HELPER_P_H + +// +// W A R N I N G +// ------------- +// +// This file is not part of the Qt API. It exists for the convenience +// of the Network Access API. This header file may change from +// version to version without notice, or even be removed. +// +// We mean it. +// + +#include +#include + +#include + +QT_BEGIN_NAMESPACE + +class QIODevice; +class Q_AUTOTEST_EXPORT QDecompressHelper +{ +public: + enum ContentEncoding { + None, + Deflate, + GZip, + }; + + QDecompressHelper() = default; + ~QDecompressHelper(); + + bool setEncoding(const QByteArray &contentEncoding); + + bool isCountingBytes() const; + void setCountingBytesEnabled(bool shouldCount); + + qint64 uncompressedSize() const; + + bool hasData() const; + void feed(const QByteArray &data); + void feed(QByteArray &&data); + void feed(const QByteDataBuffer &buffer); + void feed(QByteDataBuffer &&buffer); + qsizetype read(char *data, qsizetype maxSize); + + bool isValid() const; + + void clear(); + + static bool isSupportedEncoding(const QByteArray &encoding); + +private: + bool countInternal(); + bool countInternal(const QByteArray &data); + bool countInternal(const QByteDataBuffer &buffer); + + bool setEncoding(ContentEncoding ce); + qint64 encodedBytesAvailable() const; + + qsizetype readZLib(char *data, qsizetype maxSize); + + QByteDataBuffer compressedDataBuffer; + bool decoderHasData = false; + + bool countDecompressed = false; + std::unique_ptr countHelper; + qint64 uncompressedBytes = 0; + + ContentEncoding contentEncoding = None; + + void *decoderPointer = nullptr; +}; + +QT_END_NAMESPACE + +#endif // DECOMPRESS_HELPER_P_H -- cgit v1.2.3