diff options
author | Mårten Nordheim <marten.nordheim@qt.io> | 2020-07-31 17:18:17 +0200 |
---|---|---|
committer | Mårten Nordheim <marten.nordheim@qt.io> | 2021-08-04 19:31:52 +0200 |
commit | b41d5f62932b4c833d3162cc22bc07172559b1ec (patch) | |
tree | 1c6ed247a3ea8a4097b35d2b136fafeecfa7528c /src | |
parent | 5889985c8c8e8cc676de4480ad95979287860b96 (diff) |
QDecompressHelper: Skip double-decompression if download is small
To retain backwards compatibility with some QNetworkReply usage,
namely connecting solely to finished-signal and allocating a buffer to
read into, but without storing the entire decompressed file in memory
until read, we may decompress the file twice.
With this patch users can now avoid this double decompression if the
amount of buffered data stays below 10 MiB. This means any file smaller
than 10 MiB will never need to be decompressed twice to know the size of
it. On top of that, if the data is handled as it arrives (e.g. in
readyRead) and the buffer is kept below 10 MiB it won't need to
decompress twice either.
This is active as long as "countDecompressed" is true, though it
currently always is in QNetworkAccessManger, with a future goal to make
it possible to control with public API. Since it requires the user to
potentially adapt their usage of QNetworkReply.
In this patch we also stop tracking the amount of unhandled uncompressed
bytes (uncompressedBytes) in favor of tracking the total amount of bytes
which has been read() by the user of QDecompressHelper (totalBytesRead),
since we can more intuitively work out the total amount of unread bytes
using this value.
Change-Id: Ie3d8d6e39a18343fcf9b610f45c7fe7e4cd4e474
Reviewed-by: Timur Pocheptsov <timur.pocheptsov@qt.io>
Diffstat (limited to 'src')
-rw-r--r-- | src/network/access/qdecompresshelper.cpp | 94 | ||||
-rw-r--r-- | src/network/access/qdecompresshelper_p.h | 6 |
2 files changed, 82 insertions, 18 deletions
diff --git a/src/network/access/qdecompresshelper.cpp b/src/network/access/qdecompresshelper.cpp index 451684bf1b..f3227efe8a 100644 --- a/src/network/access/qdecompresshelper.cpp +++ b/src/network/access/qdecompresshelper.cpp @@ -237,7 +237,13 @@ void QDecompressHelper::setCountingBytesEnabled(bool shouldCount) qint64 QDecompressHelper::uncompressedSize() const { Q_ASSERT(countDecompressed); - return uncompressedBytes; + // Use the 'totalUncompressedBytes' from the countHelper if it exceeds the amount of bytes + // that we know about. + auto totalUncompressed = + countHelper && countHelper->totalUncompressedBytes > totalUncompressedBytes + ? countHelper->totalUncompressedBytes + : totalUncompressedBytes; + return totalUncompressed - totalBytesRead; } /*! @@ -262,10 +268,9 @@ void QDecompressHelper::feed(QByteArray &&data) { Q_ASSERT(contentEncoding != None); totalCompressedBytes += data.size(); - if (!countInternal(data)) + compressedDataBuffer.append(std::move(data)); + if (!countInternal(compressedDataBuffer[compressedDataBuffer.bufferCount() - 1])) clear(); // If our counting brother failed then so will we :| - else - compressedDataBuffer.append(std::move(data)); } /*! @@ -276,10 +281,9 @@ void QDecompressHelper::feed(const QByteDataBuffer &buffer) { Q_ASSERT(contentEncoding != None); totalCompressedBytes += buffer.byteAmount(); + compressedDataBuffer.append(buffer); if (!countInternal(buffer)) clear(); // If our counting brother failed then so will we :| - else - compressedDataBuffer.append(buffer); } /*! @@ -290,10 +294,10 @@ void QDecompressHelper::feed(QByteDataBuffer &&buffer) { Q_ASSERT(contentEncoding != None); totalCompressedBytes += buffer.byteAmount(); - if (!countInternal(buffer)) + const QByteDataBuffer copy(buffer); + compressedDataBuffer.append(std::move(buffer)); + if (!countInternal(copy)) clear(); // If our counting brother failed then so will we :| - else - compressedDataBuffer.append(std::move(buffer)); } /*! @@ -303,19 +307,34 @@ void QDecompressHelper::feed(QByteDataBuffer &&buffer) This lets us know the final size, unfortunately at the cost of increased computation. - Potential @future improvement: - Decompress XX MiB/KiB before starting the count. - For smaller files the extra decompression can then be avoided. + To save on some of the computation we will store the data until + we reach \c MaxDecompressedDataBufferSize stored. In this case the + "penalty" is completely removed from users who read the data on + readyRead rather than waiting for it all to be received. And + any file smaller than \c MaxDecompressedDataBufferSize will + avoid this issue as well. */ bool QDecompressHelper::countInternal() { Q_ASSERT(countDecompressed); + while (hasDataInternal() + && decompressedDataBuffer.byteAmount() < MaxDecompressedDataBufferSize) { + const qsizetype toRead = 256 * 1024; + QByteArray buffer(toRead, Qt::Uninitialized); + qsizetype bytesRead = readInternal(buffer.data(), buffer.size()); + if (bytesRead == -1) + return false; + buffer.truncate(bytesRead); + decompressedDataBuffer.append(std::move(buffer)); + } + if (!hasDataInternal()) + return true; // handled all the data so far, just return + while (countHelper->hasData()) { std::array<char, 1024> temp; qsizetype bytesRead = countHelper->read(temp.data(), temp.size()); if (bytesRead == -1) return false; - uncompressedBytes += bytesRead; } return true; } @@ -358,13 +377,45 @@ bool QDecompressHelper::countInternal(const QByteDataBuffer &buffer) qsizetype QDecompressHelper::read(char *data, qsizetype maxSize) { + if (maxSize <= 0) + return 0; + if (!isValid()) return -1; - qsizetype bytesRead = -1; if (!hasData()) return 0; + qsizetype cachedRead = 0; + if (!decompressedDataBuffer.isEmpty()) { + cachedRead = decompressedDataBuffer.read(data, maxSize); + data += cachedRead; + maxSize -= cachedRead; + } + + qsizetype bytesRead = readInternal(data, maxSize); + if (bytesRead == -1) + return -1; + totalBytesRead += bytesRead + cachedRead; + return bytesRead + cachedRead; +} + +/*! + \internal + Like read() but without attempting to read the + cached/already-decompressed data. +*/ +qsizetype QDecompressHelper::readInternal(char *data, qsizetype maxSize) +{ + Q_ASSERT(isValid()); + + if (maxSize <= 0) + return 0; + + if (!hasDataInternal()) + return 0; + + qsizetype bytesRead = -1; switch (contentEncoding) { case None: Q_UNREACHABLE(); @@ -382,8 +433,6 @@ qsizetype QDecompressHelper::read(char *data, qsizetype maxSize) } if (bytesRead == -1) clear(); - else if (countDecompressed) - uncompressedBytes -= bytesRead; totalUncompressedBytes += bytesRead; if (isPotentialArchiveBomb()) @@ -450,6 +499,16 @@ bool QDecompressHelper::isPotentialArchiveBomb() const */ bool QDecompressHelper::hasData() const { + return hasDataInternal() || !decompressedDataBuffer.isEmpty(); +} + +/*! + \internal + Like hasData() but internally the buffer of decompressed data is + not interesting. +*/ +bool QDecompressHelper::hasDataInternal() const +{ return encodedBytesAvailable() || decoderHasData; } @@ -497,11 +556,12 @@ void QDecompressHelper::clear() contentEncoding = None; compressedDataBuffer.clear(); + decompressedDataBuffer.clear(); decoderHasData = false; countDecompressed = false; countHelper.reset(); - uncompressedBytes = 0; + totalBytesRead = 0; totalUncompressedBytes = 0; totalCompressedBytes = 0; } diff --git a/src/network/access/qdecompresshelper_p.h b/src/network/access/qdecompresshelper_p.h index 33241e14f1..b0b60b2119 100644 --- a/src/network/access/qdecompresshelper_p.h +++ b/src/network/access/qdecompresshelper_p.h @@ -98,6 +98,8 @@ public: private: bool isPotentialArchiveBomb() const; + bool hasDataInternal() const; + qsizetype readInternal(char *data, qsizetype maxSize); bool countInternal(); bool countInternal(const QByteArray &data); @@ -111,16 +113,18 @@ private: qsizetype readZstandard(char *data, qsizetype maxSize); QByteDataBuffer compressedDataBuffer; + QByteDataBuffer decompressedDataBuffer; + const qsizetype MaxDecompressedDataBufferSize = 10 * 1024 * 1024; bool decoderHasData = false; bool countDecompressed = false; std::unique_ptr<QDecompressHelper> countHelper; - qint64 uncompressedBytes = 0; // Used for calculating the ratio qint64 archiveBombCheckThreshold = 10 * 1024 * 1024; qint64 totalUncompressedBytes = 0; qint64 totalCompressedBytes = 0; + qint64 totalBytesRead = 0; ContentEncoding contentEncoding = None; |