summaryrefslogtreecommitdiffstats
path: root/src/corelib/text/qbytearray.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/corelib/text/qbytearray.cpp')
-rw-r--r--src/corelib/text/qbytearray.cpp337
1 files changed, 224 insertions, 113 deletions
diff --git a/src/corelib/text/qbytearray.cpp b/src/corelib/text/qbytearray.cpp
index 825a431820..adf2a8de9a 100644
--- a/src/corelib/text/qbytearray.cpp
+++ b/src/corelib/text/qbytearray.cpp
@@ -41,6 +41,7 @@
#include "qbytearray.h"
#include "qbytearraymatcher.h"
+#include "qendian.h"
#include "private/qtools_p.h"
#include "qhashfunctions.h"
#include "qlist.h"
@@ -532,67 +533,212 @@ quint16 qChecksum(QByteArrayView data, Qt::ChecksumType standard)
The default value is -1, which specifies zlib's default
compression.
-//![compress-limit-note]
- \note The maximum size of data that this function can consume is limited by
- what the platform's \c{unsigned long} can represent (a Zlib limitation).
- That means that data > 4GiB can be compressed and decompressed on a 64-bit
- Unix system, but not on a 64-bit Windows system. Portable code should
- therefore avoid using qCompress()/qUncompress() to compress more than 4GiB
- of input.
-//![compress-limit-note]
-
\sa qUncompress(const QByteArray &data)
*/
-/*! \relates QByteArray
+/*!
+ \fn QByteArray qCompress(const uchar* data, qsizetype nbytes, int compressionLevel)
+ \relates QByteArray
\overload
Compresses the first \a nbytes of \a data at compression level
\a compressionLevel and returns the compressed data in a new byte array.
-
- \include qbytearray.cpp compress-limit-note
*/
#ifndef QT_NO_COMPRESS
+using CompressSizeHint_t = quint32; // 32-bit BE, historically
+
+enum class ZLibOp : bool { Compression, Decompression };
+
+Q_DECL_COLD_FUNCTION
+static const char *zlibOpAsString(ZLibOp op)
+{
+ switch (op) {
+ case ZLibOp::Compression: return "qCompress";
+ case ZLibOp::Decompression: return "qUncompress";
+ }
+ Q_UNREACHABLE();
+ return nullptr;
+}
+
+Q_DECL_COLD_FUNCTION
+static QByteArray zlibError(ZLibOp op, const char *what)
+{
+ qWarning("%s: %s", zlibOpAsString(op), what);
+ return QByteArray();
+}
+
+Q_DECL_COLD_FUNCTION
+static QByteArray dataIsNull(ZLibOp op)
+{
+ return zlibError(op, "Data is null");
+}
+
+Q_DECL_COLD_FUNCTION
+static QByteArray lengthIsNegative(ZLibOp op)
+{
+ return zlibError(op, "Input length is negative");
+}
+
+Q_DECL_COLD_FUNCTION
+static QByteArray tooMuchData(ZLibOp op)
+{
+ return zlibError(op, "Not enough memory");
+}
+
+Q_DECL_COLD_FUNCTION
+static QByteArray invalidCompressedData()
+{
+ return zlibError(ZLibOp::Decompression, "Input data is corrupted");
+}
+
+Q_DECL_COLD_FUNCTION
+static QByteArray unexpectedZlibError(ZLibOp op, int err, const char *msg)
+{
+ qWarning("%s unexpected zlib error: %s (%d)",
+ zlibOpAsString(op),
+ msg ? msg : "",
+ err);
+ return QByteArray();
+}
+
+template <typename Init, typename ProcessChunk, typename Deinit>
+static QByteArray xxflate(ZLibOp op, QArrayDataPointer<char> out, QByteArrayView input,
+ Init init,
+ ProcessChunk processChunk,
+ Deinit deinit)
+{
+ if (out.data() == nullptr) // allocation failed
+ return tooMuchData(op);
+ qsizetype capacity = out.allocatedCapacity();
+
+ const auto initalSize = out.size;
+
+ z_stream zs = {};
+ zs.next_in = reinterpret_cast<uchar *>(const_cast<char *>(input.data())); // 1980s C API...
+ if (const int err = init(&zs); err != Z_OK)
+ return unexpectedZlibError(op, err, zs.msg);
+ const auto sg = qScopeGuard([&] { deinit(&zs); });
+
+ using ZlibChunkSize_t = decltype(zs.avail_in);
+ static_assert(!std::is_signed_v<ZlibChunkSize_t>);
+ static_assert(std::is_same_v<ZlibChunkSize_t, decltype(zs.avail_out)>);
+ constexpr auto MaxChunkSize = std::numeric_limits<ZlibChunkSize_t>::max();
+ [[maybe_unused]]
+ constexpr auto MaxStatisticsSize = std::numeric_limits<decltype(zs.total_out)>::max();
+
+ size_t inputLeft = size_t(input.size());
+
+ int res;
+ do {
+ Q_ASSERT(out.freeSpaceAtBegin() == 0); // ensure prepend optimization stays out of the way
+ Q_ASSERT(capacity == out.allocatedCapacity());
+
+ if (zs.avail_out == 0) {
+ Q_ASSERT(size_t(out.size) - initalSize > MaxStatisticsSize || // total_out overflow
+ size_t(out.size) - initalSize == zs.total_out);
+ Q_ASSERT(out.size <= capacity);
+
+ qsizetype avail_out = capacity - out.size;
+ if (avail_out == 0) {
+ out->reallocateAndGrow(QArrayData::GrowsAtEnd, 1); // grow to next natural capacity
+ if (out.data() == nullptr) // reallocation failed
+ return tooMuchData(op);
+ capacity = out.allocatedCapacity();
+ avail_out = capacity - out.size;
+ }
+ zs.next_out = reinterpret_cast<uchar *>(out.data()) + out.size;
+ zs.avail_out = size_t(avail_out) > size_t(MaxChunkSize) ? MaxChunkSize
+ : ZlibChunkSize_t(avail_out);
+ out.size += zs.avail_out;
+
+ Q_ASSERT(zs.avail_out > 0);
+ }
+
+ if (zs.avail_in == 0) {
+ // zs.next_in is kept up-to-date by processChunk(), so nothing to do
+ zs.avail_in = inputLeft > MaxChunkSize ? MaxChunkSize : ZlibChunkSize_t(inputLeft);
+ inputLeft -= zs.avail_in;
+ }
+
+ res = processChunk(&zs, inputLeft);
+ } while (res == Z_OK);
+
+ switch (res) {
+ case Z_STREAM_END:
+ out.size -= zs.avail_out;
+ Q_ASSERT(size_t(out.size) - initalSize > MaxStatisticsSize || // total_out overflow
+ size_t(out.size) - initalSize == zs.total_out);
+ Q_ASSERT(out.size <= out.allocatedCapacity());
+ out.data()[out.size] = '\0';
+ return QByteArray(std::move(out));
+
+ case Z_MEM_ERROR:
+ return tooMuchData(op);
+
+ case Z_BUF_ERROR:
+ Q_UNREACHABLE(); // cannot happen - we supply a buffer that can hold the result,
+ // or else error out early
+
+ case Z_DATA_ERROR: // can only happen on decompression
+ Q_ASSERT(op == ZLibOp::Decompression);
+ return invalidCompressedData();
+
+ default:
+ return unexpectedZlibError(op, res, zs.msg);
+ }
+}
+
QByteArray qCompress(const uchar* data, qsizetype nbytes, int compressionLevel)
{
+ constexpr qsizetype HeaderSize = sizeof(CompressSizeHint_t);
if (nbytes == 0) {
- return QByteArray(4, '\0');
- }
- if (!data) {
- qWarning("qCompress: Data is null");
- return QByteArray();
+ return QByteArray(HeaderSize, '\0');
}
+ if (!data)
+ return dataIsNull(ZLibOp::Compression);
+
+ if (nbytes < 0)
+ return lengthIsNegative(ZLibOp::Compression);
+
if (compressionLevel < -1 || compressionLevel > 9)
compressionLevel = -1;
- ulong len = nbytes + nbytes / 100 + 13;
- QByteArray bazip;
- int res;
- do {
- bazip.resize(len + 4);
- res = ::compress2((uchar*)bazip.data()+4, &len, data, nbytes, compressionLevel);
-
- switch (res) {
- case Z_OK:
- bazip.resize(len + 4);
- bazip[0] = (nbytes & 0xff000000) >> 24;
- bazip[1] = (nbytes & 0x00ff0000) >> 16;
- bazip[2] = (nbytes & 0x0000ff00) >> 8;
- bazip[3] = (nbytes & 0x000000ff);
- break;
- case Z_MEM_ERROR:
- qWarning("qCompress: Z_MEM_ERROR: Not enough memory");
- bazip.resize(0);
- break;
- case Z_BUF_ERROR:
- len *= 2;
- break;
+ QArrayDataPointer out = [&] {
+ constexpr qsizetype SingleAllocLimit = 256 * 1024; // the maximum size for which we use
+ // zlib's compressBound() to guarantee
+ // the output buffer size is sufficient
+ // to hold result
+ qsizetype capacity = HeaderSize;
+ if (nbytes < SingleAllocLimit) {
+ // use maximum size
+ capacity += compressBound(uLong(nbytes)); // cannot overflow (both times)!
+ return QArrayDataPointer{QTypedArrayData<char>::allocate(capacity)};
}
- } while (res == Z_BUF_ERROR);
- return bazip;
+ // for larger buffers, assume it compresses optimally, and
+ // grow geometrically from there:
+ constexpr qsizetype MaxCompressionFactor = 1024; // max theoretical factor is 1032
+ // cf. http://www.zlib.org/zlib_tech.html,
+ // but use a nearby power-of-two (faster)
+ capacity += std::max(qsizetype(compressBound(uLong(SingleAllocLimit))),
+ nbytes / MaxCompressionFactor);
+ return QArrayDataPointer{QTypedArrayData<char>::allocate(capacity, QArrayData::Grow)};
+ }();
+
+ if (out.data() == nullptr) // allocation failed
+ return tooMuchData(ZLibOp::Compression);
+
+ qToBigEndian(qt_saturate<CompressSizeHint_t>(nbytes), out.data());
+ out.size = HeaderSize;
+
+ return xxflate(ZLibOp::Compression, std::move(out), {data, nbytes},
+ [=] (z_stream *zs) { return deflateInit(zs, compressionLevel); },
+ [] (z_stream *zs, size_t inputLeft) {
+ return deflate(zs, inputLeft ? Z_NO_FLUSH : Z_FINISH);
+ },
+ [] (z_stream *zs) { deflateEnd(zs); });
}
#endif
@@ -614,98 +760,63 @@ QByteArray qCompress(const uchar* data, qsizetype nbytes, int compressionLevel)
data that was compressed using zlib, you first need to prepend a four
byte header to the byte array containing the data. The header must
contain the expected length (in bytes) of the uncompressed data,
- expressed as an unsigned, big-endian, 32-bit integer.
+ expressed as an unsigned, big-endian, 32-bit integer. This number is
+ just a hint for the initial size of the output buffer size,
+ though. If the indicated size is too small to hold the result, the
+ output buffer size will still be increased until either the output
+ fits or the system runs out of memory. So, despite the 32-bit
+ header, this function, on 64-bit platforms, can produce more than
+ 4GiB of output.
-//![uncompress-limit-note]
- \note The maximum size of data that this function can produce is limited by
- what the platform's \c{unsigned long} can represent (a Zlib limitation).
- That means that data > 4GiB can be compressed and decompressed on a 64-bit
- Unix system, but not on a 64-bit Windows system. Portable code should
- therefore avoid using qCompress()/qUncompress() to compress more than 4GiB
- of input.
-//![uncompress-limit-note]
+ \note In Qt versions prior to Qt 6.5, more than 2GiB of data
+ worked unreliably; in Qt versions prior to Qt 6.0, not at all.
\sa qCompress()
*/
#ifndef QT_NO_COMPRESS
-static QByteArray invalidCompressedData()
-{
- qWarning("qUncompress: Input data is corrupted");
- return QByteArray();
-}
-
/*! \relates QByteArray
\overload
Uncompresses the first \a nbytes of \a data and returns a new byte
array with the uncompressed data.
-
- \include qbytearray.cpp uncompress-limit-note
*/
QByteArray qUncompress(const uchar* data, qsizetype nbytes)
{
- if (!data) {
- qWarning("qUncompress: Data is null");
- return QByteArray();
- }
- if (nbytes <= 4) {
- if (nbytes < 4 || (data[0]!=0 || data[1]!=0 || data[2]!=0 || data[3]!=0))
- qWarning("qUncompress: Input data is corrupted");
- return QByteArray();
- }
- size_t expectedSize = size_t((data[0] << 24) | (data[1] << 16) |
- (data[2] << 8) | (data[3] ));
- size_t len = qMax(expectedSize, 1ul);
- constexpr size_t maxPossibleSize = MaxAllocSize - sizeof(QByteArray::Data);
- if (Q_UNLIKELY(len >= maxPossibleSize)) {
- // QByteArray does not support that huge size anyway.
- return invalidCompressedData();
- }
+ if (!data)
+ return dataIsNull(ZLibOp::Decompression);
- QByteArray::DataPointer d(QByteArray::Data::allocate(len));
- if (Q_UNLIKELY(d.data() == nullptr))
- return invalidCompressedData();
+ if (nbytes < 0)
+ return lengthIsNegative(ZLibOp::Decompression);
- forever {
- const auto alloc = len;
- int res = ::uncompress((uchar*)d.data(), reinterpret_cast<uLongf*>(&len),
- data+4, nbytes-4);
-
- switch (res) {
- case Z_OK: {
- Q_ASSERT(len <= alloc);
- Q_UNUSED(alloc);
- d.data()[len] = '\0';
- d.size = len;
- return QByteArray(d);
- }
+ constexpr qsizetype HeaderSize = sizeof(CompressSizeHint_t);
+ if (nbytes < HeaderSize)
+ return invalidCompressedData();
- case Z_MEM_ERROR:
- qWarning("qUncompress: Z_MEM_ERROR: Not enough memory");
- return QByteArray();
-
- case Z_BUF_ERROR:
- static_assert(maxPossibleSize <= (std::numeric_limits<decltype(len)>::max)() / 2,
- "oops, next line may overflow");
- len *= 2;
- if (Q_UNLIKELY(len >= maxPossibleSize)) {
- // QByteArray does not support that huge size anyway.
- return invalidCompressedData();
- } else {
- // grow the block
- d->reallocate(d->allocatedCapacity()*2, QArrayData::Grow);
- if (Q_UNLIKELY(d.data() == nullptr))
- return invalidCompressedData();
- }
- continue;
+ const auto expectedSize = qFromBigEndian<CompressSizeHint_t>(data);
+ if (nbytes == HeaderSize) {
+ if (expectedSize != 0)
+ return invalidCompressedData();
+ return QByteArray();
+ }
- case Z_DATA_ERROR:
- qWarning("qUncompress: Z_DATA_ERROR: Input data is corrupted");
- return QByteArray();
- }
+ constexpr auto MaxDecompressedSize = size_t(MaxByteArraySize);
+ if constexpr (MaxDecompressedSize < std::numeric_limits<CompressSizeHint_t>::max()) {
+ if (expectedSize > MaxDecompressedSize)
+ return tooMuchData(ZLibOp::Decompression);
}
+
+ // expectedSize may be truncated, so always use at least nbytes
+ // (larger by at most 1%, according to zlib docs)
+ qsizetype capacity = std::max(qsizetype(expectedSize), // cannot overflow!
+ nbytes);
+
+ QArrayDataPointer d(QTypedArrayData<char>::allocate(capacity, QArrayData::KeepSize));
+ return xxflate(ZLibOp::Decompression, std::move(d), {data + HeaderSize, nbytes - HeaderSize},
+ [] (z_stream *zs) { return inflateInit(zs); },
+ [] (z_stream *zs, size_t) { return inflate(zs, Z_NO_FLUSH); },
+ [] (z_stream *zs) { inflateEnd(zs); });
}
#endif