diff options
Diffstat (limited to 'src/corelib/text/qbytearray.cpp')
-rw-r--r-- | src/corelib/text/qbytearray.cpp | 337 |
1 files changed, 224 insertions, 113 deletions
diff --git a/src/corelib/text/qbytearray.cpp b/src/corelib/text/qbytearray.cpp index 825a431820..adf2a8de9a 100644 --- a/src/corelib/text/qbytearray.cpp +++ b/src/corelib/text/qbytearray.cpp @@ -41,6 +41,7 @@ #include "qbytearray.h" #include "qbytearraymatcher.h" +#include "qendian.h" #include "private/qtools_p.h" #include "qhashfunctions.h" #include "qlist.h" @@ -532,67 +533,212 @@ quint16 qChecksum(QByteArrayView data, Qt::ChecksumType standard) The default value is -1, which specifies zlib's default compression. -//![compress-limit-note] - \note The maximum size of data that this function can consume is limited by - what the platform's \c{unsigned long} can represent (a Zlib limitation). - That means that data > 4GiB can be compressed and decompressed on a 64-bit - Unix system, but not on a 64-bit Windows system. Portable code should - therefore avoid using qCompress()/qUncompress() to compress more than 4GiB - of input. -//![compress-limit-note] - \sa qUncompress(const QByteArray &data) */ -/*! \relates QByteArray +/*! + \fn QByteArray qCompress(const uchar* data, qsizetype nbytes, int compressionLevel) + \relates QByteArray \overload Compresses the first \a nbytes of \a data at compression level \a compressionLevel and returns the compressed data in a new byte array. - - \include qbytearray.cpp compress-limit-note */ #ifndef QT_NO_COMPRESS +using CompressSizeHint_t = quint32; // 32-bit BE, historically + +enum class ZLibOp : bool { Compression, Decompression }; + +Q_DECL_COLD_FUNCTION +static const char *zlibOpAsString(ZLibOp op) +{ + switch (op) { + case ZLibOp::Compression: return "qCompress"; + case ZLibOp::Decompression: return "qUncompress"; + } + Q_UNREACHABLE(); + return nullptr; +} + +Q_DECL_COLD_FUNCTION +static QByteArray zlibError(ZLibOp op, const char *what) +{ + qWarning("%s: %s", zlibOpAsString(op), what); + return QByteArray(); +} + +Q_DECL_COLD_FUNCTION +static QByteArray dataIsNull(ZLibOp op) +{ + return zlibError(op, "Data is null"); +} + +Q_DECL_COLD_FUNCTION +static QByteArray lengthIsNegative(ZLibOp op) +{ + return zlibError(op, "Input length is negative"); +} + +Q_DECL_COLD_FUNCTION +static QByteArray tooMuchData(ZLibOp op) +{ + return zlibError(op, "Not enough memory"); +} + +Q_DECL_COLD_FUNCTION +static QByteArray invalidCompressedData() +{ + return zlibError(ZLibOp::Decompression, "Input data is corrupted"); +} + +Q_DECL_COLD_FUNCTION +static QByteArray unexpectedZlibError(ZLibOp op, int err, const char *msg) +{ + qWarning("%s unexpected zlib error: %s (%d)", + zlibOpAsString(op), + msg ? msg : "", + err); + return QByteArray(); +} + +template <typename Init, typename ProcessChunk, typename Deinit> +static QByteArray xxflate(ZLibOp op, QArrayDataPointer<char> out, QByteArrayView input, + Init init, + ProcessChunk processChunk, + Deinit deinit) +{ + if (out.data() == nullptr) // allocation failed + return tooMuchData(op); + qsizetype capacity = out.allocatedCapacity(); + + const auto initalSize = out.size; + + z_stream zs = {}; + zs.next_in = reinterpret_cast<uchar *>(const_cast<char *>(input.data())); // 1980s C API... + if (const int err = init(&zs); err != Z_OK) + return unexpectedZlibError(op, err, zs.msg); + const auto sg = qScopeGuard([&] { deinit(&zs); }); + + using ZlibChunkSize_t = decltype(zs.avail_in); + static_assert(!std::is_signed_v<ZlibChunkSize_t>); + static_assert(std::is_same_v<ZlibChunkSize_t, decltype(zs.avail_out)>); + constexpr auto MaxChunkSize = std::numeric_limits<ZlibChunkSize_t>::max(); + [[maybe_unused]] + constexpr auto MaxStatisticsSize = std::numeric_limits<decltype(zs.total_out)>::max(); + + size_t inputLeft = size_t(input.size()); + + int res; + do { + Q_ASSERT(out.freeSpaceAtBegin() == 0); // ensure prepend optimization stays out of the way + Q_ASSERT(capacity == out.allocatedCapacity()); + + if (zs.avail_out == 0) { + Q_ASSERT(size_t(out.size) - initalSize > MaxStatisticsSize || // total_out overflow + size_t(out.size) - initalSize == zs.total_out); + Q_ASSERT(out.size <= capacity); + + qsizetype avail_out = capacity - out.size; + if (avail_out == 0) { + out->reallocateAndGrow(QArrayData::GrowsAtEnd, 1); // grow to next natural capacity + if (out.data() == nullptr) // reallocation failed + return tooMuchData(op); + capacity = out.allocatedCapacity(); + avail_out = capacity - out.size; + } + zs.next_out = reinterpret_cast<uchar *>(out.data()) + out.size; + zs.avail_out = size_t(avail_out) > size_t(MaxChunkSize) ? MaxChunkSize + : ZlibChunkSize_t(avail_out); + out.size += zs.avail_out; + + Q_ASSERT(zs.avail_out > 0); + } + + if (zs.avail_in == 0) { + // zs.next_in is kept up-to-date by processChunk(), so nothing to do + zs.avail_in = inputLeft > MaxChunkSize ? MaxChunkSize : ZlibChunkSize_t(inputLeft); + inputLeft -= zs.avail_in; + } + + res = processChunk(&zs, inputLeft); + } while (res == Z_OK); + + switch (res) { + case Z_STREAM_END: + out.size -= zs.avail_out; + Q_ASSERT(size_t(out.size) - initalSize > MaxStatisticsSize || // total_out overflow + size_t(out.size) - initalSize == zs.total_out); + Q_ASSERT(out.size <= out.allocatedCapacity()); + out.data()[out.size] = '\0'; + return QByteArray(std::move(out)); + + case Z_MEM_ERROR: + return tooMuchData(op); + + case Z_BUF_ERROR: + Q_UNREACHABLE(); // cannot happen - we supply a buffer that can hold the result, + // or else error out early + + case Z_DATA_ERROR: // can only happen on decompression + Q_ASSERT(op == ZLibOp::Decompression); + return invalidCompressedData(); + + default: + return unexpectedZlibError(op, res, zs.msg); + } +} + QByteArray qCompress(const uchar* data, qsizetype nbytes, int compressionLevel) { + constexpr qsizetype HeaderSize = sizeof(CompressSizeHint_t); if (nbytes == 0) { - return QByteArray(4, '\0'); - } - if (!data) { - qWarning("qCompress: Data is null"); - return QByteArray(); + return QByteArray(HeaderSize, '\0'); } + if (!data) + return dataIsNull(ZLibOp::Compression); + + if (nbytes < 0) + return lengthIsNegative(ZLibOp::Compression); + if (compressionLevel < -1 || compressionLevel > 9) compressionLevel = -1; - ulong len = nbytes + nbytes / 100 + 13; - QByteArray bazip; - int res; - do { - bazip.resize(len + 4); - res = ::compress2((uchar*)bazip.data()+4, &len, data, nbytes, compressionLevel); - - switch (res) { - case Z_OK: - bazip.resize(len + 4); - bazip[0] = (nbytes & 0xff000000) >> 24; - bazip[1] = (nbytes & 0x00ff0000) >> 16; - bazip[2] = (nbytes & 0x0000ff00) >> 8; - bazip[3] = (nbytes & 0x000000ff); - break; - case Z_MEM_ERROR: - qWarning("qCompress: Z_MEM_ERROR: Not enough memory"); - bazip.resize(0); - break; - case Z_BUF_ERROR: - len *= 2; - break; + QArrayDataPointer out = [&] { + constexpr qsizetype SingleAllocLimit = 256 * 1024; // the maximum size for which we use + // zlib's compressBound() to guarantee + // the output buffer size is sufficient + // to hold result + qsizetype capacity = HeaderSize; + if (nbytes < SingleAllocLimit) { + // use maximum size + capacity += compressBound(uLong(nbytes)); // cannot overflow (both times)! + return QArrayDataPointer{QTypedArrayData<char>::allocate(capacity)}; } - } while (res == Z_BUF_ERROR); - return bazip; + // for larger buffers, assume it compresses optimally, and + // grow geometrically from there: + constexpr qsizetype MaxCompressionFactor = 1024; // max theoretical factor is 1032 + // cf. http://www.zlib.org/zlib_tech.html, + // but use a nearby power-of-two (faster) + capacity += std::max(qsizetype(compressBound(uLong(SingleAllocLimit))), + nbytes / MaxCompressionFactor); + return QArrayDataPointer{QTypedArrayData<char>::allocate(capacity, QArrayData::Grow)}; + }(); + + if (out.data() == nullptr) // allocation failed + return tooMuchData(ZLibOp::Compression); + + qToBigEndian(qt_saturate<CompressSizeHint_t>(nbytes), out.data()); + out.size = HeaderSize; + + return xxflate(ZLibOp::Compression, std::move(out), {data, nbytes}, + [=] (z_stream *zs) { return deflateInit(zs, compressionLevel); }, + [] (z_stream *zs, size_t inputLeft) { + return deflate(zs, inputLeft ? Z_NO_FLUSH : Z_FINISH); + }, + [] (z_stream *zs) { deflateEnd(zs); }); } #endif @@ -614,98 +760,63 @@ QByteArray qCompress(const uchar* data, qsizetype nbytes, int compressionLevel) data that was compressed using zlib, you first need to prepend a four byte header to the byte array containing the data. The header must contain the expected length (in bytes) of the uncompressed data, - expressed as an unsigned, big-endian, 32-bit integer. + expressed as an unsigned, big-endian, 32-bit integer. This number is + just a hint for the initial size of the output buffer size, + though. If the indicated size is too small to hold the result, the + output buffer size will still be increased until either the output + fits or the system runs out of memory. So, despite the 32-bit + header, this function, on 64-bit platforms, can produce more than + 4GiB of output. -//![uncompress-limit-note] - \note The maximum size of data that this function can produce is limited by - what the platform's \c{unsigned long} can represent (a Zlib limitation). - That means that data > 4GiB can be compressed and decompressed on a 64-bit - Unix system, but not on a 64-bit Windows system. Portable code should - therefore avoid using qCompress()/qUncompress() to compress more than 4GiB - of input. -//![uncompress-limit-note] + \note In Qt versions prior to Qt 6.5, more than 2GiB of data + worked unreliably; in Qt versions prior to Qt 6.0, not at all. \sa qCompress() */ #ifndef QT_NO_COMPRESS -static QByteArray invalidCompressedData() -{ - qWarning("qUncompress: Input data is corrupted"); - return QByteArray(); -} - /*! \relates QByteArray \overload Uncompresses the first \a nbytes of \a data and returns a new byte array with the uncompressed data. - - \include qbytearray.cpp uncompress-limit-note */ QByteArray qUncompress(const uchar* data, qsizetype nbytes) { - if (!data) { - qWarning("qUncompress: Data is null"); - return QByteArray(); - } - if (nbytes <= 4) { - if (nbytes < 4 || (data[0]!=0 || data[1]!=0 || data[2]!=0 || data[3]!=0)) - qWarning("qUncompress: Input data is corrupted"); - return QByteArray(); - } - size_t expectedSize = size_t((data[0] << 24) | (data[1] << 16) | - (data[2] << 8) | (data[3] )); - size_t len = qMax(expectedSize, 1ul); - constexpr size_t maxPossibleSize = MaxAllocSize - sizeof(QByteArray::Data); - if (Q_UNLIKELY(len >= maxPossibleSize)) { - // QByteArray does not support that huge size anyway. - return invalidCompressedData(); - } + if (!data) + return dataIsNull(ZLibOp::Decompression); - QByteArray::DataPointer d(QByteArray::Data::allocate(len)); - if (Q_UNLIKELY(d.data() == nullptr)) - return invalidCompressedData(); + if (nbytes < 0) + return lengthIsNegative(ZLibOp::Decompression); - forever { - const auto alloc = len; - int res = ::uncompress((uchar*)d.data(), reinterpret_cast<uLongf*>(&len), - data+4, nbytes-4); - - switch (res) { - case Z_OK: { - Q_ASSERT(len <= alloc); - Q_UNUSED(alloc); - d.data()[len] = '\0'; - d.size = len; - return QByteArray(d); - } + constexpr qsizetype HeaderSize = sizeof(CompressSizeHint_t); + if (nbytes < HeaderSize) + return invalidCompressedData(); - case Z_MEM_ERROR: - qWarning("qUncompress: Z_MEM_ERROR: Not enough memory"); - return QByteArray(); - - case Z_BUF_ERROR: - static_assert(maxPossibleSize <= (std::numeric_limits<decltype(len)>::max)() / 2, - "oops, next line may overflow"); - len *= 2; - if (Q_UNLIKELY(len >= maxPossibleSize)) { - // QByteArray does not support that huge size anyway. - return invalidCompressedData(); - } else { - // grow the block - d->reallocate(d->allocatedCapacity()*2, QArrayData::Grow); - if (Q_UNLIKELY(d.data() == nullptr)) - return invalidCompressedData(); - } - continue; + const auto expectedSize = qFromBigEndian<CompressSizeHint_t>(data); + if (nbytes == HeaderSize) { + if (expectedSize != 0) + return invalidCompressedData(); + return QByteArray(); + } - case Z_DATA_ERROR: - qWarning("qUncompress: Z_DATA_ERROR: Input data is corrupted"); - return QByteArray(); - } + constexpr auto MaxDecompressedSize = size_t(MaxByteArraySize); + if constexpr (MaxDecompressedSize < std::numeric_limits<CompressSizeHint_t>::max()) { + if (expectedSize > MaxDecompressedSize) + return tooMuchData(ZLibOp::Decompression); } + + // expectedSize may be truncated, so always use at least nbytes + // (larger by at most 1%, according to zlib docs) + qsizetype capacity = std::max(qsizetype(expectedSize), // cannot overflow! + nbytes); + + QArrayDataPointer d(QTypedArrayData<char>::allocate(capacity, QArrayData::KeepSize)); + return xxflate(ZLibOp::Decompression, std::move(d), {data + HeaderSize, nbytes - HeaderSize}, + [] (z_stream *zs) { return inflateInit(zs); }, + [] (z_stream *zs, size_t) { return inflate(zs, Z_NO_FLUSH); }, + [] (z_stream *zs) { inflateEnd(zs); }); } #endif |