1 files changed, 447 insertions, 47 deletions
diff --git a/tests/auto/corelib/text/qstringconverter/tst_qstringconverter.cpp b/tests/auto/corelib/text/qstringconverter/tst_qstringconverter.cpp
index a346615e39..ed3f91ac94 100644
--- a/tests/auto/corelib/text/qstringconverter/tst_qstringconverter.cpp
+++ b/tests/auto/corelib/text/qstringconverter/tst_qstringconverter.cpp
@@ -1,17 +1,34 @@
 // Copyright (C) 2021 The Qt Company Ltd.
 // Copyright (C) 2016 Intel Corporation.
-// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0
+// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only
 
 #include <QTest>
 
 #include <QtCore/private/qglobal_p.h>
 #include <qstringconverter.h>
+#include <private/qstringconverter_p.h>
 #include <qthreadpool.h>
 
 #include <array>
+#include <numeric>
 
 using namespace Qt::StringLiterals;
 
+QT_BEGIN_NAMESPACE
+namespace QTest {
+template <typename T>
+char *toString(const std::optional<T> &opt)
+{
+    if (opt)
+        return QTest::toString(*opt);
+    else
+        return qstrdup("std::nullopt");
+}
+} // namespace QTest
+QT_END_NAMESPACE
+
+using QTest::toString;
+
 static constexpr bool IsBigEndian = QSysInfo::ByteOrder == QSysInfo::BigEndian;
 enum CodecLimitation {
     AsciiOnly,
@@ -130,6 +147,10 @@ private slots:
     void roundtrip_data();
     void roundtrip();
 
+    void convertL1U8();
+
+    void convertL1U16();
+
 #if QT_CONFIG(icu)
     void roundtripIcu_data();
     void roundtripIcu();
@@ -172,6 +193,20 @@ private slots:
 
     void encodingForHtml_data();
     void encodingForHtml();
+
+    void availableCodesAreAvailable();
+
+#ifdef Q_OS_WIN
+    // On all other systems local 8-bit encoding is UTF-8
+    void fromLocal8Bit_data();
+    void fromLocal8Bit();
+    void fromLocal8Bit_special_cases();
+    void fromLocal8Bit_2GiB();
+    void toLocal8Bit_data();
+    void toLocal8Bit();
+    void toLocal8Bit_special_cases();
+    void toLocal8Bit_2GiB();
+#endif
 };
 
 void tst_QStringConverter::constructByName()
@@ -246,8 +281,8 @@ void tst_QStringConverter::invalidConverter()
 
         decoder.resetState();
         QVERIFY(!decoder.hasError());
-        QChar buffer[100];
-        QChar *position = decoder.appendToBuffer(buffer, "Even more");
+        char16_t buffer[100];
+        char16_t *position = decoder.appendToBuffer(buffer, "Even more");
         QCOMPARE(position, buffer);
         QVERIFY(decoder.hasError());
     }
@@ -349,6 +384,33 @@ void tst_QStringConverter::convertUtf8CharByChar()
     QCOMPARE(reencoded, ba);
 }
 
+void tst_QStringConverter::convertL1U16()
+{
+    const QLatin1StringView latin1("some plain latin1 text");
+    const QString qstr(latin1);
+
+    QStringDecoder decoder(QStringConverter::Latin1);
+    QVERIFY(decoder.isValid());
+    QString uniString = decoder(latin1);
+    QCOMPARE(uniString, qstr);
+    QCOMPARE(latin1, uniString.toLatin1());
+
+    // do it again (using .decode())
+    uniString = decoder.decode(latin1);
+    QCOMPARE(uniString, qstr);
+    QCOMPARE(latin1, uniString.toLatin1());
+
+    QStringEncoder encoder(QStringConverter::Latin1);
+    QByteArray reencoded = encoder(uniString);
+    QCOMPARE(reencoded, QByteArrayView(latin1));
+    QCOMPARE(reencoded, uniString.toLatin1());
+
+    // do it again (using .encode())
+    reencoded = encoder.encode(uniString);
+    QCOMPARE(reencoded, QByteArrayView(latin1));
+    QCOMPARE(reencoded, uniString.toLatin1());
+}
+
 void tst_QStringConverter::roundtrip_data()
 {
     QTest::addColumn<QStringView>("utf16");
@@ -427,6 +489,18 @@ void tst_QStringConverter::roundtrip()
     QCOMPARE(decoded, uniString);
 }
 
+void tst_QStringConverter::convertL1U8()
+{
+    {
+        std::array<char, 256> latin1;
+        std::iota(latin1.data(), latin1.data() + latin1.size(), uchar(0));
+        std::array<char, 512> utf8;
+        auto out = QUtf8::convertFromLatin1(utf8.data(), QLatin1StringView{latin1.data(), latin1.size()});
+        QCOMPARE(QString::fromLatin1(latin1.data(), latin1.size()),
+                 QString::fromUtf8(utf8.data(), out - utf8.data()));
+    }
+}
+
 #if QT_CONFIG(icu)
 
 void tst_QStringConverter::roundtripIcu_data()
@@ -497,11 +571,10 @@ void tst_QStringConverter::charByCharConsistency_data()
 
 void tst_QStringConverter::charByCharConsistency()
 {
-    QFETCH(QStringView, source);
-    QFETCH(QByteArray, codec);
+    QFETCH(const QStringView, source);
+    QFETCH(const QByteArray, codec);
 
-    {
-        QStringEncoder encoder(codec);
+    const auto check = [&](QStringEncoder encoder){
         if (!encoder.isValid())
             QSKIP("Unsupported codec");
 
@@ -512,19 +585,28 @@ void tst_QStringConverter::charByCharConsistency()
             stepByStepConverted += encoder.encode(codeUnit);
         }
         QCOMPARE(stepByStepConverted, fullyConverted);
-    }
+    };
+
+    check(QStringEncoder(codec));
+    if (QTest::currentTestResolved()) return;
+
+    check(QStringEncoder(codec, QStringConverter::Flag::ConvertInvalidToNull));
+    if (QTest::currentTestResolved()) return;
+
+    // moved codecs also work:
 
     {
-        QStringEncoder encoder(codec, QStringConverter::Flag::ConvertInvalidToNull);
+        QStringEncoder dec(codec);
+        check(std::move(dec));
+    }
+    if (QTest::currentTestResolved()) return;
 
-        QByteArray fullyConverted = encoder.encode(source);
-        encoder.resetState();
-        QByteArray stepByStepConverted;
-        for (const auto& codeUnit: source) {
-            stepByStepConverted += encoder.encode(codeUnit);
-        }
-        QCOMPARE(stepByStepConverted, fullyConverted);
+    {
+        QStringEncoder dec(codec, QStringConverter::Flag::ConvertInvalidToNull);
+        check(std::move(dec));
     }
+    if (QTest::currentTestResolved()) return;
+
 }
 
 void tst_QStringConverter::byteByByteConsistency_data()
@@ -541,11 +623,10 @@ void tst_QStringConverter::byteByByteConsistency_data()
 
 void tst_QStringConverter::byteByByteConsistency()
 {
-    QFETCH(QByteArray, source);
-    QFETCH(QByteArray, codec);
+    QFETCH(const QByteArray, source);
+    QFETCH(const QByteArray, codec);
 
-    {
-        QStringDecoder decoder(codec);
+    const auto check = [&](QStringDecoder decoder) {
         if (!decoder.isValid())
             QSKIP("Unsupported codec");
 
@@ -558,23 +639,28 @@ void tst_QStringConverter::byteByByteConsistency()
             stepByStepConverted += decoder.decode(singleChar);
         }
         QCOMPARE(stepByStepConverted, fullyConverted);
-    }
+    };
+
+    check(QStringDecoder(codec));
+    if (QTest::currentTestResolved()) return;
+
+    check(QStringDecoder(codec, QStringConverter::Flag::ConvertInvalidToNull));
+    if (QTest::currentTestResolved()) return;
+
+    // moved codecs also work:
 
     {
-        QStringDecoder decoder(codec, QStringConverter::Flag::ConvertInvalidToNull);
-        if (!decoder.isValid())
-            QSKIP("Unsupported codec");
+        QStringDecoder dec(codec);
+        check(std::move(dec));
+    }
+    if (QTest::currentTestResolved()) return;
 
-        QString fullyConverted = decoder.decode(source);
-        decoder.resetState();
-        QString stepByStepConverted;
-        for (const auto& byte: source) {
-            QByteArray singleChar;
-            singleChar.append(byte);
-            stepByStepConverted += decoder.decode(singleChar);
-        }
-        QCOMPARE(stepByStepConverted, fullyConverted);
+    {
+        QStringDecoder dec(codec, QStringConverter::Flag::ConvertInvalidToNull);
+        check(std::move(dec));
     }
+    if (QTest::currentTestResolved()) return;
+
 }
 
 void tst_QStringConverter::statefulPieceWise()
@@ -2154,25 +2240,42 @@ void tst_QStringConverter::encodingForName_data()
     QTest::addColumn<QByteArray>("name");
     QTest::addColumn<std::optional<QStringConverter::Encoding>>("encoding");
 
-    QTest::newRow("UTF-8") << QByteArray("UTF-8") << std::optional<QStringConverter::Encoding>(QStringConverter::Utf8);
-    QTest::newRow("utf8") << QByteArray("utf8") << std::optional<QStringConverter::Encoding>(QStringConverter::Utf8);
-    QTest::newRow("Utf-8") << QByteArray("Utf-8") << std::optional<QStringConverter::Encoding>(QStringConverter::Utf8);
-    QTest::newRow("UTF-16") << QByteArray("UTF-16") << std::optional<QStringConverter::Encoding>(QStringConverter::Utf16);
-    QTest::newRow("UTF-16le") << QByteArray("UTF-16le") << std::optional<QStringConverter::Encoding>(QStringConverter::Utf16LE);
-    QTest::newRow("ISO-8859-1") << QByteArray("ISO-8859-1") << std::optional<QStringConverter::Encoding>(QStringConverter::Latin1);
-    QTest::newRow("ISO8859-1") << QByteArray("ISO8859-1") << std::optional<QStringConverter::Encoding>(QStringConverter::Latin1);
-    QTest::newRow("iso8859-1") << QByteArray("iso8859-1") << std::optional<QStringConverter::Encoding>(QStringConverter::Latin1);
-    QTest::newRow("latin1") << QByteArray("latin1") << std::optional<QStringConverter::Encoding>(QStringConverter::Latin1);
-    QTest::newRow("latin2") << QByteArray("latin2") << std::optional<QStringConverter::Encoding>();
-    QTest::newRow("latin15") << QByteArray("latin15") << std::optional<QStringConverter::Encoding>();
+    auto row = [](const char *name, std::optional<QStringConverter::Encoding> expected = std::nullopt) {
+        auto protect = [](auto p) { return p ? *p ? p : "<empty>" : "<nullptr>"; };
+        QTest::addRow("%s", protect(name)) << QByteArray(name) << expected;
+    };
+
+    row("UTF-8",       QStringConverter::Utf8);
+    row("utf8",        QStringConverter::Utf8);
+    row("Utf-8",       QStringConverter::Utf8);
+    row("UTF-16",      QStringConverter::Utf16);
+    row("UTF-16le",    QStringConverter::Utf16LE);
+    row("ISO-8859-1",  QStringConverter::Latin1);
+    row("ISO8859-1",   QStringConverter::Latin1);
+    row("iso8859-1",   QStringConverter::Latin1);
+    row("latin1",      QStringConverter::Latin1);
+    row("latin-1_-",   QStringConverter::Latin1);
+    row("latin_1-_",   QStringConverter::Latin1);
+    row("-_latin-1",   QStringConverter::Latin1);
+    row("_-latin_1",   QStringConverter::Latin1);
+
+    // failures:
+    row(nullptr);
+    row("");
+    row("latin2");
+    row("latin42");
+    row(" latin1");    // spaces are significant
+    row("\tlatin1");   // HTs are significant
 }
 
 void tst_QStringConverter::encodingForName()
 {
-    QFETCH(QByteArray, name);
-    QFETCH(std::optional<QStringConverter::Encoding>, encoding);
+    QFETCH(const QByteArray, name);
+    QFETCH(const std::optional<QStringConverter::Encoding>, encoding);
 
-    auto e = QStringConverter::encodingForName(name);
+    const auto *ptr = name.isNull() ? nullptr : name.data();
+
+    const auto e = QStringConverter::encodingForName(ptr);
     QCOMPARE(e, encoding);
 }
 
@@ -2381,6 +2484,13 @@ void tst_QStringConverter::encodingForHtml()
     }
 }
 
+void tst_QStringConverter::availableCodesAreAvailable()
+{
+    auto codecs = QStringConverter::availableCodecs();
+    for (const auto &codecName: codecs)
+        QVERIFY(QStringEncoder(codecName.toLatin1()).isValid());
+}
+
 class LoadAndConvert: public QRunnable
 {
 public:
@@ -2411,6 +2521,10 @@ void tst_QStringConverter::initTestCase()
 
 void tst_QStringConverter::threadSafety()
 {
+#if defined(Q_OS_WASM)
+    QSKIP("This test misbehaves on WASM. Investigation needed (QTBUG-110067)");
+#endif
+
     QThreadPool::globalInstance()->setMaxThreadCount(12);
 
     QList<QString> res;
@@ -2426,6 +2540,292 @@ void tst_QStringConverter::threadSafety()
         QCOMPARE(b, QString::fromLatin1("abcdefghijklmonpqrstufvxyz"));
 }
 
+#ifdef Q_OS_WIN
+void tst_QStringConverter::fromLocal8Bit_data()
+{
+    QTest::addColumn<QByteArray>("eightBit");
+    QTest::addColumn<QString>("utf16");
+    QTest::addColumn<quint32>("codePage");
+
+    constexpr uint WINDOWS_1252 = 1252u;
+    QTest::newRow("windows-1252") << "Hello, world!"_ba << u"Hello, world!"_s << WINDOWS_1252;
+    constexpr uint SHIFT_JIS = 932u;
+    // Mostly two byte characters, but the comma is a single byte character (0xa4)
+    QTest::newRow("shiftJIS")
+            << "\x82\xb1\x82\xf1\x82\xc9\x82\xbf\x82\xcd\xa4\x90\xa2\x8a\x45\x81\x49"_ba
+            << u"こんにちは､世界！"_s << SHIFT_JIS;
+
+    constexpr uint GB_18030 = 54936u;
+    QTest::newRow("GB-18030") << "\xc4\xe3\xba\xc3\xca\xc0\xbd\xe7\xa3\xa1"_ba << u"你好世界！"_s
+                              << GB_18030;
+}
+
+void tst_QStringConverter::fromLocal8Bit()
+{
+    QFETCH(const QByteArray, eightBit);
+    QFETCH(const QString, utf16);
+    QFETCH(const quint32, codePage);
+
+    QStringConverter::State state;
+
+    QString result = QLocal8Bit::convertToUnicode_sys(eightBit, codePage, &state);
+    QCOMPARE(result, utf16);
+    QCOMPARE(state.remainingChars, 0);
+
+    result.clear();
+    state.clear();
+    for (char c : eightBit)
+        result += QLocal8Bit::convertToUnicode_sys({&c, 1}, codePage, &state);
+    QCOMPARE(result, utf16);
+    QCOMPARE(state.remainingChars, 0);
+
+    result.clear();
+    state.clear();
+    // Decode the full string again, this time without state
+    state.flags |= QStringConverter::Flag::Stateless;
+    result = QLocal8Bit::convertToUnicode_sys(eightBit, codePage, &state);
+    QCOMPARE(result, utf16);
+    QCOMPARE(state.remainingChars, 0);
+}
+
+void tst_QStringConverter::fromLocal8Bit_special_cases()
+{
+    QStringConverter::State state;
+    constexpr uint SHIFT_JIS = 932u;
+    // Decode a 2-octet character, but only provide 1 octet at first:
+    QString result = QLocal8Bit::convertToUnicode_sys("\x82", SHIFT_JIS, &state);
+    QCOMPARE(result, QString());
+    QVERIFY(result.isNull());
+    QCOMPARE_GT(state.remainingChars, 0);
+    // Then provide the second octet:
+    result = QLocal8Bit::convertToUnicode_sys("\xb1", SHIFT_JIS, &state);
+    QCOMPARE(result, u"こ");
+    QCOMPARE(state.remainingChars, 0);
+
+    // And without state:
+    result.clear();
+    QStringConverter::State statelessState;
+    statelessState.flags |= QStringConverter::Flag::Stateless;
+    result = QLocal8Bit::convertToUnicode_sys("\x82", SHIFT_JIS, &statelessState);
+    result += QLocal8Bit::convertToUnicode_sys("\xb1", SHIFT_JIS, &statelessState);
+    // 0xb1 is a valid single-octet character in Shift-JIS, so the output
+    // isn't really what you would expect:
+    QCOMPARE(result, QString(QChar::ReplacementCharacter) + u'ｱ');
+    QCOMPARE(statelessState.remainingChars, 0);
+
+    // Now try a 3-octet UTF-8 sequence:
+    result.clear();
+    state.clear();
+    constexpr uint UTF8 = 65001u;
+    // First the first 2 octets:
+    result = QLocal8Bit::convertToUnicode_sys("\xe4\xbd", UTF8, &state);
+    QCOMPARE(result, QString());
+    QVERIFY(result.isNull());
+    QCOMPARE_GT(state.remainingChars, 0);
+    // Then provide the remaining octet:
+    result = QLocal8Bit::convertToUnicode_sys("\xa0", UTF8, &state);
+    QCOMPARE(result, u"你");
+    QCOMPARE(state.remainingChars, 0);
+
+    // Now the same, but there is an incomplete sequence at the start
+    result.clear();
+    state.clear();
+    result = QLocal8Bit::convertToUnicode_sys("\xe4\xe4\xbd", UTF8, &state);
+    QCOMPARE(result, QString());
+    QVERIFY(result.isNull());
+    // Remaining octet (and a '.' to force it to discard something from the
+    // internal state which is currently limited to 4 octets):
+    result += QLocal8Bit::convertToUnicode_sys("\xa0.", UTF8, &state);
+    QCOMPARE(result, QChar::ReplacementCharacter + u"你."_s);
+    QCOMPARE(state.remainingChars, 0);
+
+    // Test QTBUG-118834, which is failing
+    result.clear();
+    state.clear();
+    result = QLocal8Bit::convertToUnicode_sys("\xe4\xe4\xbd", UTF8, &state);
+    QCOMPARE(result, QString());
+    QVERIFY(result.isNull());
+    // Remaining octet:
+    result += QLocal8Bit::convertToUnicode_sys("\xa0", UTF8, &state);
+    QEXPECT_FAIL("", "QTBUG-118834: We don't output anything because it's "
+                     "within the size of our internal state, and we cannot "
+                     "signal that it needs to be drained.", Continue);
+    QCOMPARE(result, QChar::ReplacementCharacter + u"你"_s);
+    QEXPECT_FAIL("", "QTBUG-118834: As above", Continue);
+    QCOMPARE(state.remainingChars, 0);
+
+    // Now try a 4-octet GB 18030 sequence:
+    result.clear();
+    state.clear();
+    constexpr uint GB_18030 = 54936u;
+    const char sequence[] = "\x95\x32\x90\x31";
+    // Repeat the sequence multiple times to test handling of exhaustion of
+    // internal buffer
+    QByteArray repeated = QByteArray(sequence).repeated(2049);
+    QByteArrayView octets = QByteArrayView(repeated);
+    result = QLocal8Bit::convertToUnicode_sys(octets.first(2), GB_18030, &state);
+    QCOMPARE(result, QString());
+    QVERIFY(result.isNull());
+    QCOMPARE_GT(state.remainingChars, 0);
+    // Then provide one more octet:
+    result = QLocal8Bit::convertToUnicode_sys(octets.sliced(2, 1), GB_18030, &state);
+    QCOMPARE(result, QString());
+    QVERIFY(result.isNull());
+    QCOMPARE_GT(state.remainingChars, 0);
+    // Then provide the last octet + the rest of the string
+    result = QLocal8Bit::convertToUnicode_sys(octets.sliced(3), GB_18030, &state);
+    QCOMPARE(result.first(2), u"𠂇");
+    QCOMPARE(state.remainingChars, 0);
+}
+
+void tst_QStringConverter::fromLocal8Bit_2GiB()
+{
+#if QT_POINTER_SIZE == 4
+    QSKIP("This test is only relevant for 64-bit builds");
+#else
+    qsizetype size = qsizetype(std::numeric_limits<int>::max()) + 3;
+    QByteArray input;
+    QT_TRY {
+        input.reserve(size);
+    } QT_CATCH (const std::bad_alloc &) {
+        QSKIP("Out of memory");
+    }
+    // fill with '､' - a single octet character in Shift-JIS
+    input.fill('\xa4', std::numeric_limits<int>::max() - 1);
+    // then append 'こ' - a two octet character in Shift-JIS
+    // which is now straddling the 2 GiB boundary
+    input += "\x82\xb1";
+    // then append another two '､', so that our output is also crossing the
+    // 2 GiB boundary
+    input += "\xa4\xa4";
+    QCOMPARE(input.size(), input.capacity());
+    constexpr uint SHIFT_JIS = 932u;
+    QStringConverter::State state;
+    QString result;
+    QT_TRY {
+        result = QLocal8Bit::convertToUnicode_sys(input, SHIFT_JIS, &state);
+    } QT_CATCH (const std::bad_alloc &) {
+        QSKIP("Out of memory");
+    }
+    QCOMPARE(result.size(), size - 1); // The 2-octet character is only 1 code unit in UTF-16
+    QCOMPARE(result.last(4), u"､こ､､"); // Check we correctly decoded it
+    QCOMPARE(state.remainingChars, 0); // and there is nothing left in the state
+#endif
+}
+
+void tst_QStringConverter::toLocal8Bit_data()
+{
+    fromLocal8Bit_data();
+}
+
+void tst_QStringConverter::toLocal8Bit()
+{
+    QFETCH(const QByteArray, eightBit);
+    QFETCH(const QString, utf16);
+    QFETCH(const quint32, codePage);
+
+    QStringConverter::State state;
+
+    QByteArray result = QLocal8Bit::convertFromUnicode_sys(utf16, codePage, &state);
+    QCOMPARE(result, eightBit);
+    QCOMPARE(state.remainingChars, 0);
+
+    result.clear();
+    state.clear();
+    for (QChar c : utf16)
+        result += QLocal8Bit::convertFromUnicode_sys(QStringView(&c, 1), codePage, &state);
+    QCOMPARE(result, eightBit);
+    QCOMPARE(state.remainingChars, 0);
+
+    result.clear();
+    state.clear();
+    // Decode the full string again, this time without state
+    state.flags |= QStringConverter::Flag::Stateless;
+    result = QLocal8Bit::convertFromUnicode_sys(utf16, codePage, &state);
+    QCOMPARE(result, eightBit);
+    QCOMPARE(state.remainingChars, 0);
+}
+
+void tst_QStringConverter::toLocal8Bit_special_cases()
+{
+    QStringConverter::State state;
+    // Normally utf8 goes through a different code path, but we can force it here
+    constexpr uint UTF8 = 65001u;
+    // Decode a 2-code unit character, but only provide 1 code unit at first:
+    const char16_t a[] = u"𬽦";
+    QStringView codeUnits = a;
+    QByteArray result = QLocal8Bit::convertFromUnicode_sys(codeUnits.first(1), UTF8, &state);
+    QCOMPARE(result, QString());
+    QVERIFY(result.isNull());
+    QCOMPARE_GT(state.remainingChars, 0);
+    // Then provide the second code unit:
+    result = QLocal8Bit::convertFromUnicode_sys(codeUnits.sliced(1), UTF8, &state);
+    QCOMPARE(result, "\xf0\xac\xbd\xa6"_ba);
+    QCOMPARE(state.remainingChars, 0);
+
+    // Retain compat with the behavior for toLocal8Bit:
+    QCOMPARE(codeUnits.first(1).toLocal8Bit(), "?");
+
+    // QString::toLocal8Bit is already stateless, but test stateless handling
+    // explicitly anyway:
+    result.clear();
+    QStringConverter::State statelessState;
+    statelessState.flags |= QStringConverter::Flag::Stateless;
+    result = QLocal8Bit::convertFromUnicode_sys(codeUnits.first(1), UTF8, &statelessState);
+    result += QLocal8Bit::convertFromUnicode_sys(codeUnits.sliced(1), UTF8, &statelessState);
+    // Windows uses the replacement character for invalid characters:
+    QCOMPARE(result, "\ufffd\ufffd");
+
+    // Now do the same, but the second time we feed in a character, we also
+    // provide many more so the internal stack buffer is not large enough.
+    result.clear();
+    state.clear();
+    QString str = QStringView(a).toString().repeated(2048);
+    codeUnits = str;
+    result = QLocal8Bit::convertFromUnicode_sys(codeUnits.first(1), UTF8, &state);
+    QCOMPARE(result, QString());
+    QVERIFY(result.isNull());
+    QCOMPARE_GT(state.remainingChars, 0);
+    // Then we provide the rest of the string:
+    result = QLocal8Bit::convertFromUnicode_sys(codeUnits.sliced(1), UTF8, &state);
+    QCOMPARE(result.first(4), "\xf0\xac\xbd\xa6"_ba);
+    QCOMPARE(state.remainingChars, 0);
+}
+
+void tst_QStringConverter::toLocal8Bit_2GiB()
+{
+#if QT_POINTER_SIZE == 4
+    QSKIP("This test is only relevant for 64-bit builds");
+#else
+    constexpr qsizetype TwoGiB = qsizetype(std::numeric_limits<int>::max());
+    QString input;
+    QT_TRY {
+        input.reserve(TwoGiB + 1);
+    } QT_CATCH (const std::bad_alloc &) {
+        QSKIP("Out of memory");
+    }
+    // Fill with a single code unit character
+    input.fill(u'.', TwoGiB - 1);
+    // Then append a 2 code unit character, so that the input straddles the 2 GiB
+    // boundary
+    input += u"🙂";
+    QCOMPARE(input.size(), input.capacity());
+    constexpr uint UTF8 = 65001u;
+    QStringConverter::State state;
+    QByteArray result;
+    QT_TRY {
+        result = QLocal8Bit::convertFromUnicode_sys(input, UTF8, &state);
+    } QT_CATCH (const std::bad_alloc &) {
+        QSKIP("Out of memory");
+    }
+    QUtf8StringView rView = result;
+    QCOMPARE(rView.size(), TwoGiB + 3); // The 2 code unit smiley is 4 code units in UTF-8
+    QCOMPARE(rView.last(7), u8"...🙂"); // Check we correctly decoded it
+    QCOMPARE(state.remainingChars, 0); // and there is nothing left in the state
+#endif
+}
+#endif // Q_OS_WIN
+
 struct DontCrashAtExit {
     ~DontCrashAtExit() {
         QStringDecoder decoder(QStringDecoder::Utf8);