1 files changed, 455 insertions, 169 deletions
diff --git a/src/corelib/text/qstringconverter.cpp b/src/corelib/text/qstringconverter.cpp
index 90f277ba61..67c75d708e 100644
--- a/src/corelib/text/qstringconverter.cpp
+++ b/src/corelib/text/qstringconverter.cpp
@@ -10,6 +10,8 @@
 #include "private/qstringiterator_p.h"
 #include "private/qtools_p.h"
 #include "qbytearraymatcher.h"
+#include "qcontainertools_impl.h"
+#include <QtCore/qbytearraylist.h>
 
 #if QT_CONFIG(icu)
 #include <unicode/ucnv.h>
@@ -22,9 +24,13 @@
 #include <qt_windows.h>
 #ifndef QT_BOOTSTRAPPED
 #include <QtCore/qvarlengtharray.h>
+#include <QtCore/q20iterator.h>
+#include <QtCore/private/qnumeric_p.h>
 #endif // !QT_BOOTSTRAPPED
 #endif
 
+#include <array>
+
 #if __has_include(<bit>) && __cplusplus > 201703L
 #include <bit>
 #endif
@@ -202,7 +208,7 @@ static inline const uchar *simdFindNonAscii(const uchar *src, const uchar *end,
             continue;
 
         uint n = _mm256_movemask_epi8(data);
-        Q_ASSUME(n);
+        Q_ASSERT(n);
 
         // find the next probable ASCII character
         // we don't want to load 32 bytes again in this loop if we know there are non-ASCII
@@ -936,6 +942,7 @@ int QUtf8::compareUtf8(QByteArrayView lhs, QByteArrayView rhs, Qt::CaseSensitivi
     return (end1 > src1) - (end2 > src2);
 }
 
+#ifndef QT_BOOTSTRAPPED
 QByteArray QUtf16::convertFromUnicode(QStringView in, QStringConverter::State *state, DataEndianness endian)
 {
     bool writeBom = !(state->internalState & HeaderDone) && state->flags & QStringConverter::Flag::WriteBom;
@@ -1245,6 +1252,7 @@ QChar *QUtf32::convertToUnicode(QChar *out, QByteArrayView in, QStringConverter:
 
     return out;
 }
+#endif // !QT_BOOTSTRAPPED
 
 #if defined(Q_OS_WIN) && !defined(QT_BOOTSTRAPPED)
 int QLocal8Bit::checkUtf8()
@@ -1252,186 +1260,365 @@ int QLocal8Bit::checkUtf8()
     return GetACP() == CP_UTF8 ? 1 : -1;
 }
 
-static QString convertToUnicodeCharByChar(QByteArrayView in, QStringConverter::State *state)
+QString QLocal8Bit::convertToUnicode_sys(QByteArrayView in, QStringConverter::State *state)
 {
-    qsizetype length = in.size();
-    const char *chars = in.data();
-
-    Q_ASSERT(state);
-    if (state->flags & QStringConverter::Flag::Stateless) // temporary
-        state = nullptr;
-
-    if (!chars || !length)
-        return QString();
-
-    qsizetype copyLocation = 0;
-    qsizetype extra = 2;
-    if (state && state->remainingChars) {
-        copyLocation = state->remainingChars;
-        extra += copyLocation;
-    }
-    qsizetype newLength = length + extra;
-    char *mbcs = new char[newLength];
-    //ensure that we have a NULL terminated string
-    mbcs[newLength-1] = 0;
-    mbcs[newLength-2] = 0;
-    memcpy(&(mbcs[copyLocation]), chars, length);
-    if (copyLocation) {
-        //copy the last character from the state
-        mbcs[0] = (char)state->state_data[0];
-        state->remainingChars = 0;
-    }
-    const char *mb = mbcs;
-    const char *next = 0;
-    QString s;
-    while ((next = CharNextExA(CP_ACP, mb, 0)) != mb) {
-        wchar_t wc[2] ={0};
-        int charlength = int(next - mb); // always just a few bytes
-        int len = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED|MB_ERR_INVALID_CHARS, mb, charlength, wc, 2);
-        if (len>0) {
-            s.append(QChar(wc[0]));
-        } else {
-            int r = GetLastError();
-            //check if the character being dropped is the last character
-            if (r == ERROR_NO_UNICODE_TRANSLATION && mb == (mbcs+newLength -3) && state) {
-                state->remainingChars = 1;
-                state->state_data[0] = (char)*mb;
-            }
-        }
-        mb = next;
-    }
-    delete [] mbcs;
-    return s;
+    return convertToUnicode_sys(in, CP_ACP, state);
 }
 
-
-QString QLocal8Bit::convertToUnicode_sys(QByteArrayView in, QStringConverter::State *state)
+QString QLocal8Bit::convertToUnicode_sys(QByteArrayView in, quint32 codePage,
+                                         QStringConverter::State *state)
 {
-    qsizetype length = in.size();
-
-    Q_ASSERT(length < INT_MAX); // ### FIXME
     const char *mb = in.data();
-    int mblen = length;
+    qsizetype mblen = in.size();
+
+    Q_ASSERT(state);
+    qsizetype &invalidChars = state->invalidChars;
+    using Flag = QStringConverter::Flag;
+    const bool useNullForReplacement = !!(state->flags & Flag::ConvertInvalidToNull);
+    const char16_t replacementCharacter = useNullForReplacement ? QChar::Null
+                                                                : QChar::ReplacementCharacter;
+    if (state->flags & Flag::Stateless) {
+        Q_ASSERT(state->remainingChars == 0);
+        state = nullptr;
+    }
 
     if (!mb || !mblen)
         return QString();
 
-    QVarLengthArray<wchar_t, 4096> wc(4096);
-    int len;
+    // Use a local stack-buffer at first to allow us a decently large container
+    // to avoid a lot of resizing, without also returning an overallocated
+    // QString to the user for small strings.
+    // Then we can be fast for small strings and take the hit of extra resizes
+    // and measuring how much storage is needed for large strings.
+    std::array<wchar_t, 4096> buf;
+    wchar_t *out = buf.data();
+    qsizetype outlen = buf.size();
+
     QString sp;
-    bool prepend = false;
-    char state_data = 0;
-    int remainingChars = 0;
-
-    //save the current state information
-    if (state) {
-        state_data = (char)state->state_data[0];
-        remainingChars = state->remainingChars;
-    }
 
-    //convert the pending character (if available)
-    if (state && remainingChars) {
-        char prev[3] = {0};
-        prev[0] = state_data;
-        prev[1] = mb[0];
-        remainingChars = 0;
-        len = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED,
-                                    prev, 2, wc.data(), wc.length());
-        if (len) {
-            sp.append(QChar(wc[0]));
-            if (mblen == 1) {
-                state->remainingChars = 0;
-                return sp;
-            }
-            prepend = true;
-            mb++;
-            mblen--;
-            wc[0] = 0;
+    // Return a pointer to storage where we have enough space for `size`
+    const auto growOut = [&](qsizetype size) -> std::tuple<wchar_t *, qsizetype> {
+        if (outlen >= size)
+            return {out, outlen};
+        const bool wasStackBuffer = sp.isEmpty();
+        const auto begin = wasStackBuffer ? buf.data() : reinterpret_cast<wchar_t *>(sp.data());
+        const qsizetype offset = qsizetype(std::distance(begin, out));
+        qsizetype newSize = 0;
+        if (Q_UNLIKELY(qAddOverflow(offset, size, &newSize))) {
+            Q_CHECK_PTR(false);
+            return {nullptr, 0};
+        }
+        sp.resize(newSize);
+        auto it = reinterpret_cast<wchar_t *>(sp.data());
+        if (wasStackBuffer)
+            it = std::copy_n(buf.data(), offset, it);
+        else
+            it += offset;
+        return {it, size};
+    };
+
+    // Convert the pending characters (if available)
+    while (state && state->remainingChars && mblen) {
+        QStringConverter::State localState;
+        localState.flags = state->flags;
+        // Use at most 6 characters as a guess for the longest encoded character
+        // in any multibyte encoding.
+        // Even with a total of 2 bytes of overhead that would leave around
+        // 2^(4 * 8) possible characters
+        std::array<char, 6> prev = {0};
+        Q_ASSERT(state->remainingChars <= q20::ssize(state->state_data));
+        qsizetype index = 0;
+        for (; index < state->remainingChars; ++index)
+            prev[index] = state->state_data[index];
+        const qsizetype toCopy = std::min(q20::ssize(prev) - index, mblen);
+        for (qsizetype i = 0; i < toCopy; ++i, ++index)
+            prev[index] = mb[i];
+        mb += toCopy;
+        mblen -= toCopy;
+
+        // Recursing:
+        // Since we are using a clean local state it will try to decode what was
+        // stored in our state + some extra octets from input (`prev`). If some
+        // part fails we will have those characters stored in the local state's
+        // storage, and we can extract those. It may also output some
+        // replacement characters, which we'll count in the invalidChars.
+        // In the best case we only do this once, but we will loop until we have
+        // resolved all the remaining characters or we have run out of new input
+        // in which case we may still have remaining characters.
+        const QString tmp = convertToUnicode_sys(QByteArrayView(prev.data(), index), codePage,
+                                                    &localState);
+        std::tie(out, outlen) = growOut(tmp.size());
+        if (!out)
+            return {};
+        out = std::copy_n(reinterpret_cast<const wchar_t *>(tmp.constData()), tmp.size(), out);
+        outlen -= tmp.size();
+        const qsizetype tail = toCopy - localState.remainingChars;
+        if (tail >= 0) {
+            // Everything left to process comes from `in`, so we can stop
+            // looping. Adjust the window for `in` and unset remainingChars to
+            // signal that we're done.
+            mb -= localState.remainingChars;
+            mblen += localState.remainingChars;
+            localState.remainingChars = 0;
         }
+        state->remainingChars = localState.remainingChars;
+        state->invalidChars += localState.invalidChars;
+        std::copy_n(localState.state_data, state->remainingChars, state->state_data);
     }
 
-    while (!(len=MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED|MB_ERR_INVALID_CHARS,
-                mb, mblen, wc.data(), wc.length()))) {
-        int r = GetLastError();
-        if (r == ERROR_INSUFFICIENT_BUFFER) {
-                const int wclen = MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED,
-                                    mb, mblen, 0, 0);
-                wc.resize(wclen);
-        } else if (r == ERROR_NO_UNICODE_TRANSLATION) {
-            //find the last non NULL character
-            while (mblen > 1  && !(mb[mblen-1]))
-                mblen--;
-            //check whether,  we hit an invalid character in the middle
-            if ((mblen <= 1) || (remainingChars && state_data))
-                return convertToUnicodeCharByChar(in, state);
-            //Remove the last character and try again...
-            state_data = mb[mblen-1];
-            remainingChars = 1;
-            mblen--;
+    Q_ASSERT(!state || state->remainingChars == 0 || mblen == 0);
+
+    // Need it in this scope, since we try to decrease our window size if we
+    // encounter an error
+    int nextIn = qt_saturate<int>(mblen);
+    while (mblen > 0) {
+        std::tie(out, outlen) = growOut(1); // Need space for at least one character
+        if (!out)
+            return {};
+        const int nextOut = qt_saturate<int>(outlen);
+        int len = MultiByteToWideChar(codePage, MB_ERR_INVALID_CHARS, mb, nextIn, out, nextOut);
+        if (len) {
+            mb += nextIn;
+            mblen -= nextIn;
+            out += len;
+            outlen -= len;
         } else {
-            // Fail.
-            qWarning("MultiByteToWideChar: Cannot convert multibyte text");
-            break;
+            int r = GetLastError();
+            if (r == ERROR_INSUFFICIENT_BUFFER) {
+                const int wclen = MultiByteToWideChar(codePage, 0, mb, nextIn, 0, 0);
+                std::tie(out, outlen) = growOut(wclen);
+                if (!out)
+                    return {};
+            } else if (r == ERROR_NO_UNICODE_TRANSLATION) {
+                // Can't decode the current window, so either store the state,
+                // reduce window size or output a replacement character.
+
+                // Check if we can store all remaining characters in the state
+                // to be used next time we're called:
+                if (state && mblen <= q20::ssize(state->state_data)) {
+                    state->remainingChars = mblen;
+                    std::copy_n(mb, mblen, state->state_data);
+                    mb += mblen;
+                    mblen = 0;
+                    break;
+                }
+
+                // .. if not, try to find the last valid character in the window
+                // and try again with a shrunken window:
+                if (nextIn > 1) {
+                    // There may be some incomplete data at the end of our current
+                    // window, so decrease the window size and try again.
+                    // In the worst case scenario there is gigs of undecodable
+                    // garbage, but what are we supposed to do about that?
+                    const auto it = CharPrevExA(codePage, mb, mb + nextIn, 0);
+                    if (it != mb)
+                        nextIn = int(it - mb);
+                    else
+                        --nextIn;
+                    continue;
+                }
+
+                // Finally, we are forced to output a replacement character for
+                // the first byte in the window:
+                std::tie(out, outlen) = growOut(1);
+                if (!out)
+                    return {};
+                *out = replacementCharacter;
+                ++invalidChars;
+                ++out;
+                --outlen;
+                ++mb;
+                --mblen;
+            } else {
+                // Fail.
+                qWarning("MultiByteToWideChar: Cannot convert multibyte text");
+                break;
+            }
         }
+        nextIn = qt_saturate<int>(mblen);
     }
 
-    if (len <= 0)
-        return QString();
+    if (sp.isEmpty()) {
+        // We must have only used the stack buffer
+        if (out != buf.data()) // else: we return null-string
+            sp = QStringView(buf.data(), out).toString();
+    } else{
+        const auto begin = reinterpret_cast<wchar_t *>(sp.data());
+        sp.truncate(std::distance(begin, out));
+    }
 
-    if (wc[len-1] == 0) // len - 1: we don't want terminator
-        --len;
+    if (sp.size() && sp.back().isNull())
+        sp.chop(1);
 
-    //save the new state information
-    if (state) {
-        state->state_data[0] = (char)state_data;
-        state->remainingChars = remainingChars;
+    if (!state && mblen > 0) {
+        // We have trailing character(s) that could not be converted, and
+        // nowhere to cache them
+        sp.resize(sp.size() + mblen, replacementCharacter);
+        invalidChars += mblen;
     }
-    QString s((QChar*)wc.data(), len);
-    if (prepend) {
-        return sp+s;
-    }
-    return s;
+    return sp;
 }
 
 QByteArray QLocal8Bit::convertFromUnicode_sys(QStringView in, QStringConverter::State *state)
 {
-    const QChar *ch = in.data();
+    return convertFromUnicode_sys(in, CP_ACP, state);
+}
+
+QByteArray QLocal8Bit::convertFromUnicode_sys(QStringView in, quint32 codePage,
+                                              QStringConverter::State *state)
+{
+    const wchar_t *ch = reinterpret_cast<const wchar_t *>(in.data());
     qsizetype uclen = in.size();
 
-    Q_ASSERT(uclen < INT_MAX); // ### FIXME
     Q_ASSERT(state);
-    Q_UNUSED(state); // ### Fixme
-    if (state->flags & QStringConverter::Flag::Stateless) // temporary
+    // The Windows API has a *boolean* out-parameter that says if a replacement
+    // character was used, but it gives us no way to know _how many_ were used.
+    // Since we cannot simply scan the string for replacement characters
+    // (which is potentially a question mark, and thus a valid character),
+    // we simply do not track the number of invalid characters here.
+    // auto &invalidChars = state->invalidChars;
+
+    using Flag = QStringConverter::Flag;
+    if (state->flags & Flag::Stateless) { // temporary
+        Q_ASSERT(state->remainingChars == 0);
         state = nullptr;
+    }
 
     if (!ch)
         return QByteArray();
     if (uclen == 0)
         return QByteArray("");
-    BOOL used_def;
-    QByteArray mb(4096, 0);
-    int len;
-    while (!(len=WideCharToMultiByte(CP_ACP, 0, (const wchar_t*)ch, uclen,
-                mb.data(), mb.size()-1, 0, &used_def)))
-    {
-        int r = GetLastError();
-        if (r == ERROR_INSUFFICIENT_BUFFER) {
-            mb.resize(1+WideCharToMultiByte(CP_ACP, 0,
-                                (const wchar_t*)ch, uclen,
-                                0, 0, 0, &used_def));
-                // and try again...
+
+    // Use a local stack-buffer at first to allow us a decently large container
+    // to avoid a lot of resizing, without also returning an overallocated
+    // QByteArray to the user for small strings.
+    // Then we can be fast for small strings and take the hit of extra resizes
+    // and measuring how much storage is needed for large strings.
+    std::array<char, 4096> buf;
+    char *out = buf.data();
+    qsizetype outlen = buf.size();
+    QByteArray mb;
+
+    if (state && state->remainingChars > 0) {
+        Q_ASSERT(state->remainingChars == 1);
+        // Let's try to decode the pending character
+        wchar_t wc[2] = { wchar_t(state->state_data[0]), ch[0] };
+        // Check if the second character is a valid low surrogate,
+        // otherwise we'll just decode the first character, for which windows
+        // will output a replacement character.
+        const bool validCodePoint = QChar::isLowSurrogate(wc[1]);
+        int len = WideCharToMultiByte(codePage, 0, wc, validCodePoint ? 2 : 1, out, outlen, nullptr,
+                                      nullptr);
+        if (!len)
+            return {}; // Cannot recover, and I refuse to believe it was a size limitation
+        out += len;
+        outlen -= len;
+        if (validCodePoint) {
+            ++ch;
+            --uclen;
+        }
+        state->remainingChars = 0;
+        state->state_data[0] = 0;
+        if (uclen == 0)
+            return QByteArrayView(buf.data(), len).toByteArray();
+    }
+
+    if (state && QChar::isHighSurrogate(ch[uclen - 1])) {
+        // We can handle a missing low surrogate at the end of the string,
+        // so if there is one, exclude it now and store it in the state.
+        state->remainingChars = 1;
+        state->state_data[0] = ch[uclen - 1];
+        --uclen;
+        if (uclen == 0)
+            return QByteArray();
+    }
+
+    Q_ASSERT(uclen > 0);
+
+    // Return a pointer to storage where we have enough space for `size`
+    const auto growOut = [&](qsizetype size) -> std::tuple<char *, qsizetype> {
+        if (outlen >= size)
+            return {out, outlen};
+        const bool wasStackBuffer = mb.isEmpty();
+        const auto begin = wasStackBuffer ? buf.data() : mb.data();
+        const qsizetype offset = qsizetype(std::distance(begin, out));
+        qsizetype newSize = 0;
+        if (Q_UNLIKELY(qAddOverflow(offset, size, &newSize))) {
+            Q_CHECK_PTR(false);
+            return {nullptr, 0};
+        }
+        mb.resize(newSize);
+        auto it = mb.data();
+        if (wasStackBuffer)
+            it = std::copy_n(buf.data(), offset, it);
+        else
+            it += offset;
+        return {it, size};
+    };
+
+    const auto getNextWindowSize = [&]() {
+        int nextIn = qt_saturate<int>(uclen);
+        // The Windows API has some issues if the current window ends in the
+        // middle of a surrogate pair, so we avoid that:
+        if (nextIn > 1 && QChar::isHighSurrogate(ch[nextIn - 1]))
+            --nextIn;
+        return nextIn;
+    };
+
+    int len = 0;
+    while (uclen > 0) {
+        const int nextIn = getNextWindowSize();
+        std::tie(out, outlen) = growOut(1); // We need at least one byte
+        if (!out)
+            return {};
+        const int nextOut = qt_saturate<int>(outlen);
+        len = WideCharToMultiByte(codePage, 0, ch, nextIn, out, nextOut, nullptr, nullptr);
+        if (len > 0) {
+            ch += nextIn;
+            uclen -= nextIn;
+            out += len;
+            outlen -= len;
         } else {
-            // Fail.  Probably can't happen in fact (dwFlags is 0).
+            int r = GetLastError();
+            if (r == ERROR_INSUFFICIENT_BUFFER) {
+                int neededLength = WideCharToMultiByte(codePage, 0, ch, nextIn, nullptr, 0,
+                                                       nullptr, nullptr);
+                if (neededLength <= 0) {
+                    // Fail. Observed with UTF8 where the input window was max int and ended in an
+                    // incomplete sequence, probably a Windows bug. We try to avoid that from
+                    // happening by reducing the window size in that case. But let's keep this
+                    // branch just in case of other bugs.
+#ifndef QT_NO_DEBUG
+                    r = GetLastError();
+                    fprintf(stderr,
+                            "WideCharToMultiByte: Cannot convert multibyte text (error %d)\n", r);
+#endif // !QT_NO_DEBUG
+                    break;
+                }
+                std::tie(out, outlen) = growOut(neededLength);
+                if (!out)
+                    return {};
+                // and try again...
+            } else {
+                // Fail.  Probably can't happen in fact (dwFlags is 0).
 #ifndef QT_NO_DEBUG
-            // Can't use qWarning(), as it'll recurse to handle %ls
-            fprintf(stderr,
-                    "WideCharToMultiByte: Cannot convert multibyte text (error %d): %ls\n",
-                    r, reinterpret_cast<const wchar_t*>(QString(ch, uclen).utf16()));
+                // Can't use qWarning(), as it'll recurse to handle %ls
+                fprintf(stderr,
+                        "WideCharToMultiByte: Cannot convert multibyte text (error %d): %ls\n", r,
+                        reinterpret_cast<const wchar_t *>(
+                                QStringView(ch, uclen).left(100).toString().utf16()));
 #endif
-            break;
+                break;
+            }
         }
     }
-    mb.resize(len);
+    if (mb.isEmpty()) {
+        // We must have only used the stack buffer
+        if (out != buf.data()) // else: we return null-array
+            mb = QByteArrayView(buf.data(), out).toByteArray();
+    } else {
+        mb.truncate(std::distance(mb.data(), out));
+    }
     return mb;
 }
 #endif
@@ -1462,6 +1649,7 @@ void QStringConverter::State::reset() noexcept
     }
 }
 
+#ifndef QT_BOOTSTRAPPED
 static QChar *fromUtf16(QChar *out, QByteArrayView in, QStringConverter::State *state)
 {
     return QUtf16::convertToUnicode(out, in, state, DetectEndianness);
@@ -1521,6 +1709,7 @@ static char *toUtf32LE(char *out, QStringView in, QStringConverter::State *state
 {
     return QUtf32::convertFromUnicode(out, in, state, LittleEndianness);
 }
+#endif // !QT_BOOTSTRAPPED
 
 char *QLatin1::convertFromUnicode(char *out, QStringView in, QStringConverter::State *state) noexcept
 {
@@ -1562,11 +1751,13 @@ static char *toLocal8Bit(char *out, QStringView in, QStringConverter::State *sta
 static qsizetype fromUtf8Len(qsizetype l) { return l + 1; }
 static qsizetype toUtf8Len(qsizetype l) { return 3*(l + 1); }
 
+#ifndef QT_BOOTSTRAPPED
 static qsizetype fromUtf16Len(qsizetype l) { return l/2 + 2; }
 static qsizetype toUtf16Len(qsizetype l) { return 2*(l + 1); }
 
 static qsizetype fromUtf32Len(qsizetype l) { return l/2 + 2; }
 static qsizetype toUtf32Len(qsizetype l) { return 4*(l + 1); }
+#endif
 
 static qsizetype fromLatin1Len(qsizetype l) { return l + 1; }
 static qsizetype toLatin1Len(qsizetype l) { return l + 1; }
@@ -1599,7 +1790,7 @@ static qsizetype toLatin1Len(qsizetype l) { return l + 1; }
     operation, encoding UTF-16 encoded data (usually in the form of a QString) to
     the requested encoding.
 
-    The supported encodings are:
+    The following encodings are always supported:
 
     \list
     \li UTF-8
@@ -1613,6 +1804,10 @@ static qsizetype toLatin1Len(qsizetype l) { return l + 1; }
     \li The system encoding
     \endlist
 
+    QStringConverter may support more encodings depending on how Qt was
+    compiled. If more codecs are supported, they can be listed using
+    availableCodecs().
+
     \l {QStringConverter}s can be used as follows to convert some encoded
     string to and from UTF-16.
 
@@ -1702,12 +1897,14 @@ static qsizetype toLatin1Len(qsizetype l) { return l + 1; }
 const QStringConverter::Interface QStringConverter::encodingInterfaces[QStringConverter::LastEncoding + 1] =
 {
     { "UTF-8", QUtf8::convertToUnicode, fromUtf8Len, QUtf8::convertFromUnicode, toUtf8Len },
+#ifndef QT_BOOTSTRAPPED
     { "UTF-16", fromUtf16, fromUtf16Len, toUtf16, toUtf16Len },
     { "UTF-16LE", fromUtf16LE, fromUtf16Len, toUtf16LE, toUtf16Len },
     { "UTF-16BE", fromUtf16BE, fromUtf16Len, toUtf16BE, toUtf16Len },
     { "UTF-32", fromUtf32, fromUtf32Len, toUtf32, toUtf32Len },
     { "UTF-32LE", fromUtf32LE, fromUtf32Len, toUtf32LE, toUtf32Len },
     { "UTF-32BE", fromUtf32BE, fromUtf32Len, toUtf32BE, toUtf32Len },
+#endif
     { "ISO-8859-1", QLatin1::convertToUnicode, fromLatin1Len, QLatin1::convertFromUnicode, toLatin1Len },
     { "Locale", fromLocal8Bit, fromUtf8Len, toLocal8Bit, toUtf8Len }
 };
@@ -1715,21 +1912,16 @@ const QStringConverter::Interface QStringConverter::encodingInterfaces[QStringCo
 // match names case insensitive and skipping '-' and '_'
 static bool nameMatch(const char *a, const char *b)
 {
-    while (*a && *b) {
-        if (*a == '-' || *a == '_') {
+    do {
+        while (*a == '-' || *a == '_')
             ++a;
-            continue;
-        }
-        if (*b == '-' || *b == '_') {
+        while (*b == '-' || *b == '_')
             ++b;
-            continue;
-        }
-        if (QtMiscUtils::toAsciiLower(*a) != QtMiscUtils::toAsciiLower(*b))
-            return false;
-        ++a;
-        ++b;
-    }
-    return !*a && !*b;
+        if (!*a && !*b) // end of both strings
+            return true;
+    } while (QtMiscUtils::toAsciiLower(*a++) == QtMiscUtils::toAsciiLower(*b++));
+
+    return false;
 }
 
 
@@ -1784,7 +1976,7 @@ struct QStringConverterICU : QStringConverter
         const void *context;
         ucnv_getToUCallBack(icu_conv, &action, &context);
         if (context != state)
-             ucnv_setToUCallBack(icu_conv, action, &state, nullptr, nullptr, &err);
+             ucnv_setToUCallBack(icu_conv, action, state, nullptr, nullptr, &err);
 
         ucnv_toUnicode(icu_conv, &target, targetLimit, &source, sourceLimit, nullptr, flush, &err);
         // We did reserve enough space:
@@ -1817,7 +2009,7 @@ struct QStringConverterICU : QStringConverter
         const void *context;
         ucnv_getFromUCallBack(icu_conv, &action, &context);
         if (context != state)
-             ucnv_setFromUCallBack(icu_conv, action, &state, nullptr, nullptr, &err);
+             ucnv_setFromUCallBack(icu_conv, action, state, nullptr, nullptr, &err);
 
         ucnv_fromUnicode(icu_conv, &target, targetLimit, &source, sourceLimit, nullptr, flush, &err);
         // We did reserve enough space:
@@ -2032,6 +2224,7 @@ const char *QStringConverter::name() const noexcept
 
     Returns the canonical name of the encoding this QStringConverter can encode or decode.
     Returns a nullptr if the converter is not valid.
+    The returned name is UTF-8 encoded.
 
     \sa isValid()
 */
@@ -2043,9 +2236,13 @@ const char *QStringConverter::name() const noexcept
     \c{std::nullopt} is returned. Such a name may, none the less, be accepted by
     the QStringConverter constructor when Qt is built with ICU, if ICU provides a
     converter with the given name.
+
+    \a name is expected to be UTF-8 encoded.
 */
 std::optional<QStringConverter::Encoding> QStringConverter::encodingForName(const char *name) noexcept
 {
+    if (!name)
+        return std::nullopt;
     for (qsizetype i = 0; i < LastEncoding + 1; ++i) {
         if (nameMatch(encodingInterfaces[i].name, name))
             return QStringConverter::Encoding(i);
@@ -2055,6 +2252,7 @@ std::optional<QStringConverter::Encoding> QStringConverter::encodingForName(cons
     return std::nullopt;
 }
 
+#ifndef QT_BOOTSTRAPPED
 /*!
    Returns the encoding for the content of \a data if it can be determined.
    \a expectedFirstCharacter can be passed as an additional hint to help determine
@@ -2163,6 +2361,63 @@ std::optional<QStringConverter::Encoding> QStringConverter::encodingForHtml(QByt
     return Utf8;
 }
 
+static qsizetype availableCodecCount()
+{
+#if !QT_CONFIG(icu)
+    return QStringConverter::Encoding::LastEncoding;
+#else
+    /* icu contains also the names of what Qt provides
+       except for the special Locale one (so add one for it)
+    */
+    return 1 + ucnv_countAvailable();
+#endif
+}
+
+/*!
+    Returns a list of names of supported codecs. The names returned
+    by this function can be passed to QStringEncoder's and
+    QStringDecoder's constructor to create a en- or decoder for
+    the given codec.
+
+    This function may be used to obtain a listing of additional codecs beyond
+    the standard ones. Support for additional codecs requires Qt be compiled
+    with support for the ICU library.
+
+    \note The order of codecs is an internal implementation detail
+    and not guaranteed to be stable.
+ */
+QStringList QStringConverter::availableCodecs()
+{
+    auto availableCodec = [](qsizetype index) -> QString
+    {
+    #if !QT_CONFIG(icu)
+        return QString::fromLatin1(encodingInterfaces[index].name);
+    #else
+        if (index == 0) // "Locale", not provided by icu
+            return QString::fromLatin1(
+                        encodingInterfaces[QStringConverter::Encoding::System].name);
+        // this mirrors the setup we do to set a converters name
+        UErrorCode status = U_ZERO_ERROR;
+        auto icuName = ucnv_getAvailableName(int32_t(index - 1));
+        const char *standardName = ucnv_getStandardName(icuName, "MIME", &status);
+        if (U_FAILURE(status) || !standardName) {
+            status = U_ZERO_ERROR;
+            standardName = ucnv_getStandardName(icuName, "IANA", &status);
+        }
+        if (!standardName)
+            standardName = icuName;
+        return QString::fromLatin1(standardName);
+    #endif
+    };
+
+    qsizetype codecCount = availableCodecCount();
+    QStringList result;
+    result.reserve(codecCount);
+    for (qsizetype i = 0; i < codecCount; ++i)
+        result.push_back(availableCodec(i));
+    return result;
+}
+
 /*!
     Tries to determine the encoding of the HTML in \a data by looking at leading byte
     order marks or a charset specifier in the HTML meta tag and returns a QStringDecoder
@@ -2186,7 +2441,7 @@ QStringDecoder QStringDecoder::decoderForHtml(QByteArrayView data)
 
     return QStringDecoder(Utf8);
 }
-
+#endif // !QT_BOOTSTRAPPED
 
 /*!
     Returns the canonical name for encoding \a e.
@@ -2255,12 +2510,24 @@ const char *QStringConverter::nameForEncoding(QStringConverter::Encoding e)
 */
 
 /*!
-    \fn QByteArray QStringEncoder::encode(const QString &in)
-    \fn QByteArray QStringEncoder::encode(QStringView in)
-    \fn QByteArray QStringEncoder::operator()(const QString &in)
-    \fn QByteArray QStringEncoder::operator()(QStringView in)
+    \fn constexpr QStringEncoder::QStringEncoder(const QString &name, Flags flags = Flag::Default)
+    \since 6.8
+
+    Creates an encoder object using \a name and \a flags.
+    If \a name is not the name of a known encoding an invalid converter will get created.
 
-    Converts \a in and returns the data as a byte array.
+    \sa isValid()
+*/
+
+/*!
+    \fn QStringEncoder::DecodedData<const QString &> QStringEncoder::encode(const QString &in)
+    \fn QStringEncoder::DecodedData<QStringView> QStringEncoder::encode(QStringView in)
+    \fn QStringEncoder::DecodedData<const QString &> QStringEncoder::operator()(const QString &in)
+    \fn QStringEncoder::DecodedData<QStringView> QStringEncoder::operator()(QStringView in)
+
+    Converts \a in and returns a struct that is implicitly convertible to QByteArray.
+
+    \snippet code/src_corelib_text_qstringconverter.cpp 5
 */
 
 /*!
@@ -2344,12 +2611,25 @@ const char *QStringConverter::nameForEncoding(QStringConverter::Encoding e)
 */
 
 /*!
-    \fn QString QStringDecoder::operator()(const QByteArray &ba)
-    \fn QString QStringDecoder::decode(const QByteArray &ba)
-    \fn QString QStringDecoder::operator()(QByteArrayView ba)
-    \fn QString QStringDecoder::decode(QByteArrayView ba)
+    \fn constexpr QStringDecoder::QStringDecoder(const QString &name, Flags flags = Flag::Default)
+    \since 6.8
+
+    Creates an decoder object using \a name and \a flags.
+    If \a name is not the name of a known encoding an invalid converter will get created.
 
-    Converts \a ba and returns the data as a QString.
+    \sa isValid()
+*/
+
+/*!
+    \fn QStringDecoder::EncodedData<const QByteArray &> QStringDecoder::operator()(const QByteArray &ba)
+    \fn QStringDecoder::EncodedData<const QByteArray &> QStringDecoder::decode(const QByteArray &ba)
+    \fn QStringDecoder::EncodedData<QByteArrayView> QStringDecoder::operator()(QByteArrayView ba)
+    \fn QStringDecoder::EncodedData<QByteArrayView> QStringDecoder::decode(QByteArrayView ba)
+
+    Converts \a ba and returns a struct that is implicitly convertible to QString.
+
+
+    \snippet code/src_corelib_text_qstringconverter.cpp 4
 */
 
 /*!
@@ -2374,4 +2654,10 @@ const char *QStringConverter::nameForEncoding(QStringConverter::Encoding e)
     \sa requiredSpace
 */
 
+/*!
+    \fn char16_t *QStringDecoder::appendToBuffer(char16_t *out, QByteArrayView in)
+    \since 6.6
+    \overload
+*/
+
 QT_END_NAMESPACE