23 files changed, 1294 insertions, 1239 deletions
diff --git a/qmake/CMakeLists.txt b/qmake/CMakeLists.txt
index d724b44d59..fa7d50f234 100644
--- a/qmake/CMakeLists.txt
+++ b/qmake/CMakeLists.txt
@@ -40,7 +40,6 @@ qt_add_tool(qmake # special case
         ../src/3rdparty/pcre2/src/pcre2_ucp.h
         ../src/3rdparty/pcre2/src/pcre2_valid_utf.c
         ../src/3rdparty/pcre2/src/pcre2_xclass.c
-        ../src/corelib/codecs/qutfcodec.cpp ../src/corelib/codecs/qutfcodec_p.h
         ../src/corelib/global/qendian.cpp # special case
         ../src/corelib/global/qglobal.cpp ../src/corelib/global/qglobal.h
         ../src/corelib/global/qlibraryinfo.cpp
@@ -105,6 +104,7 @@ qt_add_tool(qmake # special case
         ../src/corelib/tools/qringbuffer.cpp # special case
         ../src/corelib/text/qstring.cpp ../src/corelib/text/qstring.h
         ../src/corelib/text/qstringbuilder.cpp ../src/corelib/text/qstringbuilder.h
+        ../src/corelib/text/qstringconverter.cpp ../src/corelib/text/qstringconverter.h ../src/corelib/text/qstringconverter_p.h
         ../src/corelib/text/qstringlist.cpp ../src/corelib/text/qstringlist.h
         ../src/corelib/text/qstringmatcher.h
         ../src/corelib/tools/qvector.h
diff --git a/qmake/Makefile.unix b/qmake/Makefile.unix
index 98d255f2d5..c0b6704351 100644
--- a/qmake/Makefile.unix
+++ b/qmake/Makefile.unix
@@ -17,7 +17,6 @@ OBJS = \
 
 #qt code (please keep in order matching DEPEND_SRC)
 QOBJS = \
-	qutfcodec.o \
 	qendian.o qglobal.o qlogging.o qmalloc.o qnumeric.o qoperatingsystemversion.o qrandom.o \
 	qabstractfileengine.o qbuffer.o qdatastream.o qdebug.o \
 	qdir.o qdiriterator.o \
@@ -32,7 +31,7 @@ QOBJS = \
 	qcalendar.o qgregoriancalendar.o qromancalendar.o \
         qcryptographichash.o qdatetime.o qhash.o \
         qlocale.o qlocale_tools.o qmap.o qregularexpression.o qregexp.o qringbuffer.o \
-	qstringbuilder.o qstring.o qstringlist.o qversionnumber.o \
+        qstringbuilder.o qstring.o qstringconverter.o qstringlist.o qversionnumber.o \
 	qvsnprintf.o qxmlstream.o qxmlutils.o \
         pcre2_auto_possess.o pcre2_chartables.o pcre2_compile.o pcre2_config.o \
         pcre2_context.o pcre2_dfa_match.o pcre2_error.o pcre2_extuni.o \
@@ -74,7 +73,6 @@ DEPEND_SRC = \
 	   $(QMKGENSRC)/win32/msvc_vcxproj.cpp \
 	   $(QMKGENSRC)/win32/winmakefile.cpp \
 	   $(QMKGENSRC)/xmloutput.cpp \
-	   $(SOURCE_PATH)/src/corelib/codecs/qutfcodec.cpp \
 	   $(SOURCE_PATH)/src/corelib/global/qendian.cpp \
 	   $(SOURCE_PATH)/src/corelib/global/qglobal.cpp \
 	   $(SOURCE_PATH)/src/corelib/global/qlibraryinfo.cpp \
@@ -122,6 +120,7 @@ DEPEND_SRC = \
 	   $(SOURCE_PATH)/src/corelib/text/qregularexpression.cpp \
 	   $(SOURCE_PATH)/src/corelib/text/qregexp.cpp \
 	   $(SOURCE_PATH)/src/corelib/text/qstringbuilder.cpp \
+	   $(SOURCE_PATH)/src/corelib/text/qstringconverter.cpp \
 	   $(SOURCE_PATH)/src/corelib/text/qstring.cpp \
 	   $(SOURCE_PATH)/src/corelib/text/qstringlist.cpp \
 	   $(SOURCE_PATH)/src/corelib/text/qvsnprintf.cpp \
@@ -380,15 +379,15 @@ qoperatingsystemversion_darwin.o: $(SOURCE_PATH)/src/corelib/global/qoperatingsy
 qcore_foundation.o: $(SOURCE_PATH)/src/corelib/kernel/qcore_foundation.mm
 	$(CXX) -c -o $@ $(CXXFLAGS) $<
 
-qutfcodec.o: $(SOURCE_PATH)/src/corelib/codecs/qutfcodec.cpp
-	$(CXX) -c -o $@ $(CXXFLAGS) $<
-
 qstring.o: $(SOURCE_PATH)/src/corelib/text/qstring.cpp
 	$(CXX) -c -o $@ $(CXXFLAGS) $<
 
 qstringbuilder.o: $(SOURCE_PATH)/src/corelib/text/qstringbuilder.cpp
 	$(CXX) -c -o $@ $(CXXFLAGS) $<
 
+qstringconverter.o: $(SOURCE_PATH)/src/corelib/text/qstringconverter.cpp
+	$(CXX) -c -o $@ $(CXXFLAGS) $<
+
 qlocale.o: $(SOURCE_PATH)/src/corelib/text/qlocale.cpp
 	$(CXX) -c -o $@ $(CXXFLAGS) $<
 
diff --git a/qmake/Makefile.win32 b/qmake/Makefile.win32
index d3a85c17b2..df47dacd15 100644
--- a/qmake/Makefile.win32
+++ b/qmake/Makefile.win32
@@ -104,8 +104,8 @@ QTOBJS= \
 	qoperatingsystemversion_win.obj \
 	qregexp.obj \
 	qromancalendar.obj \
-	qutfcodec.obj \
 	qstring.obj \
+	qstringconverter.obj \
 	qstringlist.obj \
 	qstringbuilder.obj \
 	qsystemerror.obj \
diff --git a/qmake/qmake.pro b/qmake/qmake.pro
index 243f07ac2c..fcd1c17dcf 100644
--- a/qmake/qmake.pro
+++ b/qmake/qmake.pro
@@ -159,11 +159,11 @@ SOURCES += \
     qsettings.cpp \
     qstring.cpp \
     qstringbuilder.cpp \
+    qstringconverter.cpp \
     qstringlist.cpp \
     qsystemerror.cpp \
     qtemporaryfile.cpp \
     qtextstream.cpp \
-    qutfcodec.cpp \
     quuid.cpp \
     qvariant.cpp \
     qversionnumber.cpp \
@@ -217,12 +217,13 @@ HEADERS += \
     qromancalendar_p.h \
     qstring.h \
     qstringbuilder.h \
+    qstringconverter_p.h \
+    qstringconverter.h \
     qstringlist.h \
     qstringmatcher.h \
     qsystemerror_p.h \
     qtemporaryfile.h \
     qtextstream.h \
-    qutfcodec_p.h \
     quuid.h \
     qvector.h \
     qversionnumber.h \
diff --git a/src/corelib/CMakeLists.txt b/src/corelib/CMakeLists.txt
index ff28b2d20c..710d025caf 100644
--- a/src/corelib/CMakeLists.txt
+++ b/src/corelib/CMakeLists.txt
@@ -169,6 +169,7 @@ qt_add_module(Core
         text/qstring.cpp text/qstring.h
         text/qstring_compat.cpp
         text/qstringalgorithms.h text/qstringalgorithms_p.h
+        text/qstringconverter.cpp text/qstringconverter.h text/qstringconverter_p.h
         text/qstringbuilder.cpp text/qstringbuilder.h
         text/qstringiterator_p.h
         text/qstringlist.cpp text/qstringlist.h
diff --git a/src/corelib/codecs/qutfcodec.cpp b/src/corelib/codecs/qutfcodec.cpp
index a31bfbd218..c518ab1d9c 100644
--- a/src/corelib/codecs/qutfcodec.cpp
+++ b/src/corelib/codecs/qutfcodec.cpp
@@ -48,946 +48,6 @@
 
 QT_BEGIN_NAMESPACE
 
-enum { Endian = 0, Data = 1 };
-
-static const uchar utf8bom[] = { 0xef, 0xbb, 0xbf };
-
-#if (defined(__SSE2__) && defined(QT_COMPILER_SUPPORTS_SSE2)) \
-    || (defined(__ARM_NEON__) && defined(Q_PROCESSOR_ARM_64))
-static Q_ALWAYS_INLINE uint qBitScanReverse(unsigned v) noexcept
-{
-    uint result = qCountLeadingZeroBits(v);
-    // Now Invert the result: clz will count *down* from the msb to the lsb, so the msb index is 31
-    // and the lsb index is 0. The result for _bit_scan_reverse is expected to be the index when
-    // counting up: msb index is 0 (because it starts there), and the lsb index is 31.
-    result ^= sizeof(unsigned) * 8 - 1;
-    return result;
-}
-#endif
-
-#if defined(__SSE2__) && defined(QT_COMPILER_SUPPORTS_SSE2)
-static inline bool simdEncodeAscii(uchar *&dst, const ushort *&nextAscii, const ushort *&src, const ushort *end)
-{
-    // do sixteen characters at a time
-    for ( ; end - src >= 16; src += 16, dst += 16) {
-#  ifdef __AVX2__
-        __m256i data = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(src));
-        __m128i data1 = _mm256_castsi256_si128(data);
-        __m128i data2 = _mm256_extracti128_si256(data, 1);
-#  else
-        __m128i data1 = _mm_loadu_si128((const __m128i*)src);
-        __m128i data2 = _mm_loadu_si128(1+(const __m128i*)src);
-#  endif
-
-        // check if everything is ASCII
-        // the highest ASCII value is U+007F
-        // Do the packing directly:
-        // The PACKUSWB instruction has packs a signed 16-bit integer to an unsigned 8-bit
-        // with saturation. That is, anything from 0x0100 to 0x7fff is saturated to 0xff,
-        // while all negatives (0x8000 to 0xffff) get saturated to 0x00. To detect non-ASCII,
-        // we simply do a signed greater-than comparison to 0x00. That means we detect NULs as
-        // "non-ASCII", but it's an acceptable compromise.
-        __m128i packed = _mm_packus_epi16(data1, data2);
-        __m128i nonAscii = _mm_cmpgt_epi8(packed, _mm_setzero_si128());
-
-        // store, even if there are non-ASCII characters here
-        _mm_storeu_si128((__m128i*)dst, packed);
-
-        // n will contain 1 bit set per character in [data1, data2] that is non-ASCII (or NUL)
-        ushort n = ~_mm_movemask_epi8(nonAscii);
-        if (n) {
-            // find the next probable ASCII character
-            // we don't want to load 32 bytes again in this loop if we know there are non-ASCII
-            // characters still coming
-            nextAscii = src + qBitScanReverse(n) + 1;
-
-            n = qCountTrailingZeroBits(n);
-            dst += n;
-            src += n;
-            return false;
-        }
-    }
-
-    if (end - src >= 8) {
-        // do eight characters at a time
-        __m128i data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(src));
-        __m128i packed = _mm_packus_epi16(data, data);
-        __m128i nonAscii = _mm_cmpgt_epi8(packed, _mm_setzero_si128());
-
-        // store even non-ASCII
-        _mm_storel_epi64(reinterpret_cast<__m128i *>(dst), packed);
-
-        uchar n = ~_mm_movemask_epi8(nonAscii);
-        if (n) {
-            nextAscii = src + qBitScanReverse(n) + 1;
-            n = qCountTrailingZeroBits(n);
-            dst += n;
-            src += n;
-            return false;
-        }
-    }
-
-    return src == end;
-}
-
-static inline bool simdDecodeAscii(ushort *&dst, const uchar *&nextAscii, const uchar *&src, const uchar *end)
-{
-    // do sixteen characters at a time
-    for ( ; end - src >= 16; src += 16, dst += 16) {
-        __m128i data = _mm_loadu_si128((const __m128i*)src);
-
-#ifdef __AVX2__
-        const int BitSpacing = 2;
-        // load and zero extend to an YMM register
-        const __m256i extended = _mm256_cvtepu8_epi16(data);
-
-        uint n = _mm256_movemask_epi8(extended);
-        if (!n) {
-            // store
-            _mm256_storeu_si256((__m256i*)dst, extended);
-            continue;
-        }
-#else
-        const int BitSpacing = 1;
-
-        // check if everything is ASCII
-        // movemask extracts the high bit of every byte, so n is non-zero if something isn't ASCII
-        uint n = _mm_movemask_epi8(data);
-        if (!n) {
-            // unpack
-            _mm_storeu_si128((__m128i*)dst, _mm_unpacklo_epi8(data, _mm_setzero_si128()));
-            _mm_storeu_si128(1+(__m128i*)dst, _mm_unpackhi_epi8(data, _mm_setzero_si128()));
-            continue;
-        }
-#endif
-
-        // copy the front part that is still ASCII
-        while (!(n & 1)) {
-            *dst++ = *src++;
-            n >>= BitSpacing;
-        }
-
-        // find the next probable ASCII character
-        // we don't want to load 16 bytes again in this loop if we know there are non-ASCII
-        // characters still coming
-        n = qBitScanReverse(n);
-        nextAscii = src + (n / BitSpacing) + 1;
-        return false;
-
-    }
-
-    if (end - src >= 8) {
-        __m128i data = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(src));
-        uint n = _mm_movemask_epi8(data) & 0xff;
-        if (!n) {
-            // unpack and store
-            _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), _mm_unpacklo_epi8(data, _mm_setzero_si128()));
-        } else {
-            while (!(n & 1)) {
-                *dst++ = *src++;
-                n >>= 1;
-            }
-
-            n = qBitScanReverse(n);
-            nextAscii = src + n + 1;
-            return false;
-        }
-    }
-
-    return src == end;
-}
-
-static inline const uchar *simdFindNonAscii(const uchar *src, const uchar *end, const uchar *&nextAscii)
-{
-#ifdef __AVX2__
-    // do 32 characters at a time
-    // (this is similar to simdTestMask in qstring.cpp)
-    const __m256i mask = _mm256_set1_epi8(0x80);
-    for ( ; end - src >= 32; src += 32) {
-        __m256i data = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(src));
-        if (_mm256_testz_si256(mask, data))
-            continue;
-
-        uint n = _mm256_movemask_epi8(data);
-        Q_ASSUME(n);
-
-        // find the next probable ASCII character
-        // we don't want to load 32 bytes again in this loop if we know there are non-ASCII
-        // characters still coming
-        nextAscii = src + qBitScanReverse(n) + 1;
-
-        // return the non-ASCII character
-        return src + qCountTrailingZeroBits(n);
-    }
-#endif
-
-    // do sixteen characters at a time
-    for ( ; end - src >= 16; src += 16) {
-        __m128i data = _mm_loadu_si128(reinterpret_cast<const __m128i*>(src));
-
-        // check if everything is ASCII
-        // movemask extracts the high bit of every byte, so n is non-zero if something isn't ASCII
-        uint n = _mm_movemask_epi8(data);
-        if (!n)
-            continue;
-
-        // find the next probable ASCII character
-        // we don't want to load 16 bytes again in this loop if we know there are non-ASCII
-        // characters still coming
-        nextAscii = src + qBitScanReverse(n) + 1;
-
-        // return the non-ASCII character
-        return src + qCountTrailingZeroBits(n);
-    }
-
-    // do four characters at a time
-    for ( ; end - src >= 4; src += 4) {
-        quint32 data = qFromUnaligned<quint32>(src);
-        data &= 0x80808080U;
-        if (!data)
-            continue;
-
-        // We don't try to guess which of the three bytes is ASCII and which
-        // one isn't. The chance that at least two of them are non-ASCII is
-        // better than 75%.
-        nextAscii = src;
-        return src;
-    }
-    nextAscii = end;
-    return src;
-}
-#elif defined(__ARM_NEON__) && defined(Q_PROCESSOR_ARM_64) // vaddv is only available on Aarch64
-static inline bool simdEncodeAscii(uchar *&dst, const ushort *&nextAscii, const ushort *&src, const ushort *end)
-{
-    uint16x8_t maxAscii = vdupq_n_u16(0x7f);
-    uint16x8_t mask1 = { 1,      1 << 2, 1 << 4, 1 << 6, 1 << 8, 1 << 10, 1 << 12, 1 << 14 };
-    uint16x8_t mask2 = vshlq_n_u16(mask1, 1);
-
-    // do sixteen characters at a time
-    for ( ; end - src >= 16; src += 16, dst += 16) {
-        // load 2 lanes (or: "load interleaved")
-        uint16x8x2_t in = vld2q_u16(src);
-
-        // check if any of the elements > 0x7f, select 1 bit per element (element 0 -> bit 0, element 1 -> bit 1, etc),
-        // add those together into a scalar, and merge the scalars.
-        uint16_t nonAscii = vaddvq_u16(vandq_u16(vcgtq_u16(in.val[0], maxAscii), mask1))
-                          | vaddvq_u16(vandq_u16(vcgtq_u16(in.val[1], maxAscii), mask2));
-
-        // merge the two lanes by shifting the values of the second by 8 and inserting them
-        uint16x8_t out = vsliq_n_u16(in.val[0], in.val[1], 8);
-
-        // store, even if there are non-ASCII characters here
-        vst1q_u8(dst, vreinterpretq_u8_u16(out));
-
-        if (nonAscii) {
-            // find the next probable ASCII character
-            // we don't want to load 32 bytes again in this loop if we know there are non-ASCII
-            // characters still coming
-            nextAscii = src + qBitScanReverse(nonAscii) + 1;
-
-            nonAscii = qCountTrailingZeroBits(nonAscii);
-            dst += nonAscii;
-            src += nonAscii;
-            return false;
-        }
-    }
-    return src == end;
-}
-
-static inline bool simdDecodeAscii(ushort *&dst, const uchar *&nextAscii, const uchar *&src, const uchar *end)
-{
-    // do eight characters at a time
-    uint8x8_t msb_mask = vdup_n_u8(0x80);
-    uint8x8_t add_mask = { 1, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7 };
-    for ( ; end - src >= 8; src += 8, dst += 8) {
-        uint8x8_t c = vld1_u8(src);
-        uint8_t n = vaddv_u8(vand_u8(vcge_u8(c, msb_mask), add_mask));
-        if (!n) {
-            // store
-            vst1q_u16(dst, vmovl_u8(c));
-            continue;
-        }
-
-        // copy the front part that is still ASCII
-        while (!(n & 1)) {
-            *dst++ = *src++;
-            n >>= 1;
-        }
-
-        // find the next probable ASCII character
-        // we don't want to load 16 bytes again in this loop if we know there are non-ASCII
-        // characters still coming
-        n = qBitScanReverse(n);
-        nextAscii = src + n + 1;
-        return false;
-
-    }
-    return src == end;
-}
-
-static inline const uchar *simdFindNonAscii(const uchar *src, const uchar *end, const uchar *&nextAscii)
-{
-    // The SIMD code below is untested, so just force an early return until
-    // we've had the time to verify it works.
-    nextAscii = end;
-    return src;
-
-    // do eight characters at a time
-    uint8x8_t msb_mask = vdup_n_u8(0x80);
-    uint8x8_t add_mask = { 1, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7 };
-    for ( ; end - src >= 8; src += 8) {
-        uint8x8_t c = vld1_u8(src);
-        uint8_t n = vaddv_u8(vand_u8(vcge_u8(c, msb_mask), add_mask));
-        if (!n)
-            continue;
-
-        // find the next probable ASCII character
-        // we don't want to load 16 bytes again in this loop if we know there are non-ASCII
-        // characters still coming
-        nextAscii = src + qBitScanReverse(n) + 1;
-
-        // return the non-ASCII character
-        return src + qCountTrailingZeroBits(n);
-    }
-    nextAscii = end;
-    return src;
-}
-#else
-static inline bool simdEncodeAscii(uchar *, const ushort *, const ushort *, const ushort *)
-{
-    return false;
-}
-
-static inline bool simdDecodeAscii(ushort *, const uchar *, const uchar *, const uchar *)
-{
-    return false;
-}
-
-static inline const uchar *simdFindNonAscii(const uchar *src, const uchar *end, const uchar *&nextAscii)
-{
-    nextAscii = end;
-    return src;
-}
-#endif
-
-QByteArray QUtf8::convertFromUnicode(const QChar *uc, int len)
-{
-    // create a QByteArray with the worst case scenario size
-    QByteArray result(len * 3, Qt::Uninitialized);
-    uchar *dst = reinterpret_cast<uchar *>(const_cast<char *>(result.constData()));
-    const ushort *src = reinterpret_cast<const ushort *>(uc);
-    const ushort *const end = src + len;
-
-    while (src != end) {
-        const ushort *nextAscii = end;
-        if (simdEncodeAscii(dst, nextAscii, src, end))
-            break;
-
-        do {
-            ushort uc = *src++;
-            int res = QUtf8Functions::toUtf8<QUtf8BaseTraits>(uc, dst, src, end);
-            if (res < 0) {
-                // encoding error - append '?'
-                *dst++ = '?';
-            }
-        } while (src < nextAscii);
-    }
-
-    result.truncate(dst - reinterpret_cast<uchar *>(const_cast<char *>(result.constData())));
-    return result;
-}
-
-QByteArray QUtf8::convertFromUnicode(const QChar *uc, int len, QTextCodec::ConverterState *state)
-{
-    uchar replacement = '?';
-    int rlen = 3*len;
-    int surrogate_high = -1;
-    if (state) {
-        if (state->flags & QTextCodec::ConvertInvalidToNull)
-            replacement = 0;
-        if (!(state->flags & QTextCodec::IgnoreHeader))
-            rlen += 3;
-        if (state->remainingChars)
-            surrogate_high = state->state_data[0];
-    }
-
-
-    QByteArray rstr(rlen, Qt::Uninitialized);
-    uchar *cursor = reinterpret_cast<uchar *>(const_cast<char *>(rstr.constData()));
-    const ushort *src = reinterpret_cast<const ushort *>(uc);
-    const ushort *const end = src + len;
-
-    int invalid = 0;
-    if (state && !(state->flags & QTextCodec::IgnoreHeader)) {
-        // append UTF-8 BOM
-        *cursor++ = utf8bom[0];
-        *cursor++ = utf8bom[1];
-        *cursor++ = utf8bom[2];
-    }
-
-    const ushort *nextAscii = src;
-    while (src != end) {
-        int res;
-        ushort uc;
-        if (surrogate_high != -1) {
-            uc = surrogate_high;
-            surrogate_high = -1;
-            res = QUtf8Functions::toUtf8<QUtf8BaseTraits>(uc, cursor, src, end);
-        } else {
-            if (src >= nextAscii && simdEncodeAscii(cursor, nextAscii, src, end))
-                break;
-
-            uc = *src++;
-            res = QUtf8Functions::toUtf8<QUtf8BaseTraits>(uc, cursor, src, end);
-        }
-        if (Q_LIKELY(res >= 0))
-            continue;
-
-        if (res == QUtf8BaseTraits::Error) {
-            // encoding error
-            ++invalid;
-            *cursor++ = replacement;
-        } else if (res == QUtf8BaseTraits::EndOfString) {
-            surrogate_high = uc;
-            break;
-        }
-    }
-
-    rstr.resize(cursor - (const uchar*)rstr.constData());
-    if (state) {
-        state->invalidChars += invalid;
-        state->flags |= QTextCodec::IgnoreHeader;
-        state->remainingChars = 0;
-        if (surrogate_high >= 0) {
-            state->remainingChars = 1;
-            state->state_data[0] = surrogate_high;
-        }
-    }
-    return rstr;
-}
-
-QString QUtf8::convertToUnicode(const char *chars, int len)
-{
-    // UTF-8 to UTF-16 always needs the exact same number of words or less:
-    //    UTF-8     UTF-16
-    //   1 byte     1 word
-    //   2 bytes    1 word
-    //   3 bytes    1 word
-    //   4 bytes    2 words (one surrogate pair)
-    // That is, we'll use the full buffer if the input is US-ASCII (1-byte UTF-8),
-    // half the buffer for U+0080-U+07FF text (e.g., Greek, Cyrillic, Arabic) or
-    // non-BMP text, and one third of the buffer for U+0800-U+FFFF text (e.g, CJK).
-    //
-    // The table holds for invalid sequences too: we'll insert one replacement char
-    // per invalid byte.
-    QString result(len, Qt::Uninitialized);
-    QChar *data = const_cast<QChar*>(result.constData()); // we know we're not shared
-    const QChar *end = convertToUnicode(data, chars, len);
-    result.truncate(end - data);
-    return result;
-}
-
-/*!
-    \since 5.7
-    \overload
-
-    Converts the UTF-8 sequence of \a len octets beginning at \a chars to
-    a sequence of QChar starting at \a buffer. The buffer is expected to be
-    large enough to hold the result. An upper bound for the size of the
-    buffer is \a len QChars.
-
-    If, during decoding, an error occurs, a QChar::ReplacementCharacter is
-    written.
-
-    Returns a pointer to one past the last QChar written.
-
-    This function never throws.
-*/
-
-QChar *QUtf8::convertToUnicode(QChar *buffer, const char *chars, int len) noexcept
-{
-    ushort *dst = reinterpret_cast<ushort *>(buffer);
-    const uchar *src = reinterpret_cast<const uchar *>(chars);
-    const uchar *end = src + len;
-
-    // attempt to do a full decoding in SIMD
-    const uchar *nextAscii = end;
-    if (!simdDecodeAscii(dst, nextAscii, src, end)) {
-        // at least one non-ASCII entry
-        // check if we failed to decode the UTF-8 BOM; if so, skip it
-        if (Q_UNLIKELY(src == reinterpret_cast<const uchar *>(chars))
-                && end - src >= 3
-                && Q_UNLIKELY(src[0] == utf8bom[0] && src[1] == utf8bom[1] && src[2] == utf8bom[2])) {
-            src += 3;
-        }
-
-        while (src < end) {
-            nextAscii = end;
-            if (simdDecodeAscii(dst, nextAscii, src, end))
-                break;
-
-            do {
-                uchar b = *src++;
-                int res = QUtf8Functions::fromUtf8<QUtf8BaseTraits>(b, dst, src, end);
-                if (res < 0) {
-                    // decoding error
-                    *dst++ = QChar::ReplacementCharacter;
-                }
-            } while (src < nextAscii);
-        }
-    }
-
-    return reinterpret_cast<QChar *>(dst);
-}
-
-QString QUtf8::convertToUnicode(const char *chars, int len, QTextCodec::ConverterState *state)
-{
-    bool headerdone = false;
-    ushort replacement = QChar::ReplacementCharacter;
-    int invalid = 0;
-    int res;
-    uchar ch = 0;
-
-    // See above for buffer requirements for stateless decoding. However, that
-    // fails if the state is not empty. The following situations can add to the
-    // requirements:
-    //  state contains      chars starts with           requirement
-    //   1 of 2 bytes       valid continuation          0
-    //   2 of 3 bytes       same                        0
-    //   3 bytes of 4       same                        +1 (need to insert surrogate pair)
-    //   1 of 2 bytes       invalid continuation        +1 (need to insert replacement and restart)
-    //   2 of 3 bytes       same                        +1 (same)
-    //   3 of 4 bytes       same                        +1 (same)
-    QString result(len + 1, Qt::Uninitialized);
-
-    ushort *dst = reinterpret_cast<ushort *>(const_cast<QChar *>(result.constData()));
-    const uchar *src = reinterpret_cast<const uchar *>(chars);
-    const uchar *end = src + len;
-
-    if (state) {
-        if (state->flags & QTextCodec::IgnoreHeader)
-            headerdone = true;
-        if (state->flags & QTextCodec::ConvertInvalidToNull)
-            replacement = QChar::Null;
-        if (state->remainingChars) {
-            // handle incoming state first
-            uchar remainingCharsData[4]; // longest UTF-8 sequence possible
-            int remainingCharsCount = state->remainingChars;
-            int newCharsToCopy = qMin<int>(sizeof(remainingCharsData) - remainingCharsCount, end - src);
-
-            memset(remainingCharsData, 0, sizeof(remainingCharsData));
-            memcpy(remainingCharsData, &state->state_data[0], remainingCharsCount);
-            memcpy(remainingCharsData + remainingCharsCount, src, newCharsToCopy);
-
-            const uchar *begin = &remainingCharsData[1];
-            res = QUtf8Functions::fromUtf8<QUtf8BaseTraits>(remainingCharsData[0], dst, begin,
-                    static_cast<const uchar *>(remainingCharsData) + remainingCharsCount + newCharsToCopy);
-            if (res == QUtf8BaseTraits::Error || (res == QUtf8BaseTraits::EndOfString && len == 0)) {
-                // special case for len == 0:
-                // if we were supplied an empty string, terminate the previous, unfinished sequence with error
-                ++invalid;
-                *dst++ = replacement;
-            } else if (res == QUtf8BaseTraits::EndOfString) {
-                // if we got EndOfString again, then there were too few bytes in src;
-                // copy to our state and return
-                state->remainingChars = remainingCharsCount + newCharsToCopy;
-                memcpy(&state->state_data[0], remainingCharsData, state->remainingChars);
-                return QString();
-            } else if (!headerdone && res >= 0) {
-                // eat the UTF-8 BOM
-                headerdone = true;
-                if (dst[-1] == 0xfeff)
-                    --dst;
-            }
-
-            // adjust src now that we have maybe consumed a few chars
-            if (res >= 0) {
-                Q_ASSERT(res > remainingCharsCount);
-                src += res - remainingCharsCount;
-            }
-        }
-    }
-
-    // main body, stateless decoding
-    res = 0;
-    const uchar *nextAscii = src;
-    const uchar *start = src;
-    while (res >= 0 && src < end) {
-        if (src >= nextAscii && simdDecodeAscii(dst, nextAscii, src, end))
-            break;
-
-        ch = *src++;
-        res = QUtf8Functions::fromUtf8<QUtf8BaseTraits>(ch, dst, src, end);
-        if (!headerdone && res >= 0) {
-            headerdone = true;
-            if (src == start + 3) { // 3 == sizeof(utf8-bom)
-                // eat the UTF-8 BOM (it can only appear at the beginning of the string).
-                if (dst[-1] == 0xfeff)
-                    --dst;
-            }
-        }
-        if (res == QUtf8BaseTraits::Error) {
-            res = 0;
-            ++invalid;
-            *dst++ = replacement;
-        }
-    }
-
-    if (!state && res == QUtf8BaseTraits::EndOfString) {
-        // unterminated UTF sequence
-        *dst++ = QChar::ReplacementCharacter;
-        while (src++ < end)
-            *dst++ = QChar::ReplacementCharacter;
-    }
-
-    result.truncate(dst - (const ushort *)result.unicode());
-    if (state) {
-        state->invalidChars += invalid;
-        if (headerdone)
-            state->flags |= QTextCodec::IgnoreHeader;
-        if (res == QUtf8BaseTraits::EndOfString) {
-            --src; // unread the byte in ch
-            state->remainingChars = end - src;
-            memcpy(&state->state_data[0], src, end - src);
-        } else {
-            state->remainingChars = 0;
-        }
-    }
-    return result;
-}
-
-struct QUtf8NoOutputTraits : public QUtf8BaseTraitsNoAscii
-{
-    struct NoOutput {};
-    static void appendUtf16(const NoOutput &, ushort) {}
-    static void appendUcs4(const NoOutput &, uint) {}
-};
-
-QUtf8::ValidUtf8Result QUtf8::isValidUtf8(const char *chars, qsizetype len)
-{
-    const uchar *src = reinterpret_cast<const uchar *>(chars);
-    const uchar *end = src + len;
-    const uchar *nextAscii = src;
-    bool isValidAscii = true;
-
-    while (src < end) {
-        if (src >= nextAscii)
-            src = simdFindNonAscii(src, end, nextAscii);
-        if (src == end)
-            break;
-
-        do {
-            uchar b = *src++;
-            if ((b & 0x80) == 0)
-                continue;
-
-            isValidAscii = false;
-            QUtf8NoOutputTraits::NoOutput output;
-            int res = QUtf8Functions::fromUtf8<QUtf8NoOutputTraits>(b, output, src, end);
-            if (res < 0) {
-                // decoding error
-                return { false, false };
-            }
-        } while (src < nextAscii);
-    }
-
-    return { true, isValidAscii };
-}
-
-int QUtf8::compareUtf8(const char *utf8, qsizetype u8len, const QChar *utf16, int u16len)
-{
-    uint uc1, uc2;
-    auto src1 = reinterpret_cast<const uchar *>(utf8);
-    auto end1 = src1 + u8len;
-    QStringIterator src2(utf16, utf16 + u16len);
-
-    while (src1 < end1 && src2.hasNext()) {
-        uchar b = *src1++;
-        uint *output = &uc1;
-        int res = QUtf8Functions::fromUtf8<QUtf8BaseTraits>(b, output, src1, end1);
-        if (res < 0) {
-            // decoding error
-            uc1 = QChar::ReplacementCharacter;
-        }
-
-        uc2 = src2.next();
-        if (uc1 != uc2)
-            return int(uc1) - int(uc2);
-    }
-
-    // the shorter string sorts first
-    return (end1 > src1) - int(src2.hasNext());
-}
-
-int QUtf8::compareUtf8(const char *utf8, qsizetype u8len, QLatin1String s)
-{
-    uint uc1;
-    auto src1 = reinterpret_cast<const uchar *>(utf8);
-    auto end1 = src1 + u8len;
-    auto src2 = reinterpret_cast<const uchar *>(s.latin1());
-    auto end2 = src2 + s.size();
-
-    while (src1 < end1 && src2 < end2) {
-        uchar b = *src1++;
-        uint *output = &uc1;
-        int res = QUtf8Functions::fromUtf8<QUtf8BaseTraits>(b, output, src1, end1);
-        if (res < 0) {
-            // decoding error
-            uc1 = QChar::ReplacementCharacter;
-        }
-
-        uint uc2 = *src2++;
-        if (uc1 != uc2)
-            return int(uc1) - int(uc2);
-    }
-
-    // the shorter string sorts first
-    return (end1 > src1) - (end2 > src2);
-}
-
-QByteArray QUtf16::convertFromUnicode(const QChar *uc, int len, QTextCodec::ConverterState *state, DataEndianness e)
-{
-    DataEndianness endian = e;
-    int length =  2*len;
-    if (!state || (!(state->flags & QTextCodec::IgnoreHeader))) {
-        length += 2;
-    }
-    if (e == DetectEndianness) {
-        endian = (QSysInfo::ByteOrder == QSysInfo::BigEndian) ? BigEndianness : LittleEndianness;
-    }
-
-    QByteArray d;
-    d.resize(length);
-    char *data = d.data();
-    if (!state || !(state->flags & QTextCodec::IgnoreHeader)) {
-        QChar bom(QChar::ByteOrderMark);
-        if (endian == BigEndianness)
-            qToBigEndian(bom.unicode(), data);
-        else
-            qToLittleEndian(bom.unicode(), data);
-        data += 2;
-    }
-    if (endian == BigEndianness)
-        qToBigEndian<ushort>(uc, len, data);
-    else
-        qToLittleEndian<ushort>(uc, len, data);
-
-    if (state) {
-        state->remainingChars = 0;
-        state->flags |= QTextCodec::IgnoreHeader;
-    }
-    return d;
-}
-
-QString QUtf16::convertToUnicode(const char *chars, int len, QTextCodec::ConverterState *state, DataEndianness e)
-{
-    DataEndianness endian = e;
-    bool half = false;
-    uchar buf = 0;
-    bool headerdone = false;
-    if (state) {
-        headerdone = state->flags & QTextCodec::IgnoreHeader;
-        if (endian == DetectEndianness)
-            endian = (DataEndianness)state->state_data[Endian];
-        if (state->remainingChars) {
-            half = true;
-            buf = state->state_data[Data];
-        }
-    }
-    if (headerdone && endian == DetectEndianness)
-        endian = (QSysInfo::ByteOrder == QSysInfo::BigEndian) ? BigEndianness : LittleEndianness;
-
-    QString result(len, Qt::Uninitialized); // worst case
-    QChar *qch = (QChar *)result.data();
-    while (len--) {
-        if (half) {
-            QChar ch;
-            if (endian == LittleEndianness) {
-                ch.setRow(*chars++);
-                ch.setCell(buf);
-            } else {
-                ch.setRow(buf);
-                ch.setCell(*chars++);
-            }
-            if (!headerdone) {
-                headerdone = true;
-                if (endian == DetectEndianness) {
-                    if (ch == QChar::ByteOrderSwapped) {
-                        endian = LittleEndianness;
-                    } else if (ch == QChar::ByteOrderMark) {
-                        endian = BigEndianness;
-                    } else {
-                        if (QSysInfo::ByteOrder == QSysInfo::BigEndian) {
-                            endian = BigEndianness;
-                        } else {
-                            endian = LittleEndianness;
-                            ch = QChar::fromUcs2((ch.unicode() >> 8) | ((ch.unicode() & 0xff) << 8));
-                        }
-                        *qch++ = ch;
-                    }
-                } else if (ch != QChar::ByteOrderMark) {
-                    *qch++ = ch;
-                }
-            } else {
-                *qch++ = ch;
-            }
-            half = false;
-        } else {
-            buf = *chars++;
-            half = true;
-        }
-    }
-    result.truncate(qch - result.unicode());
-
-    if (state) {
-        if (headerdone)
-            state->flags |= QTextCodec::IgnoreHeader;
-        state->state_data[Endian] = endian;
-        if (half) {
-            state->remainingChars = 1;
-            state->state_data[Data] = buf;
-        } else {
-            state->remainingChars = 0;
-            state->state_data[Data] = 0;
-        }
-    }
-    return result;
-}
-
-QByteArray QUtf32::convertFromUnicode(const QChar *uc, int len, QTextCodec::ConverterState *state, DataEndianness e)
-{
-    DataEndianness endian = e;
-    int length =  4*len;
-    if (!state || (!(state->flags & QTextCodec::IgnoreHeader))) {
-        length += 4;
-    }
-    if (e == DetectEndianness) {
-        endian = (QSysInfo::ByteOrder == QSysInfo::BigEndian) ? BigEndianness : LittleEndianness;
-    }
-
-    QByteArray d(length, Qt::Uninitialized);
-    char *data = d.data();
-    if (!state || !(state->flags & QTextCodec::IgnoreHeader)) {
-        if (endian == BigEndianness) {
-            data[0] = 0;
-            data[1] = 0;
-            data[2] = (char)0xfe;
-            data[3] = (char)0xff;
-        } else {
-            data[0] = (char)0xff;
-            data[1] = (char)0xfe;
-            data[2] = 0;
-            data[3] = 0;
-        }
-        data += 4;
-    }
-
-    QStringIterator i(uc, uc + len);
-    if (endian == BigEndianness) {
-        while (i.hasNext()) {
-            uint cp = i.next();
-            qToBigEndian(cp, data);
-            data += 4;
-        }
-    } else {
-        while (i.hasNext()) {
-            uint cp = i.next();
-            qToLittleEndian(cp, data);
-            data += 4;
-        }
-    }
-
-    if (state) {
-        state->remainingChars = 0;
-        state->flags |= QTextCodec::IgnoreHeader;
-    }
-    return d;
-}
-
-QString QUtf32::convertToUnicode(const char *chars, int len, QTextCodec::ConverterState *state, DataEndianness e)
-{
-    DataEndianness endian = e;
-    uchar tuple[4];
-    int num = 0;
-    bool headerdone = false;
-    if (state) {
-        headerdone = state->flags & QTextCodec::IgnoreHeader;
-        if (endian == DetectEndianness) {
-            endian = (DataEndianness)state->state_data[Endian];
-        }
-        num = state->remainingChars;
-        memcpy(tuple, &state->state_data[Data], 4);
-    }
-    if (headerdone && endian == DetectEndianness)
-        endian = (QSysInfo::ByteOrder == QSysInfo::BigEndian) ? BigEndianness : LittleEndianness;
-
-    QString result;
-    result.resize((num + len) >> 2 << 1); // worst case
-    QChar *qch = (QChar *)result.data();
-
-    const char *end = chars + len;
-    while (chars < end) {
-        tuple[num++] = *chars++;
-        if (num == 4) {
-            if (!headerdone) {
-                headerdone = true;
-                if (endian == DetectEndianness) {
-                    if (tuple[0] == 0xff && tuple[1] == 0xfe && tuple[2] == 0 && tuple[3] == 0 && endian != BigEndianness) {
-                        endian = LittleEndianness;
-                        num = 0;
-                        continue;
-                    } else if (tuple[0] == 0 && tuple[1] == 0 && tuple[2] == 0xfe && tuple[3] == 0xff && endian != LittleEndianness) {
-                        endian = BigEndianness;
-                        num = 0;
-                        continue;
-                    } else if (QSysInfo::ByteOrder == QSysInfo::BigEndian) {
-                        endian = BigEndianness;
-                    } else {
-                        endian = LittleEndianness;
-                    }
-                } else if (((endian == BigEndianness) ? qFromBigEndian<quint32>(tuple) : qFromLittleEndian<quint32>(tuple)) == QChar::ByteOrderMark) {
-                    num = 0;
-                    continue;
-                }
-            }
-            uint code = (endian == BigEndianness) ? qFromBigEndian<quint32>(tuple) : qFromLittleEndian<quint32>(tuple);
-            for (char16_t c : QChar::fromUcs4(code))
-                *qch++ = c;
-            num = 0;
-        }
-    }
-    result.truncate(qch - result.unicode());
-
-    if (state) {
-        if (headerdone)
-            state->flags |= QTextCodec::IgnoreHeader;
-        state->state_data[Endian] = endian;
-        state->remainingChars = num;
-        memcpy(&state->state_data[Data], tuple, 4);
-    }
-    return result;
-}
-
-QString qFromUtfEncoded(const QByteArray &ba)
-{
-    const int arraySize = ba.size();
-    const uchar *buf = reinterpret_cast<const uchar *>(ba.constData());
-    const uint bom = 0xfeff;
-
-    if (arraySize > 3) {
-        uint uc = qFromUnaligned<uint>(buf);
-        if (uc == qToBigEndian(bom) || uc == qToLittleEndian(bom))
-            return QUtf32::convertToUnicode(ba.constData(), ba.length(), nullptr); // utf-32
-    }
-
-    if (arraySize > 1) {
-        ushort uc = qFromUnaligned<ushort>(buf);
-        if (uc == qToBigEndian(ushort(bom)) || qToLittleEndian(ushort(bom)))
-            return QUtf16::convertToUnicode(ba.constData(), ba.length(), nullptr); // utf-16
-    }
-    return QUtf8::convertToUnicode(ba.constData(), ba.length());
-}
-
 #if QT_CONFIG(textcodec)
 
 QUtf8Codec::~QUtf8Codec()
diff --git a/src/corelib/codecs/qutfcodec_p.h b/src/corelib/codecs/qutfcodec_p.h
index b1c7a23d4f..893a6db8e1 100644
--- a/src/corelib/codecs/qutfcodec_p.h
+++ b/src/corelib/codecs/qutfcodec_p.h
@@ -60,271 +60,11 @@
 #include "QtCore/qtextcodec.h"
 #endif
 
+#include "private/qstringconverter_p.h"
 #include "private/qtextcodec_p.h"
 
 QT_BEGIN_NAMESPACE
 
-struct QUtf8BaseTraits
-{
-    static const bool isTrusted = false;
-    static const bool allowNonCharacters = true;
-    static const bool skipAsciiHandling = false;
-    static const int Error = -1;
-    static const int EndOfString = -2;
-
-    static bool isValidCharacter(uint u)
-    { return int(u) >= 0; }
-
-    static void appendByte(uchar *&ptr, uchar b)
-    { *ptr++ = b; }
-
-    static uchar peekByte(const uchar *ptr, int n = 0)
-    { return ptr[n]; }
-
-    static qptrdiff availableBytes(const uchar *ptr, const uchar *end)
-    { return end - ptr; }
-
-    static void advanceByte(const uchar *&ptr, int n = 1)
-    { ptr += n; }
-
-    static void appendUtf16(ushort *&ptr, ushort uc)
-    { *ptr++ = uc; }
-
-    static void appendUcs4(ushort *&ptr, uint uc)
-    {
-        appendUtf16(ptr, QChar::highSurrogate(uc));
-        appendUtf16(ptr, QChar::lowSurrogate(uc));
-    }
-
-    static ushort peekUtf16(const ushort *ptr, int n = 0)
-    { return ptr[n]; }
-
-    static qptrdiff availableUtf16(const ushort *ptr, const ushort *end)
-    { return end - ptr; }
-
-    static void advanceUtf16(const ushort *&ptr, int n = 1)
-    { ptr += n; }
-
-    // it's possible to output to UCS-4 too
-    static void appendUtf16(uint *&ptr, ushort uc)
-    { *ptr++ = uc; }
-
-    static void appendUcs4(uint *&ptr, uint uc)
-    { *ptr++ = uc; }
-};
-
-struct QUtf8BaseTraitsNoAscii : public QUtf8BaseTraits
-{
-    static const bool skipAsciiHandling = true;
-};
-
-namespace QUtf8Functions
-{
-    /// returns 0 on success; errors can only happen if \a u is a surrogate:
-    /// Error if \a u is a low surrogate;
-    /// if \a u is a high surrogate, Error if the next isn't a low one,
-    /// EndOfString if we run into the end of the string.
-    template <typename Traits, typename OutputPtr, typename InputPtr> inline
-    int toUtf8(ushort u, OutputPtr &dst, InputPtr &src, InputPtr end)
-    {
-        if (!Traits::skipAsciiHandling && u < 0x80) {
-            // U+0000 to U+007F (US-ASCII) - one byte
-            Traits::appendByte(dst, uchar(u));
-            return 0;
-        } else if (u < 0x0800) {
-            // U+0080 to U+07FF - two bytes
-            // first of two bytes
-            Traits::appendByte(dst, 0xc0 | uchar(u >> 6));
-        } else {
-            if (!QChar::isSurrogate(u)) {
-                // U+0800 to U+FFFF (except U+D800-U+DFFF) - three bytes
-                if (!Traits::allowNonCharacters && QChar::isNonCharacter(u))
-                    return Traits::Error;
-
-                // first of three bytes
-                Traits::appendByte(dst, 0xe0 | uchar(u >> 12));
-            } else {
-                // U+10000 to U+10FFFF - four bytes
-                // need to get one extra codepoint
-                if (Traits::availableUtf16(src, end) == 0)
-                    return Traits::EndOfString;
-
-                ushort low = Traits::peekUtf16(src);
-                if (!QChar::isHighSurrogate(u))
-                    return Traits::Error;
-                if (!QChar::isLowSurrogate(low))
-                    return Traits::Error;
-
-                Traits::advanceUtf16(src);
-                uint ucs4 = QChar::surrogateToUcs4(u, low);
-
-                if (!Traits::allowNonCharacters && QChar::isNonCharacter(ucs4))
-                    return Traits::Error;
-
-                // first byte
-                Traits::appendByte(dst, 0xf0 | (uchar(ucs4 >> 18) & 0xf));
-
-                // second of four bytes
-                Traits::appendByte(dst, 0x80 | (uchar(ucs4 >> 12) & 0x3f));
-
-                // for the rest of the bytes
-                u = ushort(ucs4);
-            }
-
-            // second to last byte
-            Traits::appendByte(dst, 0x80 | (uchar(u >> 6) & 0x3f));
-        }
-
-        // last byte
-        Traits::appendByte(dst, 0x80 | (u & 0x3f));
-        return 0;
-    }
-
-    inline bool isContinuationByte(uchar b)
-    {
-        return (b & 0xc0) == 0x80;
-    }
-
-    /// returns the number of characters consumed (including \a b) in case of success;
-    /// returns negative in case of error: Traits::Error or Traits::EndOfString
-    template <typename Traits, typename OutputPtr, typename InputPtr> inline
-    int fromUtf8(uchar b, OutputPtr &dst, InputPtr &src, InputPtr end)
-    {
-        int charsNeeded;
-        uint min_uc;
-        uint uc;
-
-        if (!Traits::skipAsciiHandling && b < 0x80) {
-            // US-ASCII
-            Traits::appendUtf16(dst, b);
-            return 1;
-        }
-
-        if (!Traits::isTrusted && Q_UNLIKELY(b <= 0xC1)) {
-            // an UTF-8 first character must be at least 0xC0
-            // however, all 0xC0 and 0xC1 first bytes can only produce overlong sequences
-            return Traits::Error;
-        } else if (b < 0xe0) {
-            charsNeeded = 2;
-            min_uc = 0x80;
-            uc = b & 0x1f;
-        } else if (b < 0xf0) {
-            charsNeeded = 3;
-            min_uc = 0x800;
-            uc = b & 0x0f;
-        } else if (b < 0xf5) {
-            charsNeeded = 4;
-            min_uc = 0x10000;
-            uc = b & 0x07;
-        } else {
-            // the last Unicode character is U+10FFFF
-            // it's encoded in UTF-8 as "\xF4\x8F\xBF\xBF"
-            // therefore, a byte higher than 0xF4 is not the UTF-8 first byte
-            return Traits::Error;
-        }
-
-        int bytesAvailable = Traits::availableBytes(src, end);
-        if (Q_UNLIKELY(bytesAvailable < charsNeeded - 1)) {
-            // it's possible that we have an error instead of just unfinished bytes
-            if (bytesAvailable > 0 && !isContinuationByte(Traits::peekByte(src, 0)))
-                return Traits::Error;
-            if (bytesAvailable > 1 && !isContinuationByte(Traits::peekByte(src, 1)))
-                return Traits::Error;
-            return Traits::EndOfString;
-        }
-
-        // first continuation character
-        b = Traits::peekByte(src, 0);
-        if (!isContinuationByte(b))
-            return Traits::Error;
-        uc <<= 6;
-        uc |= b & 0x3f;
-
-        if (charsNeeded > 2) {
-            // second continuation character
-            b = Traits::peekByte(src, 1);
-            if (!isContinuationByte(b))
-                return Traits::Error;
-            uc <<= 6;
-            uc |= b & 0x3f;
-
-            if (charsNeeded > 3) {
-                // third continuation character
-                b = Traits::peekByte(src, 2);
-                if (!isContinuationByte(b))
-                    return Traits::Error;
-                uc <<= 6;
-                uc |= b & 0x3f;
-            }
-        }
-
-        // we've decoded something; safety-check it
-        if (!Traits::isTrusted) {
-            if (uc < min_uc)
-                return Traits::Error;
-            if (QChar::isSurrogate(uc) || uc > QChar::LastValidCodePoint)
-                return Traits::Error;
-            if (!Traits::allowNonCharacters && QChar::isNonCharacter(uc))
-                return Traits::Error;
-        }
-
-        // write the UTF-16 sequence
-        if (!QChar::requiresSurrogates(uc)) {
-            // UTF-8 decoded and no surrogates are required
-            // detach if necessary
-            Traits::appendUtf16(dst, ushort(uc));
-        } else {
-            // UTF-8 decoded to something that requires a surrogate pair
-            Traits::appendUcs4(dst, uc);
-        }
-
-        Traits::advanceByte(src, charsNeeded - 1);
-        return charsNeeded;
-    }
-}
-
-enum DataEndianness
-{
-    DetectEndianness,
-    BigEndianness,
-    LittleEndianness
-};
-
-struct QUtf8
-{
-    static QChar *convertToUnicode(QChar *, const char *, int) noexcept;
-    static QString convertToUnicode(const char *, int);
-    static QString convertToUnicode(const char *, int, QTextCodec::ConverterState *);
-    static QByteArray convertFromUnicode(const QChar *, int);
-    static QByteArray convertFromUnicode(const QChar *, int, QTextCodec::ConverterState *);
-    struct ValidUtf8Result {
-        bool isValidUtf8;
-        bool isValidAscii;
-    };
-    static ValidUtf8Result isValidUtf8(const char *, qsizetype);
-    static int compareUtf8(const char *, qsizetype, const QChar *, int);
-    static int compareUtf8(const char *, qsizetype, QLatin1String s);
-};
-
-struct QUtf16
-{
-    static QString convertToUnicode(const char *, int, QTextCodec::ConverterState *, DataEndianness = DetectEndianness);
-    static QByteArray convertFromUnicode(const QChar *, int, QTextCodec::ConverterState *, DataEndianness = DetectEndianness);
-};
-
-struct QUtf32
-{
-    static QString convertToUnicode(const char *, int, QTextCodec::ConverterState *, DataEndianness = DetectEndianness);
-    static QByteArray convertFromUnicode(const QChar *, int, QTextCodec::ConverterState *, DataEndianness = DetectEndianness);
-};
-
-/*
- Converts from different utf encodings looking at a possible byte order mark at the
- beginning of the string. If no BOM exists, utf-8 is assumed.
- */
-QString Q_CORE_EXPORT qFromUtfEncoded(const QByteArray &ba);
-
-
 #if QT_CONFIG(textcodec)
 
 class QUtf8Codec : public QTextCodec {
diff --git a/src/corelib/global/qconfig-bootstrapped.h b/src/corelib/global/qconfig-bootstrapped.h
index 349dfeea1c..6ef4acf503 100644
--- a/src/corelib/global/qconfig-bootstrapped.h
+++ b/src/corelib/global/qconfig-bootstrapped.h
@@ -141,18 +141,17 @@
 #define QT_FEATURE_zstd -1
 #endif
 
+#define QT_FEATURE_textcodec -1
+
 #ifdef QT_BUILD_QMAKE
 #define QT_FEATURE_commandlineparser -1
 #define QT_NO_COMPRESS
 #define QT_JSON_READONLY
 #define QT_FEATURE_settings 1
 #define QT_NO_STANDARDPATHS
-#define QT_FEATURE_textcodec -1
 #else
-#define QT_FEATURE_codecs -1
 #define QT_FEATURE_commandlineparser 1
 #define QT_FEATURE_settings -1
-#define QT_FEATURE_textcodec 1
 #endif
 
 #endif // QT_BOOTSTRAPPED
diff --git a/src/corelib/io/qfilesystemiterator_unix.cpp b/src/corelib/io/qfilesystemiterator_unix.cpp
index ceea3a467c..4bc6b2e31b 100644
--- a/src/corelib/io/qfilesystemiterator_unix.cpp
+++ b/src/corelib/io/qfilesystemiterator_unix.cpp
@@ -42,7 +42,7 @@
 
 #if QT_CONFIG(textcodec)
 #  include <qtextcodec.h>
-#  include <private/qutfcodec_p.h>
+#  include <private/qstringconverter_p.h>
 #endif
 
 #ifndef QT_NO_FILESYSTEMITERATOR
diff --git a/src/corelib/io/qurlrecode.cpp b/src/corelib/io/qurlrecode.cpp
index 1c9d0d1d4b..2788de3b3a 100644
--- a/src/corelib/io/qurlrecode.cpp
+++ b/src/corelib/io/qurlrecode.cpp
@@ -38,7 +38,7 @@
 ****************************************************************************/
 
 #include "qurl.h"
-#include "private/qutfcodec_p.h"
+#include "private/qstringconverter_p.h"
 #include "private/qtools_p.h"
 #include "private/qsimd_p.h"
 
diff --git a/src/corelib/serialization/qcborstreamreader.cpp b/src/corelib/serialization/qcborstreamreader.cpp
index ec385e0629..c49a76aada 100644
--- a/src/corelib/serialization/qcborstreamreader.cpp
+++ b/src/corelib/serialization/qcborstreamreader.cpp
@@ -44,7 +44,7 @@
 
 #include <private/qbytearray_p.h>
 #include <private/qnumeric_p.h>
-#include <private/qutfcodec_p.h>
+#include <private/qstringconverter_p.h>
 #include <qdebug.h>
 #include <qstack.h>
 
diff --git a/src/corelib/serialization/qcborvalue_p.h b/src/corelib/serialization/qcborvalue_p.h
index 1d686f118b..38383c7522 100644
--- a/src/corelib/serialization/qcborvalue_p.h
+++ b/src/corelib/serialization/qcborvalue_p.h
@@ -54,7 +54,7 @@
 #include "qcborvalue.h"
 
 #include <private/qglobal_p.h>
-#include <private/qutfcodec_p.h>
+#include <private/qstringconverter_p.h>
 
 #include <math.h>
 
diff --git a/src/corelib/serialization/qjsonparser.cpp b/src/corelib/serialization/qjsonparser.cpp
index 46d82ea47f..116e7f6995 100644
--- a/src/corelib/serialization/qjsonparser.cpp
+++ b/src/corelib/serialization/qjsonparser.cpp
@@ -44,7 +44,7 @@
 #include <qdebug.h>
 #include "qjsonparser_p.h"
 #include "qjson_p.h"
-#include "private/qutfcodec_p.h"
+#include "private/qstringconverter_p.h"
 #include "private/qcborvalue_p.h"
 #include "private/qnumeric_p.h"
 
diff --git a/src/corelib/serialization/qjsonwriter.cpp b/src/corelib/serialization/qjsonwriter.cpp
index 590b59f09c..8610cdff7e 100644
--- a/src/corelib/serialization/qjsonwriter.cpp
+++ b/src/corelib/serialization/qjsonwriter.cpp
@@ -42,7 +42,7 @@
 #include <qlocale.h>
 #include "qjsonwriter_p.h"
 #include "qjson_p.h"
-#include "private/qutfcodec_p.h"
+#include "private/qstringconverter_p.h"
 #include <private/qnumeric_p.h>
 #include <private/qcborvalue_p.h>
 
diff --git a/src/corelib/text/qstring.cpp b/src/corelib/text/qstring.cpp
index 0682395ebf..68a0f757c8 100644
--- a/src/corelib/text/qstring.cpp
+++ b/src/corelib/text/qstring.cpp
@@ -48,7 +48,7 @@
 #if QT_CONFIG(textcodec)
 #include <qtextcodec.h>
 #endif
-#include <private/qutfcodec_p.h>
+#include <private/qstringconverter_p.h>
 #include "qlocale_tools_p.h"
 #include "private/qsimd_p.h"
 #include <qnumeric.h>
diff --git a/src/corelib/text/qstringbuilder.cpp b/src/corelib/text/qstringbuilder.cpp
index 29bd216e80..4e47ba0922 100644
--- a/src/corelib/text/qstringbuilder.cpp
+++ b/src/corelib/text/qstringbuilder.cpp
@@ -38,7 +38,7 @@
 ****************************************************************************/
 
 #include "qstringbuilder.h"
-#include <private/qutfcodec_p.h>
+#include <private/qstringconverter_p.h>
 
 QT_BEGIN_NAMESPACE
 
diff --git a/src/corelib/text/qstringconverter.cpp b/src/corelib/text/qstringconverter.cpp
index 1f61eee5cb..36567f5106 100644
--- a/src/corelib/text/qstringconverter.cpp
+++ b/src/corelib/text/qstringconverter.cpp
@@ -39,10 +39,954 @@
 ****************************************************************************/
 
 #include <qstringconverter.h>
-#include <private/qutfcodec_p.h>
+#include <private/qstringconverter_p.h>
+#include "qendian.h"
+
+#include "private/qsimd_p.h"
+#include "private/qstringiterator_p.h"
 
 QT_BEGIN_NAMESPACE
 
+enum { Endian = 0, Data = 1 };
+
+static const uchar utf8bom[] = { 0xef, 0xbb, 0xbf };
+
+#if (defined(__SSE2__) && defined(QT_COMPILER_SUPPORTS_SSE2)) \
+    || (defined(__ARM_NEON__) && defined(Q_PROCESSOR_ARM_64))
+static Q_ALWAYS_INLINE uint qBitScanReverse(unsigned v) noexcept
+{
+    uint result = qCountLeadingZeroBits(v);
+    // Now Invert the result: clz will count *down* from the msb to the lsb, so the msb index is 31
+    // and the lsb index is 0. The result for _bit_scan_reverse is expected to be the index when
+    // counting up: msb index is 0 (because it starts there), and the lsb index is 31.
+    result ^= sizeof(unsigned) * 8 - 1;
+    return result;
+}
+#endif
+
+#if defined(__SSE2__) && defined(QT_COMPILER_SUPPORTS_SSE2)
+static inline bool simdEncodeAscii(uchar *&dst, const ushort *&nextAscii, const ushort *&src, const ushort *end)
+{
+    // do sixteen characters at a time
+    for ( ; end - src >= 16; src += 16, dst += 16) {
+#  ifdef __AVX2__
+        __m256i data = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(src));
+        __m128i data1 = _mm256_castsi256_si128(data);
+        __m128i data2 = _mm256_extracti128_si256(data, 1);
+#  else
+        __m128i data1 = _mm_loadu_si128((const __m128i*)src);
+        __m128i data2 = _mm_loadu_si128(1+(const __m128i*)src);
+#  endif
+
+        // check if everything is ASCII
+        // the highest ASCII value is U+007F
+        // Do the packing directly:
+        // The PACKUSWB instruction has packs a signed 16-bit integer to an unsigned 8-bit
+        // with saturation. That is, anything from 0x0100 to 0x7fff is saturated to 0xff,
+        // while all negatives (0x8000 to 0xffff) get saturated to 0x00. To detect non-ASCII,
+        // we simply do a signed greater-than comparison to 0x00. That means we detect NULs as
+        // "non-ASCII", but it's an acceptable compromise.
+        __m128i packed = _mm_packus_epi16(data1, data2);
+        __m128i nonAscii = _mm_cmpgt_epi8(packed, _mm_setzero_si128());
+
+        // store, even if there are non-ASCII characters here
+        _mm_storeu_si128((__m128i*)dst, packed);
+
+        // n will contain 1 bit set per character in [data1, data2] that is non-ASCII (or NUL)
+        ushort n = ~_mm_movemask_epi8(nonAscii);
+        if (n) {
+            // find the next probable ASCII character
+            // we don't want to load 32 bytes again in this loop if we know there are non-ASCII
+            // characters still coming
+            nextAscii = src + qBitScanReverse(n) + 1;
+
+            n = qCountTrailingZeroBits(n);
+            dst += n;
+            src += n;
+            return false;
+        }
+    }
+
+    if (end - src >= 8) {
+        // do eight characters at a time
+        __m128i data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(src));
+        __m128i packed = _mm_packus_epi16(data, data);
+        __m128i nonAscii = _mm_cmpgt_epi8(packed, _mm_setzero_si128());
+
+        // store even non-ASCII
+        _mm_storel_epi64(reinterpret_cast<__m128i *>(dst), packed);
+
+        uchar n = ~_mm_movemask_epi8(nonAscii);
+        if (n) {
+            nextAscii = src + qBitScanReverse(n) + 1;
+            n = qCountTrailingZeroBits(n);
+            dst += n;
+            src += n;
+            return false;
+        }
+    }
+
+    return src == end;
+}
+
+static inline bool simdDecodeAscii(ushort *&dst, const uchar *&nextAscii, const uchar *&src, const uchar *end)
+{
+    // do sixteen characters at a time
+    for ( ; end - src >= 16; src += 16, dst += 16) {
+        __m128i data = _mm_loadu_si128((const __m128i*)src);
+
+#ifdef __AVX2__
+        const int BitSpacing = 2;
+        // load and zero extend to an YMM register
+        const __m256i extended = _mm256_cvtepu8_epi16(data);
+
+        uint n = _mm256_movemask_epi8(extended);
+        if (!n) {
+            // store
+            _mm256_storeu_si256((__m256i*)dst, extended);
+            continue;
+        }
+#else
+        const int BitSpacing = 1;
+
+        // check if everything is ASCII
+        // movemask extracts the high bit of every byte, so n is non-zero if something isn't ASCII
+        uint n = _mm_movemask_epi8(data);
+        if (!n) {
+            // unpack
+            _mm_storeu_si128((__m128i*)dst, _mm_unpacklo_epi8(data, _mm_setzero_si128()));
+            _mm_storeu_si128(1+(__m128i*)dst, _mm_unpackhi_epi8(data, _mm_setzero_si128()));
+            continue;
+        }
+#endif
+
+        // copy the front part that is still ASCII
+        while (!(n & 1)) {
+            *dst++ = *src++;
+            n >>= BitSpacing;
+        }
+
+        // find the next probable ASCII character
+        // we don't want to load 16 bytes again in this loop if we know there are non-ASCII
+        // characters still coming
+        n = qBitScanReverse(n);
+        nextAscii = src + (n / BitSpacing) + 1;
+        return false;
+
+    }
+
+    if (end - src >= 8) {
+        __m128i data = _mm_loadl_epi64(reinterpret_cast<const __m128i *>(src));
+        uint n = _mm_movemask_epi8(data) & 0xff;
+        if (!n) {
+            // unpack and store
+            _mm_storeu_si128(reinterpret_cast<__m128i *>(dst), _mm_unpacklo_epi8(data, _mm_setzero_si128()));
+        } else {
+            while (!(n & 1)) {
+                *dst++ = *src++;
+                n >>= 1;
+            }
+
+            n = qBitScanReverse(n);
+            nextAscii = src + n + 1;
+            return false;
+        }
+    }
+
+    return src == end;
+}
+
+static inline const uchar *simdFindNonAscii(const uchar *src, const uchar *end, const uchar *&nextAscii)
+{
+#ifdef __AVX2__
+    // do 32 characters at a time
+    // (this is similar to simdTestMask in qstring.cpp)
+    const __m256i mask = _mm256_set1_epi8(0x80);
+    for ( ; end - src >= 32; src += 32) {
+        __m256i data = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(src));
+        if (_mm256_testz_si256(mask, data))
+            continue;
+
+        uint n = _mm256_movemask_epi8(data);
+        Q_ASSUME(n);
+
+        // find the next probable ASCII character
+        // we don't want to load 32 bytes again in this loop if we know there are non-ASCII
+        // characters still coming
+        nextAscii = src + qBitScanReverse(n) + 1;
+
+        // return the non-ASCII character
+        return src + qCountTrailingZeroBits(n);
+    }
+#endif
+
+    // do sixteen characters at a time
+    for ( ; end - src >= 16; src += 16) {
+        __m128i data = _mm_loadu_si128(reinterpret_cast<const __m128i*>(src));
+
+        // check if everything is ASCII
+        // movemask extracts the high bit of every byte, so n is non-zero if something isn't ASCII
+        uint n = _mm_movemask_epi8(data);
+        if (!n)
+            continue;
+
+        // find the next probable ASCII character
+        // we don't want to load 16 bytes again in this loop if we know there are non-ASCII
+        // characters still coming
+        nextAscii = src + qBitScanReverse(n) + 1;
+
+        // return the non-ASCII character
+        return src + qCountTrailingZeroBits(n);
+    }
+
+    // do four characters at a time
+    for ( ; end - src >= 4; src += 4) {
+        quint32 data = qFromUnaligned<quint32>(src);
+        data &= 0x80808080U;
+        if (!data)
+            continue;
+
+        // We don't try to guess which of the three bytes is ASCII and which
+        // one isn't. The chance that at least two of them are non-ASCII is
+        // better than 75%.
+        nextAscii = src;
+        return src;
+    }
+    nextAscii = end;
+    return src;
+}
+#elif defined(__ARM_NEON__) && defined(Q_PROCESSOR_ARM_64) // vaddv is only available on Aarch64
+static inline bool simdEncodeAscii(uchar *&dst, const ushort *&nextAscii, const ushort *&src, const ushort *end)
+{
+    uint16x8_t maxAscii = vdupq_n_u16(0x7f);
+    uint16x8_t mask1 = { 1,      1 << 2, 1 << 4, 1 << 6, 1 << 8, 1 << 10, 1 << 12, 1 << 14 };
+    uint16x8_t mask2 = vshlq_n_u16(mask1, 1);
+
+    // do sixteen characters at a time
+    for ( ; end - src >= 16; src += 16, dst += 16) {
+        // load 2 lanes (or: "load interleaved")
+        uint16x8x2_t in = vld2q_u16(src);
+
+        // check if any of the elements > 0x7f, select 1 bit per element (element 0 -> bit 0, element 1 -> bit 1, etc),
+        // add those together into a scalar, and merge the scalars.
+        uint16_t nonAscii = vaddvq_u16(vandq_u16(vcgtq_u16(in.val[0], maxAscii), mask1))
+                          | vaddvq_u16(vandq_u16(vcgtq_u16(in.val[1], maxAscii), mask2));
+
+        // merge the two lanes by shifting the values of the second by 8 and inserting them
+        uint16x8_t out = vsliq_n_u16(in.val[0], in.val[1], 8);
+
+        // store, even if there are non-ASCII characters here
+        vst1q_u8(dst, vreinterpretq_u8_u16(out));
+
+        if (nonAscii) {
+            // find the next probable ASCII character
+            // we don't want to load 32 bytes again in this loop if we know there are non-ASCII
+            // characters still coming
+            nextAscii = src + qBitScanReverse(nonAscii) + 1;
+
+            nonAscii = qCountTrailingZeroBits(nonAscii);
+            dst += nonAscii;
+            src += nonAscii;
+            return false;
+        }
+    }
+    return src == end;
+}
+
+static inline bool simdDecodeAscii(ushort *&dst, const uchar *&nextAscii, const uchar *&src, const uchar *end)
+{
+    // do eight characters at a time
+    uint8x8_t msb_mask = vdup_n_u8(0x80);
+    uint8x8_t add_mask = { 1, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7 };
+    for ( ; end - src >= 8; src += 8, dst += 8) {
+        uint8x8_t c = vld1_u8(src);
+        uint8_t n = vaddv_u8(vand_u8(vcge_u8(c, msb_mask), add_mask));
+        if (!n) {
+            // store
+            vst1q_u16(dst, vmovl_u8(c));
+            continue;
+        }
+
+        // copy the front part that is still ASCII
+        while (!(n & 1)) {
+            *dst++ = *src++;
+            n >>= 1;
+        }
+
+        // find the next probable ASCII character
+        // we don't want to load 16 bytes again in this loop if we know there are non-ASCII
+        // characters still coming
+        n = qBitScanReverse(n);
+        nextAscii = src + n + 1;
+        return false;
+
+    }
+    return src == end;
+}
+
+static inline const uchar *simdFindNonAscii(const uchar *src, const uchar *end, const uchar *&nextAscii)
+{
+    // The SIMD code below is untested, so just force an early return until
+    // we've had the time to verify it works.
+    nextAscii = end;
+    return src;
+
+    // do eight characters at a time
+    uint8x8_t msb_mask = vdup_n_u8(0x80);
+    uint8x8_t add_mask = { 1, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7 };
+    for ( ; end - src >= 8; src += 8) {
+        uint8x8_t c = vld1_u8(src);
+        uint8_t n = vaddv_u8(vand_u8(vcge_u8(c, msb_mask), add_mask));
+        if (!n)
+            continue;
+
+        // find the next probable ASCII character
+        // we don't want to load 16 bytes again in this loop if we know there are non-ASCII
+        // characters still coming
+        nextAscii = src + qBitScanReverse(n) + 1;
+
+        // return the non-ASCII character
+        return src + qCountTrailingZeroBits(n);
+    }
+    nextAscii = end;
+    return src;
+}
+#else
+static inline bool simdEncodeAscii(uchar *, const ushort *, const ushort *, const ushort *)
+{
+    return false;
+}
+
+static inline bool simdDecodeAscii(ushort *, const uchar *, const uchar *, const uchar *)
+{
+    return false;
+}
+
+static inline const uchar *simdFindNonAscii(const uchar *src, const uchar *end, const uchar *&nextAscii)
+{
+    nextAscii = end;
+    return src;
+}
+#endif
+
+QByteArray QUtf8::convertFromUnicode(const QChar *uc, qsizetype len)
+{
+    // create a QByteArray with the worst case scenario size
+    QByteArray result(len * 3, Qt::Uninitialized);
+    uchar *dst = reinterpret_cast<uchar *>(const_cast<char *>(result.constData()));
+    const ushort *src = reinterpret_cast<const ushort *>(uc);
+    const ushort *const end = src + len;
+
+    while (src != end) {
+        const ushort *nextAscii = end;
+        if (simdEncodeAscii(dst, nextAscii, src, end))
+            break;
+
+        do {
+            ushort uc = *src++;
+            int res = QUtf8Functions::toUtf8<QUtf8BaseTraits>(uc, dst, src, end);
+            if (res < 0) {
+                // encoding error - append '?'
+                *dst++ = '?';
+            }
+        } while (src < nextAscii);
+    }
+
+    result.truncate(dst - reinterpret_cast<uchar *>(const_cast<char *>(result.constData())));
+    return result;
+}
+
+QByteArray QUtf8::convertFromUnicode(const QChar *uc, qsizetype len, QStringConverter::State *state)
+{
+    uchar replacement = '?';
+    qsizetype rlen = 3*len;
+    int surrogate_high = -1;
+    if (state) {
+        if (state->flags & QStringConverter::ConvertInvalidToNull)
+            replacement = 0;
+        if (!(state->flags & QStringConverter::IgnoreHeader))
+            rlen += 3;
+        if (state->remainingChars)
+            surrogate_high = state->state_data[0];
+    }
+
+
+    QByteArray rstr(rlen, Qt::Uninitialized);
+    uchar *cursor = reinterpret_cast<uchar *>(const_cast<char *>(rstr.constData()));
+    const ushort *src = reinterpret_cast<const ushort *>(uc);
+    const ushort *const end = src + len;
+
+    int invalid = 0;
+    if (state && !(state->flags & QStringConverter::IgnoreHeader)) {
+        // append UTF-8 BOM
+        *cursor++ = utf8bom[0];
+        *cursor++ = utf8bom[1];
+        *cursor++ = utf8bom[2];
+    }
+
+    const ushort *nextAscii = src;
+    while (src != end) {
+        int res;
+        ushort uc;
+        if (surrogate_high != -1) {
+            uc = surrogate_high;
+            surrogate_high = -1;
+            res = QUtf8Functions::toUtf8<QUtf8BaseTraits>(uc, cursor, src, end);
+        } else {
+            if (src >= nextAscii && simdEncodeAscii(cursor, nextAscii, src, end))
+                break;
+
+            uc = *src++;
+            res = QUtf8Functions::toUtf8<QUtf8BaseTraits>(uc, cursor, src, end);
+        }
+        if (Q_LIKELY(res >= 0))
+            continue;
+
+        if (res == QUtf8BaseTraits::Error) {
+            // encoding error
+            ++invalid;
+            *cursor++ = replacement;
+        } else if (res == QUtf8BaseTraits::EndOfString) {
+            surrogate_high = uc;
+            break;
+        }
+    }
+
+    rstr.resize(cursor - (const uchar*)rstr.constData());
+    if (state) {
+        state->invalidChars += invalid;
+        state->flags |= QStringConverter::IgnoreHeader;
+        state->remainingChars = 0;
+        if (surrogate_high >= 0) {
+            state->remainingChars = 1;
+            state->state_data[0] = surrogate_high;
+        }
+    }
+    return rstr;
+}
+
+QString QUtf8::convertToUnicode(const char *chars, qsizetype len)
+{
+    // UTF-8 to UTF-16 always needs the exact same number of words or less:
+    //    UTF-8     UTF-16
+    //   1 byte     1 word
+    //   2 bytes    1 word
+    //   3 bytes    1 word
+    //   4 bytes    2 words (one surrogate pair)
+    // That is, we'll use the full buffer if the input is US-ASCII (1-byte UTF-8),
+    // half the buffer for U+0080-U+07FF text (e.g., Greek, Cyrillic, Arabic) or
+    // non-BMP text, and one third of the buffer for U+0800-U+FFFF text (e.g, CJK).
+    //
+    // The table holds for invalid sequences too: we'll insert one replacement char
+    // per invalid byte.
+    QString result(len, Qt::Uninitialized);
+    QChar *data = const_cast<QChar*>(result.constData()); // we know we're not shared
+    const QChar *end = convertToUnicode(data, chars, len);
+    result.truncate(end - data);
+    return result;
+}
+
+/*!
+    \since 5.7
+    \overload
+
+    Converts the UTF-8 sequence of \a len octets beginning at \a chars to
+    a sequence of QChar starting at \a buffer. The buffer is expected to be
+    large enough to hold the result. An upper bound for the size of the
+    buffer is \a len QChars.
+
+    If, during decoding, an error occurs, a QChar::ReplacementCharacter is
+    written.
+
+    Returns a pointer to one past the last QChar written.
+
+    This function never throws.
+*/
+
+QChar *QUtf8::convertToUnicode(QChar *buffer, const char *chars, qsizetype len) noexcept
+{
+    ushort *dst = reinterpret_cast<ushort *>(buffer);
+    const uchar *src = reinterpret_cast<const uchar *>(chars);
+    const uchar *end = src + len;
+
+    // attempt to do a full decoding in SIMD
+    const uchar *nextAscii = end;
+    if (!simdDecodeAscii(dst, nextAscii, src, end)) {
+        // at least one non-ASCII entry
+        // check if we failed to decode the UTF-8 BOM; if so, skip it
+        if (Q_UNLIKELY(src == reinterpret_cast<const uchar *>(chars))
+                && end - src >= 3
+                && Q_UNLIKELY(src[0] == utf8bom[0] && src[1] == utf8bom[1] && src[2] == utf8bom[2])) {
+            src += 3;
+        }
+
+        while (src < end) {
+            nextAscii = end;
+            if (simdDecodeAscii(dst, nextAscii, src, end))
+                break;
+
+            do {
+                uchar b = *src++;
+                int res = QUtf8Functions::fromUtf8<QUtf8BaseTraits>(b, dst, src, end);
+                if (res < 0) {
+                    // decoding error
+                    *dst++ = QChar::ReplacementCharacter;
+                }
+            } while (src < nextAscii);
+        }
+    }
+
+    return reinterpret_cast<QChar *>(dst);
+}
+
+QString QUtf8::convertToUnicode(const char *chars, qsizetype len, QStringConverter::State *state)
+{
+    bool headerdone = false;
+    ushort replacement = QChar::ReplacementCharacter;
+    int invalid = 0;
+    int res;
+    uchar ch = 0;
+
+    // See above for buffer requirements for stateless decoding. However, that
+    // fails if the state is not empty. The following situations can add to the
+    // requirements:
+    //  state contains      chars starts with           requirement
+    //   1 of 2 bytes       valid continuation          0
+    //   2 of 3 bytes       same                        0
+    //   3 bytes of 4       same                        +1 (need to insert surrogate pair)
+    //   1 of 2 bytes       invalid continuation        +1 (need to insert replacement and restart)
+    //   2 of 3 bytes       same                        +1 (same)
+    //   3 of 4 bytes       same                        +1 (same)
+    QString result(len + 1, Qt::Uninitialized);
+
+    ushort *dst = reinterpret_cast<ushort *>(const_cast<QChar *>(result.constData()));
+    const uchar *src = reinterpret_cast<const uchar *>(chars);
+    const uchar *end = src + len;
+
+    if (state) {
+        if (state->flags & QStringConverter::IgnoreHeader)
+            headerdone = true;
+        if (state->flags & QStringConverter::ConvertInvalidToNull)
+            replacement = QChar::Null;
+        if (state->remainingChars) {
+            // handle incoming state first
+            uchar remainingCharsData[4]; // longest UTF-8 sequence possible
+            qsizetype remainingCharsCount = state->remainingChars;
+            qsizetype newCharsToCopy = qMin<int>(sizeof(remainingCharsData) - remainingCharsCount, end - src);
+
+            memset(remainingCharsData, 0, sizeof(remainingCharsData));
+            memcpy(remainingCharsData, &state->state_data[0], remainingCharsCount);
+            memcpy(remainingCharsData + remainingCharsCount, src, newCharsToCopy);
+
+            const uchar *begin = &remainingCharsData[1];
+            res = QUtf8Functions::fromUtf8<QUtf8BaseTraits>(remainingCharsData[0], dst, begin,
+                    static_cast<const uchar *>(remainingCharsData) + remainingCharsCount + newCharsToCopy);
+            if (res == QUtf8BaseTraits::Error || (res == QUtf8BaseTraits::EndOfString && len == 0)) {
+                // special case for len == 0:
+                // if we were supplied an empty string, terminate the previous, unfinished sequence with error
+                ++invalid;
+                *dst++ = replacement;
+            } else if (res == QUtf8BaseTraits::EndOfString) {
+                // if we got EndOfString again, then there were too few bytes in src;
+                // copy to our state and return
+                state->remainingChars = remainingCharsCount + newCharsToCopy;
+                memcpy(&state->state_data[0], remainingCharsData, state->remainingChars);
+                return QString();
+            } else if (!headerdone && res >= 0) {
+                // eat the UTF-8 BOM
+                headerdone = true;
+                if (dst[-1] == 0xfeff)
+                    --dst;
+            }
+
+            // adjust src now that we have maybe consumed a few chars
+            if (res >= 0) {
+                Q_ASSERT(res > remainingCharsCount);
+                src += res - remainingCharsCount;
+            }
+        }
+    }
+
+    // main body, stateless decoding
+    res = 0;
+    const uchar *nextAscii = src;
+    const uchar *start = src;
+    while (res >= 0 && src < end) {
+        if (src >= nextAscii && simdDecodeAscii(dst, nextAscii, src, end))
+            break;
+
+        ch = *src++;
+        res = QUtf8Functions::fromUtf8<QUtf8BaseTraits>(ch, dst, src, end);
+        if (!headerdone && res >= 0) {
+            headerdone = true;
+            if (src == start + 3) { // 3 == sizeof(utf8-bom)
+                // eat the UTF-8 BOM (it can only appear at the beginning of the string).
+                if (dst[-1] == 0xfeff)
+                    --dst;
+            }
+        }
+        if (res == QUtf8BaseTraits::Error) {
+            res = 0;
+            ++invalid;
+            *dst++ = replacement;
+        }
+    }
+
+    if (!state && res == QUtf8BaseTraits::EndOfString) {
+        // unterminated UTF sequence
+        *dst++ = QChar::ReplacementCharacter;
+        while (src++ < end)
+            *dst++ = QChar::ReplacementCharacter;
+    }
+
+    result.truncate(dst - (const ushort *)result.unicode());
+    if (state) {
+        state->invalidChars += invalid;
+        if (headerdone)
+            state->flags |= QStringConverter::IgnoreHeader;
+        if (res == QUtf8BaseTraits::EndOfString) {
+            --src; // unread the byte in ch
+            state->remainingChars = end - src;
+            memcpy(&state->state_data[0], src, end - src);
+        } else {
+            state->remainingChars = 0;
+        }
+    }
+    return result;
+}
+
+struct QUtf8NoOutputTraits : public QUtf8BaseTraitsNoAscii
+{
+    struct NoOutput {};
+    static void appendUtf16(const NoOutput &, ushort) {}
+    static void appendUcs4(const NoOutput &, uint) {}
+};
+
+QUtf8::ValidUtf8Result QUtf8::isValidUtf8(const char *chars, qsizetype len)
+{
+    const uchar *src = reinterpret_cast<const uchar *>(chars);
+    const uchar *end = src + len;
+    const uchar *nextAscii = src;
+    bool isValidAscii = true;
+
+    while (src < end) {
+        if (src >= nextAscii)
+            src = simdFindNonAscii(src, end, nextAscii);
+        if (src == end)
+            break;
+
+        do {
+            uchar b = *src++;
+            if ((b & 0x80) == 0)
+                continue;
+
+            isValidAscii = false;
+            QUtf8NoOutputTraits::NoOutput output;
+            int res = QUtf8Functions::fromUtf8<QUtf8NoOutputTraits>(b, output, src, end);
+            if (res < 0) {
+                // decoding error
+                return { false, false };
+            }
+        } while (src < nextAscii);
+    }
+
+    return { true, isValidAscii };
+}
+
+int QUtf8::compareUtf8(const char *utf8, qsizetype u8len, const QChar *utf16, qsizetype u16len)
+{
+    uint uc1, uc2;
+    auto src1 = reinterpret_cast<const uchar *>(utf8);
+    auto end1 = src1 + u8len;
+    QStringIterator src2(utf16, utf16 + u16len);
+
+    while (src1 < end1 && src2.hasNext()) {
+        uchar b = *src1++;
+        uint *output = &uc1;
+        int res = QUtf8Functions::fromUtf8<QUtf8BaseTraits>(b, output, src1, end1);
+        if (res < 0) {
+            // decoding error
+            uc1 = QChar::ReplacementCharacter;
+        }
+
+        uc2 = src2.next();
+        if (uc1 != uc2)
+            return int(uc1) - int(uc2);
+    }
+
+    // the shorter string sorts first
+    return (end1 > src1) - int(src2.hasNext());
+}
+
+int QUtf8::compareUtf8(const char *utf8, qsizetype u8len, QLatin1String s)
+{
+    uint uc1;
+    auto src1 = reinterpret_cast<const uchar *>(utf8);
+    auto end1 = src1 + u8len;
+    auto src2 = reinterpret_cast<const uchar *>(s.latin1());
+    auto end2 = src2 + s.size();
+
+    while (src1 < end1 && src2 < end2) {
+        uchar b = *src1++;
+        uint *output = &uc1;
+        int res = QUtf8Functions::fromUtf8<QUtf8BaseTraits>(b, output, src1, end1);
+        if (res < 0) {
+            // decoding error
+            uc1 = QChar::ReplacementCharacter;
+        }
+
+        uint uc2 = *src2++;
+        if (uc1 != uc2)
+            return int(uc1) - int(uc2);
+    }
+
+    // the shorter string sorts first
+    return (end1 > src1) - (end2 > src2);
+}
+
+QByteArray QUtf16::convertFromUnicode(const QChar *uc, qsizetype len, QStringConverter::State *state, DataEndianness e)
+{
+    DataEndianness endian = e;
+    qsizetype length =  2*len;
+    if (!state || (!(state->flags & QStringConverter::IgnoreHeader))) {
+        length += 2;
+    }
+    if (e == DetectEndianness) {
+        endian = (QSysInfo::ByteOrder == QSysInfo::BigEndian) ? BigEndianness : LittleEndianness;
+    }
+
+    QByteArray d;
+    d.resize(length);
+    char *data = d.data();
+    if (!state || !(state->flags & QStringConverter::IgnoreHeader)) {
+        QChar bom(QChar::ByteOrderMark);
+        if (endian == BigEndianness)
+            qToBigEndian(bom.unicode(), data);
+        else
+            qToLittleEndian(bom.unicode(), data);
+        data += 2;
+    }
+    if (endian == BigEndianness)
+        qToBigEndian<ushort>(uc, len, data);
+    else
+        qToLittleEndian<ushort>(uc, len, data);
+
+    if (state) {
+        state->remainingChars = 0;
+        state->flags |= QStringConverter::IgnoreHeader;
+    }
+    return d;
+}
+
+QString QUtf16::convertToUnicode(const char *chars, qsizetype len, QStringConverter::State *state, DataEndianness e)
+{
+    DataEndianness endian = e;
+    bool half = false;
+    uchar buf = 0;
+    bool headerdone = false;
+    if (state) {
+        headerdone = state->flags & QStringConverter::IgnoreHeader;
+        if (endian == DetectEndianness)
+            endian = (DataEndianness)state->state_data[Endian];
+        if (state->remainingChars) {
+            half = true;
+            buf = state->state_data[Data];
+        }
+    }
+    if (headerdone && endian == DetectEndianness)
+        endian = (QSysInfo::ByteOrder == QSysInfo::BigEndian) ? BigEndianness : LittleEndianness;
+
+    QString result(len, Qt::Uninitialized); // worst case
+    QChar *qch = (QChar *)result.data();
+    while (len--) {
+        if (half) {
+            QChar ch;
+            if (endian == LittleEndianness) {
+                ch.setRow(*chars++);
+                ch.setCell(buf);
+            } else {
+                ch.setRow(buf);
+                ch.setCell(*chars++);
+            }
+            if (!headerdone) {
+                headerdone = true;
+                if (endian == DetectEndianness) {
+                    if (ch == QChar::ByteOrderSwapped) {
+                        endian = LittleEndianness;
+                    } else if (ch == QChar::ByteOrderMark) {
+                        endian = BigEndianness;
+                    } else {
+                        if (QSysInfo::ByteOrder == QSysInfo::BigEndian) {
+                            endian = BigEndianness;
+                        } else {
+                            endian = LittleEndianness;
+                            ch = QChar::fromUcs2((ch.unicode() >> 8) | ((ch.unicode() & 0xff) << 8));
+                        }
+                        *qch++ = ch;
+                    }
+                } else if (ch != QChar::ByteOrderMark) {
+                    *qch++ = ch;
+                }
+            } else {
+                *qch++ = ch;
+            }
+            half = false;
+        } else {
+            buf = *chars++;
+            half = true;
+        }
+    }
+    result.truncate(qch - result.unicode());
+
+    if (state) {
+        if (headerdone)
+            state->flags |= QStringConverter::IgnoreHeader;
+        state->state_data[Endian] = endian;
+        if (half) {
+            state->remainingChars = 1;
+            state->state_data[Data] = buf;
+        } else {
+            state->remainingChars = 0;
+            state->state_data[Data] = 0;
+        }
+    }
+    return result;
+}
+
+QByteArray QUtf32::convertFromUnicode(const QChar *uc, qsizetype len, QStringConverter::State *state, DataEndianness e)
+{
+    DataEndianness endian = e;
+    qsizetype length =  4*len;
+    if (!state || (!(state->flags & QStringConverter::IgnoreHeader))) {
+        length += 4;
+    }
+    if (e == DetectEndianness) {
+        endian = (QSysInfo::ByteOrder == QSysInfo::BigEndian) ? BigEndianness : LittleEndianness;
+    }
+
+    QByteArray d(length, Qt::Uninitialized);
+    char *data = d.data();
+    if (!state || !(state->flags & QStringConverter::IgnoreHeader)) {
+        if (endian == BigEndianness) {
+            data[0] = 0;
+            data[1] = 0;
+            data[2] = (char)0xfe;
+            data[3] = (char)0xff;
+        } else {
+            data[0] = (char)0xff;
+            data[1] = (char)0xfe;
+            data[2] = 0;
+            data[3] = 0;
+        }
+        data += 4;
+    }
+
+    QStringIterator i(uc, uc + len);
+    if (endian == BigEndianness) {
+        while (i.hasNext()) {
+            uint cp = i.next();
+            qToBigEndian(cp, data);
+            data += 4;
+        }
+    } else {
+        while (i.hasNext()) {
+            uint cp = i.next();
+            qToLittleEndian(cp, data);
+            data += 4;
+        }
+    }
+
+    if (state) {
+        state->remainingChars = 0;
+        state->flags |= QStringConverter::IgnoreHeader;
+    }
+    return d;
+}
+
+QString QUtf32::convertToUnicode(const char *chars, qsizetype len, QStringConverter::State *state, DataEndianness e)
+{
+    DataEndianness endian = e;
+    uchar tuple[4];
+    int num = 0;
+    bool headerdone = false;
+    if (state) {
+        headerdone = state->flags & QStringConverter::IgnoreHeader;
+        if (endian == DetectEndianness) {
+            endian = (DataEndianness)state->state_data[Endian];
+        }
+        num = state->remainingChars;
+        memcpy(tuple, &state->state_data[Data], 4);
+    }
+    if (headerdone && endian == DetectEndianness)
+        endian = (QSysInfo::ByteOrder == QSysInfo::BigEndian) ? BigEndianness : LittleEndianness;
+
+    QString result;
+    result.resize((num + len) >> 2 << 1); // worst case
+    QChar *qch = (QChar *)result.data();
+
+    const char *end = chars + len;
+    while (chars < end) {
+        tuple[num++] = *chars++;
+        if (num == 4) {
+            if (!headerdone) {
+                headerdone = true;
+                if (endian == DetectEndianness) {
+                    if (tuple[0] == 0xff && tuple[1] == 0xfe && tuple[2] == 0 && tuple[3] == 0 && endian != BigEndianness) {
+                        endian = LittleEndianness;
+                        num = 0;
+                        continue;
+                    } else if (tuple[0] == 0 && tuple[1] == 0 && tuple[2] == 0xfe && tuple[3] == 0xff && endian != LittleEndianness) {
+                        endian = BigEndianness;
+                        num = 0;
+                        continue;
+                    } else if (QSysInfo::ByteOrder == QSysInfo::BigEndian) {
+                        endian = BigEndianness;
+                    } else {
+                        endian = LittleEndianness;
+                    }
+                } else if (((endian == BigEndianness) ? qFromBigEndian<quint32>(tuple) : qFromLittleEndian<quint32>(tuple)) == QChar::ByteOrderMark) {
+                    num = 0;
+                    continue;
+                }
+            }
+            uint code = (endian == BigEndianness) ? qFromBigEndian<quint32>(tuple) : qFromLittleEndian<quint32>(tuple);
+            for (char16_t c : QChar::fromUcs4(code))
+                *qch++ = c;
+            num = 0;
+        }
+    }
+    result.truncate(qch - result.unicode());
+
+    if (state) {
+        if (headerdone)
+            state->flags |= QStringConverter::IgnoreHeader;
+        state->state_data[Endian] = endian;
+        state->remainingChars = num;
+        memcpy(&state->state_data[Data], tuple, 4);
+    }
+    return result;
+}
+
+QString qFromUtfEncoded(const QByteArray &ba)
+{
+    const qsizetype arraySize = ba.size();
+    const uchar *buf = reinterpret_cast<const uchar *>(ba.constData());
+    const uint bom = 0xfeff;
+
+    if (arraySize > 3) {
+        uint uc = qFromUnaligned<uint>(buf);
+        if (uc == qToBigEndian(bom) || uc == qToLittleEndian(bom))
+            return QUtf32::convertToUnicode(ba.constData(), ba.length(), nullptr); // utf-32
+    }
+
+    if (arraySize > 1) {
+        ushort uc = qFromUnaligned<ushort>(buf);
+        if (uc == qToBigEndian(ushort(bom)) || qToLittleEndian(ushort(bom)))
+            return QUtf16::convertToUnicode(ba.constData(), ba.length(), nullptr); // utf-16
+    }
+    return QUtf8::convertToUnicode(ba.constData(), ba.length());
+}
+
 /*!
     \enum QStringConverter::Flag
 
@@ -60,7 +1004,8 @@ void QStringConverter::State::clear()
 {
     if (clearFn)
         clearFn(this);
-    state_data[0] = state_data[1] = state_data[2] = state_data[3] = 0;
+    else
+        state_data[0] = state_data[1] = state_data[2] = state_data[3] = 0;
     remainingChars = 0;
     invalidChars = 0;
 }
diff --git a/src/corelib/text/qstringconverter_p.h b/src/corelib/text/qstringconverter_p.h
new file mode 100644
index 0000000000..5764979542
--- /dev/null
+++ b/src/corelib/text/qstringconverter_p.h
@@ -0,0 +1,323 @@
+/****************************************************************************
+**
+** Copyright (C) 2018 The Qt Company Ltd.
+** Copyright (C) 2018 Intel Corporation.
+** Contact: https://www.qt.io/licensing/
+**
+** This file is part of the QtCore module of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** Commercial License Usage
+** Licensees holding valid commercial Qt licenses may use this file in
+** accordance with the commercial license agreement provided with the
+** Software or, alternatively, in accordance with the terms contained in
+** a written agreement between you and The Qt Company. For licensing terms
+** and conditions see https://www.qt.io/terms-conditions. For further
+** information use the contact form at https://www.qt.io/contact-us.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 3 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL3 included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU Lesser General Public License version 3 requirements
+** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
+**
+** GNU General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU
+** General Public License version 2.0 or (at your option) the GNU General
+** Public license version 3 or any later version approved by the KDE Free
+** Qt Foundation. The licenses are as published by the Free Software
+** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
+** included in the packaging of this file. Please review the following
+** information to ensure the GNU General Public License requirements will
+** be met: https://www.gnu.org/licenses/gpl-2.0.html and
+** https://www.gnu.org/licenses/gpl-3.0.html.
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+#ifndef QSTRINGCONVERTER_P_H
+#define QSTRINGCONVERTER_P_H
+
+//
+//  W A R N I N G
+//  -------------
+//
+// This file is not part of the Qt API.  It exists purely as an
+// implementation detail.  This header file may change from version to
+// version without notice, or even be removed.
+//
+// We mean it.
+//
+
+#include <QtCore/qstring.h>
+#include <QtCore/qendian.h>
+#include <QtCore/qstringconverter.h>
+
+QT_BEGIN_NAMESPACE
+
+struct QUtf8BaseTraits
+{
+    static const bool isTrusted = false;
+    static const bool allowNonCharacters = true;
+    static const bool skipAsciiHandling = false;
+    static const int Error = -1;
+    static const int EndOfString = -2;
+
+    static bool isValidCharacter(uint u)
+    { return int(u) >= 0; }
+
+    static void appendByte(uchar *&ptr, uchar b)
+    { *ptr++ = b; }
+
+    static uchar peekByte(const uchar *ptr, int n = 0)
+    { return ptr[n]; }
+
+    static qptrdiff availableBytes(const uchar *ptr, const uchar *end)
+    { return end - ptr; }
+
+    static void advanceByte(const uchar *&ptr, int n = 1)
+    { ptr += n; }
+
+    static void appendUtf16(ushort *&ptr, ushort uc)
+    { *ptr++ = uc; }
+
+    static void appendUcs4(ushort *&ptr, uint uc)
+    {
+        appendUtf16(ptr, QChar::highSurrogate(uc));
+        appendUtf16(ptr, QChar::lowSurrogate(uc));
+    }
+
+    static ushort peekUtf16(const ushort *ptr, int n = 0)
+    { return ptr[n]; }
+
+    static qptrdiff availableUtf16(const ushort *ptr, const ushort *end)
+    { return end - ptr; }
+
+    static void advanceUtf16(const ushort *&ptr, int n = 1)
+    { ptr += n; }
+
+    // it's possible to output to UCS-4 too
+    static void appendUtf16(uint *&ptr, ushort uc)
+    { *ptr++ = uc; }
+
+    static void appendUcs4(uint *&ptr, uint uc)
+    { *ptr++ = uc; }
+};
+
+struct QUtf8BaseTraitsNoAscii : public QUtf8BaseTraits
+{
+    static const bool skipAsciiHandling = true;
+};
+
+namespace QUtf8Functions
+{
+    /// returns 0 on success; errors can only happen if \a u is a surrogate:
+    /// Error if \a u is a low surrogate;
+    /// if \a u is a high surrogate, Error if the next isn't a low one,
+    /// EndOfString if we run into the end of the string.
+    template <typename Traits, typename OutputPtr, typename InputPtr> inline
+    int toUtf8(ushort u, OutputPtr &dst, InputPtr &src, InputPtr end)
+    {
+        if (!Traits::skipAsciiHandling && u < 0x80) {
+            // U+0000 to U+007F (US-ASCII) - one byte
+            Traits::appendByte(dst, uchar(u));
+            return 0;
+        } else if (u < 0x0800) {
+            // U+0080 to U+07FF - two bytes
+            // first of two bytes
+            Traits::appendByte(dst, 0xc0 | uchar(u >> 6));
+        } else {
+            if (!QChar::isSurrogate(u)) {
+                // U+0800 to U+FFFF (except U+D800-U+DFFF) - three bytes
+                if (!Traits::allowNonCharacters && QChar::isNonCharacter(u))
+                    return Traits::Error;
+
+                // first of three bytes
+                Traits::appendByte(dst, 0xe0 | uchar(u >> 12));
+            } else {
+                // U+10000 to U+10FFFF - four bytes
+                // need to get one extra codepoint
+                if (Traits::availableUtf16(src, end) == 0)
+                    return Traits::EndOfString;
+
+                ushort low = Traits::peekUtf16(src);
+                if (!QChar::isHighSurrogate(u))
+                    return Traits::Error;
+                if (!QChar::isLowSurrogate(low))
+                    return Traits::Error;
+
+                Traits::advanceUtf16(src);
+                uint ucs4 = QChar::surrogateToUcs4(u, low);
+
+                if (!Traits::allowNonCharacters && QChar::isNonCharacter(ucs4))
+                    return Traits::Error;
+
+                // first byte
+                Traits::appendByte(dst, 0xf0 | (uchar(ucs4 >> 18) & 0xf));
+
+                // second of four bytes
+                Traits::appendByte(dst, 0x80 | (uchar(ucs4 >> 12) & 0x3f));
+
+                // for the rest of the bytes
+                u = ushort(ucs4);
+            }
+
+            // second to last byte
+            Traits::appendByte(dst, 0x80 | (uchar(u >> 6) & 0x3f));
+        }
+
+        // last byte
+        Traits::appendByte(dst, 0x80 | (u & 0x3f));
+        return 0;
+    }
+
+    inline bool isContinuationByte(uchar b)
+    {
+        return (b & 0xc0) == 0x80;
+    }
+
+    /// returns the number of characters consumed (including \a b) in case of success;
+    /// returns negative in case of error: Traits::Error or Traits::EndOfString
+    template <typename Traits, typename OutputPtr, typename InputPtr> inline
+    int fromUtf8(uchar b, OutputPtr &dst, InputPtr &src, InputPtr end)
+    {
+        int charsNeeded;
+        uint min_uc;
+        uint uc;
+
+        if (!Traits::skipAsciiHandling && b < 0x80) {
+            // US-ASCII
+            Traits::appendUtf16(dst, b);
+            return 1;
+        }
+
+        if (!Traits::isTrusted && Q_UNLIKELY(b <= 0xC1)) {
+            // an UTF-8 first character must be at least 0xC0
+            // however, all 0xC0 and 0xC1 first bytes can only produce overlong sequences
+            return Traits::Error;
+        } else if (b < 0xe0) {
+            charsNeeded = 2;
+            min_uc = 0x80;
+            uc = b & 0x1f;
+        } else if (b < 0xf0) {
+            charsNeeded = 3;
+            min_uc = 0x800;
+            uc = b & 0x0f;
+        } else if (b < 0xf5) {
+            charsNeeded = 4;
+            min_uc = 0x10000;
+            uc = b & 0x07;
+        } else {
+            // the last Unicode character is U+10FFFF
+            // it's encoded in UTF-8 as "\xF4\x8F\xBF\xBF"
+            // therefore, a byte higher than 0xF4 is not the UTF-8 first byte
+            return Traits::Error;
+        }
+
+        int bytesAvailable = Traits::availableBytes(src, end);
+        if (Q_UNLIKELY(bytesAvailable < charsNeeded - 1)) {
+            // it's possible that we have an error instead of just unfinished bytes
+            if (bytesAvailable > 0 && !isContinuationByte(Traits::peekByte(src, 0)))
+                return Traits::Error;
+            if (bytesAvailable > 1 && !isContinuationByte(Traits::peekByte(src, 1)))
+                return Traits::Error;
+            return Traits::EndOfString;
+        }
+
+        // first continuation character
+        b = Traits::peekByte(src, 0);
+        if (!isContinuationByte(b))
+            return Traits::Error;
+        uc <<= 6;
+        uc |= b & 0x3f;
+
+        if (charsNeeded > 2) {
+            // second continuation character
+            b = Traits::peekByte(src, 1);
+            if (!isContinuationByte(b))
+                return Traits::Error;
+            uc <<= 6;
+            uc |= b & 0x3f;
+
+            if (charsNeeded > 3) {
+                // third continuation character
+                b = Traits::peekByte(src, 2);
+                if (!isContinuationByte(b))
+                    return Traits::Error;
+                uc <<= 6;
+                uc |= b & 0x3f;
+            }
+        }
+
+        // we've decoded something; safety-check it
+        if (!Traits::isTrusted) {
+            if (uc < min_uc)
+                return Traits::Error;
+            if (QChar::isSurrogate(uc) || uc > QChar::LastValidCodePoint)
+                return Traits::Error;
+            if (!Traits::allowNonCharacters && QChar::isNonCharacter(uc))
+                return Traits::Error;
+        }
+
+        // write the UTF-16 sequence
+        if (!QChar::requiresSurrogates(uc)) {
+            // UTF-8 decoded and no surrogates are required
+            // detach if necessary
+            Traits::appendUtf16(dst, ushort(uc));
+        } else {
+            // UTF-8 decoded to something that requires a surrogate pair
+            Traits::appendUcs4(dst, uc);
+        }
+
+        Traits::advanceByte(src, charsNeeded - 1);
+        return charsNeeded;
+    }
+}
+
+enum DataEndianness
+{
+    DetectEndianness,
+    BigEndianness,
+    LittleEndianness
+};
+
+struct QUtf8
+{
+    static QChar *convertToUnicode(QChar *, const char *, qsizetype) noexcept;
+    static QString convertToUnicode(const char *, qsizetype);
+    static QString convertToUnicode(const char *, qsizetype, QStringConverter::State *);
+    static QByteArray convertFromUnicode(const QChar *, qsizetype);
+    static QByteArray convertFromUnicode(const QChar *, qsizetype, QStringConverter::State *);
+    struct ValidUtf8Result {
+        bool isValidUtf8;
+        bool isValidAscii;
+    };
+    static ValidUtf8Result isValidUtf8(const char *, qsizetype);
+    static int compareUtf8(const char *, qsizetype, const QChar *, qsizetype);
+    static int compareUtf8(const char *, qsizetype, QLatin1String s);
+};
+
+struct QUtf16
+{
+    static QString convertToUnicode(const char *, qsizetype, QStringConverter::State *, DataEndianness = DetectEndianness);
+    static QByteArray convertFromUnicode(const QChar *, qsizetype, QStringConverter::State *, DataEndianness = DetectEndianness);
+};
+
+struct QUtf32
+{
+    static QString convertToUnicode(const char *, qsizetype, QStringConverter::State *, DataEndianness = DetectEndianness);
+    static QByteArray convertFromUnicode(const QChar *, qsizetype, QStringConverter::State *, DataEndianness = DetectEndianness);
+};
+
+/*
+ Converts from different utf encodings looking at a possible byte order mark at the
+ beginning of the string. If no BOM exists, utf-8 is assumed.
+ */
+Q_CORE_EXPORT QString qFromUtfEncoded(const QByteArray &ba);
+
+QT_END_NAMESPACE
+
+#endif // QSTRINGCONVERTER_P_H
diff --git a/src/corelib/text/text.pri b/src/corelib/text/text.pri
index 4c584cf958..1275c014a8 100644
--- a/src/corelib/text/text.pri
+++ b/src/corelib/text/text.pri
@@ -20,6 +20,7 @@ HEADERS +=  \
         text/qstringalgorithms_p.h \
         text/qstringbuilder.h \
         text/qstringconverter.h \
+        text/qstringconverter_p.h \
         text/qstringiterator_p.h \
         text/qstringlist.h \
         text/qstringliteral.h \
diff --git a/src/gui/kernel/qclipboard.cpp b/src/gui/kernel/qclipboard.cpp
index 72f27d3e49..3b42e78624 100644
--- a/src/gui/kernel/qclipboard.cpp
+++ b/src/gui/kernel/qclipboard.cpp
@@ -46,9 +46,7 @@
 #include "qvariant.h"
 #include "qbuffer.h"
 #include "qimage.h"
-#if QT_CONFIG(textcodec)
-#include "private/qutfcodec_p.h"
-#endif
+#include "private/qstringconverter_p.h"
 
 #include "private/qguiapplication_p.h"
 #include <qpa/qplatformintegration.h>
diff --git a/src/tools/bootstrap/.prev_CMakeLists.txt b/src/tools/bootstrap/.prev_CMakeLists.txt
index 8f430c494e..f81e03adff 100644
--- a/src/tools/bootstrap/.prev_CMakeLists.txt
+++ b/src/tools/bootstrap/.prev_CMakeLists.txt
@@ -41,9 +41,6 @@ qt_add_module(Bootstrap
         ../../3rdparty/pcre2/src/pcre2_ucp.h
         ../../3rdparty/pcre2/src/pcre2_valid_utf.c
         ../../3rdparty/pcre2/src/pcre2_xclass.c
-        ../../corelib/codecs/qlatincodec.cpp
-        ../../corelib/codecs/qtextcodec.cpp
-        ../../corelib/codecs/qutfcodec.cpp
         ../../corelib/global/qendian.cpp
         ../../corelib/global/qglobal.cpp
         ../../corelib/global/qlogging.cpp
@@ -109,7 +106,6 @@ qt_add_module(Bootstrap
         ../../corelib/text/qstringbuilder.cpp
         ../../corelib/text/qstringconverter.cpp
         ../../corelib/text/qstringlist.cpp
-        ../../corelib/text/qstringview.cpp
         ../../corelib/text/qvsnprintf.cpp
         ../../corelib/time/qcalendar.cpp
         ../../corelib/time/qdatetime.cpp
diff --git a/src/tools/bootstrap/CMakeLists.txt b/src/tools/bootstrap/CMakeLists.txt
index 5a17888003..a5184fbb80 100644
--- a/src/tools/bootstrap/CMakeLists.txt
+++ b/src/tools/bootstrap/CMakeLists.txt
@@ -42,9 +42,6 @@ qt_extend_target(Bootstrap
         ../../3rdparty/pcre2/src/pcre2_ucp.h
         ../../3rdparty/pcre2/src/pcre2_valid_utf.c
         ../../3rdparty/pcre2/src/pcre2_xclass.c
-        ../../corelib/codecs/qlatincodec.cpp
-        ../../corelib/codecs/qtextcodec.cpp
-        ../../corelib/codecs/qutfcodec.cpp
         ../../corelib/global/qendian.cpp
         ../../corelib/global/qglobal.cpp
         ../../corelib/global/qlogging.cpp
@@ -110,7 +107,6 @@ qt_extend_target(Bootstrap
         ../../corelib/text/qstringbuilder.cpp
         ../../corelib/text/qstringconverter.cpp
         ../../corelib/text/qstringlist.cpp
-        ../../corelib/text/qstringview.cpp
         ../../corelib/text/qvsnprintf.cpp
         ../../corelib/time/qcalendar.cpp
         ../../corelib/time/qdatetime.cpp
diff --git a/src/tools/bootstrap/bootstrap.pro b/src/tools/bootstrap/bootstrap.pro
index 169c5fe1c2..5b7da8e687 100644
--- a/src/tools/bootstrap/bootstrap.pro
+++ b/src/tools/bootstrap/bootstrap.pro
@@ -28,9 +28,6 @@ INCLUDEPATH += \
     $$PWD/../../3rdparty/pcre2/src
 
 SOURCES += \
-           ../../corelib/codecs/qlatincodec.cpp \
-           ../../corelib/codecs/qtextcodec.cpp \
-           ../../corelib/codecs/qutfcodec.cpp \
            ../../corelib/global/qendian.cpp \
            ../../corelib/global/qglobal.cpp \
            ../../corelib/global/qlogging.cpp \
@@ -96,7 +93,6 @@ SOURCES += \
            ../../corelib/text/qstringconverter.cpp \
            ../../corelib/text/qstring_compat.cpp \
            ../../corelib/text/qstringlist.cpp \
-           ../../corelib/text/qstringview.cpp \
            ../../corelib/text/qvsnprintf.cpp \
            ../../corelib/time/qcalendar.cpp \
            ../../corelib/time/qdatetime.cpp \