corelib: Use char16_t and char32_t types for characters

Use standard char16_t and char32_t types instead of ushort and uint. Remove members of QUtf8BaseTraits that use those integer types. Change-Id: I77b1a9106244835c813336a50417f6bbdfada288 Reviewed-by: Edward Welbourne <edward.welbourne@qt.io>
author: Ievgenii Meshcheriakov <ievgenii.meshcheriakov@qt.io> 2021-09-01 12:46:57 +0200
committer: Ievgenii Meshcheriakov <ievgenii.meshcheriakov@qt.io> 2021-09-06 17:41:53 +0200
commit: 65a2cb6964f3d1178718203f05e9143bd400c80e (patch)
tree: f6d99eaeca8c6787fa116eabcbf71c6515f336fa /src/corelib/text
parent: ee2ac8b4ef3b8691ef8b9d17a6d4c95b95db4349 (diff)
3 files changed, 52 insertions, 82 deletions
diff --git a/src/corelib/text/qstring.cpp b/src/corelib/text/qstring.cpp
index 23da7e8b18..ec813ee750 100644
--- a/src/corelib/text/qstring.cpp
+++ b/src/corelib/text/qstring.cpp
@@ -885,8 +885,8 @@ static int ucstricmp8(const char *utf8, const char *utf8end, const QChar *utf16,
     QStringIterator src2(utf16, utf16end);
 
     while (src1 < end1 && src2.hasNext()) {
-        uint uc1 = 0;
-        uint *output = &uc1;
+        char32_t uc1 = 0;
+        char32_t *output = &uc1;
         uchar b = *src1++;
         int res = QUtf8Functions::fromUtf8<QUtf8BaseTraits>(b, output, src1, end1);
         if (res < 0) {
@@ -896,7 +896,7 @@ static int ucstricmp8(const char *utf8, const char *utf8end, const QChar *utf16,
             uc1 = QChar::toCaseFolded(uc1);
         }
 
-        uint uc2 = QChar::toCaseFolded(src2.next());
+        char32_t uc2 = QChar::toCaseFolded(src2.next());
         int diff = uc1 - uc2;   // can't underflow
         if (diff)
             return diff;
diff --git a/src/corelib/text/qstringconverter.cpp b/src/corelib/text/qstringconverter.cpp
index df9efe7f67..09ac6512be 100644
--- a/src/corelib/text/qstringconverter.cpp
+++ b/src/corelib/text/qstringconverter.cpp
@@ -78,7 +78,7 @@ static Q_ALWAYS_INLINE uint qBitScanReverse(unsigned v) noexcept
 #endif
 
 #if defined(__SSE2__) && defined(QT_COMPILER_SUPPORTS_SSE2)
-static inline bool simdEncodeAscii(uchar *&dst, const ushort *&nextAscii, const ushort *&src, const ushort *end)
+static inline bool simdEncodeAscii(uchar *&dst, const char16_t *&nextAscii, const char16_t *&src, const char16_t *end)
 {
     // do sixteen characters at a time
     for ( ; end - src >= 16; src += 16, dst += 16) {
@@ -142,7 +142,7 @@ static inline bool simdEncodeAscii(uchar *&dst, const ushort *&nextAscii, const
     return src == end;
 }
 
-static inline bool simdDecodeAscii(ushort *&dst, const uchar *&nextAscii, const uchar *&src, const uchar *end)
+static inline bool simdDecodeAscii(char16_t *&dst, const uchar *&nextAscii, const uchar *&src, const uchar *end)
 {
     // do sixteen characters at a time
     for ( ; end - src >= 16; src += 16, dst += 16) {
@@ -361,7 +361,7 @@ static void simdCompareAscii(const char8_t *&src8, const char8_t *end8, const ch
     src16 += offset;
 }
 #elif defined(__ARM_NEON__)
-static inline bool simdEncodeAscii(uchar *&dst, const ushort *&nextAscii, const ushort *&src, const ushort *end)
+static inline bool simdEncodeAscii(uchar *&dst, const char16_t *&nextAscii, const char16_t *&src, const char16_t *end)
 {
     uint16x8_t maxAscii = vdupq_n_u16(0x7f);
     uint16x8_t mask1 = { 1,      1 << 2, 1 << 4, 1 << 6, 1 << 8, 1 << 10, 1 << 12, 1 << 14 };
@@ -370,7 +370,7 @@ static inline bool simdEncodeAscii(uchar *&dst, const ushort *&nextAscii, const
     // do sixteen characters at a time
     for ( ; end - src >= 16; src += 16, dst += 16) {
         // load 2 lanes (or: "load interleaved")
-        uint16x8x2_t in = vld2q_u16(src);
+        uint16x8x2_t in = vld2q_u16(reinterpret_cast<const uint16_t *>(src));
 
         // check if any of the elements > 0x7f, select 1 bit per element (element 0 -> bit 0, element 1 -> bit 1, etc),
         // add those together into a scalar, and merge the scalars.
@@ -398,7 +398,7 @@ static inline bool simdEncodeAscii(uchar *&dst, const ushort *&nextAscii, const
     return src == end;
 }
 
-static inline bool simdDecodeAscii(ushort *&dst, const uchar *&nextAscii, const uchar *&src, const uchar *end)
+static inline bool simdDecodeAscii(char16_t *&dst, const uchar *&nextAscii, const uchar *&src, const uchar *end)
 {
     // do eight characters at a time
     uint8x8_t msb_mask = vdup_n_u8(0x80);
@@ -408,7 +408,7 @@ static inline bool simdDecodeAscii(ushort *&dst, const uchar *&nextAscii, const
         uint8_t n = vaddv_u8(vand_u8(vcge_u8(c, msb_mask), add_mask));
         if (!n) {
             // store
-            vst1q_u16(dst, vmovl_u8(c));
+            vst1q_u16(reinterpret_cast<uint16_t *>(dst), vmovl_u8(c));
             continue;
         }
 
@@ -461,12 +461,12 @@ static void simdCompareAscii(const char8_t *&, const char8_t *, const char16_t *
 {
 }
 #else
-static inline bool simdEncodeAscii(uchar *, const ushort *, const ushort *, const ushort *)
+static inline bool simdEncodeAscii(uchar *, const char16_t *, const char16_t *, const char16_t *)
 {
     return false;
 }
 
-static inline bool simdDecodeAscii(ushort *, const uchar *, const uchar *, const uchar *)
+static inline bool simdDecodeAscii(char16_t *, const uchar *, const uchar *, const uchar *)
 {
     return false;
 }
@@ -491,16 +491,16 @@ QByteArray QUtf8::convertFromUnicode(QStringView in)
     // create a QByteArray with the worst case scenario size
     QByteArray result(len * 3, Qt::Uninitialized);
     uchar *dst = reinterpret_cast<uchar *>(const_cast<char *>(result.constData()));
-    const ushort *src = reinterpret_cast<const ushort *>(in.data());
-    const ushort *const end = src + len;
+    const char16_t *src = reinterpret_cast<const char16_t *>(in.data());
+    const char16_t *const end = src + len;
 
     while (src != end) {
-        const ushort *nextAscii = end;
+        const char16_t *nextAscii = end;
         if (simdEncodeAscii(dst, nextAscii, src, end))
             break;
 
         do {
-            ushort u = *src++;
+            char16_t u = *src++;
             int res = QUtf8Functions::toUtf8<QUtf8BaseTraits>(u, dst, src, end);
             if (res < 0) {
                 // encoding error - append '?'
@@ -542,8 +542,8 @@ char *QUtf8::convertFromUnicode(char *out, QStringView in, QStringConverter::Sta
     };
 
     uchar *cursor = reinterpret_cast<uchar *>(out);
-    const ushort *src = reinterpret_cast<const ushort *>(uc);
-    const ushort *const end = src + len;
+    const char16_t *src = reinterpret_cast<const char16_t *>(uc);
+    const char16_t *const end = src + len;
 
     if (!(state->flags & QStringDecoder::Flag::Stateless)) {
         if (state->remainingChars) {
@@ -562,12 +562,12 @@ char *QUtf8::convertFromUnicode(char *out, QStringView in, QStringConverter::Sta
     }
 
     while (src != end) {
-        const ushort *nextAscii = end;
+        const char16_t *nextAscii = end;
         if (simdEncodeAscii(cursor, nextAscii, src, end))
             break;
 
         do {
-            ushort uc = *src++;
+            char16_t uc = *src++;
             int res = QUtf8Functions::toUtf8<QUtf8BaseTraits>(uc, cursor, src, end);
             if (Q_LIKELY(res >= 0))
                 continue;
@@ -632,7 +632,7 @@ QString QUtf8::convertToUnicode(QByteArrayView in)
 
 QChar *QUtf8::convertToUnicode(QChar *buffer, QByteArrayView in) noexcept
 {
-    ushort *dst = reinterpret_cast<ushort *>(buffer);
+    char16_t *dst = reinterpret_cast<char16_t *>(buffer);
     const uchar *const start = reinterpret_cast<const uchar *>(in.data());
     const uchar *src = start;
     const uchar *end = src + in.size();
@@ -694,14 +694,14 @@ QChar *QUtf8::convertToUnicode(QChar *out, QByteArrayView in, QStringConverter::
         return out;
 
 
-    ushort replacement = QChar::ReplacementCharacter;
+    char16_t replacement = QChar::ReplacementCharacter;
     if (state->flags & QStringConverter::Flag::ConvertInvalidToNull)
         replacement = QChar::Null;
 
     int res;
     uchar ch = 0;
 
-    ushort *dst = reinterpret_cast<ushort *>(out);
+    char16_t *dst = reinterpret_cast<char16_t *>(out);
     const uchar *src = reinterpret_cast<const uchar *>(in.data());
     const uchar *end = src + len;
 
@@ -791,8 +791,8 @@ QChar *QUtf8::convertToUnicode(QChar *out, QByteArrayView in, QStringConverter::
 struct QUtf8NoOutputTraits : public QUtf8BaseTraitsNoAscii
 {
     struct NoOutput {};
-    static void appendUtf16(const NoOutput &, ushort) {}
-    static void appendUcs4(const NoOutput &, uint) {}
+    static void appendUtf16(const NoOutput &, char16_t) {}
+    static void appendUcs4(const NoOutput &, char32_t) {}
 };
 
 QUtf8::ValidUtf8Result QUtf8::isValidUtf8(QByteArrayView in)
@@ -865,7 +865,7 @@ int QUtf8::compareUtf8(QByteArrayView utf8, QStringView utf16) noexcept
 
 int QUtf8::compareUtf8(QByteArrayView utf8, QLatin1String s)
 {
-    uint uc1 = QChar::Null;
+    char32_t uc1 = QChar::Null;
     auto src1 = reinterpret_cast<const uchar *>(utf8.data());
     auto end1 = src1 + utf8.size();
     auto src2 = reinterpret_cast<const uchar *>(s.latin1());
@@ -873,14 +873,14 @@ int QUtf8::compareUtf8(QByteArrayView utf8, QLatin1String s)
 
     while (src1 < end1 && src2 < end2) {
         uchar b = *src1++;
-        uint *output = &uc1;
+        char32_t *output = &uc1;
         int res = QUtf8Functions::fromUtf8<QUtf8BaseTraits>(b, output, src1, end1);
         if (res < 0) {
             // decoding error
             uc1 = QChar::ReplacementCharacter;
         }
 
-        uint uc2 = *src2++;
+        char32_t uc2 = *src2++;
         if (uc1 != uc2)
             return int(uc1) - int(uc2);
     }
@@ -921,9 +921,9 @@ char *QUtf16::convertFromUnicode(char *out, QStringView in, QStringConverter::St
         out += 2;
     }
     if (endian == BigEndianness)
-        qToBigEndian<ushort>(in.data(), in.length(), out);
+        qToBigEndian<char16_t>(in.data(), in.length(), out);
     else
-        qToLittleEndian<ushort>(in.data(), in.length(), out);
+        qToLittleEndian<char16_t>(in.data(), in.length(), out);
 
     state->remainingChars = 0;
     state->internalState |= HeaderDone;
@@ -998,9 +998,9 @@ QChar *QUtf16::convertToUnicode(QChar *out, QByteArrayView in, QStringConverter:
 
     int nPairs = (end - chars) >> 1;
     if (endian == BigEndianness)
-        qFromBigEndian<ushort>(chars, nPairs, out);
+        qFromBigEndian<char16_t>(chars, nPairs, out);
     else
-        qFromLittleEndian<ushort>(chars, nPairs, out);
+        qFromLittleEndian<char16_t>(chars, nPairs, out);
     out += nPairs;
 
     state->state_data[Endian] = endian;
@@ -1064,7 +1064,7 @@ char *QUtf32::convertFromUnicode(char *out, QStringView in, QStringConverter::St
     const QChar *uc = in.data();
     const QChar *end = in.data() + in.length();
     QChar ch;
-    uint ucs4;
+    char32_t ucs4;
     if (state->remainingChars == 1) {
         auto character = state->state_data[Data];
         Q_ASSERT(character <= 0xFFFF);
@@ -1165,7 +1165,7 @@ QChar *QUtf32::convertToUnicode(QChar *out, QByteArrayView in, QStringConverter:
                 endian = LittleEndianness;
             }
         }
-        uint code = (endian == BigEndianness) ? qFromBigEndian<quint32>(tuple) : qFromLittleEndian<quint32>(tuple);
+        char32_t code = (endian == BigEndianness) ? qFromBigEndian<char32_t>(tuple) : qFromLittleEndian<char32_t>(tuple);
         if (headerdone || code != QChar::ByteOrderMark) {
             if (QChar::requiresSurrogates(code)) {
                 *out++ = QChar(QChar::highSurrogate(code));
@@ -1184,7 +1184,7 @@ QChar *QUtf32::convertToUnicode(QChar *out, QByteArrayView in, QStringConverter:
     while (chars < end) {
         tuple[num++] = *chars++;
         if (num == 4) {
-            uint code = (endian == BigEndianness) ? qFromBigEndian<quint32>(tuple) : qFromLittleEndian<quint32>(tuple);
+            char32_t code = (endian == BigEndianness) ? qFromBigEndian<char32_t>(tuple) : qFromLittleEndian<char32_t>(tuple);
             for (char16_t c : QChar::fromUcs4(code))
                 *out++ = c;
             num = 0;
@@ -1764,10 +1764,10 @@ std::optional<QStringConverter::Encoding> QStringConverter::encodingForData(QByt
     // someone set us up the BOM?
     qsizetype arraySize = data.size();
     if (arraySize > 3) {
-        uint uc = qFromUnaligned<uint>(data.data());
-        if (uc == qToBigEndian(uint(QChar::ByteOrderMark)))
+        char32_t uc = qFromUnaligned<char32_t>(data.data());
+        if (uc == qToBigEndian(char32_t(QChar::ByteOrderMark)))
             return QStringConverter::Utf32BE;
-        if (uc == qToLittleEndian(uint(QChar::ByteOrderMark)))
+        if (uc == qToLittleEndian(char32_t(QChar::ByteOrderMark)))
             return QStringConverter::Utf32LE;
         if (expectedFirstCharacter) {
             // catch also anything starting with the expected character
@@ -1784,10 +1784,10 @@ std::optional<QStringConverter::Encoding> QStringConverter::encodingForData(QByt
     }
 
     if (arraySize > 1) {
-        ushort uc = qFromUnaligned<ushort>(data.data());
-        if (uc == qToBigEndian(ushort(QChar::ByteOrderMark)))
+        char16_t uc = qFromUnaligned<char16_t>(data.data());
+        if (uc == qToBigEndian(char16_t(QChar::ByteOrderMark)))
             return QStringConverter::Utf16BE;
-        if (uc == qToLittleEndian(ushort(QChar::ByteOrderMark)))
+        if (uc == qToLittleEndian(char16_t(QChar::ByteOrderMark)))
             return QStringConverter::Utf16LE;
         if (expectedFirstCharacter) {
             // catch also anything starting with the expected character
diff --git a/src/corelib/text/qstringconverter_p.h b/src/corelib/text/qstringconverter_p.h
index 242f3f0303..2ad59af23c 100644
--- a/src/corelib/text/qstringconverter_p.h
+++ b/src/corelib/text/qstringconverter_p.h
@@ -70,9 +70,6 @@ struct QUtf8BaseTraits
     static const int Error = -1;
     static const int EndOfString = -2;
 
-    static bool isValidCharacter(uint u)
-    { return int(u) >= 0; }
-
     static void appendByte(uchar *&ptr, uchar b)
     { *ptr++ = b; }
 
@@ -97,54 +94,27 @@ struct QUtf8BaseTraits
     static void advanceByte(const char8_t *&ptr, int n = 1)
     { ptr += n; }
 
-    static void appendUtf16(ushort *&ptr, ushort uc)
-    { *ptr++ = uc; }
-
-    static void appendUtf16(char16_t *&ptr, ushort uc)
+    static void appendUtf16(char16_t *&ptr, char16_t uc)
     { *ptr++ = char16_t(uc); }
 
-    static void appendUcs4(ushort *&ptr, uint uc)
-    {
-        appendUtf16(ptr, QChar::highSurrogate(uc));
-        appendUtf16(ptr, QChar::lowSurrogate(uc));
-    }
-
     static void appendUcs4(char16_t *&ptr, char32_t uc)
     {
         appendUtf16(ptr, QChar::highSurrogate(uc));
         appendUtf16(ptr, QChar::lowSurrogate(uc));
     }
 
-    static ushort peekUtf16(const ushort *ptr, qsizetype n = 0)
-    { return ptr[n]; }
-
-    static ushort peekUtf16(const char16_t *ptr, int n = 0)
-    { return ptr[n]; }
-
-    static qptrdiff availableUtf16(const ushort *ptr, const ushort *end)
-    { return end - ptr; }
+    static char16_t peekUtf16(const char16_t *ptr, qsizetype n = 0) { return ptr[n]; }
 
     static qptrdiff availableUtf16(const char16_t *ptr, const char16_t *end)
     { return end - ptr; }
 
-    static void advanceUtf16(const ushort *&ptr, qsizetype n = 1)
-    { ptr += n; }
-
-    static void advanceUtf16(const char16_t *&ptr, int n = 1)
-    { ptr += n; }
-
-    // it's possible to output to UCS-4 too
-    static void appendUtf16(uint *&ptr, ushort uc)
-    { *ptr++ = uc; }
+    static void advanceUtf16(const char16_t *&ptr, qsizetype n = 1) { ptr += n; }
 
-    static void appendUtf16(char32_t *&ptr, ushort uc)
+    static void appendUtf16(char32_t *&ptr, char16_t uc)
     { *ptr++ = char32_t(uc); }
 
-    static void appendUcs4(uint *&ptr, uint uc)
+    static void appendUcs4(char32_t *&ptr, char32_t uc)
     { *ptr++ = uc; }
-
-    static void appendUcs4(char32_t *&ptr, uint uc)
-    { *ptr++ = char32_t(uc); }
 };
 
 struct QUtf8BaseTraitsNoAscii : public QUtf8BaseTraits
@@ -159,7 +129,7 @@ namespace QUtf8Functions
     /// if \a u is a high surrogate, Error if the next isn't a low one,
     /// EndOfString if we run into the end of the string.
     template <typename Traits, typename OutputPtr, typename InputPtr> inline
-    int toUtf8(ushort u, OutputPtr &dst, InputPtr &src, InputPtr end)
+    int toUtf8(char16_t u, OutputPtr &dst, InputPtr &src, InputPtr end)
     {
         if (!Traits::skipAsciiHandling && u < 0x80) {
             // U+0000 to U+007F (US-ASCII) - one byte
@@ -183,14 +153,14 @@ namespace QUtf8Functions
                 if (Traits::availableUtf16(src, end) == 0)
                     return Traits::EndOfString;
 
-                ushort low = Traits::peekUtf16(src);
+                char16_t low = Traits::peekUtf16(src);
                 if (!QChar::isHighSurrogate(u))
                     return Traits::Error;
                 if (!QChar::isLowSurrogate(low))
                     return Traits::Error;
 
                 Traits::advanceUtf16(src);
-                uint ucs4 = QChar::surrogateToUcs4(u, low);
+                char32_t ucs4 = QChar::surrogateToUcs4(u, low);
 
                 if (!Traits::allowNonCharacters && QChar::isNonCharacter(ucs4))
                     return Traits::Error;
@@ -202,7 +172,7 @@ namespace QUtf8Functions
                 Traits::appendByte(dst, 0x80 | (uchar(ucs4 >> 12) & 0x3f));
 
                 // for the rest of the bytes
-                u = ushort(ucs4);
+                u = char16_t(ucs4);
             }
 
             // second to last byte
@@ -225,8 +195,8 @@ namespace QUtf8Functions
     qsizetype fromUtf8(uchar b, OutputPtr &dst, InputPtr &src, InputPtr end)
     {
         qsizetype charsNeeded;
-        uint min_uc;
-        uint uc;
+        char32_t min_uc;
+        char32_t uc;
 
         if (!Traits::skipAsciiHandling && b < 0x80) {
             // US-ASCII
@@ -306,7 +276,7 @@ namespace QUtf8Functions
         if (!QChar::requiresSurrogates(uc)) {
             // UTF-8 decoded and no surrogates are required
             // detach if necessary
-            Traits::appendUtf16(dst, ushort(uc));
+            Traits::appendUtf16(dst, char16_t(uc));
         } else {
             // UTF-8 decoded to something that requires a surrogate pair
             Traits::appendUcs4(dst, uc);
author	Ievgenii Meshcheriakov <ievgenii.meshcheriakov@qt.io>	2021-09-01 12:46:57 +0200
committer	Ievgenii Meshcheriakov <ievgenii.meshcheriakov@qt.io>	2021-09-06 17:41:53 +0200
commit	65a2cb6964f3d1178718203f05e9143bd400c80e (patch)
tree	f6d99eaeca8c6787fa116eabcbf71c6515f336fa /src/corelib/text
parent	ee2ac8b4ef3b8691ef8b9d17a6d4c95b95db4349 (diff)