diff options
Diffstat (limited to 'src/corelib/text/qchar.cpp')
-rw-r--r-- | src/corelib/text/qchar.cpp | 354 |
1 files changed, 170 insertions, 184 deletions
diff --git a/src/corelib/text/qchar.cpp b/src/corelib/text/qchar.cpp index dcc36d18ce..63296a92de 100644 --- a/src/corelib/text/qchar.cpp +++ b/src/corelib/text/qchar.cpp @@ -1,50 +1,6 @@ -/**************************************************************************** -** -** Copyright (C) 2020 The Qt Company Ltd. -** Contact: https://www.qt.io/licensing/ -** -** This file is part of the QtCore module of the Qt Toolkit. -** -** $QT_BEGIN_LICENSE:LGPL$ -** Commercial License Usage -** Licensees holding valid commercial Qt licenses may use this file in -** accordance with the commercial license agreement provided with the -** Software or, alternatively, in accordance with the terms contained in -** a written agreement between you and The Qt Company. For licensing terms -** and conditions see https://www.qt.io/terms-conditions. For further -** information use the contact form at https://www.qt.io/contact-us. -** -** GNU Lesser General Public License Usage -** Alternatively, this file may be used under the terms of the GNU Lesser -** General Public License version 3 as published by the Free Software -** Foundation and appearing in the file LICENSE.LGPL3 included in the -** packaging of this file. Please review the following information to -** ensure the GNU Lesser General Public License version 3 requirements -** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. -** -** GNU General Public License Usage -** Alternatively, this file may be used under the terms of the GNU -** General Public License version 2.0 or (at your option) the GNU General -** Public license version 3 or any later version approved by the KDE Free -** Qt Foundation. The licenses are as published by the Free Software -** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 -** included in the packaging of this file. Please review the following -** information to ensure the GNU General Public License requirements will -** be met: https://www.gnu.org/licenses/gpl-2.0.html and -** https://www.gnu.org/licenses/gpl-3.0.html. -** -** $QT_END_LICENSE$ -** -****************************************************************************/ - -// Don't define it while compiling this module, or USERS of Qt will -// not be able to link. -#ifdef QT_NO_CAST_FROM_ASCII -# undef QT_NO_CAST_FROM_ASCII -#endif -#ifdef QT_NO_CAST_TO_ASCII -# undef QT_NO_CAST_TO_ASCII -#endif +// Copyright (C) 2022 The Qt Company Ltd. +// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only + #include "qchar.h" #include "qdatastream.h" @@ -68,7 +24,7 @@ QT_BEGIN_NAMESPACE This class is only useful to construct a QChar with 8-bit character. - \sa QChar, QLatin1String, QString + \sa QChar, QLatin1StringView, QString */ /*! @@ -99,6 +55,13 @@ QT_BEGIN_NAMESPACE \ingroup string-processing \reentrant + \compares strong + \compareswith strong char16_t QString QStringView QLatin1StringView QUtf8StringView + \endcompareswith + \compareswith strong {const char *} QByteArray QByteArrayView + The contents of the byte array is interpreted as utf-8. + \endcompareswith + In Qt, Unicode characters are 16-bit entities without any markup or structure. This class represents such an entity. It is lightweight, so it can be used everywhere. Most compilers treat @@ -165,7 +128,7 @@ QT_BEGIN_NAMESPACE QT_IMPLICIT_QCHAR_CONSTRUCTION. For more information see - \l{http://www.unicode.org/ucd/}{"About the Unicode Character Database"}. + \l{https://www.unicode.org/ucd/}{"About the Unicode Character Database"}. \sa Unicode, QString, QLatin1Char */ @@ -173,8 +136,8 @@ QT_BEGIN_NAMESPACE /*! \enum QChar::UnicodeVersion - Specifies which version of the \l{http://www.unicode.org/}{Unicode standard} - introduced a certain character. + Specifies which version of the \l{Unicode standard} introduced a certain + character. \value Unicode_1_1 Version 1.1 \value Unicode_2_0 Version 2.0 @@ -190,15 +153,18 @@ QT_BEGIN_NAMESPACE \value Unicode_6_0 Version 6.0 \value Unicode_6_1 Version 6.1 \value Unicode_6_2 Version 6.2 - \value Unicode_6_3 Version 6.3 Since Qt 5.3 - \value Unicode_7_0 Version 7.0 Since Qt 5.5 - \value Unicode_8_0 Version 8.0 Since Qt 5.6 - \value Unicode_9_0 Version 9.0 Since Qt 5.11 - \value Unicode_10_0 Version 10.0 Since Qt 5.11 - \value Unicode_11_0 Version 11.0 Since Qt 5.15 - \value Unicode_12_0 Version 12.0 Since Qt 5.15 - \value Unicode_12_1 Version 12.1 Since Qt 5.15 - \value Unicode_13_0 Version 13.0 Since Qt 5.15 + \value [since 5.3] Unicode_6_3 Version 6.3 + \value [since 5.5] Unicode_7_0 Version 7.0 + \value [since 5.6] Unicode_8_0 Version 8.0 + \value [since 5.11] Unicode_9_0 Version 9.0 + \value [since 5.11] Unicode_10_0 Version 10.0 + \value [since 5.15] Unicode_11_0 Version 11.0 + \value [since 5.15] Unicode_12_0 Version 12.0 + \value [since 5.15] Unicode_12_1 Version 12.1 + \value [since 5.15] Unicode_13_0 Version 13.0 + \value [since 6.3] Unicode_14_0 Version 14.0 + \value [since 6.5] Unicode_15_0 Version 15.0 + \value [since 6.8] Unicode_15_1 Version 15.1 \value Unicode_Unassigned The value is not assigned to any character in version 8.0 of Unicode. @@ -285,7 +251,7 @@ QT_BEGIN_NAMESPACE This enum type defines the Unicode script property values. For details about the Unicode script property values see - \l{http://www.unicode.org/reports/tr24/}{Unicode Standard Annex #24}. + \l{https://www.unicode.org/reports/tr24/}{Unicode Standard Annex #24}. In order to conform to C/C++ naming conventions "Script_" is prepended to the codes used in the Unicode Standard. @@ -298,18 +264,18 @@ QT_BEGIN_NAMESPACE \value Script_Common For characters that may be used with multiple scripts and that do not inherit their script from the preceding characters. - \value Script_Adlam Since Qt 5.11 - \value Script_Ahom Since Qt 5.6 - \value Script_AnatolianHieroglyphs Since Qt 5.6 + \value [since 5.11] Script_Adlam + \value [since 5.6] Script_Ahom + \value [since 5.6] Script_AnatolianHieroglyphs \value Script_Arabic \value Script_Armenian \value Script_Avestan \value Script_Balinese \value Script_Bamum - \value Script_BassaVah Since Qt 5.5 + \value [since 5.5] Script_BassaVah \value Script_Batak \value Script_Bengali - \value Script_Bhaiksuki Since Qt 5.11 + \value [since 5.11] Script_Bhaiksuki \value Script_Bopomofo \value Script_Brahmi \value Script_Braille @@ -317,37 +283,38 @@ QT_BEGIN_NAMESPACE \value Script_Buhid \value Script_CanadianAboriginal \value Script_Carian - \value Script_CaucasianAlbanian Since Qt 5.5 + \value [since 5.5] Script_CaucasianAlbanian \value Script_Chakma \value Script_Cham \value Script_Cherokee - \value Script_Chorasmian Since Qt 5.15 + \value [since 5.15] Script_Chorasmian \value Script_Coptic \value Script_Cuneiform \value Script_Cypriot + \value [since 6.3] Script_CyproMinoan \value Script_Cyrillic \value Script_Deseret \value Script_Devanagari - \value Script_DivesAkuru Since Qt 5.15 - \value Script_Dogra Since Qt 5.15 - \value Script_Duployan Since Qt 5.5 + \value [since 5.15] Script_DivesAkuru + \value [since 5.15] Script_Dogra + \value [since 5.5] Script_Duployan \value Script_EgyptianHieroglyphs - \value Script_Elbasan Since Qt 5.5 - \value Script_Elymaic Since Qt 5.15 + \value [since 5.5] Script_Elbasan + \value [since 5.15] Script_Elymaic \value Script_Ethiopic \value Script_Georgian \value Script_Glagolitic \value Script_Gothic - \value Script_Grantha Since Qt 5.5 + \value [since 5.5] Script_Grantha \value Script_Greek \value Script_Gujarati - \value Script_GunjalaGondi Since Qt 5.15 + \value [since 5.15] Script_GunjalaGondi \value Script_Gurmukhi \value Script_Han \value Script_Hangul - \value Script_HanifiRohingya Since Qt 5.15 + \value [since 5.15] Script_HanifiRohingya \value Script_Hanunoo - \value Script_Hatran Since Qt 5.6 + \value [since 5.6] Script_Hatran \value Script_Hebrew \value Script_Hiragana \value Script_ImperialAramaic @@ -357,77 +324,80 @@ QT_BEGIN_NAMESPACE \value Script_Kaithi \value Script_Kannada \value Script_Katakana + \value [since 6.5] Script_Kawi \value Script_KayahLi \value Script_Kharoshthi - \value Script_KhitanSmallScript Since Qt 5.15 + \value [since 5.15] Script_KhitanSmallScript \value Script_Khmer - \value Script_Khojki Since Qt 5.5 - \value Script_Khudawadi Since Qt 5.5 + \value [since 5.5] Script_Khojki + \value [since 5.5] Script_Khudawadi \value Script_Lao \value Script_Latin \value Script_Lepcha \value Script_Limbu - \value Script_LinearA Since Qt 5.5 + \value [since 5.5] Script_LinearA \value Script_LinearB \value Script_Lisu \value Script_Lycian \value Script_Lydian - \value Script_Mahajani Since Qt 5.5 - \value Script_Makasar Since Qt 5.15 + \value [since 5.5] Script_Mahajani + \value [since 5.15] Script_Makasar \value Script_Malayalam \value Script_Mandaic - \value Script_Manichaean Since Qt 5.5 - \value Script_Marchen Since Qt 5.11 - \value Script_MasaramGondi Since Qt 5.11 - \value Script_Medefaidrin Since Qt 5.15 + \value [since 5.5] Script_Manichaean + \value [since 5.11] Script_Marchen + \value [since 5.11] Script_MasaramGondi + \value [since 5.15] Script_Medefaidrin \value Script_MeeteiMayek - \value Script_MendeKikakui Since Qt 5.5 + \value [since 5.5] Script_MendeKikakui \value Script_MeroiticCursive \value Script_MeroiticHieroglyphs \value Script_Miao - \value Script_Modi Since Qt 5.5 + \value [since 5.5] Script_Modi \value Script_Mongolian - \value Script_Mro Since Qt 5.5 - \value Script_Multani Since Qt 5.6 + \value [since 5.5] Script_Mro + \value [since 5.6] Script_Multani \value Script_Myanmar - \value Script_Nabataean Since Qt 5.5 - \value Script_Nandinagari Since Qt 5.15 - \value Script_Newa Since Qt 5.11 + \value [since 5.5] Script_Nabataean + \value [since 6.3] Script_NagMundari + \value [since 5.15] Script_Nandinagari + \value [since 5.11] Script_Newa \value Script_NewTaiLue \value Script_Nko - \value Script_Nushu Since Qt 5.11 - \value Script_NyiakengPuachueHmong Since Qt 5.15 + \value [since 5.11] Script_Nushu + \value [since 5.15] Script_NyiakengPuachueHmong \value Script_Ogham \value Script_OlChiki - \value Script_OldHungarian Since Qt 5.6 + \value [since 5.6] Script_OldHungarian \value Script_OldItalic - \value Script_OldNorthArabian Since Qt 5.5 - \value Script_OldPermic Since Qt 5.5 + \value [since 5.5] Script_OldNorthArabian + \value [since 5.5] Script_OldPermic \value Script_OldPersian - \value Script_OldSogdian Since Qt 5.15 + \value [since 5.15] Script_OldSogdian \value Script_OldSouthArabian \value Script_OldTurkic + \value [since 6.3] Script_OldUyghur \value Script_Oriya - \value Script_Osage Since Qt 5.11 + \value [since 5.11] Script_Osage \value Script_Osmanya - \value Script_PahawhHmong Since Qt 5.5 - \value Script_Palmyrene Since Qt 5.5 - \value Script_PauCinHau Since Qt 5.5 + \value [since 5.5] Script_PahawhHmong + \value [since 5.5] Script_Palmyrene + \value [since 5.5] Script_PauCinHau \value Script_PhagsPa \value Script_Phoenician - \value Script_PsalterPahlavi Since Qt 5.5 + \value [since 5.5] Script_PsalterPahlavi \value Script_Rejang \value Script_Runic \value Script_Samaritan \value Script_Saurashtra \value Script_Sharada \value Script_Shavian - \value Script_Siddham Since Qt 5.5 - \value Script_SignWriting Since Qt 5.6 + \value [since 5.5] Script_Siddham + \value [since 5.6] Script_SignWriting \value Script_Sinhala - \value Script_Sogdian Since Qt 5.15 + \value [since 5.15] Script_Sogdian \value Script_SoraSompeng - \value Script_Soyombo Since Qt 5.11 + \value [since 5.11] Script_Soyombo \value Script_Sundanese \value Script_SylotiNagri \value Script_Syriac @@ -438,20 +408,23 @@ QT_BEGIN_NAMESPACE \value Script_TaiViet \value Script_Takri \value Script_Tamil - \value Script_Tangut Since Qt 5.11 + \value [since 5.11] Script_Tangut + \value [since 6.3] Script_Tangsa \value Script_Telugu \value Script_Thaana \value Script_Thai \value Script_Tibetan \value Script_Tifinagh - \value Script_Tirhuta Since Qt 5.5 + \value [since 5.5] Script_Tirhuta + \value [since 6.3] Script_Toto \value Script_Ugaritic \value Script_Vai - \value Script_Wancho Since Qt 5.15 - \value Script_WarangCiti Since Qt 5.5 - \value Script_Yezidi Since Qt 5.15 + \value [since 6.3] Script_Vithkuqi + \value [since 5.15] Script_Wancho + \value [since 5.5] Script_WarangCiti + \value [since 5.15] Script_Yezidi \value Script_Yi - \value Script_ZanabazarSquare Since Qt 5.11 + \value [since 5.11] Script_ZanabazarSquare \omitvalue ScriptCount @@ -462,8 +435,8 @@ QT_BEGIN_NAMESPACE \enum QChar::Direction This enum type defines the Unicode direction attributes. See the - \l{http://www.unicode.org/reports/tr9/tr9-35.html#Table_Bidirectional_Character_Types}{Unicode Standard} for a description - of the values. + \l{https://www.unicode.org/reports/tr9/tr9-35.html#Table_Bidirectional_Character_Types}{Unicode + Standard} for a description of the values. In order to conform to C/C++ naming conventions "Dir" is prepended to the codes used in the Unicode Standard. @@ -476,18 +449,18 @@ QT_BEGIN_NAMESPACE \value DirEN \value DirES \value DirET - \value DirFSI Since Qt 5.3 + \value [since 5.3] DirFSI \value DirL \value DirLRE - \value DirLRI Since Qt 5.3 + \value [since 5.3] DirLRI \value DirLRO \value DirNSM \value DirON \value DirPDF - \value DirPDI Since Qt 5.3 + \value [since 5.3] DirPDI \value DirR \value DirRLE - \value DirRLI Since Qt 5.3 + \value [since 5.3] DirRLI \value DirRLO \value DirS \value DirWS @@ -499,8 +472,7 @@ QT_BEGIN_NAMESPACE \enum QChar::Decomposition This enum type defines the Unicode decomposition attributes. See - the \l{http://www.unicode.org/}{Unicode Standard} for a - description of the values. + the \l{Unicode standard} for a description of the values. \value NoDecomposition \value Canonical @@ -529,7 +501,7 @@ QT_BEGIN_NAMESPACE since 5.3 This enum type defines the Unicode joining type attributes. See the - \l{http://www.unicode.org/}{Unicode Standard} for a description of the values. + \l{Unicode standard} for a description of the values. In order to conform to C/C++ naming conventions "Joining_" is prepended to the codes used in the Unicode Standard. @@ -550,8 +522,7 @@ QT_BEGIN_NAMESPACE \internal This enum type defines names for some of the Unicode combining - classes. See the \l{http://www.unicode.org/}{Unicode Standard} - for a description of the values. + classes. See the \l{Unicode Standard} for a description of the values. \value Combining_Above \value Combining_AboveAttached @@ -595,6 +566,7 @@ QT_BEGIN_NAMESPACE \value ByteOrderSwapped \value ParagraphSeparator \value LineSeparator + \value [since 6.2] VisualTabCharacter Used to represent a tabulation as a horizontal arrow. \value LastValidCodePoint */ @@ -1379,31 +1351,28 @@ char32_t QChar::mirroredChar(char32_t ucs4) noexcept return ucs4 + qGetProp(ucs4)->mirrorDiff; } - -// constants for Hangul (de)composition, see UAX #15 -enum { - Hangul_SBase = 0xac00, - Hangul_LBase = 0x1100, - Hangul_VBase = 0x1161, - Hangul_TBase = 0x11a7, - Hangul_LCount = 19, - Hangul_VCount = 21, - Hangul_TCount = 28, - Hangul_NCount = Hangul_VCount * Hangul_TCount, - Hangul_SCount = Hangul_LCount * Hangul_NCount -}; +// Constants for Hangul (de)composition, see UAX #15: +static constexpr char32_t Hangul_SBase = 0xac00; +static constexpr char32_t Hangul_LBase = 0x1100; +static constexpr char32_t Hangul_VBase = 0x1161; +static constexpr char32_t Hangul_TBase = 0x11a7; +static constexpr quint32 Hangul_LCount = 19; +static constexpr quint32 Hangul_VCount = 21; +static constexpr quint32 Hangul_TCount = 28; +static constexpr quint32 Hangul_NCount = Hangul_VCount * Hangul_TCount; +static constexpr quint32 Hangul_SCount = Hangul_LCount * Hangul_NCount; // buffer has to have a length of 3. It's needed for Hangul decomposition -static const unsigned short * QT_FASTCALL decompositionHelper - (uint ucs4, qsizetype *length, int *tag, unsigned short *buffer) +static const QChar * QT_FASTCALL decompositionHelper( + char32_t ucs4, qsizetype *length, QChar::Decomposition *tag, QChar *buffer) { if (ucs4 >= Hangul_SBase && ucs4 < Hangul_SBase + Hangul_SCount) { // compute Hangul syllable decomposition as per UAX #15 - const uint SIndex = ucs4 - Hangul_SBase; - buffer[0] = Hangul_LBase + SIndex / Hangul_NCount; // L - buffer[1] = Hangul_VBase + (SIndex % Hangul_NCount) / Hangul_TCount; // V - buffer[2] = Hangul_TBase + SIndex % Hangul_TCount; // T - *length = buffer[2] == Hangul_TBase ? 2 : 3; + const char32_t SIndex = ucs4 - Hangul_SBase; + buffer[0] = QChar(Hangul_LBase + SIndex / Hangul_NCount); // L + buffer[1] = QChar(Hangul_VBase + (SIndex % Hangul_NCount) / Hangul_TCount); // V + buffer[2] = QChar(Hangul_TBase + SIndex % Hangul_TCount); // T + *length = buffer[2].unicode() == Hangul_TBase ? 2 : 3; *tag = QChar::Canonical; return buffer; } @@ -1416,9 +1385,9 @@ static const unsigned short * QT_FASTCALL decompositionHelper } const unsigned short *decomposition = uc_decomposition_map+index; - *tag = (*decomposition) & 0xff; + *tag = QChar::Decomposition((*decomposition) & 0xff); *length = (*decomposition) >> 8; - return decomposition+1; + return reinterpret_cast<const QChar *>(decomposition + 1); } /*! @@ -1439,11 +1408,11 @@ QString QChar::decomposition() const */ QString QChar::decomposition(char32_t ucs4) { - unsigned short buffer[3]; + QChar buffer[3]; qsizetype length; - int tag; - const unsigned short *d = decompositionHelper(ucs4, &length, &tag, buffer); - return QString(reinterpret_cast<const QChar *>(d), length); + QChar::Decomposition tag; + const QChar *d = decompositionHelper(ucs4, &length, &tag, buffer); + return QString(d, length); } /*! @@ -1572,7 +1541,7 @@ static auto fullConvertCase(char32_t uc, QUnicodeTables::Case which) noexcept while (length--) *pp++ = *specialCase++; } else { - // so far, case convertion never changes planes (guaranteed by the qunicodetables generator) + // so far, case conversion never changes planes (guaranteed by the qunicodetables generator) for (char16_t c : QChar::fromUcs4(uc + caseDiff)) *pp++ = c; } @@ -1778,47 +1747,64 @@ QDataStream &operator>>(QDataStream &in, QChar &chr) *****************************************************************************/ /*! - \fn bool QChar::operator==(QChar c1, QChar c2) + \fn bool QChar::operator==(const QChar &c1, const QChar &c2) Returns \c true if \a c1 and \a c2 are the same Unicode character; otherwise returns \c false. */ /*! - \fn int QChar::operator!=(QChar c1, QChar c2) + \fn bool QChar::operator!=(const QChar &c1, const QChar &c2) Returns \c true if \a c1 and \a c2 are not the same Unicode character; otherwise returns \c false. */ /*! - \fn int QChar::operator<=(QChar c1, QChar c2) + \fn bool QChar::operator<=(const QChar &c1, const QChar &c2) Returns \c true if the numeric Unicode value of \a c1 is less than or equal to that of \a c2; otherwise returns \c false. */ /*! - \fn int QChar::operator>=(QChar c1, QChar c2) + \fn bool QChar::operator>=(const QChar &c1, const QChar &c2) Returns \c true if the numeric Unicode value of \a c1 is greater than or equal to that of \a c2; otherwise returns \c false. */ /*! - \fn int QChar::operator<(QChar c1, QChar c2) + \fn bool QChar::operator<(const QChar &c1, const QChar &c2) Returns \c true if the numeric Unicode value of \a c1 is less than that of \a c2; otherwise returns \c false. */ /*! - \fn int QChar::operator>(QChar c1, QChar c2) + \fn bool QChar::operator>(const QChar &c1, const QChar &c2) Returns \c true if the numeric Unicode value of \a c1 is greater than that of \a c2; otherwise returns \c false. */ +/*! + \fn Qt::Literals::StringLiterals::operator""_L1(char ch) + + \relates QLatin1Char + \since 6.4 + + Literal operator that creates a QLatin1Char out of \a ch. + + The following code creates a QLatin1Char: + \code + using namespace Qt::Literals::StringLiterals; + + auto ch = 'a'_L1; + \endcode + + \sa Qt::Literals::StringLiterals +*/ // --------------------------------------------------------------------------- @@ -1826,15 +1812,15 @@ QDataStream &operator>>(QDataStream &in, QChar &chr) static void decomposeHelper(QString *str, bool canonical, QChar::UnicodeVersion version, qsizetype from) { qsizetype length; - int tag; - unsigned short buffer[3]; + QChar::Decomposition tag; + QChar buffer[3]; QString &s = *str; const unsigned short *utf16 = reinterpret_cast<unsigned short *>(s.data()); - const unsigned short *uc = utf16 + s.length(); + const unsigned short *uc = utf16 + s.size(); while (uc != utf16 + from) { - uint ucs4 = *(--uc); + char32_t ucs4 = *(--uc); if (QChar(ucs4).isLowSurrogate() && uc != utf16) { ushort high = *(uc - 1); if (QChar(high).isHighSurrogate()) { @@ -1846,12 +1832,12 @@ static void decomposeHelper(QString *str, bool canonical, QChar::UnicodeVersion if (QChar::unicodeVersion(ucs4) > version) continue; - const unsigned short *d = decompositionHelper(ucs4, &length, &tag, buffer); + const QChar *d = decompositionHelper(ucs4, &length, &tag, buffer); if (!d || (canonical && tag != QChar::Canonical)) continue; qsizetype pos = uc - utf16; - s.replace(pos, QChar::requiresSurrogates(ucs4) ? 2 : 1, reinterpret_cast<const QChar *>(d), length); + s.replace(pos, QChar::requiresSurrogates(ucs4) ? 2 : 1, d, length); // since the replace invalidates the pointers and we do decomposition recursive utf16 = reinterpret_cast<unsigned short *>(s.data()); uc = utf16 + pos + length; @@ -1878,27 +1864,27 @@ struct UCS2SurrogatePair { inline bool operator<(const UCS2SurrogatePair &ligature1, const UCS2SurrogatePair &ligature2) { return QChar::surrogateToUcs4(ligature1.p1.u1, ligature1.p1.u2) < QChar::surrogateToUcs4(ligature2.p1.u1, ligature2.p1.u2); } -inline bool operator<(uint u1, const UCS2SurrogatePair &ligature) +inline bool operator<(char32_t u1, const UCS2SurrogatePair &ligature) { return u1 < QChar::surrogateToUcs4(ligature.p1.u1, ligature.p1.u2); } -inline bool operator<(const UCS2SurrogatePair &ligature, uint u1) +inline bool operator<(const UCS2SurrogatePair &ligature, char32_t u1) { return QChar::surrogateToUcs4(ligature.p1.u1, ligature.p1.u2) < u1; } -static uint inline ligatureHelper(uint u1, uint u2) +static char32_t inline ligatureHelper(char32_t u1, char32_t u2) { - if (u1 >= Hangul_LBase && u1 <= Hangul_SBase + Hangul_SCount) { + if (u1 >= Hangul_LBase && u1 < Hangul_SBase + Hangul_SCount) { // compute Hangul syllable composition as per UAX #15 // hangul L-V pair - const uint LIndex = u1 - Hangul_LBase; + const char32_t LIndex = u1 - Hangul_LBase; if (LIndex < Hangul_LCount) { - const uint VIndex = u2 - Hangul_VBase; + const char32_t VIndex = u2 - Hangul_VBase; if (VIndex < Hangul_VCount) return Hangul_SBase + (LIndex * Hangul_VCount + VIndex) * Hangul_TCount; } // hangul LV-T pair - const uint SIndex = u1 - Hangul_SBase; + const char32_t SIndex = u1 - Hangul_SBase; if (SIndex < Hangul_SCount && (SIndex % Hangul_TCount) == 0) { - const uint TIndex = u2 - Hangul_TBase; - if (TIndex <= Hangul_TCount) + const char32_t TIndex = u2 - Hangul_TBase; + if (TIndex < Hangul_TCount && TIndex) return u1 + TIndex; } } @@ -1927,19 +1913,19 @@ static void composeHelper(QString *str, QChar::UnicodeVersion version, qsizetype { QString &s = *str; - if (from < 0 || s.length() - from < 2) + if (from < 0 || s.size() - from < 2) return; - uint stcode = 0; // starter code point + char32_t stcode = 0; // starter code point qsizetype starter = -1; // starter position qsizetype next = -1; // to prevent i == next int lastCombining = 255; // to prevent combining > lastCombining qsizetype pos = from; - while (pos < s.length()) { + while (pos < s.size()) { qsizetype i = pos; char32_t uc = s.at(pos).unicode(); - if (QChar(uc).isHighSurrogate() && pos < s.length()-1) { + if (QChar(uc).isHighSurrogate() && pos < s.size()-1) { ushort low = s.at(pos+1).unicode(); if (QChar(low).isLowSurrogate()) { uc = QChar::surrogateToUcs4(uc, low); @@ -1959,7 +1945,7 @@ static void composeHelper(QString *str, QChar::UnicodeVersion version, qsizetype int combining = p->combiningClass; if ((i == next || combining > lastCombining) && starter >= from) { // allowed to form ligature with S - uint ligature = ligatureHelper(stcode, uc); + char32_t ligature = ligatureHelper(stcode, uc); if (ligature) { stcode = ligature; QChar *d = s.data(); @@ -1986,7 +1972,7 @@ static void composeHelper(QString *str, QChar::UnicodeVersion version, qsizetype static void canonicalOrderHelper(QString *str, QChar::UnicodeVersion version, qsizetype from) { QString &s = *str; - const qsizetype l = s.length()-1; + const qsizetype l = s.size()-1; char32_t u1, u2; char16_t c1, c2; @@ -2074,8 +2060,8 @@ static bool normalizationQuickCheckHelper(QString *str, QString::NormalizationFo enum { NFQC_YES = 0, NFQC_NO = 1, NFQC_MAYBE = 3 }; - const ushort *string = reinterpret_cast<const ushort *>(str->constData()); - qsizetype length = str->length(); + const auto *string = reinterpret_cast<const char16_t *>(str->constData()); + qsizetype length = str->size(); // this avoids one out of bounds check in the loop while (length > from && QChar::isHighSurrogate(string[length - 1])) @@ -2118,8 +2104,8 @@ static bool normalizationQuickCheckHelper(QString *str, QString::NormalizationFo *lastStable = pos; } - if (length != str->length()) // low surrogate parts at the end of text - *lastStable = str->length() - 1; + if (length != str->size()) // low surrogate parts at the end of text + *lastStable = str->size() - 1; return true; } |