summaryrefslogtreecommitdiffstats
path: root/src/corelib/text/qchar.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/corelib/text/qchar.cpp')
-rw-r--r--src/corelib/text/qchar.cpp120
1 files changed, 37 insertions, 83 deletions
diff --git a/src/corelib/text/qchar.cpp b/src/corelib/text/qchar.cpp
index 32e293ee9f..7b114e9723 100644
--- a/src/corelib/text/qchar.cpp
+++ b/src/corelib/text/qchar.cpp
@@ -1,14 +1,6 @@
// Copyright (C) 2022 The Qt Company Ltd.
// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
-// Don't define it while compiling this module, or USERS of Qt will
-// not be able to link.
-#ifdef QT_NO_CAST_FROM_ASCII
-# undef QT_NO_CAST_FROM_ASCII
-#endif
-#ifdef QT_NO_CAST_TO_ASCII
-# undef QT_NO_CAST_TO_ASCII
-#endif
#include "qchar.h"
#include "qdatastream.h"
@@ -63,6 +55,13 @@ QT_BEGIN_NAMESPACE
\ingroup string-processing
\reentrant
+ \compares strong
+ \compareswith strong char16_t QString QStringView QLatin1StringView QUtf8StringView
+ \endcompareswith
+ \compareswith strong {const char *} QByteArray QByteArrayView
+ The contents of the byte array is interpreted as utf-8.
+ \endcompareswith
+
In Qt, Unicode characters are 16-bit entities without any markup
or structure. This class represents such an entity. It is
lightweight, so it can be used everywhere. Most compilers treat
@@ -124,9 +123,7 @@ QT_BEGIN_NAMESPACE
Starting with Qt 6.0, most QChar constructors are \c explicit. This
is done to avoid dangerous mistakes when accidentally mixing
- integral types and strings. You can opt-out (and make these
- constructors implicit) by defining the macro \c
- QT_IMPLICIT_QCHAR_CONSTRUCTION.
+ integral types and strings.
For more information see
\l{https://www.unicode.org/ucd/}{"About the Unicode Character Database"}.
@@ -164,6 +161,8 @@ QT_BEGIN_NAMESPACE
\value [since 5.15] Unicode_12_1 Version 12.1
\value [since 5.15] Unicode_13_0 Version 13.0
\value [since 6.3] Unicode_14_0 Version 14.0
+ \value [since 6.5] Unicode_15_0 Version 15.0
+ \value [since 6.8] Unicode_15_1 Version 15.1
\value Unicode_Unassigned The value is not assigned to any character
in version 8.0 of Unicode.
@@ -323,6 +322,7 @@ QT_BEGIN_NAMESPACE
\value Script_Kaithi
\value Script_Kannada
\value Script_Katakana
+ \value [since 6.5] Script_Kawi
\value Script_KayahLi
\value Script_Kharoshthi
\value [since 5.15] Script_KhitanSmallScript
@@ -357,6 +357,7 @@ QT_BEGIN_NAMESPACE
\value [since 5.6] Script_Multani
\value Script_Myanmar
\value [since 5.5] Script_Nabataean
+ \value [since 6.3] Script_NagMundari
\value [since 5.15] Script_Nandinagari
\value [since 5.11] Script_Newa
\value Script_NewTaiLue
@@ -1365,7 +1366,7 @@ static const QChar * QT_FASTCALL decompositionHelper(
{
if (ucs4 >= Hangul_SBase && ucs4 < Hangul_SBase + Hangul_SCount) {
// compute Hangul syllable decomposition as per UAX #15
- const uint SIndex = ucs4 - Hangul_SBase;
+ const char32_t SIndex = ucs4 - Hangul_SBase;
buffer[0] = QChar(Hangul_LBase + SIndex / Hangul_NCount); // L
buffer[1] = QChar(Hangul_VBase + (SIndex % Hangul_NCount) / Hangul_TCount); // V
buffer[2] = QChar(Hangul_TBase + SIndex % Hangul_TCount); // T
@@ -1744,42 +1745,42 @@ QDataStream &operator>>(QDataStream &in, QChar &chr)
*****************************************************************************/
/*!
- \fn bool QChar::operator==(QChar c1, QChar c2)
+ \fn bool QChar::operator==(const QChar &c1, const QChar &c2)
Returns \c true if \a c1 and \a c2 are the same Unicode character;
otherwise returns \c false.
*/
/*!
- \fn int QChar::operator!=(QChar c1, QChar c2)
+ \fn bool QChar::operator!=(const QChar &c1, const QChar &c2)
Returns \c true if \a c1 and \a c2 are not the same Unicode
character; otherwise returns \c false.
*/
/*!
- \fn int QChar::operator<=(QChar c1, QChar c2)
+ \fn bool QChar::operator<=(const QChar &c1, const QChar &c2)
Returns \c true if the numeric Unicode value of \a c1 is less than
or equal to that of \a c2; otherwise returns \c false.
*/
/*!
- \fn int QChar::operator>=(QChar c1, QChar c2)
+ \fn bool QChar::operator>=(const QChar &c1, const QChar &c2)
Returns \c true if the numeric Unicode value of \a c1 is greater than
or equal to that of \a c2; otherwise returns \c false.
*/
/*!
- \fn int QChar::operator<(QChar c1, QChar c2)
+ \fn bool QChar::operator<(const QChar &c1, const QChar &c2)
Returns \c true if the numeric Unicode value of \a c1 is less than
that of \a c2; otherwise returns \c false.
*/
/*!
- \fn int QChar::operator>(QChar c1, QChar c2)
+ \fn bool QChar::operator>(const QChar &c1, const QChar &c2)
Returns \c true if the numeric Unicode value of \a c1 is greater than
that of \a c2; otherwise returns \c false.
@@ -1815,7 +1816,7 @@ static void decomposeHelper(QString *str, bool canonical, QChar::UnicodeVersion
QString &s = *str;
const unsigned short *utf16 = reinterpret_cast<unsigned short *>(s.data());
- const unsigned short *uc = utf16 + s.length();
+ const unsigned short *uc = utf16 + s.size();
while (uc != utf16 + from) {
char32_t ucs4 = *(--uc);
if (QChar(ucs4).isLowSurrogate() && uc != utf16) {
@@ -1861,26 +1862,26 @@ struct UCS2SurrogatePair {
inline bool operator<(const UCS2SurrogatePair &ligature1, const UCS2SurrogatePair &ligature2)
{ return QChar::surrogateToUcs4(ligature1.p1.u1, ligature1.p1.u2) < QChar::surrogateToUcs4(ligature2.p1.u1, ligature2.p1.u2); }
-inline bool operator<(uint u1, const UCS2SurrogatePair &ligature)
+inline bool operator<(char32_t u1, const UCS2SurrogatePair &ligature)
{ return u1 < QChar::surrogateToUcs4(ligature.p1.u1, ligature.p1.u2); }
-inline bool operator<(const UCS2SurrogatePair &ligature, uint u1)
+inline bool operator<(const UCS2SurrogatePair &ligature, char32_t u1)
{ return QChar::surrogateToUcs4(ligature.p1.u1, ligature.p1.u2) < u1; }
-static uint inline ligatureHelper(uint u1, uint u2)
+static char32_t inline ligatureHelper(char32_t u1, char32_t u2)
{
if (u1 >= Hangul_LBase && u1 < Hangul_SBase + Hangul_SCount) {
// compute Hangul syllable composition as per UAX #15
// hangul L-V pair
- const uint LIndex = u1 - Hangul_LBase;
+ const char32_t LIndex = u1 - Hangul_LBase;
if (LIndex < Hangul_LCount) {
- const uint VIndex = u2 - Hangul_VBase;
+ const char32_t VIndex = u2 - Hangul_VBase;
if (VIndex < Hangul_VCount)
return Hangul_SBase + (LIndex * Hangul_VCount + VIndex) * Hangul_TCount;
}
// hangul LV-T pair
- const uint SIndex = u1 - Hangul_SBase;
+ const char32_t SIndex = u1 - Hangul_SBase;
if (SIndex < Hangul_SCount && (SIndex % Hangul_TCount) == 0) {
- const uint TIndex = u2 - Hangul_TBase;
+ const char32_t TIndex = u2 - Hangul_TBase;
if (TIndex < Hangul_TCount && TIndex)
return u1 + TIndex;
}
@@ -1910,19 +1911,19 @@ static void composeHelper(QString *str, QChar::UnicodeVersion version, qsizetype
{
QString &s = *str;
- if (from < 0 || s.length() - from < 2)
+ if (from < 0 || s.size() - from < 2)
return;
- uint stcode = 0; // starter code point
+ char32_t stcode = 0; // starter code point
qsizetype starter = -1; // starter position
qsizetype next = -1; // to prevent i == next
int lastCombining = 255; // to prevent combining > lastCombining
qsizetype pos = from;
- while (pos < s.length()) {
+ while (pos < s.size()) {
qsizetype i = pos;
char32_t uc = s.at(pos).unicode();
- if (QChar(uc).isHighSurrogate() && pos < s.length()-1) {
+ if (QChar(uc).isHighSurrogate() && pos < s.size()-1) {
ushort low = s.at(pos+1).unicode();
if (QChar(low).isLowSurrogate()) {
uc = QChar::surrogateToUcs4(uc, low);
@@ -1942,7 +1943,7 @@ static void composeHelper(QString *str, QChar::UnicodeVersion version, qsizetype
int combining = p->combiningClass;
if ((i == next || combining > lastCombining) && starter >= from) {
// allowed to form ligature with S
- uint ligature = ligatureHelper(stcode, uc);
+ char32_t ligature = ligatureHelper(stcode, uc);
if (ligature) {
stcode = ligature;
QChar *d = s.data();
@@ -1969,7 +1970,7 @@ static void composeHelper(QString *str, QChar::UnicodeVersion version, qsizetype
static void canonicalOrderHelper(QString *str, QChar::UnicodeVersion version, qsizetype from)
{
QString &s = *str;
- const qsizetype l = s.length()-1;
+ const qsizetype l = s.size()-1;
char32_t u1, u2;
char16_t c1, c2;
@@ -2057,8 +2058,8 @@ static bool normalizationQuickCheckHelper(QString *str, QString::NormalizationFo
enum { NFQC_YES = 0, NFQC_NO = 1, NFQC_MAYBE = 3 };
- const ushort *string = reinterpret_cast<const ushort *>(str->constData());
- qsizetype length = str->length();
+ const auto *string = reinterpret_cast<const char16_t *>(str->constData());
+ qsizetype length = str->size();
// this avoids one out of bounds check in the loop
while (length > from && QChar::isHighSurrogate(string[length - 1]))
@@ -2101,57 +2102,10 @@ static bool normalizationQuickCheckHelper(QString *str, QString::NormalizationFo
*lastStable = pos;
}
- if (length != str->length()) // low surrogate parts at the end of text
- *lastStable = str->length() - 1;
+ if (length != str->size()) // low surrogate parts at the end of text
+ *lastStable = str->size() - 1;
return true;
}
-/*!
- \macro QT_IMPLICIT_QCHAR_CONSTRUCTION
- \since 6.0
- \relates QChar
-
- Defining this macro makes certain QChar constructors implicit
- rather than explicit. This is done to enforce safe conversions:
-
- \badcode
-
- QString str = getString();
- if (str == 123) {
- // Oops, meant str == "123". By default does not compile,
- // *unless* this macro is defined, in which case, it's interpreted
- // as `if (str == QChar(123))`, that is, `if (str == '{')`.
- // Likely, not what we meant.
- }
-
- \endcode
-
- This macro is provided to keep existing code working; it is
- recommended to instead use explicit conversions and/or QLatin1Char.
- For instance:
-
- \code
-
- QChar c1 = 'x'; // OK, unless QT_NO_CAST_FROM_ASCII is defined
- QChar c2 = u'x'; // always OK, recommended
- QChar c3 = QLatin1Char('x'); // always OK, recommended
-
- // from int to 1 UTF-16 code unit: must guarantee that the input is <= 0xFFFF
- QChar c4 = 120; // compile error, unless QT_IMPLICIT_QCHAR_CONSTRUCTION is defined
- QChar c5(120); // OK (direct initialization)
- auto c6 = QChar(120); // ditto
-
- // from int/char32_t to 1/2 UTF-16 code units:
- // 𝄞 'MUSICAL SYMBOL G CLEF' (U+1D11E)
- auto c7 = QChar(0x1D11E); // compiles, but undefined behavior at runtime
- auto c8 = QChar::fromUcs4(0x1D11E); // always OK
- auto c9 = QChar::fromUcs4(U'\U0001D11E'); // always OK
- // => use c8/c9 as QStringView objects
-
- \endcode
-
- \sa QLatin1Char, QChar::fromUcs4, QT_NO_CAST_FROM_ASCII
-*/
-
QT_END_NAMESPACE