summaryrefslogtreecommitdiffstats
path: root/src/corelib/text/qchar.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/corelib/text/qchar.cpp')
-rw-r--r--src/corelib/text/qchar.cpp2059
1 files changed, 2059 insertions, 0 deletions
diff --git a/src/corelib/text/qchar.cpp b/src/corelib/text/qchar.cpp
new file mode 100644
index 0000000000..0c190c6a3d
--- /dev/null
+++ b/src/corelib/text/qchar.cpp
@@ -0,0 +1,2059 @@
+/****************************************************************************
+**
+** Copyright (C) 2016 The Qt Company Ltd.
+** Contact: https://www.qt.io/licensing/
+**
+** This file is part of the QtCore module of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** Commercial License Usage
+** Licensees holding valid commercial Qt licenses may use this file in
+** accordance with the commercial license agreement provided with the
+** Software or, alternatively, in accordance with the terms contained in
+** a written agreement between you and The Qt Company. For licensing terms
+** and conditions see https://www.qt.io/terms-conditions. For further
+** information use the contact form at https://www.qt.io/contact-us.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 3 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL3 included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU Lesser General Public License version 3 requirements
+** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
+**
+** GNU General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU
+** General Public License version 2.0 or (at your option) the GNU General
+** Public license version 3 or any later version approved by the KDE Free
+** Qt Foundation. The licenses are as published by the Free Software
+** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
+** included in the packaging of this file. Please review the following
+** information to ensure the GNU General Public License requirements will
+** be met: https://www.gnu.org/licenses/gpl-2.0.html and
+** https://www.gnu.org/licenses/gpl-3.0.html.
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+// Don't define it while compiling this module, or USERS of Qt will
+// not be able to link.
+#ifdef QT_NO_CAST_FROM_ASCII
+# undef QT_NO_CAST_FROM_ASCII
+#endif
+#ifdef QT_NO_CAST_TO_ASCII
+# undef QT_NO_CAST_TO_ASCII
+#endif
+#include "qchar.h"
+
+#include "qdatastream.h"
+
+#include "qunicodetables_p.h"
+#include "qunicodetables.cpp"
+
+#include <algorithm>
+
+QT_BEGIN_NAMESPACE
+
+#define FLAG(x) (1 << (x))
+
+/*!
+ \class QLatin1Char
+ \inmodule QtCore
+ \brief The QLatin1Char class provides an 8-bit ASCII/Latin-1 character.
+
+ \ingroup string-processing
+
+ This class is only useful to construct a QChar with 8-bit character.
+
+ \sa QChar, QLatin1String, QString
+*/
+
+/*!
+ \fn const char QLatin1Char::toLatin1() const
+
+ Converts a Latin-1 character to an 8-bit ASCII representation of the character.
+*/
+
+/*!
+ \fn const ushort QLatin1Char::unicode() const
+
+ Converts a Latin-1 character to an 16-bit-encoded Unicode representation
+ of the character.
+*/
+
+/*!
+ \fn QLatin1Char::QLatin1Char(char c)
+
+ Constructs a Latin-1 character for \a c. This constructor should be
+ used when the encoding of the input character is known to be Latin-1.
+*/
+
+/*!
+ \class QChar
+ \inmodule QtCore
+ \brief The QChar class provides a 16-bit Unicode character.
+
+ \ingroup string-processing
+ \reentrant
+
+ In Qt, Unicode characters are 16-bit entities without any markup
+ or structure. This class represents such an entity. It is
+ lightweight, so it can be used everywhere. Most compilers treat
+ it like an \c{unsigned short}.
+
+ QChar provides a full complement of testing/classification
+ functions, converting to and from other formats, converting from
+ composed to decomposed Unicode, and trying to compare and
+ case-convert if you ask it to.
+
+ The classification functions include functions like those in the
+ standard C++ header \<cctype\> (formerly \<ctype.h\>), but
+ operating on the full range of Unicode characters, not just for the ASCII
+ range. They all return true if the character is a certain type of character;
+ otherwise they return false. These classification functions are
+ isNull() (returns \c true if the character is '\\0'), isPrint()
+ (true if the character is any sort of printable character,
+ including whitespace), isPunct() (any sort of punctation),
+ isMark() (Unicode Mark), isLetter() (a letter), isNumber() (any
+ sort of numeric character, not just 0-9), isLetterOrNumber(), and
+ isDigit() (decimal digits). All of these are wrappers around
+ category() which return the Unicode-defined category of each
+ character. Some of these also calculate the derived properties
+ (for example isSpace() returns \c true if the character is of category
+ Separator_* or an exceptional code point from Other_Control category).
+
+ QChar also provides direction(), which indicates the "natural"
+ writing direction of this character. The joiningType() function
+ indicates how the character joins with it's neighbors (needed
+ mostly for Arabic or Syriac) and finally hasMirrored(), which indicates
+ whether the character needs to be mirrored when it is printed in
+ it's "unnatural" writing direction.
+
+ Composed Unicode characters (like \a ring) can be converted to
+ decomposed Unicode ("a" followed by "ring above") by using decomposition().
+
+ In Unicode, comparison is not necessarily possible and case
+ conversion is very difficult at best. Unicode, covering the
+ "entire" world, also includes most of the world's case and
+ sorting problems. operator==() and friends will do comparison
+ based purely on the numeric Unicode value (code point) of the
+ characters, and toUpper() and toLower() will do case changes when
+ the character has a well-defined uppercase/lowercase equivalent.
+ For locale-dependent comparisons, use QString::localeAwareCompare().
+
+ The conversion functions include unicode() (to a scalar),
+ toLatin1() (to scalar, but converts all non-Latin-1 characters to
+ 0), row() (gives the Unicode row), cell() (gives the Unicode
+ cell), digitValue() (gives the integer value of any of the
+ numerous digit characters), and a host of constructors.
+
+ QChar provides constructors and cast operators that make it easy
+ to convert to and from traditional 8-bit \c{char}s. If you
+ defined \c QT_NO_CAST_FROM_ASCII and \c QT_NO_CAST_TO_ASCII, as
+ explained in the QString documentation, you will need to
+ explicitly call fromLatin1(), or use QLatin1Char,
+ to construct a QChar from an 8-bit \c char, and you will need to
+ call toLatin1() to get the 8-bit value back.
+
+ For more information see
+ \l{http://www.unicode.org/ucd/}{"About the Unicode Character Database"}.
+
+ \sa Unicode, QString, QLatin1Char
+*/
+
+/*!
+ \enum QChar::UnicodeVersion
+
+ Specifies which version of the \l{http://www.unicode.org/}{Unicode standard}
+ introduced a certain character.
+
+ \value Unicode_1_1 Version 1.1
+ \value Unicode_2_0 Version 2.0
+ \value Unicode_2_1_2 Version 2.1.2
+ \value Unicode_3_0 Version 3.0
+ \value Unicode_3_1 Version 3.1
+ \value Unicode_3_2 Version 3.2
+ \value Unicode_4_0 Version 4.0
+ \value Unicode_4_1 Version 4.1
+ \value Unicode_5_0 Version 5.0
+ \value Unicode_5_1 Version 5.1
+ \value Unicode_5_2 Version 5.2
+ \value Unicode_6_0 Version 6.0
+ \value Unicode_6_1 Version 6.1
+ \value Unicode_6_2 Version 6.2
+ \value Unicode_6_3 Version 6.3 Since Qt 5.3
+ \value Unicode_7_0 Version 7.0 Since Qt 5.5
+ \value Unicode_8_0 Version 8.0 Since Qt 5.6
+ \value Unicode_9_0 Version 9.0 Since Qt 5.11
+ \value Unicode_10_0 Version 10.0 Since Qt 5.11
+ \value Unicode_Unassigned The value is not assigned to any character
+ in version 8.0 of Unicode.
+
+ \sa unicodeVersion(), currentUnicodeVersion()
+*/
+
+/*!
+ \enum QChar::Category
+
+ This enum maps the Unicode character categories.
+
+ The following characters are normative in Unicode:
+
+ \value Mark_NonSpacing Unicode class name Mn
+
+ \value Mark_SpacingCombining Unicode class name Mc
+
+ \value Mark_Enclosing Unicode class name Me
+
+ \value Number_DecimalDigit Unicode class name Nd
+
+ \value Number_Letter Unicode class name Nl
+
+ \value Number_Other Unicode class name No
+
+ \value Separator_Space Unicode class name Zs
+
+ \value Separator_Line Unicode class name Zl
+
+ \value Separator_Paragraph Unicode class name Zp
+
+ \value Other_Control Unicode class name Cc
+
+ \value Other_Format Unicode class name Cf
+
+ \value Other_Surrogate Unicode class name Cs
+
+ \value Other_PrivateUse Unicode class name Co
+
+ \value Other_NotAssigned Unicode class name Cn
+
+
+ The following categories are informative in Unicode:
+
+ \value Letter_Uppercase Unicode class name Lu
+
+ \value Letter_Lowercase Unicode class name Ll
+
+ \value Letter_Titlecase Unicode class name Lt
+
+ \value Letter_Modifier Unicode class name Lm
+
+ \value Letter_Other Unicode class name Lo
+
+ \value Punctuation_Connector Unicode class name Pc
+
+ \value Punctuation_Dash Unicode class name Pd
+
+ \value Punctuation_Open Unicode class name Ps
+
+ \value Punctuation_Close Unicode class name Pe
+
+ \value Punctuation_InitialQuote Unicode class name Pi
+
+ \value Punctuation_FinalQuote Unicode class name Pf
+
+ \value Punctuation_Other Unicode class name Po
+
+ \value Symbol_Math Unicode class name Sm
+
+ \value Symbol_Currency Unicode class name Sc
+
+ \value Symbol_Modifier Unicode class name Sk
+
+ \value Symbol_Other Unicode class name So
+
+ \sa category()
+*/
+
+/*!
+ \enum QChar::Script
+ \since 5.1
+
+ This enum type defines the Unicode script property values.
+
+ For details about the Unicode script property values see
+ \l{http://www.unicode.org/reports/tr24/}{Unicode Standard Annex #24}.
+
+ In order to conform to C/C++ naming conventions "Script_" is prepended
+ to the codes used in the Unicode Standard.
+
+ \value Script_Unknown For unassigned, private-use, noncharacter, and surrogate code points.
+ \value Script_Inherited For characters that may be used with multiple scripts
+ and that inherit their script from the preceding characters.
+ These include nonspacing marks, enclosing marks,
+ and zero width joiner/non-joiner characters.
+ \value Script_Common For characters that may be used with multiple scripts
+ and that do not inherit their script from the preceding characters.
+
+ \value Script_Latin
+ \value Script_Greek
+ \value Script_Cyrillic
+ \value Script_Armenian
+ \value Script_Hebrew
+ \value Script_Arabic
+ \value Script_Syriac
+ \value Script_Thaana
+ \value Script_Devanagari
+ \value Script_Bengali
+ \value Script_Gurmukhi
+ \value Script_Gujarati
+ \value Script_Oriya
+ \value Script_Tamil
+ \value Script_Telugu
+ \value Script_Kannada
+ \value Script_Malayalam
+ \value Script_Sinhala
+ \value Script_Thai
+ \value Script_Lao
+ \value Script_Tibetan
+ \value Script_Myanmar
+ \value Script_Georgian
+ \value Script_Hangul
+ \value Script_Ethiopic
+ \value Script_Cherokee
+ \value Script_CanadianAboriginal
+ \value Script_Ogham
+ \value Script_Runic
+ \value Script_Khmer
+ \value Script_Mongolian
+ \value Script_Hiragana
+ \value Script_Katakana
+ \value Script_Bopomofo
+ \value Script_Han
+ \value Script_Yi
+ \value Script_OldItalic
+ \value Script_Gothic
+ \value Script_Deseret
+ \value Script_Tagalog
+ \value Script_Hanunoo
+ \value Script_Buhid
+ \value Script_Tagbanwa
+ \value Script_Coptic
+ \value Script_Limbu
+ \value Script_TaiLe
+ \value Script_LinearB
+ \value Script_Ugaritic
+ \value Script_Shavian
+ \value Script_Osmanya
+ \value Script_Cypriot
+ \value Script_Braille
+ \value Script_Buginese
+ \value Script_NewTaiLue
+ \value Script_Glagolitic
+ \value Script_Tifinagh
+ \value Script_SylotiNagri
+ \value Script_OldPersian
+ \value Script_Kharoshthi
+ \value Script_Balinese
+ \value Script_Cuneiform
+ \value Script_Phoenician
+ \value Script_PhagsPa
+ \value Script_Nko
+ \value Script_Sundanese
+ \value Script_Lepcha
+ \value Script_OlChiki
+ \value Script_Vai
+ \value Script_Saurashtra
+ \value Script_KayahLi
+ \value Script_Rejang
+ \value Script_Lycian
+ \value Script_Carian
+ \value Script_Lydian
+ \value Script_Cham
+ \value Script_TaiTham
+ \value Script_TaiViet
+ \value Script_Avestan
+ \value Script_EgyptianHieroglyphs
+ \value Script_Samaritan
+ \value Script_Lisu
+ \value Script_Bamum
+ \value Script_Javanese
+ \value Script_MeeteiMayek
+ \value Script_ImperialAramaic
+ \value Script_OldSouthArabian
+ \value Script_InscriptionalParthian
+ \value Script_InscriptionalPahlavi
+ \value Script_OldTurkic
+ \value Script_Kaithi
+ \value Script_Batak
+ \value Script_Brahmi
+ \value Script_Mandaic
+ \value Script_Chakma
+ \value Script_MeroiticCursive
+ \value Script_MeroiticHieroglyphs
+ \value Script_Miao
+ \value Script_Sharada
+ \value Script_SoraSompeng
+ \value Script_Takri
+ \value Script_CaucasianAlbanian
+ \value Script_BassaVah
+ \value Script_Duployan
+ \value Script_Elbasan
+ \value Script_Grantha
+ \value Script_PahawhHmong
+ \value Script_Khojki
+ \value Script_LinearA
+ \value Script_Mahajani
+ \value Script_Manichaean
+ \value Script_MendeKikakui
+ \value Script_Modi
+ \value Script_Mro
+ \value Script_OldNorthArabian
+ \value Script_Nabataean
+ \value Script_Palmyrene
+ \value Script_PauCinHau
+ \value Script_OldPermic
+ \value Script_PsalterPahlavi
+ \value Script_Siddham
+ \value Script_Khudawadi
+ \value Script_Tirhuta
+ \value Script_WarangCiti
+ \value Script_Ahom
+ \value Script_AnatolianHieroglyphs
+ \value Script_Hatran
+ \value Script_Multani
+ \value Script_OldHungarian
+ \value Script_SignWriting
+ \value Script_Adlam
+ \value Script_Bhaiksuki
+ \value Script_Marchen
+ \value Script_Newa
+ \value Script_Osage
+ \value Script_Tangut
+ \value Script_MasaramGondi
+ \value Script_Nushu
+ \value Script_Soyombo
+ \value Script_ZanabazarSquare
+
+ \omitvalue ScriptCount
+
+ \sa script()
+*/
+
+/*!
+ \enum QChar::Direction
+
+ This enum type defines the Unicode direction attributes. See the
+ \l{http://www.unicode.org/reports/tr9/tr9-35.html#Table_Bidirectional_Character_Types}{Unicode Standard} for a description
+ of the values.
+
+ In order to conform to C/C++ naming conventions "Dir" is prepended
+ to the codes used in the Unicode Standard.
+
+ \value DirAL
+ \value DirAN
+ \value DirB
+ \value DirBN
+ \value DirCS
+ \value DirEN
+ \value DirES
+ \value DirET
+ \value DirFSI Since Qt 5.3
+ \value DirL
+ \value DirLRE
+ \value DirLRI Since Qt 5.3
+ \value DirLRO
+ \value DirNSM
+ \value DirON
+ \value DirPDF
+ \value DirPDI Since Qt 5.3
+ \value DirR
+ \value DirRLE
+ \value DirRLI Since Qt 5.3
+ \value DirRLO
+ \value DirS
+ \value DirWS
+
+ \sa direction()
+*/
+
+/*!
+ \enum QChar::Decomposition
+
+ This enum type defines the Unicode decomposition attributes. See
+ the \l{http://www.unicode.org/}{Unicode Standard} for a
+ description of the values.
+
+ \value NoDecomposition
+ \value Canonical
+ \value Circle
+ \value Compat
+ \value Final
+ \value Font
+ \value Fraction
+ \value Initial
+ \value Isolated
+ \value Medial
+ \value Narrow
+ \value NoBreak
+ \value Small
+ \value Square
+ \value Sub
+ \value Super
+ \value Vertical
+ \value Wide
+
+ \sa decomposition()
+*/
+
+/*!
+ \enum QChar::JoiningType
+ since 5.3
+
+ This enum type defines the Unicode joining type attributes. See the
+ \l{http://www.unicode.org/}{Unicode Standard} for a description of the values.
+
+ In order to conform to C/C++ naming conventions "Joining_" is prepended
+ to the codes used in the Unicode Standard.
+
+ \value Joining_None
+ \value Joining_Causing
+ \value Joining_Dual
+ \value Joining_Right
+ \value Joining_Left
+ \value Joining_Transparent
+
+ \sa joiningType()
+*/
+
+#if QT_DEPRECATED_SINCE(5, 3)
+/*!
+ \enum QChar::Joining
+ \deprecated in 5.3, use JoiningType instead.
+
+ This enum type defines the Unicode joining attributes. See the
+ \l{http://www.unicode.org/}{Unicode Standard} for a description
+ of the values.
+
+ \value Center
+ \value Dual
+ \value OtherJoining
+ \value Right
+
+ \sa joining()
+*/
+#endif
+
+/*!
+ \enum QChar::CombiningClass
+
+ \internal
+
+ This enum type defines names for some of the Unicode combining
+ classes. See the \l{http://www.unicode.org/}{Unicode Standard}
+ for a description of the values.
+
+ \value Combining_Above
+ \value Combining_AboveAttached
+ \value Combining_AboveLeft
+ \value Combining_AboveLeftAttached
+ \value Combining_AboveRight
+ \value Combining_AboveRightAttached
+ \value Combining_Below
+ \value Combining_BelowAttached
+ \value Combining_BelowLeft
+ \value Combining_BelowLeftAttached
+ \value Combining_BelowRight
+ \value Combining_BelowRightAttached
+ \value Combining_DoubleAbove
+ \value Combining_DoubleBelow
+ \value Combining_IotaSubscript
+ \value Combining_Left
+ \value Combining_LeftAttached
+ \value Combining_Right
+ \value Combining_RightAttached
+*/
+
+/*!
+ \enum QChar::SpecialCharacter
+
+ \value Null A QChar with this value isNull().
+ \value Tabulation Character tabulation.
+ \value LineFeed
+ \value CarriageReturn
+ \value Space
+ \value Nbsp Non-breaking space.
+ \value SoftHyphen
+ \value ReplacementCharacter The character shown when a font has no glyph
+ for a certain codepoint. A special question mark character is often
+ used. Codecs use this codepoint when input data cannot be
+ represented in Unicode.
+ \value ObjectReplacementCharacter Used to represent an object such as an
+ image when such objects cannot be presented.
+ \value ByteOrderMark
+ \value ByteOrderSwapped
+ \value ParagraphSeparator
+ \value LineSeparator
+ \value LastValidCodePoint
+*/
+
+/*!
+ \fn void QChar::setCell(uchar cell)
+ \internal
+*/
+
+/*!
+ \fn void QChar::setRow(uchar row)
+ \internal
+*/
+
+/*!
+ \fn QChar::QChar()
+
+ Constructs a null QChar ('\\0').
+
+ \sa isNull()
+*/
+
+/*!
+ \fn QChar::QChar(QLatin1Char ch)
+
+ Constructs a QChar corresponding to ASCII/Latin-1 character \a ch.
+*/
+
+/*!
+ \fn QChar::QChar(SpecialCharacter ch)
+
+ Constructs a QChar for the predefined character value \a ch.
+*/
+
+/*!
+ \fn QChar::QChar(char16_t ch)
+ \since 5.10
+
+ Constructs a QChar corresponding to the UTF-16 character \a ch.
+*/
+
+/*!
+ \fn QChar::QChar(wchar_t ch)
+ \since 5.10
+
+ Constructs a QChar corresponding to the wide character \a ch.
+
+ \note This constructor is only available on Windows.
+*/
+
+/*!
+ \fn QChar::QChar(char ch)
+
+ Constructs a QChar corresponding to ASCII/Latin-1 character \a ch.
+
+ \note This constructor is not available when \c QT_NO_CAST_FROM_ASCII
+ is defined.
+
+ \sa QT_NO_CAST_FROM_ASCII
+*/
+
+/*!
+ \fn QChar::QChar(uchar ch)
+
+ Constructs a QChar corresponding to ASCII/Latin-1 character \a ch.
+
+ \note This constructor is not available when \c QT_NO_CAST_FROM_ASCII
+ or \c QT_RESTRICTED_CAST_FROM_ASCII is defined.
+
+ \sa QT_NO_CAST_FROM_ASCII, QT_RESTRICTED_CAST_FROM_ASCII
+*/
+
+/*!
+ \fn QChar::QChar(uchar cell, uchar row)
+
+ Constructs a QChar for Unicode cell \a cell in row \a row.
+
+ \sa cell(), row()
+*/
+
+/*!
+ \fn QChar::QChar(ushort code)
+
+ Constructs a QChar for the character with Unicode code point \a code.
+*/
+
+/*!
+ \fn QChar::QChar(short code)
+
+ Constructs a QChar for the character with Unicode code point \a code.
+*/
+
+/*!
+ \fn QChar::QChar(uint code)
+
+ Constructs a QChar for the character with Unicode code point \a code.
+*/
+
+/*!
+ \fn QChar::QChar(int code)
+
+ Constructs a QChar for the character with Unicode code point \a code.
+*/
+
+/*!
+ \fn bool QChar::isNull() const
+
+ Returns \c true if the character is the Unicode character 0x0000
+ ('\\0'); otherwise returns \c false.
+*/
+
+/*!
+ \fn uchar QChar::cell() const
+
+ Returns the cell (least significant byte) of the Unicode character.
+
+ \sa row()
+*/
+
+/*!
+ \fn uchar QChar::row() const
+
+ Returns the row (most significant byte) of the Unicode character.
+
+ \sa cell()
+*/
+
+/*!
+ \fn bool QChar::isPrint() const
+
+ Returns \c true if the character is a printable character; otherwise
+ returns \c false. This is any character not of category Other_*.
+
+ Note that this gives no indication of whether the character is
+ available in a particular font.
+*/
+
+/*!
+ \overload
+ \since 5.0
+
+ Returns \c true if the UCS-4-encoded character specified by \a ucs4 is
+ a printable character; otherwise returns \c false.
+ This is any character not of category Other_*.
+
+ Note that this gives no indication of whether the character is
+ available in a particular font.
+*/
+bool QChar::isPrint(uint ucs4) noexcept
+{
+ if (ucs4 > LastValidCodePoint)
+ return false;
+ const int test = FLAG(Other_Control) |
+ FLAG(Other_Format) |
+ FLAG(Other_Surrogate) |
+ FLAG(Other_PrivateUse) |
+ FLAG(Other_NotAssigned);
+ return !(FLAG(qGetProp(ucs4)->category) & test);
+}
+
+/*!
+ \fn bool QChar::isSpace() const
+
+ Returns \c true if the character is a separator character
+ (Separator_* categories or certain code points from Other_Control category);
+ otherwise returns \c false.
+*/
+
+/*!
+ \fn bool QChar::isSpace(uint ucs4)
+ \overload
+ \since 5.0
+
+ Returns \c true if the UCS-4-encoded character specified by \a ucs4 is
+ a separator character (Separator_* categories or certain code points
+ from Other_Control category); otherwise returns \c false.
+*/
+
+/*!
+ \internal
+*/
+bool QT_FASTCALL QChar::isSpace_helper(uint ucs4) noexcept
+{
+ if (ucs4 > LastValidCodePoint)
+ return false;
+ const int test = FLAG(Separator_Space) |
+ FLAG(Separator_Line) |
+ FLAG(Separator_Paragraph);
+ return FLAG(qGetProp(ucs4)->category) & test;
+}
+
+/*!
+ \fn bool QChar::isMark() const
+
+ Returns \c true if the character is a mark (Mark_* categories);
+ otherwise returns \c false.
+
+ See QChar::Category for more information regarding marks.
+*/
+
+/*!
+ \overload
+ \since 5.0
+
+ Returns \c true if the UCS-4-encoded character specified by \a ucs4 is
+ a mark (Mark_* categories); otherwise returns \c false.
+*/
+bool QChar::isMark(uint ucs4) noexcept
+{
+ if (ucs4 > LastValidCodePoint)
+ return false;
+ const int test = FLAG(Mark_NonSpacing) |
+ FLAG(Mark_SpacingCombining) |
+ FLAG(Mark_Enclosing);
+ return FLAG(qGetProp(ucs4)->category) & test;
+}
+
+/*!
+ \fn bool QChar::isPunct() const
+
+ Returns \c true if the character is a punctuation mark (Punctuation_*
+ categories); otherwise returns \c false.
+*/
+
+/*!
+ \overload
+ \since 5.0
+
+ Returns \c true if the UCS-4-encoded character specified by \a ucs4 is
+ a punctuation mark (Punctuation_* categories); otherwise returns \c false.
+*/
+bool QChar::isPunct(uint ucs4) noexcept
+{
+ if (ucs4 > LastValidCodePoint)
+ return false;
+ const int test = FLAG(Punctuation_Connector) |
+ FLAG(Punctuation_Dash) |
+ FLAG(Punctuation_Open) |
+ FLAG(Punctuation_Close) |
+ FLAG(Punctuation_InitialQuote) |
+ FLAG(Punctuation_FinalQuote) |
+ FLAG(Punctuation_Other);
+ return FLAG(qGetProp(ucs4)->category) & test;
+}
+
+/*!
+ \fn bool QChar::isSymbol() const
+
+ Returns \c true if the character is a symbol (Symbol_* categories);
+ otherwise returns \c false.
+*/
+
+/*!
+ \overload
+ \since 5.0
+
+ Returns \c true if the UCS-4-encoded character specified by \a ucs4 is
+ a symbol (Symbol_* categories); otherwise returns \c false.
+*/
+bool QChar::isSymbol(uint ucs4) noexcept
+{
+ if (ucs4 > LastValidCodePoint)
+ return false;
+ const int test = FLAG(Symbol_Math) |
+ FLAG(Symbol_Currency) |
+ FLAG(Symbol_Modifier) |
+ FLAG(Symbol_Other);
+ return FLAG(qGetProp(ucs4)->category) & test;
+}
+
+/*!
+ \fn bool QChar::isLetter() const
+
+ Returns \c true if the character is a letter (Letter_* categories);
+ otherwise returns \c false.
+*/
+
+/*!
+ \fn bool QChar::isLetter(uint ucs4)
+ \overload
+ \since 5.0
+
+ Returns \c true if the UCS-4-encoded character specified by \a ucs4 is
+ a letter (Letter_* categories); otherwise returns \c false.
+*/
+
+/*!
+ \internal
+*/
+bool QT_FASTCALL QChar::isLetter_helper(uint ucs4) noexcept
+{
+ if (ucs4 > LastValidCodePoint)
+ return false;
+ const int test = FLAG(Letter_Uppercase) |
+ FLAG(Letter_Lowercase) |
+ FLAG(Letter_Titlecase) |
+ FLAG(Letter_Modifier) |
+ FLAG(Letter_Other);
+ return FLAG(qGetProp(ucs4)->category) & test;
+}
+
+/*!
+ \fn bool QChar::isNumber() const
+
+ Returns \c true if the character is a number (Number_* categories,
+ not just 0-9); otherwise returns \c false.
+
+ \sa isDigit()
+*/
+
+/*!
+ \fn bool QChar::isNumber(uint ucs4)
+ \overload
+ \since 5.0
+
+ Returns \c true if the UCS-4-encoded character specified by \a ucs4 is
+ a number (Number_* categories, not just 0-9); otherwise returns \c false.
+
+ \sa isDigit()
+*/
+
+/*!
+ \internal
+*/
+bool QT_FASTCALL QChar::isNumber_helper(uint ucs4) noexcept
+{
+ if (ucs4 > LastValidCodePoint)
+ return false;
+ const int test = FLAG(Number_DecimalDigit) |
+ FLAG(Number_Letter) |
+ FLAG(Number_Other);
+ return FLAG(qGetProp(ucs4)->category) & test;
+}
+
+/*!
+ \fn bool QChar::isLetterOrNumber() const
+
+ Returns \c true if the character is a letter or number (Letter_* or
+ Number_* categories); otherwise returns \c false.
+*/
+
+/*!
+ \fn bool QChar::isLetterOrNumber(uint ucs4)
+ \overload
+ \since 5.0
+
+ Returns \c true if the UCS-4-encoded character specified by \a ucs4 is
+ a letter or number (Letter_* or Number_* categories); otherwise returns \c false.
+*/
+
+/*!
+ \internal
+*/
+bool QT_FASTCALL QChar::isLetterOrNumber_helper(uint ucs4) noexcept
+{
+ if (ucs4 > LastValidCodePoint)
+ return false;
+ const int test = FLAG(Letter_Uppercase) |
+ FLAG(Letter_Lowercase) |
+ FLAG(Letter_Titlecase) |
+ FLAG(Letter_Modifier) |
+ FLAG(Letter_Other) |
+ FLAG(Number_DecimalDigit) |
+ FLAG(Number_Letter) |
+ FLAG(Number_Other);
+ return FLAG(qGetProp(ucs4)->category) & test;
+}
+
+/*!
+ \fn bool QChar::isDigit() const
+
+ Returns \c true if the character is a decimal digit
+ (Number_DecimalDigit); otherwise returns \c false.
+
+ \sa isNumber()
+*/
+
+/*!
+ \fn bool QChar::isDigit(uint ucs4)
+ \overload
+ \since 5.0
+
+ Returns \c true if the UCS-4-encoded character specified by \a ucs4 is
+ a decimal digit (Number_DecimalDigit); otherwise returns \c false.
+
+ \sa isNumber()
+*/
+
+/*!
+ \fn bool QChar::isNonCharacter() const
+ \since 5.0
+
+ Returns \c true if the QChar is a non-character; false otherwise.
+
+ Unicode has a certain number of code points that are classified
+ as "non-characters:" that is, they can be used for internal purposes
+ in applications but cannot be used for text interchange.
+ Those are the last two entries each Unicode Plane ([0xfffe..0xffff],
+ [0x1fffe..0x1ffff], etc.) as well as the entries in range [0xfdd0..0xfdef].
+*/
+
+/*!
+ \fn bool QChar::isHighSurrogate() const
+
+ Returns \c true if the QChar is the high part of a UTF16 surrogate
+ (for example if its code point is in range [0xd800..0xdbff]); false otherwise.
+*/
+
+/*!
+ \fn bool QChar::isLowSurrogate() const
+
+ Returns \c true if the QChar is the low part of a UTF16 surrogate
+ (for example if its code point is in range [0xdc00..0xdfff]); false otherwise.
+*/
+
+/*!
+ \fn bool QChar::isSurrogate() const
+ \since 5.0
+
+ Returns \c true if the QChar contains a code point that is in either
+ the high or the low part of the UTF-16 surrogate range
+ (for example if its code point is in range [0xd800..0xdfff]); false otherwise.
+*/
+
+/*!
+ \fn static bool QChar::isNonCharacter(uint ucs4)
+ \overload
+ \since 5.0
+
+ Returns \c true if the UCS-4-encoded character specified by \a ucs4
+ is a non-character; false otherwise.
+
+ Unicode has a certain number of code points that are classified
+ as "non-characters:" that is, they can be used for internal purposes
+ in applications but cannot be used for text interchange.
+ Those are the last two entries each Unicode Plane ([0xfffe..0xffff],
+ [0x1fffe..0x1ffff], etc.) as well as the entries in range [0xfdd0..0xfdef].
+*/
+
+/*!
+ \fn static bool QChar::isHighSurrogate(uint ucs4)
+ \overload
+
+ Returns \c true if the UCS-4-encoded character specified by \a ucs4
+ is the high part of a UTF16 surrogate
+ (for example if its code point is in range [0xd800..0xdbff]); false otherwise.
+*/
+
+/*!
+ \fn static bool QChar::isLowSurrogate(uint ucs4)
+ \overload
+
+ Returns \c true if the UCS-4-encoded character specified by \a ucs4
+ is the low part of a UTF16 surrogate
+ (for example if its code point is in range [0xdc00..0xdfff]); false otherwise.
+*/
+
+/*!
+ \fn static bool QChar::isSurrogate(uint ucs4)
+ \overload
+ \since 5.0
+
+ Returns \c true if the UCS-4-encoded character specified by \a ucs4
+ contains a code point that is in either the high or the low part of the
+ UTF-16 surrogate range (for example if its code point is in range [0xd800..0xdfff]);
+ false otherwise.
+*/
+
+/*!
+ \fn static bool QChar::requiresSurrogates(uint ucs4)
+
+ Returns \c true if the UCS-4-encoded character specified by \a ucs4
+ can be split into the high and low parts of a UTF16 surrogate
+ (for example if its code point is greater than or equals to 0x10000);
+ false otherwise.
+*/
+
+/*!
+ \fn static uint QChar::surrogateToUcs4(ushort high, ushort low)
+
+ Converts a UTF16 surrogate pair with the given \a high and \a low values
+ to it's UCS-4-encoded code point.
+*/
+
+/*!
+ \fn static uint QChar::surrogateToUcs4(QChar high, QChar low)
+ \overload
+
+ Converts a UTF16 surrogate pair (\a high, \a low) to it's UCS-4-encoded code point.
+*/
+
+/*!
+ \fn static ushort QChar::highSurrogate(uint ucs4)
+
+ Returns the high surrogate part of a UCS-4-encoded code point.
+ The returned result is undefined if \a ucs4 is smaller than 0x10000.
+*/
+
+/*!
+ \fn static ushort QChar::lowSurrogate(uint ucs4)
+
+ Returns the low surrogate part of a UCS-4-encoded code point.
+ The returned result is undefined if \a ucs4 is smaller than 0x10000.
+*/
+
+/*!
+ \fn int QChar::digitValue() const
+
+ Returns the numeric value of the digit, or -1 if the character is not a digit.
+*/
+
+/*!
+ \overload
+ Returns the numeric value of the digit specified by the UCS-4-encoded
+ character, \a ucs4, or -1 if the character is not a digit.
+*/
+int QChar::digitValue(uint ucs4) noexcept
+{
+ if (ucs4 > LastValidCodePoint)
+ return -1;
+ return qGetProp(ucs4)->digitValue;
+}
+
+/*!
+ \fn QChar::Category QChar::category() const
+
+ Returns the character's category.
+*/
+
+/*!
+ \overload
+ Returns the category of the UCS-4-encoded character specified by \a ucs4.
+*/
+QChar::Category QChar::category(uint ucs4) noexcept
+{
+ if (ucs4 > LastValidCodePoint)
+ return QChar::Other_NotAssigned;
+ return (QChar::Category) qGetProp(ucs4)->category;
+}
+
+/*!
+ \fn QChar::Direction QChar::direction() const
+
+ Returns the character's direction.
+*/
+
+/*!
+ \overload
+ Returns the direction of the UCS-4-encoded character specified by \a ucs4.
+*/
+QChar::Direction QChar::direction(uint ucs4) noexcept
+{
+ if (ucs4 > LastValidCodePoint)
+ return QChar::DirL;
+ return (QChar::Direction) qGetProp(ucs4)->direction;
+}
+
+/*!
+ \fn QChar::JoiningType QChar::joiningType() const
+ \since 5.3
+
+ Returns information about the joining type attributes of the character
+ (needed for certain languages such as Arabic or Syriac).
+*/
+
+/*!
+ \overload
+ \since 5.3
+
+ Returns information about the joining type attributes of the UCS-4-encoded
+ character specified by \a ucs4
+ (needed for certain languages such as Arabic or Syriac).
+*/
+QChar::JoiningType QChar::joiningType(uint ucs4) noexcept
+{
+ if (ucs4 > LastValidCodePoint)
+ return QChar::Joining_None;
+ return QChar::JoiningType(qGetProp(ucs4)->joining);
+}
+
+#if QT_DEPRECATED_SINCE(5, 3)
+/*!
+ \fn QChar::Joining QChar::joining() const
+ \deprecated in 5.3, use joiningType() instead.
+
+ Returns information about the joining properties of the character
+ (needed for certain languages such as Arabic).
+*/
+
+/*!
+ \overload
+ \deprecated in 5.3, use joiningType() instead.
+
+ Returns information about the joining properties of the UCS-4-encoded
+ character specified by \a ucs4 (needed for certain languages such as Arabic).
+*/
+QChar::Joining QChar::joining(uint ucs4) noexcept
+{
+ if (ucs4 > LastValidCodePoint)
+ return QChar::OtherJoining;
+ switch (qGetProp(ucs4)->joining) {
+ case QChar::Joining_Causing: return QChar::Center;
+ case QChar::Joining_Dual: return QChar::Dual;
+ case QChar::Joining_Right: return QChar::Right;
+ default: break;
+ }
+ return QChar::OtherJoining;
+}
+#endif
+
+/*!
+ \fn bool QChar::hasMirrored() const
+
+ Returns \c true if the character should be reversed if the text
+ direction is reversed; otherwise returns \c false.
+
+ A bit faster equivalent of (ch.mirroredChar() != ch).
+
+ \sa mirroredChar()
+*/
+
+/*!
+ \overload
+ \since 5.0
+
+ Returns \c true if the UCS-4-encoded character specified by \a ucs4
+ should be reversed if the text direction is reversed; otherwise returns \c false.
+
+ A bit faster equivalent of (QChar::mirroredChar(ucs4) != ucs4).
+
+ \sa mirroredChar()
+*/
+bool QChar::hasMirrored(uint ucs4) noexcept
+{
+ if (ucs4 > LastValidCodePoint)
+ return false;
+ return qGetProp(ucs4)->mirrorDiff != 0;
+}
+
+/*!
+ \fn bool QChar::isLower() const
+
+ Returns \c true if the character is a lowercase letter, for example
+ category() is Letter_Lowercase.
+
+ \sa isUpper(), toLower(), toUpper()
+*/
+
+/*!
+ \fn static bool QChar::isLower(uint ucs4)
+ \overload
+ \since 5.0
+
+ Returns \c true if the UCS-4-encoded character specified by \a ucs4
+ is a lowercase letter, for example category() is Letter_Lowercase.
+
+ \sa isUpper(), toLower(), toUpper()
+*/
+
+/*!
+ \fn bool QChar::isUpper() const
+
+ Returns \c true if the character is an uppercase letter, for example
+ category() is Letter_Uppercase.
+
+ \sa isLower(), toUpper(), toLower()
+*/
+
+/*!
+ \fn static bool QChar::isUpper(uint ucs4)
+ \overload
+ \since 5.0
+
+ Returns \c true if the UCS-4-encoded character specified by \a ucs4
+ is an uppercase letter, for example category() is Letter_Uppercase.
+
+ \sa isLower(), toUpper(), toLower()
+*/
+
+/*!
+ \fn bool QChar::isTitleCase() const
+
+ Returns \c true if the character is a titlecase letter, for example
+ category() is Letter_Titlecase.
+
+ \sa isLower(), toUpper(), toLower(), toTitleCase()
+*/
+
+/*!
+ \fn static bool QChar::isTitleCase(uint ucs4)
+ \overload
+ \since 5.0
+
+ Returns \c true if the UCS-4-encoded character specified by \a ucs4
+ is a titlecase letter, for example category() is Letter_Titlecase.
+
+ \sa isLower(), toUpper(), toLower(), toTitleCase()
+*/
+/*!
+ \fn QChar QChar::mirroredChar() const
+
+ Returns the mirrored character if this character is a mirrored
+ character; otherwise returns the character itself.
+
+ \sa hasMirrored()
+*/
+
+/*!
+ \overload
+ Returns the mirrored character if the UCS-4-encoded character specified
+ by \a ucs4 is a mirrored character; otherwise returns the character itself.
+
+ \sa hasMirrored()
+*/
+uint QChar::mirroredChar(uint ucs4) noexcept
+{
+ if (ucs4 > LastValidCodePoint)
+ return ucs4;
+ return ucs4 + qGetProp(ucs4)->mirrorDiff;
+}
+
+
+// constants for Hangul (de)composition, see UAX #15
+enum {
+ Hangul_SBase = 0xac00,
+ Hangul_LBase = 0x1100,
+ Hangul_VBase = 0x1161,
+ Hangul_TBase = 0x11a7,
+ Hangul_LCount = 19,
+ Hangul_VCount = 21,
+ Hangul_TCount = 28,
+ Hangul_NCount = Hangul_VCount * Hangul_TCount,
+ Hangul_SCount = Hangul_LCount * Hangul_NCount
+};
+
+// buffer has to have a length of 3. It's needed for Hangul decomposition
+static const unsigned short * QT_FASTCALL decompositionHelper
+ (uint ucs4, int *length, int *tag, unsigned short *buffer)
+{
+ if (ucs4 >= Hangul_SBase && ucs4 < Hangul_SBase + Hangul_SCount) {
+ // compute Hangul syllable decomposition as per UAX #15
+ const uint SIndex = ucs4 - Hangul_SBase;
+ buffer[0] = Hangul_LBase + SIndex / Hangul_NCount; // L
+ buffer[1] = Hangul_VBase + (SIndex % Hangul_NCount) / Hangul_TCount; // V
+ buffer[2] = Hangul_TBase + SIndex % Hangul_TCount; // T
+ *length = buffer[2] == Hangul_TBase ? 2 : 3;
+ *tag = QChar::Canonical;
+ return buffer;
+ }
+
+ const unsigned short index = GET_DECOMPOSITION_INDEX(ucs4);
+ if (index == 0xffff) {
+ *length = 0;
+ *tag = QChar::NoDecomposition;
+ return nullptr;
+ }
+
+ const unsigned short *decomposition = uc_decomposition_map+index;
+ *tag = (*decomposition) & 0xff;
+ *length = (*decomposition) >> 8;
+ return decomposition+1;
+}
+
+/*!
+ Decomposes a character into it's constituent parts. Returns an empty string
+ if no decomposition exists.
+*/
+QString QChar::decomposition() const
+{
+ return QChar::decomposition(ucs);
+}
+
+/*!
+ \overload
+ Decomposes the UCS-4-encoded character specified by \a ucs4 into it's
+ constituent parts. Returns an empty string if no decomposition exists.
+*/
+QString QChar::decomposition(uint ucs4)
+{
+ unsigned short buffer[3];
+ int length;
+ int tag;
+ const unsigned short *d = decompositionHelper(ucs4, &length, &tag, buffer);
+ return QString(reinterpret_cast<const QChar *>(d), length);
+}
+
+/*!
+ \fn QChar::Decomposition QChar::decompositionTag() const
+
+ Returns the tag defining the composition of the character. Returns
+ QChar::NoDecomposition if no decomposition exists.
+*/
+
+/*!
+ \overload
+ Returns the tag defining the composition of the UCS-4-encoded character
+ specified by \a ucs4. Returns QChar::NoDecomposition if no decomposition exists.
+*/
+QChar::Decomposition QChar::decompositionTag(uint ucs4) noexcept
+{
+ if (ucs4 >= Hangul_SBase && ucs4 < Hangul_SBase + Hangul_SCount)
+ return QChar::Canonical;
+ const unsigned short index = GET_DECOMPOSITION_INDEX(ucs4);
+ if (index == 0xffff)
+ return QChar::NoDecomposition;
+ return (QChar::Decomposition)(uc_decomposition_map[index] & 0xff);
+}
+
+/*!
+ \fn unsigned char QChar::combiningClass() const
+
+ Returns the combining class for the character as defined in the
+ Unicode standard. This is mainly useful as a positioning hint for
+ marks attached to a base character.
+
+ The Qt text rendering engine uses this information to correctly
+ position non-spacing marks around a base character.
+*/
+
+/*!
+ \overload
+ Returns the combining class for the UCS-4-encoded character specified by
+ \a ucs4, as defined in the Unicode standard.
+*/
+unsigned char QChar::combiningClass(uint ucs4) noexcept
+{
+ if (ucs4 > LastValidCodePoint)
+ return 0;
+ return (unsigned char) qGetProp(ucs4)->combiningClass;
+}
+
+/*!
+ \fn QChar::Script QChar::script() const
+ \since 5.1
+
+ Returns the Unicode script property value for this character.
+*/
+
+/*!
+ \overload
+ \since 5.1
+
+ Returns the Unicode script property value for the character specified in
+ its UCS-4-encoded form as \a ucs4.
+*/
+QChar::Script QChar::script(uint ucs4) noexcept
+{
+ if (ucs4 > LastValidCodePoint)
+ return QChar::Script_Unknown;
+ return (QChar::Script) qGetProp(ucs4)->script;
+}
+
+/*!
+ \fn QChar::UnicodeVersion QChar::unicodeVersion() const
+
+ Returns the Unicode version that introduced this character.
+*/
+
+/*!
+ \overload
+ Returns the Unicode version that introduced the character specified in
+ its UCS-4-encoded form as \a ucs4.
+*/
+QChar::UnicodeVersion QChar::unicodeVersion(uint ucs4) noexcept
+{
+ if (ucs4 > LastValidCodePoint)
+ return QChar::Unicode_Unassigned;
+ return (QChar::UnicodeVersion) qGetProp(ucs4)->unicodeVersion;
+}
+
+/*!
+ Returns the most recent supported Unicode version.
+*/
+QChar::UnicodeVersion QChar::currentUnicodeVersion() noexcept
+{
+ return UNICODE_DATA_VERSION;
+}
+
+
+template <typename Traits, typename T>
+Q_DECL_CONST_FUNCTION static inline T convertCase_helper(T uc) noexcept
+{
+ const QUnicodeTables::Properties *prop = qGetProp(uc);
+
+ if (Q_UNLIKELY(Traits::caseSpecial(prop))) {
+ const ushort *specialCase = specialCaseMap + Traits::caseDiff(prop);
+ // so far, there are no special cases beyond BMP (guaranteed by the qunicodetables generator)
+ return *specialCase == 1 ? specialCase[1] : uc;
+ }
+
+ return uc + Traits::caseDiff(prop);
+}
+
+/*!
+ \fn QChar QChar::toLower() const
+
+ Returns the lowercase equivalent if the character is uppercase or titlecase;
+ otherwise returns the character itself.
+*/
+
+/*!
+ \overload
+ Returns the lowercase equivalent of the UCS-4-encoded character specified
+ by \a ucs4 if the character is uppercase or titlecase; otherwise returns
+ the character itself.
+*/
+uint QChar::toLower(uint ucs4) noexcept
+{
+ if (ucs4 > LastValidCodePoint)
+ return ucs4;
+ return convertCase_helper<QUnicodeTables::LowercaseTraits>(ucs4);
+}
+
+/*!
+ \fn QChar QChar::toUpper() const
+
+ Returns the uppercase equivalent if the character is lowercase or titlecase;
+ otherwise returns the character itself.
+*/
+
+/*!
+ \overload
+ Returns the uppercase equivalent of the UCS-4-encoded character specified
+ by \a ucs4 if the character is lowercase or titlecase; otherwise returns
+ the character itself.
+*/
+uint QChar::toUpper(uint ucs4) noexcept
+{
+ if (ucs4 > LastValidCodePoint)
+ return ucs4;
+ return convertCase_helper<QUnicodeTables::UppercaseTraits>(ucs4);
+}
+
+/*!
+ \fn QChar QChar::toTitleCase() const
+
+ Returns the title case equivalent if the character is lowercase or uppercase;
+ otherwise returns the character itself.
+*/
+
+/*!
+ \overload
+ Returns the title case equivalent of the UCS-4-encoded character specified
+ by \a ucs4 if the character is lowercase or uppercase; otherwise returns
+ the character itself.
+*/
+uint QChar::toTitleCase(uint ucs4) noexcept
+{
+ if (ucs4 > LastValidCodePoint)
+ return ucs4;
+ return convertCase_helper<QUnicodeTables::TitlecaseTraits>(ucs4);
+}
+
+static inline uint foldCase(const ushort *ch, const ushort *start)
+{
+ uint ucs4 = *ch;
+ if (QChar::isLowSurrogate(ucs4) && ch > start && QChar::isHighSurrogate(*(ch - 1)))
+ ucs4 = QChar::surrogateToUcs4(*(ch - 1), ucs4);
+ return convertCase_helper<QUnicodeTables::CasefoldTraits>(ucs4);
+}
+
+static inline uint foldCase(uint ch, uint &last) noexcept
+{
+ uint ucs4 = ch;
+ if (QChar::isLowSurrogate(ucs4) && QChar::isHighSurrogate(last))
+ ucs4 = QChar::surrogateToUcs4(last, ucs4);
+ last = ch;
+ return convertCase_helper<QUnicodeTables::CasefoldTraits>(ucs4);
+}
+
+static inline ushort foldCase(ushort ch) noexcept
+{
+ return convertCase_helper<QUnicodeTables::CasefoldTraits>(ch);
+}
+
+static inline QChar foldCase(QChar ch) noexcept
+{
+ return QChar(foldCase(ch.unicode()));
+}
+
+/*!
+ \fn QChar QChar::toCaseFolded() const
+
+ Returns the case folded equivalent of the character.
+ For most Unicode characters this is the same as toLower().
+*/
+
+/*!
+ \overload
+ Returns the case folded equivalent of the UCS-4-encoded character specified
+ by \a ucs4. For most Unicode characters this is the same as toLower().
+*/
+uint QChar::toCaseFolded(uint ucs4) noexcept
+{
+ if (ucs4 > LastValidCodePoint)
+ return ucs4;
+ return convertCase_helper<QUnicodeTables::CasefoldTraits>(ucs4);
+}
+
+/*!
+ \fn char QChar::toLatin1() const
+
+ Returns the Latin-1 character equivalent to the QChar, or 0. This
+ is mainly useful for non-internationalized software.
+
+ \note It is not possible to distinguish a non-Latin-1 character from a Latin-1 0
+ (NUL) character. Prefer to use unicode(), which does not have this ambiguity.
+
+ \sa unicode()
+*/
+
+/*!
+ \fn QChar QChar::fromLatin1(char)
+
+ Converts the Latin-1 character \a c to its equivalent QChar. This
+ is mainly useful for non-internationalized software.
+
+ An alternative is to use QLatin1Char.
+
+ \sa toLatin1(), unicode()
+*/
+
+/*!
+ \fn char QChar::toAscii() const
+ \deprecated
+
+ Returns the Latin-1 character value of the QChar, or 0 if the character is not
+ representable.
+
+ The main purpose of this function is to preserve ASCII characters used
+ in C strings. This is mainly useful for developers of non-internationalized
+ software.
+
+ \note It is not possible to distinguish a non-Latin 1 character from an ASCII 0
+ (NUL) character. Prefer to use unicode(), which does not have this ambiguity.
+
+ \note This function does not check whether the character value is inside
+ the valid range of US-ASCII.
+
+ \sa toLatin1(), unicode()
+*/
+
+/*!
+ \fn QChar QChar::fromAscii(char)
+ \deprecated
+
+ Converts the ASCII character \a c to it's equivalent QChar. This
+ is mainly useful for non-internationalized software.
+
+ An alternative is to use QLatin1Char.
+
+ \sa fromLatin1(), unicode()
+*/
+
+#ifndef QT_NO_DATASTREAM
+/*!
+ \relates QChar
+
+ Writes the char \a chr to the stream \a out.
+
+ \sa {Serializing Qt Data Types}
+*/
+QDataStream &operator<<(QDataStream &out, QChar chr)
+{
+ out << quint16(chr.unicode());
+ return out;
+}
+
+/*!
+ \relates QChar
+
+ Reads a char from the stream \a in into char \a chr.
+
+ \sa {Serializing Qt Data Types}
+*/
+QDataStream &operator>>(QDataStream &in, QChar &chr)
+{
+ quint16 u;
+ in >> u;
+ chr.unicode() = ushort(u);
+ return in;
+}
+#endif // QT_NO_DATASTREAM
+
+/*!
+ \fn ushort & QChar::unicode()
+
+ Returns a reference to the numeric Unicode value of the QChar.
+*/
+
+/*!
+ \fn ushort QChar::unicode() const
+
+ Returns the numeric Unicode value of the QChar.
+*/
+
+/*****************************************************************************
+ Documentation of QChar related functions
+ *****************************************************************************/
+
+/*!
+ \fn bool operator==(QChar c1, QChar c2)
+
+ \relates QChar
+
+ Returns \c true if \a c1 and \a c2 are the same Unicode character;
+ otherwise returns \c false.
+*/
+
+/*!
+ \fn int operator!=(QChar c1, QChar c2)
+
+ \relates QChar
+
+ Returns \c true if \a c1 and \a c2 are not the same Unicode
+ character; otherwise returns \c false.
+*/
+
+/*!
+ \fn int operator<=(QChar c1, QChar c2)
+
+ \relates QChar
+
+ Returns \c true if the numeric Unicode value of \a c1 is less than
+ or equal to that of \a c2; otherwise returns \c false.
+*/
+
+/*!
+ \fn int operator>=(QChar c1, QChar c2)
+
+ \relates QChar
+
+ Returns \c true if the numeric Unicode value of \a c1 is greater than
+ or equal to that of \a c2; otherwise returns \c false.
+*/
+
+/*!
+ \fn int operator<(QChar c1, QChar c2)
+
+ \relates QChar
+
+ Returns \c true if the numeric Unicode value of \a c1 is less than
+ that of \a c2; otherwise returns \c false.
+*/
+
+/*!
+ \fn int operator>(QChar c1, QChar c2)
+
+ \relates QChar
+
+ Returns \c true if the numeric Unicode value of \a c1 is greater than
+ that of \a c2; otherwise returns \c false.
+*/
+
+
+// ---------------------------------------------------------------------------
+
+
+static void decomposeHelper(QString *str, bool canonical, QChar::UnicodeVersion version, int from)
+{
+ int length;
+ int tag;
+ unsigned short buffer[3];
+
+ QString &s = *str;
+
+ const unsigned short *utf16 = reinterpret_cast<unsigned short *>(s.data());
+ const unsigned short *uc = utf16 + s.length();
+ while (uc != utf16 + from) {
+ uint ucs4 = *(--uc);
+ if (QChar(ucs4).isLowSurrogate() && uc != utf16) {
+ ushort high = *(uc - 1);
+ if (QChar(high).isHighSurrogate()) {
+ --uc;
+ ucs4 = QChar::surrogateToUcs4(high, ucs4);
+ }
+ }
+
+ if (QChar::unicodeVersion(ucs4) > version)
+ continue;
+
+ const unsigned short *d = decompositionHelper(ucs4, &length, &tag, buffer);
+ if (!d || (canonical && tag != QChar::Canonical))
+ continue;
+
+ int pos = uc - utf16;
+ s.replace(pos, QChar::requiresSurrogates(ucs4) ? 2 : 1, reinterpret_cast<const QChar *>(d), length);
+ // since the replace invalidates the pointers and we do decomposition recursive
+ utf16 = reinterpret_cast<unsigned short *>(s.data());
+ uc = utf16 + pos + length;
+ }
+}
+
+
+struct UCS2Pair {
+ ushort u1;
+ ushort u2;
+};
+
+inline bool operator<(const UCS2Pair &ligature1, const UCS2Pair &ligature2)
+{ return ligature1.u1 < ligature2.u1; }
+inline bool operator<(ushort u1, const UCS2Pair &ligature)
+{ return u1 < ligature.u1; }
+inline bool operator<(const UCS2Pair &ligature, ushort u1)
+{ return ligature.u1 < u1; }
+
+struct UCS2SurrogatePair {
+ UCS2Pair p1;
+ UCS2Pair p2;
+};
+
+inline bool operator<(const UCS2SurrogatePair &ligature1, const UCS2SurrogatePair &ligature2)
+{ return QChar::surrogateToUcs4(ligature1.p1.u1, ligature1.p1.u2) < QChar::surrogateToUcs4(ligature2.p1.u1, ligature2.p1.u2); }
+inline bool operator<(uint u1, const UCS2SurrogatePair &ligature)
+{ return u1 < QChar::surrogateToUcs4(ligature.p1.u1, ligature.p1.u2); }
+inline bool operator<(const UCS2SurrogatePair &ligature, uint u1)
+{ return QChar::surrogateToUcs4(ligature.p1.u1, ligature.p1.u2) < u1; }
+
+static uint inline ligatureHelper(uint u1, uint u2)
+{
+ if (u1 >= Hangul_LBase && u1 <= Hangul_SBase + Hangul_SCount) {
+ // compute Hangul syllable composition as per UAX #15
+ // hangul L-V pair
+ const uint LIndex = u1 - Hangul_LBase;
+ if (LIndex < Hangul_LCount) {
+ const uint VIndex = u2 - Hangul_VBase;
+ if (VIndex < Hangul_VCount)
+ return Hangul_SBase + (LIndex * Hangul_VCount + VIndex) * Hangul_TCount;
+ }
+ // hangul LV-T pair
+ const uint SIndex = u1 - Hangul_SBase;
+ if (SIndex < Hangul_SCount && (SIndex % Hangul_TCount) == 0) {
+ const uint TIndex = u2 - Hangul_TBase;
+ if (TIndex <= Hangul_TCount)
+ return u1 + TIndex;
+ }
+ }
+
+ const unsigned short index = GET_LIGATURE_INDEX(u2);
+ if (index == 0xffff)
+ return 0;
+ const unsigned short *ligatures = uc_ligature_map+index;
+ ushort length = *ligatures++;
+ if (QChar::requiresSurrogates(u1)) {
+ const UCS2SurrogatePair *data = reinterpret_cast<const UCS2SurrogatePair *>(ligatures);
+ const UCS2SurrogatePair *r = std::lower_bound(data, data + length, u1);
+ if (r != data + length && QChar::surrogateToUcs4(r->p1.u1, r->p1.u2) == u1)
+ return QChar::surrogateToUcs4(r->p2.u1, r->p2.u2);
+ } else {
+ const UCS2Pair *data = reinterpret_cast<const UCS2Pair *>(ligatures);
+ const UCS2Pair *r = std::lower_bound(data, data + length, ushort(u1));
+ if (r != data + length && r->u1 == ushort(u1))
+ return r->u2;
+ }
+
+ return 0;
+}
+
+static void composeHelper(QString *str, QChar::UnicodeVersion version, int from)
+{
+ QString &s = *str;
+
+ if (from < 0 || s.length() - from < 2)
+ return;
+
+ uint stcode = 0; // starter code point
+ int starter = -1; // starter position
+ int next = -1; // to prevent i == next
+ int lastCombining = 255; // to prevent combining > lastCombining
+
+ int pos = from;
+ while (pos < s.length()) {
+ int i = pos;
+ uint uc = s.at(pos).unicode();
+ if (QChar(uc).isHighSurrogate() && pos < s.length()-1) {
+ ushort low = s.at(pos+1).unicode();
+ if (QChar(low).isLowSurrogate()) {
+ uc = QChar::surrogateToUcs4(uc, low);
+ ++pos;
+ }
+ }
+
+ const QUnicodeTables::Properties *p = qGetProp(uc);
+ if (p->unicodeVersion > version) {
+ starter = -1;
+ next = -1; // to prevent i == next
+ lastCombining = 255; // to prevent combining > lastCombining
+ ++pos;
+ continue;
+ }
+
+ int combining = p->combiningClass;
+ if ((i == next || combining > lastCombining) && starter >= from) {
+ // allowed to form ligature with S
+ uint ligature = ligatureHelper(stcode, uc);
+ if (ligature) {
+ stcode = ligature;
+ QChar *d = s.data();
+ // ligatureHelper() never changes planes
+ if (QChar::requiresSurrogates(ligature)) {
+ d[starter] = QChar(QChar::highSurrogate(ligature));
+ d[starter + 1] = QChar(QChar::lowSurrogate(ligature));
+ s.remove(i, 2);
+ } else {
+ d[starter] = QChar(ligature);
+ s.remove(i, 1);
+ }
+ continue;
+ }
+ }
+ if (combining == 0) {
+ starter = i;
+ stcode = uc;
+ next = pos + 1;
+ }
+ lastCombining = combining;
+
+ ++pos;
+ }
+}
+
+
+static void canonicalOrderHelper(QString *str, QChar::UnicodeVersion version, int from)
+{
+ QString &s = *str;
+ const int l = s.length()-1;
+
+ uint u1, u2;
+ ushort c1, c2;
+
+ int pos = from;
+ while (pos < l) {
+ int p2 = pos+1;
+ u1 = s.at(pos).unicode();
+ if (QChar(u1).isHighSurrogate()) {
+ ushort low = s.at(p2).unicode();
+ if (QChar(low).isLowSurrogate()) {
+ u1 = QChar::surrogateToUcs4(u1, low);
+ if (p2 >= l)
+ break;
+ ++p2;
+ }
+ }
+ c1 = 0;
+
+ advance:
+ u2 = s.at(p2).unicode();
+ if (QChar(u2).isHighSurrogate() && p2 < l) {
+ ushort low = s.at(p2+1).unicode();
+ if (QChar(low).isLowSurrogate()) {
+ u2 = QChar::surrogateToUcs4(u2, low);
+ ++p2;
+ }
+ }
+
+ c2 = 0;
+ {
+ const QUnicodeTables::Properties *p = qGetProp(u2);
+ if (p->unicodeVersion <= version)
+ c2 = p->combiningClass;
+ }
+ if (c2 == 0) {
+ pos = p2+1;
+ continue;
+ }
+
+ if (c1 == 0) {
+ const QUnicodeTables::Properties *p = qGetProp(u1);
+ if (p->unicodeVersion <= version)
+ c1 = p->combiningClass;
+ }
+
+ if (c1 > c2) {
+ QChar *uc = s.data();
+ int p = pos;
+ // exchange characters
+ if (!QChar::requiresSurrogates(u2)) {
+ uc[p++] = QChar(u2);
+ } else {
+ uc[p++] = QChar(QChar::highSurrogate(u2));
+ uc[p++] = QChar(QChar::lowSurrogate(u2));
+ }
+ if (!QChar::requiresSurrogates(u1)) {
+ uc[p++] = QChar(u1);
+ } else {
+ uc[p++] = QChar(QChar::highSurrogate(u1));
+ uc[p++] = QChar(QChar::lowSurrogate(u1));
+ }
+ if (pos > 0)
+ --pos;
+ if (pos > 0 && s.at(pos).isLowSurrogate())
+ --pos;
+ } else {
+ ++pos;
+ if (QChar::requiresSurrogates(u1))
+ ++pos;
+
+ u1 = u2;
+ c1 = c2; // != 0
+ p2 = pos + 1;
+ if (QChar::requiresSurrogates(u1))
+ ++p2;
+ if (p2 > l)
+ break;
+
+ goto advance;
+ }
+ }
+}
+
+// returns true if the text is in a desired Normalization Form already; false otherwise.
+// sets lastStable to the position of the last stable code point
+static bool normalizationQuickCheckHelper(QString *str, QString::NormalizationForm mode, int from, int *lastStable)
+{
+ Q_STATIC_ASSERT(QString::NormalizationForm_D == 0);
+ Q_STATIC_ASSERT(QString::NormalizationForm_C == 1);
+ Q_STATIC_ASSERT(QString::NormalizationForm_KD == 2);
+ Q_STATIC_ASSERT(QString::NormalizationForm_KC == 3);
+
+ enum { NFQC_YES = 0, NFQC_NO = 1, NFQC_MAYBE = 3 };
+
+ const ushort *string = reinterpret_cast<const ushort *>(str->constData());
+ int length = str->length();
+
+ // this avoids one out of bounds check in the loop
+ while (length > from && QChar::isHighSurrogate(string[length - 1]))
+ --length;
+
+ uchar lastCombining = 0;
+ for (int i = from; i < length; ++i) {
+ int pos = i;
+ uint uc = string[i];
+ if (uc < 0x80) {
+ // ASCII characters are stable code points
+ lastCombining = 0;
+ *lastStable = pos;
+ continue;
+ }
+
+ if (QChar::isHighSurrogate(uc)) {
+ ushort low = string[i + 1];
+ if (!QChar::isLowSurrogate(low)) {
+ // treat surrogate like stable code point
+ lastCombining = 0;
+ *lastStable = pos;
+ continue;
+ }
+ ++i;
+ uc = QChar::surrogateToUcs4(uc, low);
+ }
+
+ const QUnicodeTables::Properties *p = qGetProp(uc);
+
+ if (p->combiningClass < lastCombining && p->combiningClass > 0)
+ return false;
+
+ const uchar check = (p->nfQuickCheck >> (mode << 1)) & 0x03;
+ if (check != NFQC_YES)
+ return false; // ### can we quick check NFQC_MAYBE ?
+
+ lastCombining = p->combiningClass;
+ if (lastCombining == 0)
+ *lastStable = pos;
+ }
+
+ if (length != str->length()) // low surrogate parts at the end of text
+ *lastStable = str->length() - 1;
+
+ return true;
+}
+
+QT_END_NAMESPACE