summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKonstantin Ritt <ritt.ks@gmail.com>2014-01-12 21:14:25 +0200
committerThe Qt Project <gerrit-noreply@qt-project.org>2014-01-14 15:38:57 +0100
commitedfce46a6c0406af749ca7ef659df6315e36cd5d (patch)
treee26868f66a6facc125544039e48578bdbd585307
parenta6046be428b39602089e1085e2e93d057059f70a (diff)
Update the Unicode Data and Algorithms up to Unicode 6.3.0
* Mongolian and Phags-pa characters have been given a Joining_Type classification for contextual shaping. As a part of these additions, one Phags-pa character has the Joining_Type value of L (Left Joining), which no character had been assigned before. * The unassigned code points in the Currency Symbols block have been given the Bidi_Class property value ET and the Line_Break property value PR, to help implementations support new currency symbols, when they are encoded. * Hebrew letters and basic punctuation marks have been assigned the newly introduced Word_Break property values Hebrew_Letter, Single_Quote, and Double_Quote. * The Bidi_Class property has been extended with four new values for directional isolates. For more details, see http://www.unicode.org/versions/Unicode6.3.0/ Change-Id: Iad62d02edc58a8497898dcd6d6c70d5aece317ea Reviewed-by: Lars Knoll <lars.knoll@digia.com>
-rw-r--r--src/corelib/tools/qchar.cpp7
-rw-r--r--src/corelib/tools/qchar.h6
-rw-r--r--src/corelib/tools/qunicodetools.cpp80
-rw-r--r--src/gui/text/qtextengine.cpp16
-rw-r--r--tests/auto/corelib/tools/qchar/tst_qchar.cpp20
-rw-r--r--util/unicode/main.cpp136
6 files changed, 199 insertions, 66 deletions
diff --git a/src/corelib/tools/qchar.cpp b/src/corelib/tools/qchar.cpp
index 4ed0cd5eea..f7f425d594 100644
--- a/src/corelib/tools/qchar.cpp
+++ b/src/corelib/tools/qchar.cpp
@@ -185,8 +185,9 @@ QT_BEGIN_NAMESPACE
\value Unicode_6_0 Version 6.0
\value Unicode_6_1 Version 6.1
\value Unicode_6_2 Version 6.2
+ \value Unicode_6_3 Version 6.3 Since Qt 5.3
\value Unicode_Unassigned The value is not assigned to any character
- in version 6.2 of Unicode.
+ in version 6.3 of Unicode.
\sa unicodeVersion(), currentUnicodeVersion()
*/
@@ -408,14 +409,18 @@ QT_BEGIN_NAMESPACE
\value DirEN
\value DirES
\value DirET
+ \value DirFSI Since Qt 5.3
\value DirL
\value DirLRE
+ \value DirLRI Since Qt 5.3
\value DirLRO
\value DirNSM
\value DirON
\value DirPDF
+ \value DirPDI Since Qt 5.3
\value DirR
\value DirRLE
+ \value DirRLI Since Qt 5.3
\value DirRLO
\value DirS
\value DirWS
diff --git a/src/corelib/tools/qchar.h b/src/corelib/tools/qchar.h
index 8afa05bb00..82ff337341 100644
--- a/src/corelib/tools/qchar.h
+++ b/src/corelib/tools/qchar.h
@@ -262,7 +262,8 @@ public:
enum Direction
{
DirL, DirR, DirEN, DirES, DirET, DirAN, DirCS, DirB, DirS, DirWS, DirON,
- DirLRE, DirLRO, DirAL, DirRLE, DirRLO, DirPDF, DirNSM, DirBN
+ DirLRE, DirLRO, DirAL, DirRLE, DirRLO, DirPDF, DirNSM, DirBN,
+ DirLRI, DirRLI, DirFSI, DirPDI
};
enum Decomposition
@@ -332,7 +333,8 @@ public:
Unicode_5_2,
Unicode_6_0,
Unicode_6_1,
- Unicode_6_2
+ Unicode_6_2,
+ Unicode_6_3
};
// ****** WHEN ADDING FUNCTIONS, CONSIDER ADDING TO QCharRef TOO
diff --git a/src/corelib/tools/qunicodetools.cpp b/src/corelib/tools/qunicodetools.cpp
index b3e55a5abc..fac795051a 100644
--- a/src/corelib/tools/qunicodetools.cpp
+++ b/src/corelib/tools/qunicodetools.cpp
@@ -57,7 +57,7 @@ namespace QUnicodeTools {
// -----------------------------------------------------------------------------------------------------
//
// The text boundaries determination algorithm.
-// See http://www.unicode.org/reports/tr29/tr29-21.html
+// See http://www.unicode.org/reports/tr29/tr29-23.html
//
// -----------------------------------------------------------------------------------------------------
@@ -112,26 +112,30 @@ static void getGraphemeBreaks(const ushort *string, quint32 len, QCharAttributes
namespace WB {
enum Action {
- NoBreak = 0,
- Break = 1,
- Lookup = 2
+ NoBreak,
+ Break,
+ Lookup,
+ LookupW
};
static const uchar breakTable[QUnicodeTables::WordBreak_ExtendNumLet + 1][QUnicodeTables::WordBreak_ExtendNumLet + 1] = {
-// Other CR LF Newline Extend RI Katakana ALetter MidNumLet MidLetter MidNum Numeric ExtendNumLet
- { Break , Break , Break , Break , NoBreak, Break , Break , Break , Break , Break , Break , Break , Break }, // Other
- { Break , Break , NoBreak, Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // CR
- { Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // LF
- { Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // Newline
- { Break , Break , Break , Break , NoBreak, Break , Break , Break , Break , Break , Break , Break , Break }, // Extend
- { Break , Break , Break , Break , NoBreak, NoBreak, Break , Break , Break , Break , Break , Break , Break }, // RegionalIndicator
- { Break , Break , Break , Break , NoBreak, Break , NoBreak, Break , Break , Break , Break , Break , NoBreak }, // Katakana
- { Break , Break , Break , Break , NoBreak, Break , Break , NoBreak, Lookup , Lookup , Break , NoBreak, NoBreak }, // ALetter
- { Break , Break , Break , Break , NoBreak, Break , Break , Break , Break , Break , Break , Break , Break }, // MidNumLet
- { Break , Break , Break , Break , NoBreak, Break , Break , Break , Break , Break , Break , Break , Break }, // MidLetter
- { Break , Break , Break , Break , NoBreak, Break , Break , Break , Break , Break , Break , Break , Break }, // MidNum
- { Break , Break , Break , Break , NoBreak, Break , Break , NoBreak, Lookup , Break , Lookup , NoBreak, NoBreak }, // Numeric
- { Break , Break , Break , Break , NoBreak, Break , NoBreak, NoBreak, Break , Break , Break , NoBreak, NoBreak }, // ExtendNumLet
+// Other CR LF Newline Extend RI Katakana HLetter ALetter SQuote DQuote MidNumLet MidLetter MidNum Numeric ExtendNumLet
+ { Break , Break , Break , Break , NoBreak, Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // Other
+ { Break , Break , NoBreak, Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // CR
+ { Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // LF
+ { Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // Newline
+ { Break , Break , Break , Break , NoBreak, Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // Extend
+ { Break , Break , Break , Break , NoBreak, NoBreak, Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // RegionalIndicator
+ { Break , Break , Break , Break , NoBreak, Break , NoBreak, Break , Break , Break , Break , Break , Break , Break , Break , NoBreak }, // Katakana
+ { Break , Break , Break , Break , NoBreak, Break , Break , NoBreak, NoBreak, LookupW, Lookup , LookupW, LookupW, Break , NoBreak, NoBreak }, // HebrewLetter
+ { Break , Break , Break , Break , NoBreak, Break , Break , NoBreak, NoBreak, LookupW, Break , LookupW, LookupW, Break , NoBreak, NoBreak }, // ALetter
+ { Break , Break , Break , Break , NoBreak, Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // SingleQuote
+ { Break , Break , Break , Break , NoBreak, Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // DoubleQuote
+ { Break , Break , Break , Break , NoBreak, Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // MidNumLet
+ { Break , Break , Break , Break , NoBreak, Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // MidLetter
+ { Break , Break , Break , Break , NoBreak, Break , Break , Break , Break , Break , Break , Break , Break , Break , Break , Break }, // MidNum
+ { Break , Break , Break , Break , NoBreak, Break , Break , NoBreak, NoBreak, Lookup , Break , Lookup , Break , Lookup , NoBreak, NoBreak }, // Numeric
+ { Break , Break , Break , Break , NoBreak, Break , NoBreak, NoBreak, NoBreak, Break , Break , Break , Break , Break , NoBreak, NoBreak }, // ExtendNumLet
};
} // namespace WB
@@ -160,8 +164,8 @@ static void getWordBreaks(const ushort *string, quint32 len, QCharAttributes *at
if (qt_initcharattributes_default_algorithm_only) {
// as of Unicode 5.1, some punctuation marks were mapped to MidLetter and MidNumLet
// which caused "hi.there" to be treated like if it were just a single word;
- // by remapping those characters in the Unicode tables generator.
- // this code is needed to pass the coverage tests; remove once the issue is fixed.
+ // we keep the pre-5.1 behavior by remapping these characters in the Unicode tables generator
+ // and this code is needed to pass the coverage tests; remove once the issue is fixed.
if (ucs4 == 0x002E) // FULL STOP
ncls = QUnicodeTables::WordBreak_MidNumLet;
else if (ucs4 == 0x003A) // COLON
@@ -170,8 +174,17 @@ static void getWordBreaks(const ushort *string, quint32 len, QCharAttributes *at
#endif
uchar action = WB::breakTable[cls][ncls];
- if (Q_UNLIKELY(action == WB::Lookup)) {
- action = WB::Break;
+ switch (action) {
+ case WB::Break:
+ break;
+ case WB::NoBreak:
+ if (Q_UNLIKELY(ncls == QUnicodeTables::WordBreak_Extend)) {
+ // WB4: X(Extend|Format)* -> X
+ continue;
+ }
+ break;
+ case WB::Lookup:
+ case WB::LookupW:
for (quint32 lookahead = i + 1; lookahead < len; ++lookahead) {
ucs4 = string[lookahead];
if (QChar::isHighSurrogate(ucs4) && lookahead + 1 != len) {
@@ -184,20 +197,28 @@ static void getWordBreaks(const ushort *string, quint32 len, QCharAttributes *at
prop = QUnicodeTables::properties(ucs4);
QUnicodeTables::WordBreakClass tcls = (QUnicodeTables::WordBreakClass) prop->wordBreakClass;
- if (Q_UNLIKELY(tcls == QUnicodeTables::WordBreak_Extend))
+
+ if (Q_UNLIKELY(tcls == QUnicodeTables::WordBreak_Extend)) {
+ // WB4: X(Extend|Format)* -> X
continue;
- if (Q_LIKELY(tcls == cls)) {
+ }
+
+ if (Q_LIKELY(tcls == cls || (action == WB::LookupW && (tcls == QUnicodeTables::WordBreak_HebrewLetter
+ || tcls == QUnicodeTables::WordBreak_ALetter)))) {
i = lookahead;
ncls = tcls;
action = WB::NoBreak;
}
break;
}
- } else if (Q_UNLIKELY(ncls == QUnicodeTables::WordBreak_Extend)) {
- // WB4: X(Extend|Format)* -> X
- if (Q_LIKELY(action != WB::Break))
- continue;
+ if (action != WB::NoBreak) {
+ action = WB::Break;
+ if (Q_UNLIKELY(ncls == QUnicodeTables::WordBreak_SingleQuote && cls == QUnicodeTables::WordBreak_HebrewLetter))
+ action = WB::NoBreak; // WB7a
+ }
+ break;
}
+
cls = ncls;
if (action == WB::Break) {
attributes[pos].wordBreak = true;
@@ -208,6 +229,7 @@ static void getWordBreaks(const ushort *string, quint32 len, QCharAttributes *at
currentWordType = WordTypeHiraganaKatakana;
attributes[pos].wordStart = true;
break;
+ case QUnicodeTables::WordBreak_HebrewLetter:
case QUnicodeTables::WordBreak_ALetter:
case QUnicodeTables::WordBreak_Numeric:
currentWordType = WordTypeAlphaNumeric;
@@ -327,7 +349,7 @@ static void getSentenceBreaks(const ushort *string, quint32 len, QCharAttributes
// -----------------------------------------------------------------------------------------------------
//
// The line breaking algorithm.
-// See http://www.unicode.org/reports/tr14/tr14-30.html
+// See http://www.unicode.org/reports/tr14/tr14-32.html
//
// -----------------------------------------------------------------------------------------------------
diff --git a/src/gui/text/qtextengine.cpp b/src/gui/text/qtextengine.cpp
index 06c5e24920..109b7e600f 100644
--- a/src/gui/text/qtextengine.cpp
+++ b/src/gui/text/qtextengine.cpp
@@ -241,7 +241,8 @@ using namespace std;
static const char *directions[] = {
"DirL", "DirR", "DirEN", "DirES", "DirET", "DirAN", "DirCS", "DirB", "DirS", "DirWS", "DirON",
- "DirLRE", "DirLRO", "DirAL", "DirRLE", "DirRLO", "DirPDF", "DirNSM", "DirBN"
+ "DirLRE", "DirLRO", "DirAL", "DirRLE", "DirRLO", "DirPDF", "DirNSM", "DirBN",
+ "DirLRI", "DirRLI", "DirFSI", "DirPDI"
};
#endif
@@ -2536,7 +2537,8 @@ static inline bool nextCharJoins(const QString &string, int pos)
++pos;
if (pos == string.length())
return false;
- return string.at(pos).joining() != QChar::OtherJoining;
+ // ### U+A872 has joining type L
+ return string.at(pos) == QChar(0xA872) || string.at(pos).joining() != QChar::OtherJoining;
}
static inline bool prevCharJoins(const QString &string, int pos)
@@ -2551,13 +2553,9 @@ static inline bool prevCharJoins(const QString &string, int pos)
static inline bool isRetainableControlCode(QChar c)
{
- return (c.unicode() == 0x202a // LRE
- || c.unicode() == 0x202b // LRE
- || c.unicode() == 0x202c // PDF
- || c.unicode() == 0x202d // LRO
- || c.unicode() == 0x202e // RLO
- || c.unicode() == 0x200e // LRM
- || c.unicode() == 0x200f); // RLM
+ return (c.unicode() >= 0x202a && c.unicode() <= 0x202e) // LRE, RLE, PDF, LRO, RLO
+ || (c.unicode() >= 0x200e && c.unicode() <= 0x200f) // LRM, RLM
+ || (c.unicode() >= 0x2066 && c.unicode() <= 0x2069); // LRM, RLM
}
static QString stringMidRetainingBidiCC(const QString &string,
diff --git a/tests/auto/corelib/tools/qchar/tst_qchar.cpp b/tests/auto/corelib/tools/qchar/tst_qchar.cpp
index 2ec85882b8..80b4162156 100644
--- a/tests/auto/corelib/tools/qchar/tst_qchar.cpp
+++ b/tests/auto/corelib/tools/qchar/tst_qchar.cpp
@@ -450,6 +450,18 @@ void tst_QChar::category()
void tst_QChar::direction()
{
+ QVERIFY(QChar::direction(0x200E) == QChar::DirL);
+ QVERIFY(QChar::direction(0x200F) == QChar::DirR);
+ QVERIFY(QChar::direction(0x202A) == QChar::DirLRE);
+ QVERIFY(QChar::direction(0x202B) == QChar::DirRLE);
+ QVERIFY(QChar::direction(0x202C) == QChar::DirPDF);
+ QVERIFY(QChar::direction(0x202D) == QChar::DirLRO);
+ QVERIFY(QChar::direction(0x202E) == QChar::DirRLO);
+ QVERIFY(QChar::direction(0x2066) == QChar::DirLRI);
+ QVERIFY(QChar::direction(0x2067) == QChar::DirRLI);
+ QVERIFY(QChar::direction(0x2068) == QChar::DirFSI);
+ QVERIFY(QChar::direction(0x2069) == QChar::DirPDI);
+
QVERIFY(QChar('a').direction() == QChar::DirL);
QVERIFY(QChar('0').direction() == QChar::DirEN);
QVERIFY(QChar((ushort)0x627).direction() == QChar::DirAL);
@@ -492,6 +504,9 @@ void tst_QChar::joining()
QVERIFY(QChar::joining(0xf0000u) == QChar::OtherJoining);
QVERIFY(QChar::joining(0xE0030u) == QChar::OtherJoining);
QVERIFY(QChar::joining(0x2FA17u) == QChar::OtherJoining);
+
+ // ### U+A872 has joining type L
+ QVERIFY(QChar::joining((uint)0xA872) == QChar::OtherJoining);
}
void tst_QChar::combiningClass()
@@ -605,6 +620,11 @@ void tst_QChar::unicodeVersion()
QVERIFY(QChar::unicodeVersion((uint)0x20ba) == QChar::Unicode_6_2);
QVERIFY(QChar::unicodeVersion((uint)0x20ba) == QChar::Unicode_6_2);
+ QVERIFY(QChar(0x061c).unicodeVersion() == QChar::Unicode_6_3);
+ QVERIFY(QChar::unicodeVersion((ushort)0x061c) == QChar::Unicode_6_3);
+ QVERIFY(QChar::unicodeVersion((uint)0x061c) == QChar::Unicode_6_3);
+ QVERIFY(QChar::unicodeVersion((uint)0x061c) == QChar::Unicode_6_3);
+
QVERIFY(QChar(0x09ff).unicodeVersion() == QChar::Unicode_Unassigned);
QVERIFY(QChar::unicodeVersion((ushort)0x09ff) == QChar::Unicode_Unassigned);
QVERIFY(QChar::unicodeVersion((uint)0x09ff) == QChar::Unicode_Unassigned);
diff --git a/util/unicode/main.cpp b/util/unicode/main.cpp
index a4d3e0f377..59b95ad924 100644
--- a/util/unicode/main.cpp
+++ b/util/unicode/main.cpp
@@ -77,6 +77,7 @@ static void initAgeMap()
{ QChar::Unicode_6_0, "6.0" },
{ QChar::Unicode_6_1, "6.1" },
{ QChar::Unicode_6_2, "6.2" },
+ { QChar::Unicode_6_3, "6.3" },
{ QChar::Unicode_Unassigned, 0 }
};
AgeMap *d = ageMap;
@@ -176,34 +177,66 @@ static void initDecompositionMap()
}
-static QHash<QByteArray, QChar::Direction> directionMap;
+enum Direction {
+ DirL = QChar::DirL,
+ DirR = QChar::DirR,
+ DirEN = QChar::DirEN,
+ DirES = QChar::DirES,
+ DirET = QChar::DirET,
+ DirAN = QChar::DirAN,
+ DirCS = QChar::DirCS,
+ DirB = QChar::DirB,
+ DirS = QChar::DirS,
+ DirWS = QChar::DirWS,
+ DirON = QChar::DirON,
+ DirLRE = QChar::DirLRE,
+ DirLRO = QChar::DirLRO,
+ DirAL = QChar::DirAL,
+ DirRLE = QChar::DirRLE,
+ DirRLO = QChar::DirRLO,
+ DirPDF = QChar::DirPDF,
+ DirNSM = QChar::DirNSM,
+ DirBN = QChar::DirBN,
+ DirLRI = QChar::DirLRI,
+ DirRLI = QChar::DirRLI,
+ DirFSI = QChar::DirFSI,
+ DirPDI = QChar::DirPDI
+
+ , Dir_Unassigned
+};
+
+static QHash<QByteArray, Direction> directionMap;
static void initDirectionMap()
{
struct Dir {
- QChar::Direction dir;
+ Direction dir;
const char *name;
} directions[] = {
- { QChar::DirL, "L" },
- { QChar::DirR, "R" },
- { QChar::DirEN, "EN" },
- { QChar::DirES, "ES" },
- { QChar::DirET, "ET" },
- { QChar::DirAN, "AN" },
- { QChar::DirCS, "CS" },
- { QChar::DirB, "B" },
- { QChar::DirS, "S" },
- { QChar::DirWS, "WS" },
- { QChar::DirON, "ON" },
- { QChar::DirLRE, "LRE" },
- { QChar::DirLRO, "LRO" },
- { QChar::DirAL, "AL" },
- { QChar::DirRLE, "RLE" },
- { QChar::DirRLO, "RLO" },
- { QChar::DirPDF, "PDF" },
- { QChar::DirNSM, "NSM" },
- { QChar::DirBN, "BN" },
- { QChar::DirL, 0 }
+ { DirL, "L" },
+ { DirR, "R" },
+ { DirEN, "EN" },
+ { DirES, "ES" },
+ { DirET, "ET" },
+ { DirAN, "AN" },
+ { DirCS, "CS" },
+ { DirB, "B" },
+ { DirS, "S" },
+ { DirWS, "WS" },
+ { DirON, "ON" },
+ { DirLRE, "LRE" },
+ { DirLRO, "LRO" },
+ { DirAL, "AL" },
+ { DirRLE, "RLE" },
+ { DirRLO, "RLO" },
+ { DirPDF, "PDF" },
+ { DirNSM, "NSM" },
+ { DirBN, "BN" },
+ { DirLRI, "LRI" },
+ { DirRLI, "RLI" },
+ { DirFSI, "FSI" },
+ { DirPDI, "PDI" },
+ { Dir_Unassigned, 0 }
};
Dir *d = directions;
while (d->name) {
@@ -323,7 +356,10 @@ static const char *word_break_class_string =
" WordBreak_Extend,\n"
" WordBreak_RegionalIndicator,\n"
" WordBreak_Katakana,\n"
+ " WordBreak_HebrewLetter,\n"
" WordBreak_ALetter,\n"
+ " WordBreak_SingleQuote,\n"
+ " WordBreak_DoubleQuote,\n"
" WordBreak_MidNumLet,\n"
" WordBreak_MidLetter,\n"
" WordBreak_MidNum,\n"
@@ -339,7 +375,10 @@ enum WordBreakClass {
WordBreak_Extend,
WordBreak_RegionalIndicator,
WordBreak_Katakana,
+ WordBreak_HebrewLetter,
WordBreak_ALetter,
+ WordBreak_SingleQuote,
+ WordBreak_DoubleQuote,
WordBreak_MidNumLet,
WordBreak_MidLetter,
WordBreak_MidNum,
@@ -365,7 +404,10 @@ static void initWordBreak()
{ WordBreak_Extend, "Format" },
{ WordBreak_RegionalIndicator, "Regional_Indicator" },
{ WordBreak_Katakana, "Katakana" },
+ { WordBreak_HebrewLetter, "Hebrew_Letter" },
{ WordBreak_ALetter, "ALetter" },
+ { WordBreak_SingleQuote, "Single_Quote" },
+ { WordBreak_DoubleQuote, "Double_Quote" },
{ WordBreak_MidNumLet, "MidNumLet" },
{ WordBreak_MidLetter, "MidLetter" },
{ WordBreak_MidNum, "MidNum" },
@@ -815,6 +857,31 @@ static int appendToSpecialCaseMap(const QList<int> &map)
return pos;
}
+static inline bool isDefaultIgnorable(uint ucs4)
+{
+ // Default_Ignorable_Code_Point:
+ // Generated from
+ // Other_Default_Ignorable_Code_Point + Cf + Variation_Selector
+ // - White_Space - FFF9..FFFB (Annotation Characters)
+ // - 0600..0604, 06DD, 070F, 110BD (exceptional Cf characters that should be visible)
+ if (ucs4 <= 0xff)
+ return ucs4 == 0xad;
+
+ return ucs4 == 0x034f
+ || (ucs4 >= 0x115f && ucs4 <= 0x1160)
+ || (ucs4 >= 0x17b4 && ucs4 <= 0x17b5)
+ || (ucs4 >= 0x180b && ucs4 <= 0x180d)
+ || (ucs4 >= 0x200b && ucs4 <= 0x200f)
+ || (ucs4 >= 0x202a && ucs4 <= 0x202e)
+ || (ucs4 >= 0x2060 && ucs4 <= 0x206f)
+ || ucs4 == 0x3164
+ || (ucs4 >= 0xfe00 && ucs4 <= 0xfe0f)
+ || ucs4 == 0xfeff
+ || ucs4 == 0xffa0
+ || (ucs4 >= 0xfff0 && ucs4 <= 0xfff8)
+ || (ucs4 >= 0x1d173 && ucs4 <= 0xe0fff && (ucs4 <= 0x1d17a || ucs4 >= 0xe0000));
+}
+
struct UnicodeData {
UnicodeData(int codepoint = 0) {
p.category = QChar::Other_NotAssigned; // Cn
@@ -842,6 +909,17 @@ struct UnicodeData {
|| (codepoint >= 0x1EF00 && codepoint <= 0x1EFFF)) {
p.direction = QChar::DirR;
}
+ // The unassigned code points that default to ET are in the range:
+ // [U+20A0..U+20CF]
+ else if (codepoint >= 0x20A0 && codepoint <= 0x20CF) {
+ p.direction = QChar::DirET;
+ }
+ // The unassigned code points that default to BN have one of the following properties:
+ // Default_Ignorable_Code_Point
+ // Noncharacter_Code_Point
+ else if (QChar::isNonCharacter(codepoint) || isDefaultIgnorable(codepoint)) {
+ p.direction = QChar::DirBN;
+ }
p.lineBreakClass = LineBreak_AL; // XX -> AL
// LineBreak.txt
@@ -858,6 +936,11 @@ struct UnicodeData {
|| (codepoint >= 0x30000 && codepoint <= 0x3FFFD)) {
p.lineBreakClass = LineBreak_ID;
}
+ // The unassigned code points that default to "PR" comprise a range in the following block:
+ // [U+20A0..U+20CF]
+ else if (codepoint >= 0x20A0 && codepoint <= 0x20CF) {
+ p.lineBreakClass = LineBreak_PR;
+ }
mirroredChar = 0;
decompositionType = QChar::NoDecomposition;
@@ -1008,7 +1091,10 @@ static void readUnicodeData()
else
++combiningClassUsage[data.p.combiningClass];
- data.p.direction = directionMap.value(properties[UD_BidiCategory], data.p.direction);
+ Direction dir = directionMap.value(properties[UD_BidiCategory], Dir_Unassigned);
+ if (dir == Dir_Unassigned)
+ qFatal("unhandled direction value: %s", properties[UD_BidiCategory].constData());
+ data.p.direction = QChar::Direction(dir);
if (!properties[UD_UpperCase].isEmpty()) {
int upperCase = properties[UD_UpperCase].toInt(&ok, 16);
@@ -1180,8 +1266,8 @@ static void readArabicShaping()
qFatal("unassigned or unhandled joining value: %s", l[2].constData());
if (joining == Joining_Left) {
- // There are currently no characters of joining type Left_Joining defined in Unicode.
- qFatal("%x: joining type '%s' was met; the current implementation needs to be revised!", codepoint, l[2].constData());
+ qWarning("ACHTUNG!!! joining type '%s' has been met for U+%X; the current implementation needs to be revised!",
+ l[2].trimmed().constData(), codepoint);
}
UnicodeData &d = UnicodeData::valueRef(codepoint);