summaryrefslogtreecommitdiffstats
path: root/util/unicode/main.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'util/unicode/main.cpp')
-rw-r--r--util/unicode/main.cpp105
1 files changed, 73 insertions, 32 deletions
diff --git a/util/unicode/main.cpp b/util/unicode/main.cpp
index c51995499d..0f3c28137d 100644
--- a/util/unicode/main.cpp
+++ b/util/unicode/main.cpp
@@ -274,11 +274,12 @@ static void initJoiningMap()
static const char *grapheme_break_class_string =
"enum GraphemeBreakClass {\n"
- " GraphemeBreak_Other,\n"
+ " GraphemeBreak_Any,\n"
" GraphemeBreak_CR,\n"
" GraphemeBreak_LF,\n"
" GraphemeBreak_Control,\n"
" GraphemeBreak_Extend,\n"
+ " GraphemeBreak_ZWJ,\n"
" GraphemeBreak_RegionalIndicator,\n"
" GraphemeBreak_Prepend,\n"
" GraphemeBreak_SpacingMark,\n"
@@ -286,15 +287,21 @@ static const char *grapheme_break_class_string =
" GraphemeBreak_V,\n"
" GraphemeBreak_T,\n"
" GraphemeBreak_LV,\n"
- " GraphemeBreak_LVT\n"
+ " GraphemeBreak_LVT,\n"
+ " Graphemebreak_E_Base,\n"
+ " Graphemebreak_E_Modifier,\n"
+ " Graphemebreak_Glue_After_Zwj,\n"
+ " Graphemebreak_E_Base_GAZ,\n"
+ " NumGraphemeBreakClasses,\n"
"};\n\n";
enum GraphemeBreakClass {
- GraphemeBreak_Other,
+ GraphemeBreak_Any,
GraphemeBreak_CR,
GraphemeBreak_LF,
GraphemeBreak_Control,
GraphemeBreak_Extend,
+ GraphemeBreak_ZWJ,
GraphemeBreak_RegionalIndicator,
GraphemeBreak_Prepend,
GraphemeBreak_SpacingMark,
@@ -302,9 +309,13 @@ enum GraphemeBreakClass {
GraphemeBreak_V,
GraphemeBreak_T,
GraphemeBreak_LV,
- GraphemeBreak_LVT
+ GraphemeBreak_LVT,
+ Graphemebreak_E_Base,
+ Graphemebreak_E_Modifier,
+ Graphemebreak_Glue_After_Zwj,
+ Graphemebreak_E_Base_GAZ,
- , GraphemeBreak_Unassigned
+ GraphemeBreak_Unassigned
};
static QHash<QByteArray, GraphemeBreakClass> grapheme_break_map;
@@ -315,11 +326,12 @@ static void initGraphemeBreak()
GraphemeBreakClass brk;
const char *name;
} breaks[] = {
- { GraphemeBreak_Other, "Other" },
+ { GraphemeBreak_Any, "Any" },
{ GraphemeBreak_CR, "CR" },
{ GraphemeBreak_LF, "LF" },
{ GraphemeBreak_Control, "Control" },
{ GraphemeBreak_Extend, "Extend" },
+ { GraphemeBreak_ZWJ, "ZWJ" },
{ GraphemeBreak_RegionalIndicator, "Regional_Indicator" },
{ GraphemeBreak_Prepend, "Prepend" },
{ GraphemeBreak_SpacingMark, "SpacingMark" },
@@ -328,6 +340,10 @@ static void initGraphemeBreak()
{ GraphemeBreak_T, "T" },
{ GraphemeBreak_LV, "LV" },
{ GraphemeBreak_LVT, "LVT" },
+ { Graphemebreak_E_Base, "E_Base" },
+ { Graphemebreak_E_Modifier, "E_Modifier" },
+ { Graphemebreak_Glue_After_Zwj, "Glue_After_Zwj" },
+ { Graphemebreak_E_Base_GAZ, "E_Base_GAZ" },
{ GraphemeBreak_Unassigned, 0 }
};
GraphemeBreakList *d = breaks;
@@ -340,11 +356,13 @@ static void initGraphemeBreak()
static const char *word_break_class_string =
"enum WordBreakClass {\n"
- " WordBreak_Other,\n"
+ " WordBreak_Any,\n"
" WordBreak_CR,\n"
" WordBreak_LF,\n"
" WordBreak_Newline,\n"
" WordBreak_Extend,\n"
+ " WordBreak_ZWJ,\n"
+ " WordBreak_Format,\n"
" WordBreak_RegionalIndicator,\n"
" WordBreak_Katakana,\n"
" WordBreak_HebrewLetter,\n"
@@ -355,15 +373,22 @@ static const char *word_break_class_string =
" WordBreak_MidLetter,\n"
" WordBreak_MidNum,\n"
" WordBreak_Numeric,\n"
- " WordBreak_ExtendNumLet\n"
+ " WordBreak_ExtendNumLet,\n"
+ " WordBreak_E_Base,\n"
+ " WordBreak_E_Modifier,\n"
+ " WordBreak_Glue_After_Zwj,\n"
+ " WordBreak_E_Base_GAZ,\n"
+ " NumWordBreakClasses,\n"
"};\n\n";
enum WordBreakClass {
- WordBreak_Other,
+ WordBreak_Any,
WordBreak_CR,
WordBreak_LF,
WordBreak_Newline,
WordBreak_Extend,
+ WordBreak_ZWJ,
+ WordBreak_Format,
WordBreak_RegionalIndicator,
WordBreak_Katakana,
WordBreak_HebrewLetter,
@@ -374,9 +399,13 @@ enum WordBreakClass {
WordBreak_MidLetter,
WordBreak_MidNum,
WordBreak_Numeric,
- WordBreak_ExtendNumLet
+ WordBreak_ExtendNumLet,
+ WordBreak_E_Base,
+ WordBreak_E_Modifier,
+ WordBreak_Glue_After_Zwj,
+ WordBreak_E_Base_GAZ,
- , WordBreak_Unassigned
+ WordBreak_Unassigned
};
static QHash<QByteArray, WordBreakClass> word_break_map;
@@ -387,12 +416,13 @@ static void initWordBreak()
WordBreakClass brk;
const char *name;
} breaks[] = {
- { WordBreak_Other, "Other" },
+ { WordBreak_Any, "Any" },
{ WordBreak_CR, "CR" },
{ WordBreak_LF, "LF" },
{ WordBreak_Newline, "Newline" },
{ WordBreak_Extend, "Extend" },
- { WordBreak_Extend, "Format" },
+ { WordBreak_ZWJ, "ZWJ" },
+ { WordBreak_Format, "Format" },
{ WordBreak_RegionalIndicator, "Regional_Indicator" },
{ WordBreak_Katakana, "Katakana" },
{ WordBreak_HebrewLetter, "Hebrew_Letter" },
@@ -404,6 +434,10 @@ static void initWordBreak()
{ WordBreak_MidNum, "MidNum" },
{ WordBreak_Numeric, "Numeric" },
{ WordBreak_ExtendNumLet, "ExtendNumLet" },
+ { WordBreak_E_Base, "E_Base" },
+ { WordBreak_E_Modifier, "E_Modifier" },
+ { WordBreak_Glue_After_Zwj, "Glue_After_Zwj" },
+ { WordBreak_E_Base_GAZ, "E_Base_GAZ" },
{ WordBreak_Unassigned, 0 }
};
WordBreakList *d = breaks;
@@ -416,7 +450,7 @@ static void initWordBreak()
static const char *sentence_break_class_string =
"enum SentenceBreakClass {\n"
- " SentenceBreak_Other,\n"
+ " SentenceBreak_Any,\n"
" SentenceBreak_CR,\n"
" SentenceBreak_LF,\n"
" SentenceBreak_Sep,\n"
@@ -429,11 +463,12 @@ static const char *sentence_break_class_string =
" SentenceBreak_ATerm,\n"
" SentenceBreak_SContinue,\n"
" SentenceBreak_STerm,\n"
- " SentenceBreak_Close\n"
+ " SentenceBreak_Close,\n"
+ " NumSentenceBreakClasses\n"
"};\n\n";
enum SentenceBreakClass {
- SentenceBreak_Other,
+ SentenceBreak_Any,
SentenceBreak_CR,
SentenceBreak_LF,
SentenceBreak_Sep,
@@ -459,7 +494,7 @@ static void initSentenceBreak()
SentenceBreakClass brk;
const char *name;
} breaks[] = {
- { SentenceBreak_Other, "Other" },
+ { SentenceBreak_Any, "Any" },
{ SentenceBreak_CR, "CR" },
{ SentenceBreak_LF, "LF" },
{ SentenceBreak_Sep, "Sep" },
@@ -494,8 +529,10 @@ static const char *line_break_class_string =
" LineBreak_IN, LineBreak_HY, LineBreak_BA, LineBreak_BB, LineBreak_B2,\n"
" LineBreak_ZW, LineBreak_CM, LineBreak_WJ, LineBreak_H2, LineBreak_H3,\n"
" LineBreak_JL, LineBreak_JV, LineBreak_JT, LineBreak_RI, LineBreak_CB,\n"
- " LineBreak_SA, LineBreak_SG, LineBreak_SP, LineBreak_CR, LineBreak_LF,\n"
- " LineBreak_BK\n"
+ " LineBreak_EB, LineBreak_EM, LineBreak_ZWJ,\n"
+ " LineBreak_SA, LineBreak_SG, LineBreak_SP,\n"
+ " LineBreak_CR, LineBreak_LF, LineBreak_BK,\n"
+ " NumLineBreakClasses\n"
"};\n\n";
enum LineBreakClass {
@@ -505,10 +542,11 @@ enum LineBreakClass {
LineBreak_IN, LineBreak_HY, LineBreak_BA, LineBreak_BB, LineBreak_B2,
LineBreak_ZW, LineBreak_CM, LineBreak_WJ, LineBreak_H2, LineBreak_H3,
LineBreak_JL, LineBreak_JV, LineBreak_JT, LineBreak_RI, LineBreak_CB,
- LineBreak_SA, LineBreak_SG, LineBreak_SP, LineBreak_CR, LineBreak_LF,
- LineBreak_BK
+ LineBreak_EB, LineBreak_EM, LineBreak_ZWJ,
+ LineBreak_SA, LineBreak_SG, LineBreak_SP,
+ LineBreak_CR, LineBreak_LF, LineBreak_BK,
- , LineBreak_Unassigned
+ LineBreak_Unassigned
};
static QHash<QByteArray, LineBreakClass> line_break_map;
@@ -563,6 +601,9 @@ static void initLineBreak()
{ LineBreak_RI, "RI" },
{ LineBreak_SA, "SA" },
{ LineBreak_AL, "XX" },
+ { LineBreak_EB, "EB" },
+ { LineBreak_EM, "EM" },
+ { LineBreak_ZWJ, "ZWJ" },
{ LineBreak_Unassigned, 0 }
};
LineBreakList *d = breaks;
@@ -768,10 +809,10 @@ static const char *property_string =
" signed short caseFoldDiff : 15;\n"
" ushort unicodeVersion : 8; /* 5 used */\n"
" ushort nfQuickCheck : 8;\n" // could be narrowed
- " ushort graphemeBreakClass : 4; /* 4 used */\n"
- " ushort wordBreakClass : 4; /* 4 used */\n"
+ " ushort graphemeBreakClass : 5; /* 5 used */\n"
+ " ushort wordBreakClass : 5; /* 5 used */\n"
" ushort sentenceBreakClass : 8; /* 4 used */\n"
- " ushort lineBreakClass : 8; /* 6 used */\n"
+ " ushort lineBreakClass : 6; /* 6 used */\n"
" ushort script : 8;\n"
"};\n\n"
"Q_CORE_EXPORT const Properties * QT_FASTCALL properties(uint ucs4) Q_DECL_NOTHROW;\n"
@@ -1034,9 +1075,9 @@ struct UnicodeData {
p.upperCaseSpecial = 0;
p.titleCaseSpecial = 0;
p.caseFoldSpecial = 0;
- p.graphemeBreakClass = GraphemeBreak_Other;
- p.wordBreakClass = WordBreak_Other;
- p.sentenceBreakClass = SentenceBreak_Other;
+ p.graphemeBreakClass = GraphemeBreak_Any;
+ p.wordBreakClass = WordBreak_Any;
+ p.sentenceBreakClass = SentenceBreak_Any;
p.script = QChar::Script_Unknown;
p.nfQuickCheck = 0;
propertyIndex = -1;
@@ -1913,7 +1954,7 @@ static void readWordBreak()
if (codepoint == 0x002E) // FULL STOP
brk = WordBreak_MidNum;
else if (codepoint == 0x003A) // COLON
- brk = WordBreak_Other;
+ brk = WordBreak_Any;
// ] ###
UnicodeData &ud = UnicodeData::valueRef(codepoint);
ud.p.wordBreakClass = brk;
@@ -2456,10 +2497,10 @@ static QByteArray createPropertyInfo()
// " ushort nfQuickCheck : 8;\n"
out += QByteArray::number( p.nfQuickCheck );
out += ", ";
-// " ushort graphemeBreakClass : 4; /* 4 used */\n"
-// " ushort wordBreakClass : 4; /* 4 used */\n"
+// " ushort graphemeBreakClass : 5; /* 5 used */\n"
+// " ushort wordBreakClass : 5; /* 5 used */\n"
// " ushort sentenceBreakClass : 8; /* 4 used */\n"
-// " ushort lineBreakClass : 8; /* 6 used */\n"
+// " ushort lineBreakClass : 6; /* 6 used */\n"
out += QByteArray::number( p.graphemeBreakClass );
out += ", ";
out += QByteArray::number( p.wordBreakClass );