summaryrefslogtreecommitdiffstats
path: root/src/corelib/tools/qunicodetables_p.h
diff options
context:
space:
mode:
authorLars Knoll <lars.knoll@qt.io>2017-12-12 10:14:28 +0100
committerLars Knoll <lars.knoll@qt.io>2018-01-03 07:47:26 +0000
commit41b4e154d617a820cd7f7f732838647425a58227 (patch)
tree27e9300e3fc275bf4e50de8fb2c5e1f8aeb40fab /src/corelib/tools/qunicodetables_p.h
parent8bfabb34dec8a437a08b5a6e0ecac4a9dd3ae18c (diff)
Update Text segmentation and line break data to Unicode 10.0
Also adjusted the text segmentation and line break algorithms so that they can handle the new data, and pass the test suite. Change-Id: Ib727fd80003e34e96458d7a681996de3fa3691e7 Reviewed-by: Eskil Abrahamsen Blomfeldt <eskil.abrahamsen-blomfeldt@qt.io>
Diffstat (limited to 'src/corelib/tools/qunicodetables_p.h')
-rw-r--r--src/corelib/tools/qunicodetables_p.h38
1 files changed, 27 insertions, 11 deletions
diff --git a/src/corelib/tools/qunicodetables_p.h b/src/corelib/tools/qunicodetables_p.h
index e4f59fc933..f3fb6ec1b0 100644
--- a/src/corelib/tools/qunicodetables_p.h
+++ b/src/corelib/tools/qunicodetables_p.h
@@ -80,10 +80,10 @@ struct Properties {
signed short caseFoldDiff : 15;
ushort unicodeVersion : 8; /* 5 used */
ushort nfQuickCheck : 8;
- ushort graphemeBreakClass : 4; /* 4 used */
- ushort wordBreakClass : 4; /* 4 used */
+ ushort graphemeBreakClass : 5; /* 5 used */
+ ushort wordBreakClass : 5; /* 5 used */
ushort sentenceBreakClass : 8; /* 4 used */
- ushort lineBreakClass : 8; /* 6 used */
+ ushort lineBreakClass : 6; /* 6 used */
ushort script : 8;
};
@@ -123,11 +123,12 @@ struct CasefoldTraits
};
enum GraphemeBreakClass {
- GraphemeBreak_Other,
+ GraphemeBreak_Any,
GraphemeBreak_CR,
GraphemeBreak_LF,
GraphemeBreak_Control,
GraphemeBreak_Extend,
+ GraphemeBreak_ZWJ,
GraphemeBreak_RegionalIndicator,
GraphemeBreak_Prepend,
GraphemeBreak_SpacingMark,
@@ -135,15 +136,22 @@ enum GraphemeBreakClass {
GraphemeBreak_V,
GraphemeBreak_T,
GraphemeBreak_LV,
- GraphemeBreak_LVT
+ GraphemeBreak_LVT,
+ Graphemebreak_E_Base,
+ Graphemebreak_E_Modifier,
+ Graphemebreak_Glue_After_Zwj,
+ Graphemebreak_E_Base_GAZ,
+ NumGraphemeBreakClasses,
};
enum WordBreakClass {
- WordBreak_Other,
+ WordBreak_Any,
WordBreak_CR,
WordBreak_LF,
WordBreak_Newline,
WordBreak_Extend,
+ WordBreak_ZWJ,
+ WordBreak_Format,
WordBreak_RegionalIndicator,
WordBreak_Katakana,
WordBreak_HebrewLetter,
@@ -154,11 +162,16 @@ enum WordBreakClass {
WordBreak_MidLetter,
WordBreak_MidNum,
WordBreak_Numeric,
- WordBreak_ExtendNumLet
+ WordBreak_ExtendNumLet,
+ WordBreak_E_Base,
+ WordBreak_E_Modifier,
+ WordBreak_Glue_After_Zwj,
+ WordBreak_E_Base_GAZ,
+ NumWordBreakClasses,
};
enum SentenceBreakClass {
- SentenceBreak_Other,
+ SentenceBreak_Any,
SentenceBreak_CR,
SentenceBreak_LF,
SentenceBreak_Sep,
@@ -171,7 +184,8 @@ enum SentenceBreakClass {
SentenceBreak_ATerm,
SentenceBreak_SContinue,
SentenceBreak_STerm,
- SentenceBreak_Close
+ SentenceBreak_Close,
+ NumSentenceBreakClasses
};
// see http://www.unicode.org/reports/tr14/tr14-30.html
@@ -183,8 +197,10 @@ enum LineBreakClass {
LineBreak_IN, LineBreak_HY, LineBreak_BA, LineBreak_BB, LineBreak_B2,
LineBreak_ZW, LineBreak_CM, LineBreak_WJ, LineBreak_H2, LineBreak_H3,
LineBreak_JL, LineBreak_JV, LineBreak_JT, LineBreak_RI, LineBreak_CB,
- LineBreak_SA, LineBreak_SG, LineBreak_SP, LineBreak_CR, LineBreak_LF,
- LineBreak_BK
+ LineBreak_EB, LineBreak_EM, LineBreak_ZWJ,
+ LineBreak_SA, LineBreak_SG, LineBreak_SP,
+ LineBreak_CR, LineBreak_LF, LineBreak_BK,
+ NumLineBreakClasses
};
Q_CORE_EXPORT GraphemeBreakClass QT_FASTCALL graphemeBreakClass(uint ucs4) Q_DECL_NOTHROW;