summaryrefslogtreecommitdiffstats
path: root/src/corelib/text
diff options
context:
space:
mode:
authorIevgenii Meshcheriakov <ievgenii.meshcheriakov@qt.io>2022-05-05 15:14:15 +0200
committerIevgenii Meshcheriakov <ievgenii.meshcheriakov@qt.io>2022-05-24 23:07:42 +0200
commitc63cdbdc43682e2034fef3e83b721c82e9aac55b (patch)
treef3c1969278a98c958ddf6a3e99d94b8324ce89bc /src/corelib/text
parent1a26719c541756c1c784b7395e9ed72ed72e1a5f (diff)
QUnicodeTools: Handle WB3c word break rule
Adjust handling of WB3c rule to UAX #29, revision 33 (Unicode 11.0.0). The rule reads: ZWJ × \p{Extended_Pictographic} This fixes 9 word break tests. Task-number: QTBUG-97537 Pick-to: 6.2 6.3 Change-Id: I818d4048828e6663d5c090aa372d83f5099fdffe Reviewed-by: Edward Welbourne <edward.welbourne@qt.io>
Diffstat (limited to 'src/corelib/text')
-rw-r--r--src/corelib/text/qunicodetools.cpp14
1 files changed, 12 insertions, 2 deletions
diff --git a/src/corelib/text/qunicodetools.cpp b/src/corelib/text/qunicodetools.cpp
index f544cb0730..9cfca74a05 100644
--- a/src/corelib/text/qunicodetools.cpp
+++ b/src/corelib/text/qunicodetools.cpp
@@ -239,6 +239,8 @@ static void getWordBreaks(const char16_t *string, qsizetype len, QCharAttributes
} currentWordType = WordTypeNone;
QUnicodeTables::WordBreakClass cls = QUnicodeTables::WordBreak_LF; // to meet WB1
+ auto real_cls = cls; // Unaffected by WB4
+
for (qsizetype i = 0; i != len; ++i) {
qsizetype pos = i;
char32_t ucs4 = string[i];
@@ -268,12 +270,18 @@ static void getWordBreaks(const char16_t *string, qsizetype len, QCharAttributes
uchar action = WB::breakTable[cls][ncls];
switch (action) {
case WB::Break:
+ if (Q_UNLIKELY(real_cls == QUnicodeTables::WordBreak_ZWJ
+ && prop->graphemeBreakClass
+ == QUnicodeTables::GraphemeBreak_Extended_Pictographic)) {
+ // WB3c: ZWJ × \p{Extended_Pictographic}
+ action = WB::NoBreak;
+ }
break;
case WB::NoBreak:
if (Q_UNLIKELY(ncls == QUnicodeTables::WordBreak_Extend || ncls == QUnicodeTables::WordBreak_ZWJ || ncls == QUnicodeTables::WordBreak_Format)) {
// WB4: X(Extend|Format)* -> X
- if (cls != QUnicodeTables::WordBreak_ZWJ) // WB3c
- continue;
+ real_cls = ncls;
+ continue;
}
if (Q_UNLIKELY(cls == QUnicodeTables::WordBreak_RegionalIndicator)) {
// WB15/WB16: break between pairs of Regional indicator
@@ -337,6 +345,8 @@ static void getWordBreaks(const char16_t *string, qsizetype len, QCharAttributes
break;
}
}
+
+ real_cls = ncls;
}
if (currentWordType != WordTypeNone)