diff options
author | Ievgenii Meshcheriakov <ievgenii.meshcheriakov@qt.io> | 2022-05-05 15:14:15 +0200 |
---|---|---|
committer | Ievgenii Meshcheriakov <ievgenii.meshcheriakov@qt.io> | 2022-05-24 23:07:42 +0200 |
commit | c63cdbdc43682e2034fef3e83b721c82e9aac55b (patch) | |
tree | f3c1969278a98c958ddf6a3e99d94b8324ce89bc /src/corelib/text/qunicodetools.cpp | |
parent | 1a26719c541756c1c784b7395e9ed72ed72e1a5f (diff) |
QUnicodeTools: Handle WB3c word break rule
Adjust handling of WB3c rule to UAX #29, revision 33 (Unicode 11.0.0).
The rule reads:
ZWJ × \p{Extended_Pictographic}
This fixes 9 word break tests.
Task-number: QTBUG-97537
Pick-to: 6.2 6.3
Change-Id: I818d4048828e6663d5c090aa372d83f5099fdffe
Reviewed-by: Edward Welbourne <edward.welbourne@qt.io>
Diffstat (limited to 'src/corelib/text/qunicodetools.cpp')
-rw-r--r-- | src/corelib/text/qunicodetools.cpp | 14 |
1 files changed, 12 insertions, 2 deletions
diff --git a/src/corelib/text/qunicodetools.cpp b/src/corelib/text/qunicodetools.cpp index f544cb0730..9cfca74a05 100644 --- a/src/corelib/text/qunicodetools.cpp +++ b/src/corelib/text/qunicodetools.cpp @@ -239,6 +239,8 @@ static void getWordBreaks(const char16_t *string, qsizetype len, QCharAttributes } currentWordType = WordTypeNone; QUnicodeTables::WordBreakClass cls = QUnicodeTables::WordBreak_LF; // to meet WB1 + auto real_cls = cls; // Unaffected by WB4 + for (qsizetype i = 0; i != len; ++i) { qsizetype pos = i; char32_t ucs4 = string[i]; @@ -268,12 +270,18 @@ static void getWordBreaks(const char16_t *string, qsizetype len, QCharAttributes uchar action = WB::breakTable[cls][ncls]; switch (action) { case WB::Break: + if (Q_UNLIKELY(real_cls == QUnicodeTables::WordBreak_ZWJ + && prop->graphemeBreakClass + == QUnicodeTables::GraphemeBreak_Extended_Pictographic)) { + // WB3c: ZWJ × \p{Extended_Pictographic} + action = WB::NoBreak; + } break; case WB::NoBreak: if (Q_UNLIKELY(ncls == QUnicodeTables::WordBreak_Extend || ncls == QUnicodeTables::WordBreak_ZWJ || ncls == QUnicodeTables::WordBreak_Format)) { // WB4: X(Extend|Format)* -> X - if (cls != QUnicodeTables::WordBreak_ZWJ) // WB3c - continue; + real_cls = ncls; + continue; } if (Q_UNLIKELY(cls == QUnicodeTables::WordBreak_RegionalIndicator)) { // WB15/WB16: break between pairs of Regional indicator @@ -337,6 +345,8 @@ static void getWordBreaks(const char16_t *string, qsizetype len, QCharAttributes break; } } + + real_cls = ncls; } if (currentWordType != WordTypeNone) |