summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIevgenii Meshcheriakov <ievgenii.meshcheriakov@qt.io>2022-05-05 16:11:14 +0200
committerIevgenii Meshcheriakov <ievgenii.meshcheriakov@qt.io>2022-05-24 23:07:42 +0200
commit40b4ad1866b4c48fa7a64bc2f07c27125398fdba (patch)
tree3db67bc85f74eecf04c8e8566a84cbfe37bac53b
parent08d2ae411fc5d0607eacb5389598ee488b76e0df (diff)
QUnicodeTools: Fix line breaking for potential emojis
Implement part of LB30b introduced by UAX #14, revision 47 (Unicode 14.0.0): [\p{Extended_Pictographic}&\p{Cn}] × EM This fixes one line breaking test. Task-number: QTBUG-97537 Pick-to: 6.3 Change-Id: I3fd2372a057b7391d8846e9c146f69a54686ea61 Reviewed-by: Edward Welbourne <edward.welbourne@qt.io>
-rw-r--r--src/corelib/text/qunicodetools.cpp11
-rw-r--r--tests/auto/corelib/text/qtextboundaryfinder/data/LineBreakTest.txt2
2 files changed, 12 insertions, 1 deletions
diff --git a/src/corelib/text/qunicodetools.cpp b/src/corelib/text/qunicodetools.cpp
index ac6302362d..8f1eb2e532 100644
--- a/src/corelib/text/qunicodetools.cpp
+++ b/src/corelib/text/qunicodetools.cpp
@@ -604,6 +604,8 @@ static void getLineBreaks(const char16_t *string, qsizetype len, QCharAttributes
QUnicodeTables::LineBreakClass lcls = QUnicodeTables::LineBreak_LF; // to meet LB10
QUnicodeTables::LineBreakClass cls = lcls;
+ const QUnicodeTables::Properties *lastProp = QUnicodeTables::properties(U'\n');
+
for (qsizetype i = 0; i != len; ++i) {
qsizetype pos = i;
char32_t ucs4 = string[i];
@@ -707,6 +709,14 @@ static void getLineBreaks(const char16_t *string, qsizetype len, QCharAttributes
goto next;
}
+ if (Q_UNLIKELY(ncls == QUnicodeTables::LineBreak_EM
+ && lastProp->category == QChar::Other_NotAssigned
+ && lastProp->graphemeBreakClass
+ == QUnicodeTables::GraphemeBreak_Extended_Pictographic)) {
+ // LB30b: [\p{Extended_Pictographic}&\p{Cn}] × EM
+ goto next;
+ }
+
// for South East Asian chars that require a complex analysis, the Unicode
// standard recommends to treat them as AL. tailoring that do dictionary analysis can override
if (Q_UNLIKELY(cls >= QUnicodeTables::LineBreak_SA))
@@ -745,6 +755,7 @@ static void getLineBreaks(const char16_t *string, qsizetype len, QCharAttributes
next:
cls = ncls;
+ lastProp = prop;
next_no_cls_update:
lcls = ncls;
}
diff --git a/tests/auto/corelib/text/qtextboundaryfinder/data/LineBreakTest.txt b/tests/auto/corelib/text/qtextboundaryfinder/data/LineBreakTest.txt
index 1b038cdce9..32d6618319 100644
--- a/tests/auto/corelib/text/qtextboundaryfinder/data/LineBreakTest.txt
+++ b/tests/auto/corelib/text/qtextboundaryfinder/data/LineBreakTest.txt
@@ -7678,7 +7678,7 @@
× 1F1F7 × 1F1FA ÷ 1F1F8 × 1F1EA ÷ # × [0.3] REGIONAL INDICATOR SYMBOL LETTER R (RI) × [30.11] REGIONAL INDICATOR SYMBOL LETTER U (RI) ÷ [30.13] REGIONAL INDICATOR SYMBOL LETTER S (RI) × [30.11] REGIONAL INDICATOR SYMBOL LETTER E (RI) ÷ [0.3]
× 1F1F7 × 1F1FA × 200B ÷ 1F1F8 × 1F1EA ÷ # × [0.3] REGIONAL INDICATOR SYMBOL LETTER R (RI) × [30.11] REGIONAL INDICATOR SYMBOL LETTER U (RI) × [7.02] ZERO WIDTH SPACE (ZW) ÷ [8.0] REGIONAL INDICATOR SYMBOL LETTER S (RI) × [30.12] REGIONAL INDICATOR SYMBOL LETTER E (RI) ÷ [0.3]
× 05D0 × 002D × 05D0 ÷ # × [0.3] HEBREW LETTER ALEF (HL) × [21.02] HYPHEN-MINUS (HY) × [21.1] HEBREW LETTER ALEF (HL) ÷ [0.3]
-# × 1F02C × 1F3FF ÷ # × [0.3] <reserved-1F02C> (Other) × [30.22] EMOJI MODIFIER FITZPATRICK TYPE-6 (EM) ÷ [0.3]
+× 1F02C × 1F3FF ÷ # × [0.3] <reserved-1F02C> (Other) × [30.22] EMOJI MODIFIER FITZPATRICK TYPE-6 (EM) ÷ [0.3]
× 00A9 ÷ 1F3FF ÷ # × [0.3] COPYRIGHT SIGN (AL) ÷ [999.0] EMOJI MODIFIER FITZPATRICK TYPE-6 (EM) ÷ [0.3]
#
# Lines: 7654