From 40b4ad1866b4c48fa7a64bc2f07c27125398fdba Mon Sep 17 00:00:00 2001 From: Ievgenii Meshcheriakov Date: Thu, 5 May 2022 16:11:14 +0200 Subject: QUnicodeTools: Fix line breaking for potential emojis MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement part of LB30b introduced by UAX #14, revision 47 (Unicode 14.0.0): [\p{Extended_Pictographic}&\p{Cn}] × EM This fixes one line breaking test. Task-number: QTBUG-97537 Pick-to: 6.3 Change-Id: I3fd2372a057b7391d8846e9c146f69a54686ea61 Reviewed-by: Edward Welbourne --- src/corelib/text/qunicodetools.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'src') diff --git a/src/corelib/text/qunicodetools.cpp b/src/corelib/text/qunicodetools.cpp index ac6302362d..8f1eb2e532 100644 --- a/src/corelib/text/qunicodetools.cpp +++ b/src/corelib/text/qunicodetools.cpp @@ -604,6 +604,8 @@ static void getLineBreaks(const char16_t *string, qsizetype len, QCharAttributes QUnicodeTables::LineBreakClass lcls = QUnicodeTables::LineBreak_LF; // to meet LB10 QUnicodeTables::LineBreakClass cls = lcls; + const QUnicodeTables::Properties *lastProp = QUnicodeTables::properties(U'\n'); + for (qsizetype i = 0; i != len; ++i) { qsizetype pos = i; char32_t ucs4 = string[i]; @@ -707,6 +709,14 @@ static void getLineBreaks(const char16_t *string, qsizetype len, QCharAttributes goto next; } + if (Q_UNLIKELY(ncls == QUnicodeTables::LineBreak_EM + && lastProp->category == QChar::Other_NotAssigned + && lastProp->graphemeBreakClass + == QUnicodeTables::GraphemeBreak_Extended_Pictographic)) { + // LB30b: [\p{Extended_Pictographic}&\p{Cn}] × EM + goto next; + } + // for South East Asian chars that require a complex analysis, the Unicode // standard recommends to treat them as AL. tailoring that do dictionary analysis can override if (Q_UNLIKELY(cls >= QUnicodeTables::LineBreak_SA)) @@ -745,6 +755,7 @@ static void getLineBreaks(const char16_t *string, qsizetype len, QCharAttributes next: cls = ncls; + lastProp = prop; next_no_cls_update: lcls = ncls; } -- cgit v1.2.3