diff options
author | Konstantin Ritt <ritt.ks@gmail.com> | 2012-06-05 11:19:22 +0300 |
---|---|---|
committer | Qt by Nokia <qt-info@nokia.com> | 2012-06-07 21:18:36 +0200 |
commit | 824180a12249e48c0e3280fec64940825ce0aa6e (patch) | |
tree | 36dba52c02f1603551ef2820e788796c47b5fb38 /src/corelib | |
parent | 9d3e77f18e3bd19c1723199f0dfb5af9eabdabe8 (diff) |
Set the whiteSpace flag outside the grapheme and the line breaking loop
The white spaces determination doesn't belong to the text breaking algorithm.
A proper breaking implementation shouldn't assume spaces are
break opportunities (actually, space is allowed to be a grapheme base);
However, the whiteSpace flag should never be checked alone while iterating
over the text to find the space sequence; the grapheme boundaries should always
be taken into account. This covers the SMP code points in UTF-16 text and
graphemes that consist of a space followed with one or more grapheme extenders.
This introduces a minor overhead that would be eliminated some later.
Change-Id: Ic2cc7f485631fd0b436fc256ce112ded5f94fc07
Reviewed-by: Lars Knoll <lars.knoll@nokia.com>
Diffstat (limited to 'src/corelib')
-rw-r--r-- | src/corelib/tools/qunicodetools.cpp | 28 |
1 files changed, 21 insertions, 7 deletions
diff --git a/src/corelib/tools/qunicodetools.cpp b/src/corelib/tools/qunicodetools.cpp index e503ecd65a..a311213ede 100644 --- a/src/corelib/tools/qunicodetools.cpp +++ b/src/corelib/tools/qunicodetools.cpp @@ -155,12 +155,10 @@ static void calcGraphemeAndLineBreaks(const ushort *string, quint32 len, HB_Char if (cls == QUnicodeTables::LineBreak_LF) cls = QUnicodeTables::LineBreak_BK; - attributes[0].whiteSpace = (cls == QUnicodeTables::LineBreak_SP || cls == QUnicodeTables::LineBreak_BK); attributes[0].charStop = true; int lcls = cls; for (quint32 i = 1; i < len; ++i) { - attributes[i].whiteSpace = false; attributes[i].charStop = true; uint ucs4 = string[i]; @@ -183,10 +181,6 @@ static void calcGraphemeAndLineBreaks(const ushort *string, quint32 len, HB_Char } } - // set white space and char stop flag - if (ncls >= QUnicodeTables::LineBreak_SP) - attributes[i].whiteSpace = true; - HB_LineBreakType lineBreakType = HB_NoBreak; if (cls >= QUnicodeTables::LineBreak_CR) { @@ -378,6 +372,24 @@ static void calcSentenceBreaks(const ushort *string, quint32 len, HB_CharAttribu } +static void getWhiteSpaces(const ushort *string, quint32 len, HB_CharAttributes *attributes) +{ + for (quint32 i = 0; i != len; ++i) { + uint ucs4 = string[i]; + if (QChar::isHighSurrogate(ucs4) && i + 1 != len) { + ushort low = string[i + 1]; + if (QChar::isLowSurrogate(low)) { + ucs4 = QChar::surrogateToUcs4(ucs4, low); + ++i; + } + } + + if (QChar::isSpace(ucs4)) + attributes[i].whiteSpace = true; + } +} + + Q_CORE_EXPORT void initCharAttributes(const ushort *string, int length, const HB_ScriptItem *items, int numItems, HB_CharAttributes *attributes, CharAttributeOptions options) @@ -391,12 +403,14 @@ Q_CORE_EXPORT void initCharAttributes(const ushort *string, int length, options |= GraphemeBreaks; } - if (options & (GraphemeBreaks | LineBreaks | WhiteSpaces)) + if (options & (GraphemeBreaks | LineBreaks)) calcGraphemeAndLineBreaks(string, length, attributes); if (options & WordBreaks) calcWordBreaks(string, length, attributes); if (options & SentenceBreaks) calcSentenceBreaks(string, length, attributes); + if (options & WhiteSpaces) + getWhiteSpaces(string, length, attributes); if (!items || numItems <= 0) return; |