summaryrefslogtreecommitdiffstats
path: root/src/corelib/tools
diff options
context:
space:
mode:
authorKonstantin Ritt <ritt.ks@gmail.com>2012-06-05 11:19:22 +0300
committerQt by Nokia <qt-info@nokia.com>2012-06-07 21:18:36 +0200
commit824180a12249e48c0e3280fec64940825ce0aa6e (patch)
tree36dba52c02f1603551ef2820e788796c47b5fb38 /src/corelib/tools
parent9d3e77f18e3bd19c1723199f0dfb5af9eabdabe8 (diff)
Set the whiteSpace flag outside the grapheme and the line breaking loop
The white spaces determination doesn't belong to the text breaking algorithm. A proper breaking implementation shouldn't assume spaces are break opportunities (actually, space is allowed to be a grapheme base); However, the whiteSpace flag should never be checked alone while iterating over the text to find the space sequence; the grapheme boundaries should always be taken into account. This covers the SMP code points in UTF-16 text and graphemes that consist of a space followed with one or more grapheme extenders. This introduces a minor overhead that would be eliminated some later. Change-Id: Ic2cc7f485631fd0b436fc256ce112ded5f94fc07 Reviewed-by: Lars Knoll <lars.knoll@nokia.com>
Diffstat (limited to 'src/corelib/tools')
-rw-r--r--src/corelib/tools/qunicodetools.cpp28
1 files changed, 21 insertions, 7 deletions
diff --git a/src/corelib/tools/qunicodetools.cpp b/src/corelib/tools/qunicodetools.cpp
index e503ecd65a..a311213ede 100644
--- a/src/corelib/tools/qunicodetools.cpp
+++ b/src/corelib/tools/qunicodetools.cpp
@@ -155,12 +155,10 @@ static void calcGraphemeAndLineBreaks(const ushort *string, quint32 len, HB_Char
if (cls == QUnicodeTables::LineBreak_LF)
cls = QUnicodeTables::LineBreak_BK;
- attributes[0].whiteSpace = (cls == QUnicodeTables::LineBreak_SP || cls == QUnicodeTables::LineBreak_BK);
attributes[0].charStop = true;
int lcls = cls;
for (quint32 i = 1; i < len; ++i) {
- attributes[i].whiteSpace = false;
attributes[i].charStop = true;
uint ucs4 = string[i];
@@ -183,10 +181,6 @@ static void calcGraphemeAndLineBreaks(const ushort *string, quint32 len, HB_Char
}
}
- // set white space and char stop flag
- if (ncls >= QUnicodeTables::LineBreak_SP)
- attributes[i].whiteSpace = true;
-
HB_LineBreakType lineBreakType = HB_NoBreak;
if (cls >= QUnicodeTables::LineBreak_CR) {
@@ -378,6 +372,24 @@ static void calcSentenceBreaks(const ushort *string, quint32 len, HB_CharAttribu
}
+static void getWhiteSpaces(const ushort *string, quint32 len, HB_CharAttributes *attributes)
+{
+ for (quint32 i = 0; i != len; ++i) {
+ uint ucs4 = string[i];
+ if (QChar::isHighSurrogate(ucs4) && i + 1 != len) {
+ ushort low = string[i + 1];
+ if (QChar::isLowSurrogate(low)) {
+ ucs4 = QChar::surrogateToUcs4(ucs4, low);
+ ++i;
+ }
+ }
+
+ if (QChar::isSpace(ucs4))
+ attributes[i].whiteSpace = true;
+ }
+}
+
+
Q_CORE_EXPORT void initCharAttributes(const ushort *string, int length,
const HB_ScriptItem *items, int numItems,
HB_CharAttributes *attributes, CharAttributeOptions options)
@@ -391,12 +403,14 @@ Q_CORE_EXPORT void initCharAttributes(const ushort *string, int length,
options |= GraphemeBreaks;
}
- if (options & (GraphemeBreaks | LineBreaks | WhiteSpaces))
+ if (options & (GraphemeBreaks | LineBreaks))
calcGraphemeAndLineBreaks(string, length, attributes);
if (options & WordBreaks)
calcWordBreaks(string, length, attributes);
if (options & SentenceBreaks)
calcSentenceBreaks(string, length, attributes);
+ if (options & WhiteSpaces)
+ getWhiteSpaces(string, length, attributes);
if (!items || numItems <= 0)
return;