summaryrefslogtreecommitdiffstats
path: root/src/corelib/tools/qunicodetools.cpp
diff options
context:
space:
mode:
authorKonstantin Ritt <ritt.ks@gmail.com>2012-09-25 23:55:54 +0300
committerThe Qt Project <gerrit-noreply@qt-project.org>2012-09-26 03:09:57 +0200
commita798b956b9786240a06142de078f56c28962a535 (patch)
tree2c96818276618e64fffda7e469ae54641c717a1b /src/corelib/tools/qunicodetools.cpp
parentaeb21c73c5e4fc585340145374800a5e285e7ab7 (diff)
QCharAttributes: add wordStart/wordEnd flags
A simple heuristic is used to detect the word beginning and ending by looking at the word break property value of surrounding characters. This behaves better than the white-spaces based implementation used before and makes it possible to tailor the default algorithm for complex scripts. BIG FAT WARNING: The QCharAttributes buffer now has to have a length of string length + 1 for the flags at end of text. Task-Id: QTBUG-6498 Change-Id: I5589b191ffde6a50d2af0c14a00430d3852c67b4 Reviewed-by: Konstantin Ritt <ritt.ks@gmail.com>
Diffstat (limited to 'src/corelib/tools/qunicodetools.cpp')
-rw-r--r--src/corelib/tools/qunicodetools.cpp34
1 files changed, 32 insertions, 2 deletions
diff --git a/src/corelib/tools/qunicodetools.cpp b/src/corelib/tools/qunicodetools.cpp
index f8daec5680..0b492abf89 100644
--- a/src/corelib/tools/qunicodetools.cpp
+++ b/src/corelib/tools/qunicodetools.cpp
@@ -102,6 +102,8 @@ static void getGraphemeBreaks(const ushort *string, quint32 len, QCharAttributes
lcls = cls;
}
+
+ attributes[len].graphemeBoundary = true; // GB2
}
@@ -133,6 +135,10 @@ static const uchar breakTable[QUnicodeTables::WordBreak_ExtendNumLet + 1][QUnico
static void getWordBreaks(const ushort *string, quint32 len, QCharAttributes *attributes)
{
+ enum WordType {
+ WordTypeNone, WordTypeAlphaNumeric, WordTypeHiraganaKatakana
+ } currentWordType = WordTypeNone;
+
QUnicodeTables::WordBreakClass cls = QUnicodeTables::WordBreak_LF; // to meet WB1
for (quint32 i = 0; i != len; ++i) {
quint32 pos = i;
@@ -178,9 +184,30 @@ static void getWordBreaks(const ushort *string, quint32 len, QCharAttributes *at
continue;
}
cls = ncls;
- if (action == WB::Break)
+ if (action == WB::Break) {
attributes[pos].wordBreak = true;
+ if (currentWordType != WordTypeNone)
+ attributes[pos].wordEnd = true;
+ switch (cls) {
+ case QUnicodeTables::WordBreak_Katakana:
+ currentWordType = WordTypeHiraganaKatakana;
+ attributes[pos].wordStart = true;
+ break;
+ case QUnicodeTables::WordBreak_ALetter:
+ case QUnicodeTables::WordBreak_Numeric:
+ currentWordType = WordTypeAlphaNumeric;
+ attributes[pos].wordStart = true;
+ break;
+ default:
+ currentWordType = WordTypeNone;
+ break;
+ }
+ }
}
+
+ if (currentWordType != WordTypeNone)
+ attributes[len].wordEnd = true;
+ attributes[len].wordBreak = true; // WB2
}
@@ -277,6 +304,8 @@ static void getSentenceBreaks(const ushort *string, quint32 len, QCharAttributes
state = SB::breakTable[SB::Initial][ncls];
}
}
+
+ attributes[len].sentenceBoundary = true; // SB2
}
@@ -514,6 +543,7 @@ static void getLineBreaks(const ushort *string, quint32 len, QCharAttributes *at
}
attributes[0].lineBreak = false; // LB2
+ attributes[len].lineBreak = true; // LB3
}
@@ -543,7 +573,7 @@ Q_CORE_EXPORT void initCharAttributes(const ushort *string, int length,
return;
if (!(options & DontClearAttributes))
- ::memset(attributes, 0, length * sizeof(QCharAttributes));
+ ::memset(attributes, 0, (length + 1) * sizeof(QCharAttributes));
if (options & GraphemeBreaks)
getGraphemeBreaks(string, length, attributes);