diff options
author | Konstantin Ritt <ritt.ks@gmail.com> | 2012-09-25 23:55:54 +0300 |
---|---|---|
committer | The Qt Project <gerrit-noreply@qt-project.org> | 2012-09-26 03:09:57 +0200 |
commit | a798b956b9786240a06142de078f56c28962a535 (patch) | |
tree | 2c96818276618e64fffda7e469ae54641c717a1b /src/corelib/tools/qunicodetools.cpp | |
parent | aeb21c73c5e4fc585340145374800a5e285e7ab7 (diff) |
QCharAttributes: add wordStart/wordEnd flags
A simple heuristic is used to detect the word beginning and ending by
looking at the word break property value of surrounding characters.
This behaves better than the white-spaces based implementation used before
and makes it possible to tailor the default algorithm for complex scripts.
BIG FAT WARNING: The QCharAttributes buffer now has to have a length
of string length + 1 for the flags at end of text.
Task-Id: QTBUG-6498
Change-Id: I5589b191ffde6a50d2af0c14a00430d3852c67b4
Reviewed-by: Konstantin Ritt <ritt.ks@gmail.com>
Diffstat (limited to 'src/corelib/tools/qunicodetools.cpp')
-rw-r--r-- | src/corelib/tools/qunicodetools.cpp | 34 |
1 files changed, 32 insertions, 2 deletions
diff --git a/src/corelib/tools/qunicodetools.cpp b/src/corelib/tools/qunicodetools.cpp index f8daec5680..0b492abf89 100644 --- a/src/corelib/tools/qunicodetools.cpp +++ b/src/corelib/tools/qunicodetools.cpp @@ -102,6 +102,8 @@ static void getGraphemeBreaks(const ushort *string, quint32 len, QCharAttributes lcls = cls; } + + attributes[len].graphemeBoundary = true; // GB2 } @@ -133,6 +135,10 @@ static const uchar breakTable[QUnicodeTables::WordBreak_ExtendNumLet + 1][QUnico static void getWordBreaks(const ushort *string, quint32 len, QCharAttributes *attributes) { + enum WordType { + WordTypeNone, WordTypeAlphaNumeric, WordTypeHiraganaKatakana + } currentWordType = WordTypeNone; + QUnicodeTables::WordBreakClass cls = QUnicodeTables::WordBreak_LF; // to meet WB1 for (quint32 i = 0; i != len; ++i) { quint32 pos = i; @@ -178,9 +184,30 @@ static void getWordBreaks(const ushort *string, quint32 len, QCharAttributes *at continue; } cls = ncls; - if (action == WB::Break) + if (action == WB::Break) { attributes[pos].wordBreak = true; + if (currentWordType != WordTypeNone) + attributes[pos].wordEnd = true; + switch (cls) { + case QUnicodeTables::WordBreak_Katakana: + currentWordType = WordTypeHiraganaKatakana; + attributes[pos].wordStart = true; + break; + case QUnicodeTables::WordBreak_ALetter: + case QUnicodeTables::WordBreak_Numeric: + currentWordType = WordTypeAlphaNumeric; + attributes[pos].wordStart = true; + break; + default: + currentWordType = WordTypeNone; + break; + } + } } + + if (currentWordType != WordTypeNone) + attributes[len].wordEnd = true; + attributes[len].wordBreak = true; // WB2 } @@ -277,6 +304,8 @@ static void getSentenceBreaks(const ushort *string, quint32 len, QCharAttributes state = SB::breakTable[SB::Initial][ncls]; } } + + attributes[len].sentenceBoundary = true; // SB2 } @@ -514,6 +543,7 @@ static void getLineBreaks(const ushort *string, quint32 len, QCharAttributes *at } attributes[0].lineBreak = false; // LB2 + attributes[len].lineBreak = true; // LB3 } @@ -543,7 +573,7 @@ Q_CORE_EXPORT void initCharAttributes(const ushort *string, int length, return; if (!(options & DontClearAttributes)) - ::memset(attributes, 0, length * sizeof(QCharAttributes)); + ::memset(attributes, 0, (length + 1) * sizeof(QCharAttributes)); if (options & GraphemeBreaks) getGraphemeBreaks(string, length, attributes); |