diff options
author | Konstantin Ritt <ritt.ks@gmail.com> | 2012-09-25 23:55:54 +0300 |
---|---|---|
committer | The Qt Project <gerrit-noreply@qt-project.org> | 2012-09-26 03:09:57 +0200 |
commit | a798b956b9786240a06142de078f56c28962a535 (patch) | |
tree | 2c96818276618e64fffda7e469ae54641c717a1b /src/corelib | |
parent | aeb21c73c5e4fc585340145374800a5e285e7ab7 (diff) |
QCharAttributes: add wordStart/wordEnd flags
A simple heuristic is used to detect the word beginning and ending by
looking at the word break property value of surrounding characters.
This behaves better than the white-spaces based implementation used before
and makes it possible to tailor the default algorithm for complex scripts.
BIG FAT WARNING: The QCharAttributes buffer now has to have a length
of string length + 1 for the flags at end of text.
Task-Id: QTBUG-6498
Change-Id: I5589b191ffde6a50d2af0c14a00430d3852c67b4
Reviewed-by: Konstantin Ritt <ritt.ks@gmail.com>
Diffstat (limited to 'src/corelib')
-rw-r--r-- | src/corelib/tools/qtextboundaryfinder.cpp | 71 | ||||
-rw-r--r-- | src/corelib/tools/qunicodetools.cpp | 34 | ||||
-rw-r--r-- | src/corelib/tools/qunicodetools_p.h | 5 |
3 files changed, 68 insertions, 42 deletions
diff --git a/src/corelib/tools/qtextboundaryfinder.cpp b/src/corelib/tools/qtextboundaryfinder.cpp index 318e3b2fa5..6656569e65 100644 --- a/src/corelib/tools/qtextboundaryfinder.cpp +++ b/src/corelib/tools/qtextboundaryfinder.cpp @@ -89,7 +89,7 @@ static void init(QTextBoundaryFinder::BoundaryType type, const QChar *chars, int scriptItems.append(item); } - QUnicodeTools::CharAttributeOptions options = QUnicodeTools::WhiteSpaces; + QUnicodeTools::CharAttributeOptions options = 0; switch (type) { case QTextBoundaryFinder::Grapheme: options |= QUnicodeTools::GraphemeBreaks; break; case QTextBoundaryFinder::Word: options |= QUnicodeTools::WordBreaks; break; @@ -189,9 +189,9 @@ QTextBoundaryFinder::QTextBoundaryFinder(const QTextBoundaryFinder &other) , pos(other.pos) , freePrivate(true) { - d = (QTextBoundaryFinderPrivate *) malloc(length*sizeof(QCharAttributes)); + d = (QTextBoundaryFinderPrivate *) malloc((length + 1) * sizeof(QCharAttributes)); Q_CHECK_PTR(d); - memcpy(d, other.d, length*sizeof(QCharAttributes)); + memcpy(d, other.d, (length + 1) * sizeof(QCharAttributes)); } /*! @@ -209,11 +209,11 @@ QTextBoundaryFinder &QTextBoundaryFinder::operator=(const QTextBoundaryFinder &o pos = other.pos; QTextBoundaryFinderPrivate *newD = (QTextBoundaryFinderPrivate *) - realloc(freePrivate ? d : 0, length*sizeof(QCharAttributes)); + realloc(freePrivate ? d : 0, (length + 1) * sizeof(QCharAttributes)); Q_CHECK_PTR(newD); freePrivate = true; d = newD; - memcpy(d, other.d, length*sizeof(QCharAttributes)); + memcpy(d, other.d, (length + 1) * sizeof(QCharAttributes)); return *this; } @@ -238,7 +238,7 @@ QTextBoundaryFinder::QTextBoundaryFinder(BoundaryType type, const QString &strin , pos(0) , freePrivate(true) { - d = (QTextBoundaryFinderPrivate *) malloc(length*sizeof(QCharAttributes)); + d = (QTextBoundaryFinderPrivate *) malloc((length + 1) * sizeof(QCharAttributes)); Q_CHECK_PTR(d); init(t, chars, length, d->attributes); } @@ -249,7 +249,8 @@ QTextBoundaryFinder::QTextBoundaryFinder(BoundaryType type, const QString &strin \a buffer is an optional working buffer of size \a bufferSize you can pass to the QTextBoundaryFinder. If the buffer is large enough to hold the working - data required, it will use this instead of allocating its own buffer. + data required (bufferSize >= length + 1), it will use this + instead of allocating its own buffer. \warning QTextBoundaryFinder does not create a copy of \a chars. It is the application programmer's responsibility to ensure the array is allocated for @@ -262,11 +263,11 @@ QTextBoundaryFinder::QTextBoundaryFinder(BoundaryType type, const QChar *chars, , length(length) , pos(0) { - if (buffer && (uint)bufferSize >= length*sizeof(QCharAttributes)) { + if (buffer && (uint)bufferSize >= (length + 1) * sizeof(QCharAttributes)) { d = (QTextBoundaryFinderPrivate *)buffer; freePrivate = false; } else { - d = (QTextBoundaryFinderPrivate *) malloc(length*sizeof(QCharAttributes)); + d = (QTextBoundaryFinderPrivate *) malloc((length + 1) * sizeof(QCharAttributes)); Q_CHECK_PTR(d); freePrivate = true; } @@ -455,38 +456,30 @@ bool QTextBoundaryFinder::isAtBoundary() const */ QTextBoundaryFinder::BoundaryReasons QTextBoundaryFinder::boundaryReasons() const { - if (!d) - return NotAtBoundary; - if (! isAtBoundary()) - return NotAtBoundary; - if (pos == 0) { - if (d->attributes[pos].whiteSpace) - return NotAtBoundary; - return StartWord; - } - if (pos == length) { - if (d->attributes[length-1].whiteSpace) - return NotAtBoundary; - return EndWord; - } - - if (t == Line && chars[pos - 1].unicode() == QChar::SoftHyphen) - return SoftHyphen; + BoundaryReasons reasons = NotAtBoundary; + if (!d || !isAtBoundary()) + return reasons; - if (t != Word) - return BoundaryReasons(StartWord | EndWord); - - const bool nextIsSpace = d->attributes[pos].whiteSpace; - const bool prevIsSpace = d->attributes[pos - 1].whiteSpace; + switch (t) { + case Word: + if (d->attributes[pos].wordStart) + reasons |= StartWord; + if (d->attributes[pos].wordEnd) + reasons |= EndWord; + break; + case Line: + if (pos > 0 && chars[pos - 1].unicode() == QChar::SoftHyphen) + reasons |= SoftHyphen; + // fall through + case Grapheme: + case Sentence: + reasons |= StartWord | EndWord; + break; + default: + break; + } - if (prevIsSpace && !nextIsSpace) - return StartWord; - else if (!prevIsSpace && nextIsSpace) - return EndWord; - else if (!prevIsSpace && !nextIsSpace) - return BoundaryReasons(StartWord | EndWord); - else - return NotAtBoundary; + return reasons; } QT_END_NAMESPACE diff --git a/src/corelib/tools/qunicodetools.cpp b/src/corelib/tools/qunicodetools.cpp index f8daec5680..0b492abf89 100644 --- a/src/corelib/tools/qunicodetools.cpp +++ b/src/corelib/tools/qunicodetools.cpp @@ -102,6 +102,8 @@ static void getGraphemeBreaks(const ushort *string, quint32 len, QCharAttributes lcls = cls; } + + attributes[len].graphemeBoundary = true; // GB2 } @@ -133,6 +135,10 @@ static const uchar breakTable[QUnicodeTables::WordBreak_ExtendNumLet + 1][QUnico static void getWordBreaks(const ushort *string, quint32 len, QCharAttributes *attributes) { + enum WordType { + WordTypeNone, WordTypeAlphaNumeric, WordTypeHiraganaKatakana + } currentWordType = WordTypeNone; + QUnicodeTables::WordBreakClass cls = QUnicodeTables::WordBreak_LF; // to meet WB1 for (quint32 i = 0; i != len; ++i) { quint32 pos = i; @@ -178,9 +184,30 @@ static void getWordBreaks(const ushort *string, quint32 len, QCharAttributes *at continue; } cls = ncls; - if (action == WB::Break) + if (action == WB::Break) { attributes[pos].wordBreak = true; + if (currentWordType != WordTypeNone) + attributes[pos].wordEnd = true; + switch (cls) { + case QUnicodeTables::WordBreak_Katakana: + currentWordType = WordTypeHiraganaKatakana; + attributes[pos].wordStart = true; + break; + case QUnicodeTables::WordBreak_ALetter: + case QUnicodeTables::WordBreak_Numeric: + currentWordType = WordTypeAlphaNumeric; + attributes[pos].wordStart = true; + break; + default: + currentWordType = WordTypeNone; + break; + } + } } + + if (currentWordType != WordTypeNone) + attributes[len].wordEnd = true; + attributes[len].wordBreak = true; // WB2 } @@ -277,6 +304,8 @@ static void getSentenceBreaks(const ushort *string, quint32 len, QCharAttributes state = SB::breakTable[SB::Initial][ncls]; } } + + attributes[len].sentenceBoundary = true; // SB2 } @@ -514,6 +543,7 @@ static void getLineBreaks(const ushort *string, quint32 len, QCharAttributes *at } attributes[0].lineBreak = false; // LB2 + attributes[len].lineBreak = true; // LB3 } @@ -543,7 +573,7 @@ Q_CORE_EXPORT void initCharAttributes(const ushort *string, int length, return; if (!(options & DontClearAttributes)) - ::memset(attributes, 0, length * sizeof(QCharAttributes)); + ::memset(attributes, 0, (length + 1) * sizeof(QCharAttributes)); if (options & GraphemeBreaks) getGraphemeBreaks(string, length, attributes); diff --git a/src/corelib/tools/qunicodetools_p.h b/src/corelib/tools/qunicodetools_p.h index 91028b6b51..b1e9127662 100644 --- a/src/corelib/tools/qunicodetools_p.h +++ b/src/corelib/tools/qunicodetools_p.h @@ -64,7 +64,9 @@ struct Q_PACKED QCharAttributes uchar sentenceBoundary : 1; uchar lineBreak : 1; uchar whiteSpace : 1; - uchar unused : 3; + uchar wordStart : 1; + uchar wordEnd : 1; + uchar unused : 1; }; Q_DECLARE_TYPEINFO(QCharAttributes, Q_PRIMITIVE_TYPE); @@ -89,6 +91,7 @@ enum CharAttributeOption { }; Q_DECLARE_FLAGS(CharAttributeOptions, CharAttributeOption) +// attributes buffer has to have a length of string length + 1 Q_CORE_EXPORT void initCharAttributes(const ushort *string, int length, const ScriptItem *items, int numItems, QCharAttributes *attributes, CharAttributeOptions options = DefaultOptionsCompat); |