summaryrefslogtreecommitdiffstats
path: root/src/corelib
diff options
context:
space:
mode:
authorKonstantin Ritt <ritt.ks@gmail.com>2012-09-25 23:55:54 +0300
committerThe Qt Project <gerrit-noreply@qt-project.org>2012-09-26 03:09:57 +0200
commita798b956b9786240a06142de078f56c28962a535 (patch)
tree2c96818276618e64fffda7e469ae54641c717a1b /src/corelib
parentaeb21c73c5e4fc585340145374800a5e285e7ab7 (diff)
QCharAttributes: add wordStart/wordEnd flags
A simple heuristic is used to detect the word beginning and ending by looking at the word break property value of surrounding characters. This behaves better than the white-spaces based implementation used before and makes it possible to tailor the default algorithm for complex scripts. BIG FAT WARNING: The QCharAttributes buffer now has to have a length of string length + 1 for the flags at end of text. Task-Id: QTBUG-6498 Change-Id: I5589b191ffde6a50d2af0c14a00430d3852c67b4 Reviewed-by: Konstantin Ritt <ritt.ks@gmail.com>
Diffstat (limited to 'src/corelib')
-rw-r--r--src/corelib/tools/qtextboundaryfinder.cpp71
-rw-r--r--src/corelib/tools/qunicodetools.cpp34
-rw-r--r--src/corelib/tools/qunicodetools_p.h5
3 files changed, 68 insertions, 42 deletions
diff --git a/src/corelib/tools/qtextboundaryfinder.cpp b/src/corelib/tools/qtextboundaryfinder.cpp
index 318e3b2fa5..6656569e65 100644
--- a/src/corelib/tools/qtextboundaryfinder.cpp
+++ b/src/corelib/tools/qtextboundaryfinder.cpp
@@ -89,7 +89,7 @@ static void init(QTextBoundaryFinder::BoundaryType type, const QChar *chars, int
scriptItems.append(item);
}
- QUnicodeTools::CharAttributeOptions options = QUnicodeTools::WhiteSpaces;
+ QUnicodeTools::CharAttributeOptions options = 0;
switch (type) {
case QTextBoundaryFinder::Grapheme: options |= QUnicodeTools::GraphemeBreaks; break;
case QTextBoundaryFinder::Word: options |= QUnicodeTools::WordBreaks; break;
@@ -189,9 +189,9 @@ QTextBoundaryFinder::QTextBoundaryFinder(const QTextBoundaryFinder &other)
, pos(other.pos)
, freePrivate(true)
{
- d = (QTextBoundaryFinderPrivate *) malloc(length*sizeof(QCharAttributes));
+ d = (QTextBoundaryFinderPrivate *) malloc((length + 1) * sizeof(QCharAttributes));
Q_CHECK_PTR(d);
- memcpy(d, other.d, length*sizeof(QCharAttributes));
+ memcpy(d, other.d, (length + 1) * sizeof(QCharAttributes));
}
/*!
@@ -209,11 +209,11 @@ QTextBoundaryFinder &QTextBoundaryFinder::operator=(const QTextBoundaryFinder &o
pos = other.pos;
QTextBoundaryFinderPrivate *newD = (QTextBoundaryFinderPrivate *)
- realloc(freePrivate ? d : 0, length*sizeof(QCharAttributes));
+ realloc(freePrivate ? d : 0, (length + 1) * sizeof(QCharAttributes));
Q_CHECK_PTR(newD);
freePrivate = true;
d = newD;
- memcpy(d, other.d, length*sizeof(QCharAttributes));
+ memcpy(d, other.d, (length + 1) * sizeof(QCharAttributes));
return *this;
}
@@ -238,7 +238,7 @@ QTextBoundaryFinder::QTextBoundaryFinder(BoundaryType type, const QString &strin
, pos(0)
, freePrivate(true)
{
- d = (QTextBoundaryFinderPrivate *) malloc(length*sizeof(QCharAttributes));
+ d = (QTextBoundaryFinderPrivate *) malloc((length + 1) * sizeof(QCharAttributes));
Q_CHECK_PTR(d);
init(t, chars, length, d->attributes);
}
@@ -249,7 +249,8 @@ QTextBoundaryFinder::QTextBoundaryFinder(BoundaryType type, const QString &strin
\a buffer is an optional working buffer of size \a bufferSize you can pass to
the QTextBoundaryFinder. If the buffer is large enough to hold the working
- data required, it will use this instead of allocating its own buffer.
+ data required (bufferSize >= length + 1), it will use this
+ instead of allocating its own buffer.
\warning QTextBoundaryFinder does not create a copy of \a chars. It is the
application programmer's responsibility to ensure the array is allocated for
@@ -262,11 +263,11 @@ QTextBoundaryFinder::QTextBoundaryFinder(BoundaryType type, const QChar *chars,
, length(length)
, pos(0)
{
- if (buffer && (uint)bufferSize >= length*sizeof(QCharAttributes)) {
+ if (buffer && (uint)bufferSize >= (length + 1) * sizeof(QCharAttributes)) {
d = (QTextBoundaryFinderPrivate *)buffer;
freePrivate = false;
} else {
- d = (QTextBoundaryFinderPrivate *) malloc(length*sizeof(QCharAttributes));
+ d = (QTextBoundaryFinderPrivate *) malloc((length + 1) * sizeof(QCharAttributes));
Q_CHECK_PTR(d);
freePrivate = true;
}
@@ -455,38 +456,30 @@ bool QTextBoundaryFinder::isAtBoundary() const
*/
QTextBoundaryFinder::BoundaryReasons QTextBoundaryFinder::boundaryReasons() const
{
- if (!d)
- return NotAtBoundary;
- if (! isAtBoundary())
- return NotAtBoundary;
- if (pos == 0) {
- if (d->attributes[pos].whiteSpace)
- return NotAtBoundary;
- return StartWord;
- }
- if (pos == length) {
- if (d->attributes[length-1].whiteSpace)
- return NotAtBoundary;
- return EndWord;
- }
-
- if (t == Line && chars[pos - 1].unicode() == QChar::SoftHyphen)
- return SoftHyphen;
+ BoundaryReasons reasons = NotAtBoundary;
+ if (!d || !isAtBoundary())
+ return reasons;
- if (t != Word)
- return BoundaryReasons(StartWord | EndWord);
-
- const bool nextIsSpace = d->attributes[pos].whiteSpace;
- const bool prevIsSpace = d->attributes[pos - 1].whiteSpace;
+ switch (t) {
+ case Word:
+ if (d->attributes[pos].wordStart)
+ reasons |= StartWord;
+ if (d->attributes[pos].wordEnd)
+ reasons |= EndWord;
+ break;
+ case Line:
+ if (pos > 0 && chars[pos - 1].unicode() == QChar::SoftHyphen)
+ reasons |= SoftHyphen;
+ // fall through
+ case Grapheme:
+ case Sentence:
+ reasons |= StartWord | EndWord;
+ break;
+ default:
+ break;
+ }
- if (prevIsSpace && !nextIsSpace)
- return StartWord;
- else if (!prevIsSpace && nextIsSpace)
- return EndWord;
- else if (!prevIsSpace && !nextIsSpace)
- return BoundaryReasons(StartWord | EndWord);
- else
- return NotAtBoundary;
+ return reasons;
}
QT_END_NAMESPACE
diff --git a/src/corelib/tools/qunicodetools.cpp b/src/corelib/tools/qunicodetools.cpp
index f8daec5680..0b492abf89 100644
--- a/src/corelib/tools/qunicodetools.cpp
+++ b/src/corelib/tools/qunicodetools.cpp
@@ -102,6 +102,8 @@ static void getGraphemeBreaks(const ushort *string, quint32 len, QCharAttributes
lcls = cls;
}
+
+ attributes[len].graphemeBoundary = true; // GB2
}
@@ -133,6 +135,10 @@ static const uchar breakTable[QUnicodeTables::WordBreak_ExtendNumLet + 1][QUnico
static void getWordBreaks(const ushort *string, quint32 len, QCharAttributes *attributes)
{
+ enum WordType {
+ WordTypeNone, WordTypeAlphaNumeric, WordTypeHiraganaKatakana
+ } currentWordType = WordTypeNone;
+
QUnicodeTables::WordBreakClass cls = QUnicodeTables::WordBreak_LF; // to meet WB1
for (quint32 i = 0; i != len; ++i) {
quint32 pos = i;
@@ -178,9 +184,30 @@ static void getWordBreaks(const ushort *string, quint32 len, QCharAttributes *at
continue;
}
cls = ncls;
- if (action == WB::Break)
+ if (action == WB::Break) {
attributes[pos].wordBreak = true;
+ if (currentWordType != WordTypeNone)
+ attributes[pos].wordEnd = true;
+ switch (cls) {
+ case QUnicodeTables::WordBreak_Katakana:
+ currentWordType = WordTypeHiraganaKatakana;
+ attributes[pos].wordStart = true;
+ break;
+ case QUnicodeTables::WordBreak_ALetter:
+ case QUnicodeTables::WordBreak_Numeric:
+ currentWordType = WordTypeAlphaNumeric;
+ attributes[pos].wordStart = true;
+ break;
+ default:
+ currentWordType = WordTypeNone;
+ break;
+ }
+ }
}
+
+ if (currentWordType != WordTypeNone)
+ attributes[len].wordEnd = true;
+ attributes[len].wordBreak = true; // WB2
}
@@ -277,6 +304,8 @@ static void getSentenceBreaks(const ushort *string, quint32 len, QCharAttributes
state = SB::breakTable[SB::Initial][ncls];
}
}
+
+ attributes[len].sentenceBoundary = true; // SB2
}
@@ -514,6 +543,7 @@ static void getLineBreaks(const ushort *string, quint32 len, QCharAttributes *at
}
attributes[0].lineBreak = false; // LB2
+ attributes[len].lineBreak = true; // LB3
}
@@ -543,7 +573,7 @@ Q_CORE_EXPORT void initCharAttributes(const ushort *string, int length,
return;
if (!(options & DontClearAttributes))
- ::memset(attributes, 0, length * sizeof(QCharAttributes));
+ ::memset(attributes, 0, (length + 1) * sizeof(QCharAttributes));
if (options & GraphemeBreaks)
getGraphemeBreaks(string, length, attributes);
diff --git a/src/corelib/tools/qunicodetools_p.h b/src/corelib/tools/qunicodetools_p.h
index 91028b6b51..b1e9127662 100644
--- a/src/corelib/tools/qunicodetools_p.h
+++ b/src/corelib/tools/qunicodetools_p.h
@@ -64,7 +64,9 @@ struct Q_PACKED QCharAttributes
uchar sentenceBoundary : 1;
uchar lineBreak : 1;
uchar whiteSpace : 1;
- uchar unused : 3;
+ uchar wordStart : 1;
+ uchar wordEnd : 1;
+ uchar unused : 1;
};
Q_DECLARE_TYPEINFO(QCharAttributes, Q_PRIMITIVE_TYPE);
@@ -89,6 +91,7 @@ enum CharAttributeOption {
};
Q_DECLARE_FLAGS(CharAttributeOptions, CharAttributeOption)
+// attributes buffer has to have a length of string length + 1
Q_CORE_EXPORT void initCharAttributes(const ushort *string, int length,
const ScriptItem *items, int numItems,
QCharAttributes *attributes, CharAttributeOptions options = DefaultOptionsCompat);