diff options
Diffstat (limited to 'src/corelib/text/qtextboundaryfinder.cpp')
-rw-r--r-- | src/corelib/text/qtextboundaryfinder.cpp | 511 |
1 files changed, 511 insertions, 0 deletions
diff --git a/src/corelib/text/qtextboundaryfinder.cpp b/src/corelib/text/qtextboundaryfinder.cpp new file mode 100644 index 0000000000..67dd15377b --- /dev/null +++ b/src/corelib/text/qtextboundaryfinder.cpp @@ -0,0 +1,511 @@ +/**************************************************************************** +** +** Copyright (C) 2016 The Qt Company Ltd. +** Contact: https://www.qt.io/licensing/ +** +** This file is part of the QtCore module of the Qt Toolkit. +** +** $QT_BEGIN_LICENSE:LGPL$ +** Commercial License Usage +** Licensees holding valid commercial Qt licenses may use this file in +** accordance with the commercial license agreement provided with the +** Software or, alternatively, in accordance with the terms contained in +** a written agreement between you and The Qt Company. For licensing terms +** and conditions see https://www.qt.io/terms-conditions. For further +** information use the contact form at https://www.qt.io/contact-us. +** +** GNU Lesser General Public License Usage +** Alternatively, this file may be used under the terms of the GNU Lesser +** General Public License version 3 as published by the Free Software +** Foundation and appearing in the file LICENSE.LGPL3 included in the +** packaging of this file. Please review the following information to +** ensure the GNU Lesser General Public License version 3 requirements +** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. +** +** GNU General Public License Usage +** Alternatively, this file may be used under the terms of the GNU +** General Public License version 2.0 or (at your option) the GNU General +** Public license version 3 or any later version approved by the KDE Free +** Qt Foundation. The licenses are as published by the Free Software +** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 +** included in the packaging of this file. Please review the following +** information to ensure the GNU General Public License requirements will +** be met: https://www.gnu.org/licenses/gpl-2.0.html and +** https://www.gnu.org/licenses/gpl-3.0.html. +** +** $QT_END_LICENSE$ +** +****************************************************************************/ +#include <QtCore/qtextboundaryfinder.h> +#include <QtCore/qvarlengtharray.h> + +#include <private/qunicodetools_p.h> + +QT_BEGIN_NAMESPACE + +class QTextBoundaryFinderPrivate +{ +public: + QCharAttributes attributes[1]; +}; + +static void init(QTextBoundaryFinder::BoundaryType type, const QChar *chars, int length, QCharAttributes *attributes) +{ + const ushort *string = reinterpret_cast<const ushort *>(chars); + + QVarLengthArray<QUnicodeTools::ScriptItem> scriptItems; + { + QVarLengthArray<uchar> scripts(length); + + QUnicodeTools::initScripts(string, length, scripts.data()); + + int start = 0; + for (int i = start + 1; i <= length; ++i) { + if (i == length || scripts[i] != scripts[start]) { + QUnicodeTools::ScriptItem item; + item.position = start; + item.script = scripts[start]; + scriptItems.append(item); + start = i; + } + } + } + + QUnicodeTools::CharAttributeOptions options = 0; + switch (type) { + case QTextBoundaryFinder::Grapheme: options |= QUnicodeTools::GraphemeBreaks; break; + case QTextBoundaryFinder::Word: options |= QUnicodeTools::WordBreaks; break; + case QTextBoundaryFinder::Sentence: options |= QUnicodeTools::SentenceBreaks; break; + case QTextBoundaryFinder::Line: options |= QUnicodeTools::LineBreaks; break; + default: break; + } + QUnicodeTools::initCharAttributes(string, length, scriptItems.data(), scriptItems.count(), attributes, options); +} + +/*! + \class QTextBoundaryFinder + \inmodule QtCore + + \brief The QTextBoundaryFinder class provides a way of finding Unicode text boundaries in a string. + + \since 4.4 + \ingroup tools + \ingroup shared + \ingroup string-processing + \reentrant + + QTextBoundaryFinder allows to find Unicode text boundaries in a + string, accordingly to the Unicode text boundary specification (see + \l{http://www.unicode.org/reports/tr14/}{Unicode Standard Annex #14} and + \l{http://www.unicode.org/reports/tr29/}{Unicode Standard Annex #29}). + + QTextBoundaryFinder can operate on a QString in four possible + modes depending on the value of \a BoundaryType. + + Units of Unicode characters that make up what the user thinks of + as a character or basic unit of the language are here called + Grapheme clusters. The two unicode characters 'A' + diaeresis do + for example form one grapheme cluster as the user thinks of them + as one character, yet it is in this case represented by two + unicode code points + (see \l{http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries}). + + Word boundaries are there to locate the start and end of what a + language considers to be a word + (see \l{http://www.unicode.org/reports/tr29/#Word_Boundaries}). + + Line break boundaries give possible places where a line break + might happen and sentence boundaries will show the beginning and + end of whole sentences + (see \l{http://www.unicode.org/reports/tr29/#Sentence_Boundaries} and + \l{http://www.unicode.org/reports/tr14/}). + + The first position in a string is always a valid boundary and + refers to the position before the first character. The last + position at the length of the string is also valid and refers + to the position after the last character. +*/ + +/*! + \enum QTextBoundaryFinder::BoundaryType + + \value Grapheme Finds a grapheme which is the smallest boundary. It + including letters, punctuation marks, numerals and more. + \value Word Finds a word. + \value Line Finds possible positions for breaking the text into multiple + lines. + \value Sentence Finds sentence boundaries. These include periods, question + marks etc. +*/ + +/*! + \enum QTextBoundaryFinder::BoundaryReason + + \value NotAtBoundary The boundary finder is not at a boundary position. + \value BreakOpportunity The boundary finder is at a break opportunity position. + Such a break opportunity might also be an item boundary + (either StartOfItem, EndOfItem, or combination of both), + a mandatory line break, or a soft hyphen. + \value StartOfItem Since 5.0. The boundary finder is at the start of + a grapheme, a word, a sentence, or a line. + \value EndOfItem Since 5.0. The boundary finder is at the end of + a grapheme, a word, a sentence, or a line. + \value MandatoryBreak Since 5.0. The boundary finder is at the end of line + (can occur for a Line boundary type only). + \value SoftHyphen The boundary finder is at the soft hyphen + (can occur for a Line boundary type only). +*/ + +/*! + Constructs an invalid QTextBoundaryFinder object. +*/ +QTextBoundaryFinder::QTextBoundaryFinder() + : t(Grapheme) + , chars(0) + , length(0) + , freePrivate(true) + , d(0) +{ +} + +/*! + Copies the QTextBoundaryFinder object, \a other. +*/ +QTextBoundaryFinder::QTextBoundaryFinder(const QTextBoundaryFinder &other) + : t(other.t) + , s(other.s) + , chars(other.chars) + , length(other.length) + , pos(other.pos) + , freePrivate(true) + , d(0) +{ + if (other.d) { + Q_ASSERT(length > 0); + d = (QTextBoundaryFinderPrivate *) malloc((length + 1) * sizeof(QCharAttributes)); + Q_CHECK_PTR(d); + memcpy(d, other.d, (length + 1) * sizeof(QCharAttributes)); + } +} + +/*! + Assigns the object, \a other, to another QTextBoundaryFinder object. +*/ +QTextBoundaryFinder &QTextBoundaryFinder::operator=(const QTextBoundaryFinder &other) +{ + if (&other == this) + return *this; + + if (other.d) { + Q_ASSERT(other.length > 0); + uint newCapacity = (other.length + 1) * sizeof(QCharAttributes); + QTextBoundaryFinderPrivate *newD = (QTextBoundaryFinderPrivate *) realloc(freePrivate ? d : 0, newCapacity); + Q_CHECK_PTR(newD); + freePrivate = true; + d = newD; + } + + t = other.t; + s = other.s; + chars = other.chars; + length = other.length; + pos = other.pos; + + if (other.d) { + memcpy(d, other.d, (length + 1) * sizeof(QCharAttributes)); + } else { + if (freePrivate) + free(d); + d = 0; + } + + return *this; +} + +/*! + Destructs the QTextBoundaryFinder object. +*/ +QTextBoundaryFinder::~QTextBoundaryFinder() +{ + Q_UNUSED(unused); + if (freePrivate) + free(d); +} + +/*! + Creates a QTextBoundaryFinder object of \a type operating on \a string. +*/ +QTextBoundaryFinder::QTextBoundaryFinder(BoundaryType type, const QString &string) + : t(type) + , s(string) + , chars(string.unicode()) + , length(string.length()) + , pos(0) + , freePrivate(true) + , d(0) +{ + if (length > 0) { + d = (QTextBoundaryFinderPrivate *) malloc((length + 1) * sizeof(QCharAttributes)); + Q_CHECK_PTR(d); + init(t, chars, length, d->attributes); + } +} + +/*! + Creates a QTextBoundaryFinder object of \a type operating on \a chars + with \a length. + + \a buffer is an optional working buffer of size \a bufferSize you can pass to + the QTextBoundaryFinder. If the buffer is large enough to hold the working + data required (bufferSize >= length + 1), it will use this + instead of allocating its own buffer. + + \warning QTextBoundaryFinder does not create a copy of \a chars. It is the + application programmer's responsibility to ensure the array is allocated for + as long as the QTextBoundaryFinder object stays alive. The same applies to + \a buffer. +*/ +QTextBoundaryFinder::QTextBoundaryFinder(BoundaryType type, const QChar *chars, int length, unsigned char *buffer, int bufferSize) + : t(type) + , chars(chars) + , length(length) + , pos(0) + , freePrivate(true) + , d(0) +{ + if (!chars) { + length = 0; + } else if (length > 0) { + if (buffer && (uint)bufferSize >= (length + 1) * sizeof(QCharAttributes)) { + d = (QTextBoundaryFinderPrivate *)buffer; + freePrivate = false; + } else { + d = (QTextBoundaryFinderPrivate *) malloc((length + 1) * sizeof(QCharAttributes)); + Q_CHECK_PTR(d); + } + init(t, chars, length, d->attributes); + } +} + +/*! + Moves the finder to the start of the string. This is equivalent to setPosition(0). + + \sa setPosition(), position() +*/ +void QTextBoundaryFinder::toStart() +{ + pos = 0; +} + +/*! + Moves the finder to the end of the string. This is equivalent to setPosition(string.length()). + + \sa setPosition(), position() +*/ +void QTextBoundaryFinder::toEnd() +{ + pos = length; +} + +/*! + Returns the current position of the QTextBoundaryFinder. + + The range is from 0 (the beginning of the string) to the length of + the string inclusive. + + \sa setPosition() +*/ +int QTextBoundaryFinder::position() const +{ + return pos; +} + +/*! + Sets the current position of the QTextBoundaryFinder to \a position. + + If \a position is out of bounds, it will be bound to only valid + positions. In this case, valid positions are from 0 to the length of + the string inclusive. + + \sa position() +*/ +void QTextBoundaryFinder::setPosition(int position) +{ + pos = qBound(0, position, length); +} + +/*! \fn QTextBoundaryFinder::BoundaryType QTextBoundaryFinder::type() const + + Returns the type of the QTextBoundaryFinder. +*/ + +/*! \fn bool QTextBoundaryFinder::isValid() const + + Returns \c true if the text boundary finder is valid; otherwise returns \c false. + A default QTextBoundaryFinder is invalid. +*/ + +/*! + Returns the string the QTextBoundaryFinder object operates on. +*/ +QString QTextBoundaryFinder::string() const +{ + if (chars == s.unicode() && length == s.length()) + return s; + return QString(chars, length); +} + + +/*! + Moves the QTextBoundaryFinder to the next boundary position and returns that position. + + Returns -1 if there is no next boundary. +*/ +int QTextBoundaryFinder::toNextBoundary() +{ + if (!d || pos < 0 || pos >= length) { + pos = -1; + return pos; + } + + ++pos; + switch(t) { + case Grapheme: + while (pos < length && !d->attributes[pos].graphemeBoundary) + ++pos; + break; + case Word: + while (pos < length && !d->attributes[pos].wordBreak) + ++pos; + break; + case Sentence: + while (pos < length && !d->attributes[pos].sentenceBoundary) + ++pos; + break; + case Line: + while (pos < length && !d->attributes[pos].lineBreak) + ++pos; + break; + } + + return pos; +} + +/*! + Moves the QTextBoundaryFinder to the previous boundary position and returns that position. + + Returns -1 if there is no previous boundary. +*/ +int QTextBoundaryFinder::toPreviousBoundary() +{ + if (!d || pos <= 0 || pos > length) { + pos = -1; + return pos; + } + + --pos; + switch(t) { + case Grapheme: + while (pos > 0 && !d->attributes[pos].graphemeBoundary) + --pos; + break; + case Word: + while (pos > 0 && !d->attributes[pos].wordBreak) + --pos; + break; + case Sentence: + while (pos > 0 && !d->attributes[pos].sentenceBoundary) + --pos; + break; + case Line: + while (pos > 0 && !d->attributes[pos].lineBreak) + --pos; + break; + } + + return pos; +} + +/*! + Returns \c true if the object's position() is currently at a valid text boundary. +*/ +bool QTextBoundaryFinder::isAtBoundary() const +{ + if (!d || pos < 0 || pos > length) + return false; + + switch(t) { + case Grapheme: + return d->attributes[pos].graphemeBoundary; + case Word: + return d->attributes[pos].wordBreak; + case Sentence: + return d->attributes[pos].sentenceBoundary; + case Line: + // ### TR#14 LB2 prohibits break at sot + return d->attributes[pos].lineBreak || pos == 0; + } + return false; +} + +/*! + Returns the reasons for the boundary finder to have chosen the current position as a boundary. +*/ +QTextBoundaryFinder::BoundaryReasons QTextBoundaryFinder::boundaryReasons() const +{ + BoundaryReasons reasons = NotAtBoundary; + if (!d || pos < 0 || pos > length) + return reasons; + + const QCharAttributes attr = d->attributes[pos]; + switch (t) { + case Grapheme: + if (attr.graphemeBoundary) { + reasons |= BreakOpportunity | StartOfItem | EndOfItem; + if (pos == 0) + reasons &= (~EndOfItem); + else if (pos == length) + reasons &= (~StartOfItem); + } + break; + case Word: + if (attr.wordBreak) { + reasons |= BreakOpportunity; + if (attr.wordStart) + reasons |= StartOfItem; + if (attr.wordEnd) + reasons |= EndOfItem; + } + break; + case Sentence: + if (attr.sentenceBoundary) { + reasons |= BreakOpportunity | StartOfItem | EndOfItem; + if (pos == 0) + reasons &= (~EndOfItem); + else if (pos == length) + reasons &= (~StartOfItem); + } + break; + case Line: + // ### TR#14 LB2 prohibits break at sot + if (attr.lineBreak || pos == 0) { + reasons |= BreakOpportunity; + if (attr.mandatoryBreak || pos == 0) { + reasons |= MandatoryBreak | StartOfItem | EndOfItem; + if (pos == 0) + reasons &= (~EndOfItem); + else if (pos == length) + reasons &= (~StartOfItem); + } else if (pos > 0 && chars[pos - 1].unicode() == QChar::SoftHyphen) { + reasons |= SoftHyphen; + } + } + break; + default: + break; + } + + return reasons; +} + +QT_END_NAMESPACE |