From da7111be51cab7c7484afd15ec9de7fb852ae305 Mon Sep 17 00:00:00 2001 From: Konstantin Ritt Date: Fri, 12 Oct 2012 06:49:27 +0300 Subject: QTextBoundaryFinder: Fix handling of break opportunities Add BoundaryReason::BreakOpportunity flag that will be returned by the boundaryReasons() when the boundary finder is at the break opportunity position that might be not an item boundary. This is the same as (StartWord || EndWord) in Grapheme and Sentence modes; in Word and Line modes, BreakOpportunity flag might occur between the words or in between of Line boundaries (e.g. in conjunction with SoftHyphen flag). In other words, the text boundaries are always break opportunities, but not vice versa. StartWord and EndWord flags has been deprecated by new StartOfItem and EndOfItem flags which are not about the word boundaries only. In line breaking, StartOfItem and EndOfItem are set for the mandatory breaks only. Change-Id: I79bf297e2b988f5976f30cff0c8ca616385f6552 Reviewed-by: Konstantin Ritt --- dist/changes-5.0.0 | 9 + src/corelib/tools/qtextboundaryfinder.cpp | 142 ++++--- src/corelib/tools/qtextboundaryfinder.h | 16 +- .../tst_qtextboundaryfinder.cpp | 410 +++++++++++---------- 4 files changed, 327 insertions(+), 250 deletions(-) diff --git a/dist/changes-5.0.0 b/dist/changes-5.0.0 index f2164087eb..57736472bc 100644 --- a/dist/changes-5.0.0 +++ b/dist/changes-5.0.0 @@ -470,6 +470,15 @@ QtCore variable. qInstallMsgHandler() has been deprecated, and should be replaced with qInstallMessageHandler(). +* QTextBoundaryFinder + - [QTBUG-6498] The word start and word end boundaries detection is now + unaware of surrounding white space characters. + - SoftHyphen enum value has been added to specify a line break opportunity + at a soft hyphen (SHY) character. + - MandatoryBreak enum value has been added to specify a mandatory (aka "hard") line breaks. + - StartWord/EndWord enum values has been logically replaced with StartOfItem/EndOfItem ones + to mention they are not about the word boundaries only. + QtGui ----- * Accessibility has been refactored. The hierachy of accessible objects is implemented via diff --git a/src/corelib/tools/qtextboundaryfinder.cpp b/src/corelib/tools/qtextboundaryfinder.cpp index 416a14a35a..259a63335a 100644 --- a/src/corelib/tools/qtextboundaryfinder.cpp +++ b/src/corelib/tools/qtextboundaryfinder.cpp @@ -148,7 +148,7 @@ static void init(QTextBoundaryFinder::BoundaryType type, const QChar *chars, int \enum QTextBoundaryFinder::BoundaryType \value Grapheme Finds a grapheme which is the smallest boundary. It - including letters, punctation marks, numerals and more. + including letters, punctuation marks, numerals and more. \value Word Finds a word. \value Line Finds possible positions for breaking the text into multiple lines. @@ -160,12 +160,25 @@ static void init(QTextBoundaryFinder::BoundaryType type, const QChar *chars, int \enum QTextBoundaryFinder::BoundaryReason \value NotAtBoundary The boundary finder is not at a boundary position. - \value StartWord The boundary finder is at the start of a word. - \value EndWord The boundary finder is at the end of a word. + \value BreakOpportunity The boundary finder is at a break opportunity position. + Such a break opportunity might also be an item boundary + (either StartOfItem, EndOfItem, or combination of both), + a mandatory line break, or a soft hyphen. + \value StartOfItem Since 5.0. The boundary finder is at the start of + a grapheme, a word, a sentence, or a line. + \value EndOfItem Since 5.0. The boundary finder is at the end of + a grapheme, a word, a sentence, or a line. \value MandatoryBreak Since 5.0. The boundary finder is at the end of line (can occur for a Line boundary type only). \value SoftHyphen The boundary finder is at the soft hyphen (can occur for a Line boundary type only). + + \value StartWord Deprecated since 5.0. Use StartOfItem instead. + The boundary finder is at the start of a word. + (can occur for a Word boundary type only). + \value EndWord Deprecated since 5.0. Use EndOfItem instead. + The boundary finder is at the end of a word. + (can occur for a Word boundary type only). */ /*! @@ -190,10 +203,14 @@ QTextBoundaryFinder::QTextBoundaryFinder(const QTextBoundaryFinder &other) , length(other.length) , pos(other.pos) , freePrivate(true) + , d(0) { - d = (QTextBoundaryFinderPrivate *) malloc((length + 1) * sizeof(QCharAttributes)); - Q_CHECK_PTR(d); - memcpy(d, other.d, (length + 1) * sizeof(QCharAttributes)); + if (other.d) { + Q_ASSERT(length > 0); + d = (QTextBoundaryFinderPrivate *) malloc((length + 1) * sizeof(QCharAttributes)); + Q_CHECK_PTR(d); + memcpy(d, other.d, (length + 1) * sizeof(QCharAttributes)); + } } /*! @@ -205,7 +222,8 @@ QTextBoundaryFinder &QTextBoundaryFinder::operator=(const QTextBoundaryFinder &o return *this; if (other.d) { - uint newCapacity = (length + 1) * sizeof(QCharAttributes); + Q_ASSERT(other.length > 0); + uint newCapacity = (other.length + 1) * sizeof(QCharAttributes); QTextBoundaryFinderPrivate *newD = (QTextBoundaryFinderPrivate *) realloc(freePrivate ? d : 0, newCapacity); Q_CHECK_PTR(newD); freePrivate = true; @@ -248,10 +266,13 @@ QTextBoundaryFinder::QTextBoundaryFinder(BoundaryType type, const QString &strin , length(string.length()) , pos(0) , freePrivate(true) + , d(0) { - d = (QTextBoundaryFinderPrivate *) malloc((length + 1) * sizeof(QCharAttributes)); - Q_CHECK_PTR(d); - init(t, chars, length, d->attributes); + if (length > 0) { + d = (QTextBoundaryFinderPrivate *) malloc((length + 1) * sizeof(QCharAttributes)); + Q_CHECK_PTR(d); + init(t, chars, length, d->attributes); + } } /*! @@ -273,16 +294,21 @@ QTextBoundaryFinder::QTextBoundaryFinder(BoundaryType type, const QChar *chars, , chars(chars) , length(length) , pos(0) + , freePrivate(true) + , d(0) { - if (buffer && (uint)bufferSize >= (length + 1) * sizeof(QCharAttributes)) { - d = (QTextBoundaryFinderPrivate *)buffer; - freePrivate = false; - } else { - d = (QTextBoundaryFinderPrivate *) malloc((length + 1) * sizeof(QCharAttributes)); - Q_CHECK_PTR(d); - freePrivate = true; + if (!chars) { + length = 0; + } else if (length > 0) { + if (buffer && (uint)bufferSize >= (length + 1) * sizeof(QCharAttributes)) { + d = (QTextBoundaryFinderPrivate *)buffer; + freePrivate = false; + } else { + d = (QTextBoundaryFinderPrivate *) malloc((length + 1) * sizeof(QCharAttributes)); + Q_CHECK_PTR(d); + } + init(t, chars, length, d->attributes); } - init(t, chars, length, d->attributes); } /*! @@ -361,19 +387,12 @@ QString QTextBoundaryFinder::string() const */ int QTextBoundaryFinder::toNextBoundary() { - if (!d) { + if (!d || pos < 0 || pos >= length) { pos = -1; return pos; } - if (pos < 0 || pos >= length) { - pos = -1; - return pos; - } ++pos; - if (pos == length) - return pos; - switch(t) { case Grapheme: while (pos < length && !d->attributes[pos].graphemeBoundary) @@ -403,19 +422,12 @@ int QTextBoundaryFinder::toNextBoundary() */ int QTextBoundaryFinder::toPreviousBoundary() { - if (!d) { + if (!d || pos <= 0 || pos > length) { pos = -1; return pos; } - if (pos <= 0 || pos > length) { - pos = -1; - return pos; - } --pos; - if (pos == 0) - return pos; - switch(t) { case Grapheme: while (pos > 0 && !d->attributes[pos].graphemeBoundary) @@ -443,21 +455,19 @@ int QTextBoundaryFinder::toPreviousBoundary() */ bool QTextBoundaryFinder::isAtBoundary() const { - if (!d || pos < 0) + if (!d || pos < 0 || pos > length) return false; - if (pos == length) - return true; - switch(t) { case Grapheme: return d->attributes[pos].graphemeBoundary; case Word: return d->attributes[pos].wordBreak; - case Line: - return pos == 0 || d->attributes[pos].lineBreak; case Sentence: return d->attributes[pos].sentenceBoundary; + case Line: + // ### TR#14 LB2 prohibits break at sot + return d->attributes[pos].lineBreak || pos == 0; } return false; } @@ -468,26 +478,52 @@ bool QTextBoundaryFinder::isAtBoundary() const QTextBoundaryFinder::BoundaryReasons QTextBoundaryFinder::boundaryReasons() const { BoundaryReasons reasons = NotAtBoundary; - if (!d || !isAtBoundary()) + if (!d || pos < 0 || pos > length) return reasons; + const QCharAttributes attr = d->attributes[pos]; switch (t) { + case Grapheme: + if (attr.graphemeBoundary) { + reasons |= BreakOpportunity | StartOfItem | EndOfItem; + if (pos == 0) + reasons &= (~EndOfItem); + else if (pos == length) + reasons &= (~StartOfItem); + } + break; case Word: - if (d->attributes[pos].wordStart) - reasons |= StartWord; - if (d->attributes[pos].wordEnd) - reasons |= EndWord; + if (attr.wordBreak) { + reasons |= BreakOpportunity; + if (attr.wordStart) + reasons |= StartOfItem | StartWord; + if (attr.wordEnd) + reasons |= EndOfItem | EndWord; + } + break; + case Sentence: + if (attr.sentenceBoundary) { + reasons |= BreakOpportunity | StartOfItem | EndOfItem; + if (pos == 0) + reasons &= (~EndOfItem); + else if (pos == length) + reasons &= (~StartOfItem); + } break; case Line: // ### TR#14 LB2 prohibits break at sot - if (d->attributes[pos].mandatoryBreak || pos == 0) - reasons |= MandatoryBreak; - else if (pos > 0 && chars[pos - 1].unicode() == QChar::SoftHyphen) - reasons |= SoftHyphen; - // fall through - case Grapheme: - case Sentence: - reasons |= StartWord | EndWord; + if (attr.lineBreak || pos == 0) { + reasons |= BreakOpportunity; + if (attr.mandatoryBreak || pos == 0) { + reasons |= MandatoryBreak | StartOfItem | EndOfItem; + if (pos == 0) + reasons &= (~EndOfItem); + else if (pos == length) + reasons &= (~StartOfItem); + } else if (pos > 0 && chars[pos - 1].unicode() == QChar::SoftHyphen) { + reasons |= SoftHyphen; + } + } break; default: break; diff --git a/src/corelib/tools/qtextboundaryfinder.h b/src/corelib/tools/qtextboundaryfinder.h index c752623844..6e33167bcb 100644 --- a/src/corelib/tools/qtextboundaryfinder.h +++ b/src/corelib/tools/qtextboundaryfinder.h @@ -63,16 +63,20 @@ public: enum BoundaryType { Grapheme, Word, - Line, - Sentence + Sentence, + Line }; enum BoundaryReason { NotAtBoundary = 0, - StartWord = 0x10, - EndWord = 0x20, - MandatoryBreak = 0x40, - SoftHyphen = 0x80 + BreakOpportunity = 0x1f, + StartOfItem = 0x20, + EndOfItem = 0x40, + MandatoryBreak = 0x80, + SoftHyphen = 0x100, + // ### Qt6: remove + StartWord = 0x1000, + EndWord = 0x2000 }; Q_DECLARE_FLAGS( BoundaryReasons, BoundaryReason ) diff --git a/tests/auto/corelib/tools/qtextboundaryfinder/tst_qtextboundaryfinder.cpp b/tests/auto/corelib/tools/qtextboundaryfinder/tst_qtextboundaryfinder.cpp index a0aab340a3..eeb22f4c21 100644 --- a/tests/auto/corelib/tools/qtextboundaryfinder/tst_qtextboundaryfinder.cpp +++ b/tests/auto/corelib/tools/qtextboundaryfinder/tst_qtextboundaryfinder.cpp @@ -50,8 +50,6 @@ class tst_QTextBoundaryFinder : public QObject { Q_OBJECT -public slots: - void init(); private slots: #ifdef QT_BUILD_INTERNAL void graphemeBoundariesDefault_data(); @@ -71,25 +69,15 @@ private slots: void lineBoundaries_manual_data(); void lineBoundaries_manual(); + void emptyText_data(); + void emptyText(); void fastConstructor(); void assignmentOperator(); - void wordBoundaries_qtbug6498(); void isAtSoftHyphen_data(); void isAtSoftHyphen(); - void isAtMandatoryBreak_data(); - void isAtMandatoryBreak(); void thaiLineBreak(); }; -void tst_QTextBoundaryFinder::init() -{ -#ifndef Q_OS_IRIX - // chdir into the top-level data dir, then refer to our testdata using relative paths - QString testdata_dir = QFileInfo(QFINDTESTDATA("data")).absolutePath(); - QVERIFY2(QDir::setCurrent(testdata_dir), qPrintable("Could not chdir to " + testdata_dir)); -#endif -} - Q_DECLARE_METATYPE(QList) QT_BEGIN_NAMESPACE @@ -189,36 +177,29 @@ static void generateDataFromFile(const QString &fname) QTest::newRow(nm.toLatin1()) << testString << expectedBreakPositions; } } - -QT_BEGIN_NAMESPACE -extern Q_AUTOTEST_EXPORT int qt_initcharattributes_default_algorithm_only; -QT_END_NAMESPACE #endif static void doTestData(const QString &testString, const QList &expectedBreakPositions, - QTextBoundaryFinder::BoundaryType type, bool default_algorithm_only = false) + QTextBoundaryFinder::BoundaryType type, + QTextBoundaryFinder::BoundaryReasons reasons = QTextBoundaryFinder::BreakOpportunity) { -#ifdef QT_BUILD_INTERNAL - QScopedValueRollback default_algorithm(qt_initcharattributes_default_algorithm_only); - if (default_algorithm_only) - qt_initcharattributes_default_algorithm_only++; -#else - Q_UNUSED(default_algorithm_only) -#endif + QVERIFY(!testString.isEmpty()); QTextBoundaryFinder boundaryFinder(type, testString); // test toNextBoundary() { QList actualBreakPositions; - if (boundaryFinder.isAtBoundary()) - actualBreakPositions.append(boundaryFinder.position()); - while (boundaryFinder.toNextBoundary() != -1) { + do { QVERIFY(boundaryFinder.isAtBoundary()); - actualBreakPositions.append(boundaryFinder.position()); - } + if (boundaryFinder.boundaryReasons() & reasons) + actualBreakPositions.append(boundaryFinder.position()); + } while (boundaryFinder.toNextBoundary() != -1); QCOMPARE(actualBreakPositions, expectedBreakPositions); } + QCOMPARE(boundaryFinder.position(), -1); + QVERIFY(!boundaryFinder.isAtBoundary()); + QVERIFY(boundaryFinder.boundaryReasons() == QTextBoundaryFinder::NotAtBoundary); // test toPreviousBoundary() { @@ -227,23 +208,30 @@ static void doTestData(const QString &testString, const QList &expectedBrea QList actualBreakPositions; boundaryFinder.toEnd(); - if (boundaryFinder.isAtBoundary()) - actualBreakPositions.append(boundaryFinder.position()); - while (boundaryFinder.toPreviousBoundary() != -1) { + do { QVERIFY(boundaryFinder.isAtBoundary()); - actualBreakPositions.append(boundaryFinder.position()); - } + if (boundaryFinder.boundaryReasons() & reasons) + actualBreakPositions.append(boundaryFinder.position()); + } while (boundaryFinder.toPreviousBoundary() != -1); QCOMPARE(actualBreakPositions, expectedBreakPositionsRev); } + QCOMPARE(boundaryFinder.position(), -1); + QVERIFY(!boundaryFinder.isAtBoundary()); + QVERIFY(boundaryFinder.boundaryReasons() == QTextBoundaryFinder::NotAtBoundary); - // test isAtBoundary() - for (int i = 0; i < testString.length(); ++i) { + // test boundaryReasons() + for (int i = 0; i <= testString.length(); ++i) { boundaryFinder.setPosition(i); - QCOMPARE(boundaryFinder.isAtBoundary(), expectedBreakPositions.contains(i)); + QCOMPARE(!!(boundaryFinder.boundaryReasons() & reasons), expectedBreakPositions.contains(i)); } } #ifdef QT_BUILD_INTERNAL + +QT_BEGIN_NAMESPACE +extern Q_AUTOTEST_EXPORT int qt_initcharattributes_default_algorithm_only; +QT_END_NAMESPACE + void tst_QTextBoundaryFinder::graphemeBoundariesDefault_data() { generateDataFromFile("data/GraphemeBreakTest.txt"); @@ -254,7 +242,10 @@ void tst_QTextBoundaryFinder::graphemeBoundariesDefault() QFETCH(QString, testString); QFETCH(QList, expectedBreakPositions); - doTestData(testString, expectedBreakPositions, QTextBoundaryFinder::Grapheme, true); + QScopedValueRollback default_algorithm(qt_initcharattributes_default_algorithm_only); + qt_initcharattributes_default_algorithm_only++; + + doTestData(testString, expectedBreakPositions, QTextBoundaryFinder::Grapheme); } void tst_QTextBoundaryFinder::wordBoundariesDefault_data() @@ -267,7 +258,10 @@ void tst_QTextBoundaryFinder::wordBoundariesDefault() QFETCH(QString, testString); QFETCH(QList, expectedBreakPositions); - doTestData(testString, expectedBreakPositions, QTextBoundaryFinder::Word, true); + QScopedValueRollback default_algorithm(qt_initcharattributes_default_algorithm_only); + qt_initcharattributes_default_algorithm_only++; + + doTestData(testString, expectedBreakPositions, QTextBoundaryFinder::Word); } void tst_QTextBoundaryFinder::sentenceBoundariesDefault_data() @@ -280,7 +274,10 @@ void tst_QTextBoundaryFinder::sentenceBoundariesDefault() QFETCH(QString, testString); QFETCH(QList, expectedBreakPositions); - doTestData(testString, expectedBreakPositions, QTextBoundaryFinder::Sentence, true); + QScopedValueRollback default_algorithm(qt_initcharattributes_default_algorithm_only); + qt_initcharattributes_default_algorithm_only++; + + doTestData(testString, expectedBreakPositions, QTextBoundaryFinder::Sentence); } void tst_QTextBoundaryFinder::lineBoundariesDefault_data() @@ -293,8 +290,11 @@ void tst_QTextBoundaryFinder::lineBoundariesDefault() QFETCH(QString, testString); QFETCH(QList, expectedBreakPositions); + QScopedValueRollback default_algorithm(qt_initcharattributes_default_algorithm_only); + qt_initcharattributes_default_algorithm_only++; + expectedBreakPositions.prepend(0); // ### QTBF generates a boundary at start of text - doTestData(testString, expectedBreakPositions, QTextBoundaryFinder::Line, true); + doTestData(testString, expectedBreakPositions, QTextBoundaryFinder::Line); } #endif // QT_BUILD_INTERNAL @@ -302,99 +302,141 @@ void tst_QTextBoundaryFinder::wordBoundaries_manual_data() { QTest::addColumn("testString"); QTest::addColumn >("expectedBreakPositions"); + QTest::addColumn >("expectedStartPositions"); + QTest::addColumn >("expectedEndPositions"); { QChar s[] = { 0x000D, 0x000A, 0x000A }; QString testString(s, sizeof(s)/sizeof(s[0])); - QList expectedBreakPositions; + QList expectedBreakPositions, expectedStartPositions, expectedEndPositions; expectedBreakPositions << 0 << 2 << 3; - QTest::newRow("+CRxLF+LF+") << testString << expectedBreakPositions; + QTest::newRow("+CRxLF+LF+") << testString << expectedBreakPositions + << expectedStartPositions << expectedEndPositions; } { QChar s[] = { 0x000D, 0x0308, 0x000A, 0x000A }; QString testString(s, sizeof(s)/sizeof(s[0])); - QList expectedBreakPositions; + QList expectedBreakPositions, expectedStartPositions, expectedEndPositions; expectedBreakPositions << 0 << 1 << 2 << 3 << 4; - QTest::newRow("+CR+FE+LF+LF+") << testString << expectedBreakPositions; + QTest::newRow("+CR+FE+LF+LF+") << testString << expectedBreakPositions + << expectedStartPositions << expectedEndPositions; } { QString testString(QString::fromUtf8("Aaa bbb ccc.\r\nDdd eee fff.")); - QList expectedBreakPositions; + QList expectedBreakPositions, expectedStartPositions, expectedEndPositions; expectedBreakPositions << 0 << 3 << 4 << 7 << 8 << 11 << 12 << 14 << 17 << 18 << 21 << 22 << 25 << 26; + expectedStartPositions << 0 << 4 << 8 << 14 << 18 << 22; + expectedEndPositions << 3 << 7 << 11 << 17 << 21 << 25; - QTest::newRow("data1") << testString << expectedBreakPositions; + QTest::newRow("data1") << testString << expectedBreakPositions + << expectedStartPositions << expectedEndPositions; + } + { + // text with trailing space + QString testString(QString::fromUtf8("Please test me. Finish ")); + QList expectedBreakPositions, expectedStartPositions, expectedEndPositions; + expectedBreakPositions << 0 << 6 << 7 << 11 << 12 << 14 << 15 << 16 << 22 << 23; + expectedStartPositions << 0 << 7 << 12 << 16; + expectedEndPositions << 6 << 11 << 14 << 22; + + QTest::newRow("qtbug6498") << testString << expectedBreakPositions + << expectedStartPositions << expectedEndPositions; } // Sample Strings from WordBreakTest.html { QChar s[] = { 0x0063, 0x0061, 0x006E, 0x0027, 0x0074 }; QString testString(s, sizeof(s)/sizeof(s[0])); - QList expectedBreakPositions; + QList expectedBreakPositions, expectedStartPositions, expectedEndPositions; expectedBreakPositions << 0 << 5; + expectedStartPositions << 0; + expectedEndPositions << 5; - QTest::newRow("ts 1") << testString << expectedBreakPositions; + QTest::newRow("ts 1") << testString << expectedBreakPositions + << expectedStartPositions << expectedEndPositions; } { QChar s[] = { 0x0063, 0x0061, 0x006E, 0x2019, 0x0074 }; QString testString(s, sizeof(s)/sizeof(s[0])); - QList expectedBreakPositions; + QList expectedBreakPositions, expectedStartPositions, expectedEndPositions; expectedBreakPositions << 0 << 5; + expectedStartPositions << 0; + expectedEndPositions << 5; - QTest::newRow("ts 2") << testString << expectedBreakPositions; + QTest::newRow("ts 2") << testString << expectedBreakPositions + << expectedStartPositions << expectedEndPositions; } { QChar s[] = { 0x0061, 0x0062, 0x00AD, 0x0062, 0x0061 }; QString testString(s, sizeof(s)/sizeof(s[0])); - QList expectedBreakPositions; + QList expectedBreakPositions, expectedStartPositions, expectedEndPositions; expectedBreakPositions << 0 << 5; + expectedStartPositions << 0; + expectedEndPositions << 5; - QTest::newRow("ts 3") << testString << expectedBreakPositions; + QTest::newRow("ts 3") << testString << expectedBreakPositions + << expectedStartPositions << expectedEndPositions; } { QChar s[] = { 0x0061, 0x0024, 0x002D, 0x0033, 0x0034, 0x002C, 0x0035, 0x0036, 0x0037, 0x002E, 0x0031, 0x0034, 0x0025, 0x0062 }; QString testString(s, sizeof(s)/sizeof(s[0])); - QList expectedBreakPositions; + QList expectedBreakPositions, expectedStartPositions, expectedEndPositions; expectedBreakPositions << 0 << 1 << 2 << 3 << 12 << 13 << 14; + expectedStartPositions << 0 << 3 << 13; + expectedEndPositions << 1 << 12 << 14; - QTest::newRow("ts 4") << testString << expectedBreakPositions; + QTest::newRow("ts 4") << testString << expectedBreakPositions + << expectedStartPositions << expectedEndPositions; } { QChar s[] = { 0x0033, 0x0061 }; QString testString(s, sizeof(s)/sizeof(s[0])); - QList expectedBreakPositions; + QList expectedBreakPositions, expectedStartPositions, expectedEndPositions; expectedBreakPositions << 0 << 2; + expectedStartPositions << 0; + expectedEndPositions << 2; - QTest::newRow("ts 5") << testString << expectedBreakPositions; + QTest::newRow("ts 5") << testString << expectedBreakPositions + << expectedStartPositions << expectedEndPositions; } { QChar s[] = { 0x2060, 0x0063, 0x2060, 0x0061, 0x2060, 0x006E, 0x2060, 0x0027, 0x2060, 0x0074, 0x2060, 0x2060 }; QString testString(s, sizeof(s)/sizeof(s[0])); - QList expectedBreakPositions; + QList expectedBreakPositions, expectedStartPositions, expectedEndPositions; expectedBreakPositions << 0 << 1 << 12; + expectedStartPositions << 1; + expectedEndPositions << 12; - QTest::newRow("ts 1e") << testString << expectedBreakPositions; + QTest::newRow("ts 1e") << testString << expectedBreakPositions + << expectedStartPositions << expectedEndPositions; } { QChar s[] = { 0x2060, 0x0063, 0x2060, 0x0061, 0x2060, 0x006E, 0x2060, 0x2019, 0x2060, 0x0074, 0x2060, 0x2060 }; QString testString(s, sizeof(s)/sizeof(s[0])); - QList expectedBreakPositions; + QList expectedBreakPositions, expectedStartPositions, expectedEndPositions; expectedBreakPositions << 0 << 1 << 12; + expectedStartPositions << 1; + expectedEndPositions << 12; - QTest::newRow("ts 2e") << testString << expectedBreakPositions; + QTest::newRow("ts 2e") << testString << expectedBreakPositions + << expectedStartPositions << expectedEndPositions; } { QChar s[] = { 0x2060, 0x0061, 0x2060, 0x0062, 0x2060, 0x00AD, 0x2060, 0x0062, 0x2060, 0x0061, 0x2060, 0x2060 }; QString testString(s, sizeof(s)/sizeof(s[0])); - QList expectedBreakPositions; + QList expectedBreakPositions, expectedStartPositions, expectedEndPositions; expectedBreakPositions << 0 << 1 << 12; + expectedStartPositions << 1; + expectedEndPositions << 12; - QTest::newRow("ts 3e") << testString << expectedBreakPositions; + QTest::newRow("ts 3e") << testString << expectedBreakPositions + << expectedStartPositions << expectedEndPositions; } { QChar s[] = { 0x2060, 0x0061, 0x2060, 0x0024, 0x2060, 0x002D, 0x2060, 0x0033, @@ -402,18 +444,24 @@ void tst_QTextBoundaryFinder::wordBoundaries_manual_data() 0x2060, 0x0037, 0x2060, 0x002E, 0x2060, 0x0031, 0x2060, 0x0034, 0x2060, 0x0025, 0x2060, 0x0062, 0x2060, 0x2060 }; QString testString(s, sizeof(s)/sizeof(s[0])); - QList expectedBreakPositions; + QList expectedBreakPositions, expectedStartPositions, expectedEndPositions; expectedBreakPositions << 0 << 1 << 3 << 5 << 7 << 25 << 27 << 30; + expectedStartPositions << 1 << 7 << 27; + expectedEndPositions << 3 << 25 << 30; - QTest::newRow("ts 4e") << testString << expectedBreakPositions; + QTest::newRow("ts 4e") << testString << expectedBreakPositions + << expectedStartPositions << expectedEndPositions; } { QChar s[] = { 0x2060, 0x0033, 0x2060, 0x0061, 0x2060, 0x2060 }; QString testString(s, sizeof(s)/sizeof(s[0])); - QList expectedBreakPositions; + QList expectedBreakPositions, expectedStartPositions, expectedEndPositions; expectedBreakPositions << 0 << 1 << 6; + expectedStartPositions << 1; + expectedEndPositions << 6; - QTest::newRow("ts 5e") << testString << expectedBreakPositions; + QTest::newRow("ts 5e") << testString << expectedBreakPositions + << expectedStartPositions << expectedEndPositions; } } @@ -421,8 +469,12 @@ void tst_QTextBoundaryFinder::wordBoundaries_manual() { QFETCH(QString, testString); QFETCH(QList, expectedBreakPositions); + QFETCH(QList, expectedStartPositions); + QFETCH(QList, expectedEndPositions); doTestData(testString, expectedBreakPositions, QTextBoundaryFinder::Word); + doTestData(testString, expectedStartPositions, QTextBoundaryFinder::Word, QTextBoundaryFinder::StartOfItem); + doTestData(testString, expectedEndPositions, QTextBoundaryFinder::Word, QTextBoundaryFinder::EndOfItem); } void tst_QTextBoundaryFinder::sentenceBoundaries_manual_data() @@ -467,53 +519,80 @@ void tst_QTextBoundaryFinder::sentenceBoundaries_manual() QFETCH(QString, testString); QFETCH(QList, expectedBreakPositions); + QVERIFY(expectedBreakPositions.size() >= 2); + QList expectedStartPositions = expectedBreakPositions; expectedStartPositions.removeLast(); + QList expectedEndPositions = expectedBreakPositions; expectedEndPositions.removeFirst(); + doTestData(testString, expectedBreakPositions, QTextBoundaryFinder::Sentence); + doTestData(testString, expectedStartPositions, QTextBoundaryFinder::Sentence, QTextBoundaryFinder::StartOfItem); + doTestData(testString, expectedEndPositions, QTextBoundaryFinder::Sentence, QTextBoundaryFinder::EndOfItem); } void tst_QTextBoundaryFinder::lineBoundaries_manual_data() { QTest::addColumn("testString"); QTest::addColumn >("expectedBreakPositions"); + QTest::addColumn >("expectedMandatoryBreakPositions"); { QString testString(QString::fromUtf8("Aaa bbb ccc.\r\nDdd eee fff.")); - QList expectedBreakPositions; + QList expectedBreakPositions, expectedMandatoryBreakPositions; expectedBreakPositions << 0 << 4 << 8 << 14 << 18 << 22 << 26; + expectedMandatoryBreakPositions << 0 << 14 << 26; - QTest::newRow("data1") << testString << expectedBreakPositions; + QTest::newRow("data1") << testString << expectedBreakPositions + << expectedMandatoryBreakPositions; } { QString testString(QString::fromUtf8("Diga-nos qualé a sua opinião")); - QList expectedBreakPositions; + QList expectedBreakPositions, expectedMandatoryBreakPositions; expectedBreakPositions << 0 << 5 << 9 << 15 << 17 << 21 << 28; + expectedMandatoryBreakPositions << 0 << 28; - QTest::newRow("data2") << testString << expectedBreakPositions; + QTest::newRow("data2") << testString << expectedBreakPositions + << expectedMandatoryBreakPositions; } + { + QChar s[] = { 0x000D, 0x0308, 0x000A, 0x000A, 0x0020 }; + QString testString(s, sizeof(s)/sizeof(s[0])); + QList expectedBreakPositions, expectedMandatoryBreakPositions; + expectedBreakPositions << 0 << 1 << 3 << 4 << 5; + expectedMandatoryBreakPositions << 0 << 1 << 3 << 4 << 5; + + QTest::newRow("x(CR)+(FE)x(LF)+(LF)+(SP)+") << testString << expectedBreakPositions + << expectedMandatoryBreakPositions; + } { QChar s[] = { 0x000A, 0x2E80, 0x0308, 0x0023, 0x0023 }; QString testString(s, sizeof(s)/sizeof(QChar)); - QList expectedBreakPositions; + QList expectedBreakPositions, expectedMandatoryBreakPositions; expectedBreakPositions << 0 << 1 << 3 << 5; + expectedMandatoryBreakPositions << 0 << 1 << 5; - QTest::newRow("x(LF)+(ID)x(CM)+(AL)x(AL)+") << testString << expectedBreakPositions; + QTest::newRow("x(LF)+(ID)x(CM)+(AL)x(AL)+") << testString << expectedBreakPositions + << expectedMandatoryBreakPositions; } { QChar s[] = { 0x000A, 0x0308, 0x0023, 0x0023 }; QString testString(s, sizeof(s)/sizeof(QChar)); - QList expectedBreakPositions; + QList expectedBreakPositions, expectedMandatoryBreakPositions; expectedBreakPositions << 0 << 1 << 4; + expectedMandatoryBreakPositions << 0 << 1 << 4; - QTest::newRow("x(LF)+(CM)x(AL)x(AL)+") << testString << expectedBreakPositions; + QTest::newRow("x(LF)+(CM)x(AL)x(AL)+") << testString << expectedBreakPositions + << expectedMandatoryBreakPositions; } { QChar s[] = { 0x0061, 0x00AD, 0x0062, 0x0009, 0x0063, 0x0064 }; QString testString(s, sizeof(s)/sizeof(s[0])); - QList expectedBreakPositions; + QList expectedBreakPositions, expectedMandatoryBreakPositions; expectedBreakPositions << 0 << 2 << 4 << 6; + expectedMandatoryBreakPositions << 0 << 6; - QTest::newRow("x(AL)x(BA)+(AL)x(BA)+(AL)x(AL)+") << testString << expectedBreakPositions; + QTest::newRow("x(AL)x(BA)+(AL)x(BA)+(AL)x(AL)+") << testString << expectedBreakPositions + << expectedMandatoryBreakPositions; } } @@ -521,27 +600,72 @@ void tst_QTextBoundaryFinder::lineBoundaries_manual() { QFETCH(QString, testString); QFETCH(QList, expectedBreakPositions); + QFETCH(QList, expectedMandatoryBreakPositions); + + QVERIFY(expectedMandatoryBreakPositions.size() >= 2); + QList expectedStartPositions = expectedMandatoryBreakPositions; expectedStartPositions.removeLast(); + QList expectedEndPositions = expectedMandatoryBreakPositions; expectedEndPositions.removeFirst(); doTestData(testString, expectedBreakPositions, QTextBoundaryFinder::Line); + doTestData(testString, expectedMandatoryBreakPositions, QTextBoundaryFinder::Line, QTextBoundaryFinder::MandatoryBreak); + doTestData(testString, expectedStartPositions, QTextBoundaryFinder::Line, QTextBoundaryFinder::StartOfItem); + doTestData(testString, expectedEndPositions, QTextBoundaryFinder::Line, QTextBoundaryFinder::EndOfItem); +} + +Q_DECLARE_METATYPE(QTextBoundaryFinder) + +void tst_QTextBoundaryFinder::emptyText_data() +{ + QTest::addColumn("boundaryFinder"); + + QString empty; + QString notEmpty(QLatin1String("not empty")); + uchar attrs[11]; + + QTextBoundaryFinder invalidFinder(QTextBoundaryFinder::Word, empty); + QTest::newRow("empty1") << invalidFinder; + QTextBoundaryFinder finder(invalidFinder); + QTest::newRow("empty2") << finder; + finder = QTextBoundaryFinder(QTextBoundaryFinder::Grapheme, notEmpty); + finder = invalidFinder; + QTest::newRow("empty3") << finder; + QTest::newRow("empty4") << QTextBoundaryFinder(QTextBoundaryFinder::Word, notEmpty.constData(), 0, 0, 0); + QTest::newRow("empty5") << QTextBoundaryFinder(QTextBoundaryFinder::Word, notEmpty.constData(), 0, attrs, 11); + QTest::newRow("invalid1") << QTextBoundaryFinder(QTextBoundaryFinder::Word, 0, 10, 0, 0); + QTest::newRow("invalid2") << QTextBoundaryFinder(QTextBoundaryFinder::Word, 0, 10, attrs, 11); +} + +void tst_QTextBoundaryFinder::emptyText() +{ + QFETCH(QTextBoundaryFinder, boundaryFinder); + + QCOMPARE(boundaryFinder.position(), 0); + QCOMPARE(boundaryFinder.boundaryReasons(), QTextBoundaryFinder::NotAtBoundary); + + boundaryFinder.toNextBoundary(); + QCOMPARE(boundaryFinder.position(), -1); + QCOMPARE(boundaryFinder.boundaryReasons(), QTextBoundaryFinder::NotAtBoundary); } void tst_QTextBoundaryFinder::fastConstructor() { QString text("Hello World"); QTextBoundaryFinder finder(QTextBoundaryFinder::Word, text.constData(), text.length(), /*buffer*/0, /*buffer size*/0); - QCOMPARE(finder.boundaryReasons(), QTextBoundaryFinder::StartWord); + + QCOMPARE(finder.position(), 0); + QVERIFY(finder.boundaryReasons() & QTextBoundaryFinder::StartOfItem); finder.toNextBoundary(); QCOMPARE(finder.position(), 5); - QCOMPARE(finder.boundaryReasons(), QTextBoundaryFinder::EndWord); + QVERIFY(finder.boundaryReasons() & QTextBoundaryFinder::EndOfItem); finder.toNextBoundary(); QCOMPARE(finder.position(), 6); - QCOMPARE(finder.boundaryReasons(), QTextBoundaryFinder::StartWord); + QVERIFY(finder.boundaryReasons() & QTextBoundaryFinder::StartOfItem); finder.toNextBoundary(); QCOMPARE(finder.position(), text.length()); - QCOMPARE(finder.boundaryReasons(), QTextBoundaryFinder::EndWord); + QVERIFY(finder.boundaryReasons() & QTextBoundaryFinder::EndOfItem); finder.toNextBoundary(); QCOMPARE(finder.position(), -1); @@ -572,128 +696,32 @@ void tst_QTextBoundaryFinder::assignmentOperator() QCOMPARE(finder.string(), text); } -void tst_QTextBoundaryFinder::wordBoundaries_qtbug6498() -{ - // text with trailing space - QString text("Please test me. Finish "); - QTextBoundaryFinder finder(QTextBoundaryFinder::Word, text); - - QCOMPARE(finder.position(), 0); - QVERIFY(finder.isAtBoundary()); - QVERIFY(finder.boundaryReasons() & QTextBoundaryFinder::StartWord); - - QCOMPARE(finder.toNextBoundary(), 6); - QCOMPARE(finder.position(), 6); - QVERIFY(finder.isAtBoundary()); - QVERIFY(finder.boundaryReasons() & QTextBoundaryFinder::EndWord); - - QCOMPARE(finder.toNextBoundary(), 7); - QCOMPARE(finder.position(), 7); - QVERIFY(finder.isAtBoundary()); - QVERIFY(finder.boundaryReasons() & QTextBoundaryFinder::StartWord); - - QCOMPARE(finder.toNextBoundary(), 11); - QCOMPARE(finder.position(), 11); - QVERIFY(finder.isAtBoundary()); - QVERIFY(finder.boundaryReasons() & QTextBoundaryFinder::EndWord); - - QCOMPARE(finder.toNextBoundary(), 12); - QCOMPARE(finder.position(), 12); - QVERIFY(finder.isAtBoundary()); - QVERIFY(finder.boundaryReasons() & QTextBoundaryFinder::StartWord); - - QCOMPARE(finder.toNextBoundary(), 14); - QCOMPARE(finder.position(), 14); - QVERIFY(finder.isAtBoundary()); - QVERIFY(finder.boundaryReasons() & QTextBoundaryFinder::EndWord); - - QCOMPARE(finder.toNextBoundary(), 15); - QCOMPARE(finder.position(), 15); - QVERIFY(finder.isAtBoundary()); - QVERIFY(finder.boundaryReasons() == QTextBoundaryFinder::NotAtBoundary); - - QCOMPARE(finder.toNextBoundary(), 16); - QCOMPARE(finder.position(), 16); - QVERIFY(finder.isAtBoundary()); - QVERIFY(finder.boundaryReasons() & QTextBoundaryFinder::StartWord); - - QCOMPARE(finder.toNextBoundary(), 22); - QCOMPARE(finder.position(), 22); - QVERIFY(finder.isAtBoundary()); - QVERIFY(finder.boundaryReasons() & QTextBoundaryFinder::EndWord); - - QCOMPARE(finder.toNextBoundary(), 23); - QCOMPARE(finder.position(), 23); - QVERIFY(finder.isAtBoundary()); - QVERIFY(finder.boundaryReasons() == QTextBoundaryFinder::NotAtBoundary); - - QCOMPARE(finder.toNextBoundary(), -1); - QCOMPARE(finder.position(), -1); - QVERIFY(!finder.isAtBoundary()); - QVERIFY(finder.boundaryReasons() == QTextBoundaryFinder::NotAtBoundary); -} - void tst_QTextBoundaryFinder::isAtSoftHyphen_data() { QTest::addColumn("testString"); QTest::addColumn >("expectedBreakPositions"); - - QString testString = QString::fromUtf8("I a-m break-able"); - testString.replace(QLatin1Char('-'), QChar(QChar::SoftHyphen)); - QList expectedBreakPositions; - expectedBreakPositions << 0 << 2 << 4 << 6 << 12 << 16; - QTest::newRow("Soft Hyphen") << testString << expectedBreakPositions; -} - -void tst_QTextBoundaryFinder::isAtSoftHyphen() -{ - QFETCH(QString, testString); - QFETCH(QList, expectedBreakPositions); - - doTestData(testString, expectedBreakPositions, QTextBoundaryFinder::Line); - - QTextBoundaryFinder boundaryFinder(QTextBoundaryFinder::Line, testString); - for (int i = 0; (i = testString.indexOf(QChar(QChar::SoftHyphen), i)) != -1; ++i) { - QVERIFY(expectedBreakPositions.contains(i + 1)); - boundaryFinder.setPosition(i + 1); - QVERIFY(boundaryFinder.isAtBoundary()); - QVERIFY(boundaryFinder.boundaryReasons() & QTextBoundaryFinder::SoftHyphen); - } -} - -void tst_QTextBoundaryFinder::isAtMandatoryBreak_data() -{ - QTest::addColumn("testString"); - QTest::addColumn >("expectedBreakPositions"); + QTest::addColumn >("expectedSoftHyphenPositions"); { - QChar s[] = { 0x000D, 0x0308, 0x000A, 0x000A }; - QString testString(s, sizeof(s)/sizeof(s[0])); - QList expectedBreakPositions; - expectedBreakPositions << 0 << 1 << 3 << 4; - - QTest::newRow("+CR+FExLF+LF+") << testString << expectedBreakPositions; - } - { - QString testString(QString::fromUtf8("Aaa bbb ccc.\r\nDdd eee fff.")); - QList expectedBreakPositions; - expectedBreakPositions << 0 << 14 << 26; + QString testString = QString::fromUtf8("I a-m break-able"); + testString.replace(QLatin1Char('-'), QChar(QChar::SoftHyphen)); + QList expectedBreakPositions, expectedSoftHyphenPositions; + expectedBreakPositions << 0 << 2 << 4 << 6 << 12 << 16; + expectedSoftHyphenPositions << 4 << 12; - QTest::newRow("data1") << testString << expectedBreakPositions; + QTest::newRow("Soft Hyphen") << testString << expectedBreakPositions + << expectedSoftHyphenPositions; } } -void tst_QTextBoundaryFinder::isAtMandatoryBreak() +void tst_QTextBoundaryFinder::isAtSoftHyphen() { QFETCH(QString, testString); QFETCH(QList, expectedBreakPositions); + QFETCH(QList, expectedSoftHyphenPositions); - QTextBoundaryFinder boundaryFinder(QTextBoundaryFinder::Line, testString); - for (int i = 0; i <= testString.size(); ++i) { - boundaryFinder.setPosition(i); - if (boundaryFinder.boundaryReasons() & QTextBoundaryFinder::MandatoryBreak) - QVERIFY(expectedBreakPositions.contains(i)); - } + doTestData(testString, expectedBreakPositions, QTextBoundaryFinder::Line); + doTestData(testString, expectedSoftHyphenPositions, QTextBoundaryFinder::Line, QTextBoundaryFinder::SoftHyphen); } #include -- cgit v1.2.3