summaryrefslogtreecommitdiffstats
path: root/tests/auto/corelib
diff options
context:
space:
mode:
authorØystein Heskestad <oystein.heskestad@qt.io>2021-08-11 11:50:59 +0200
committerØystein Heskestad <oystein.heskestad@qt.io>2021-11-10 09:44:03 +0100
commit09291eead45a49e2450e2a6ab6da53351dedd4be (patch)
tree342b26e4a6232cbd56ff04f3ed9fe2a7b7d65330 /tests/auto/corelib
parent76b4739e0714414fa6a8ae999bc93a692f5c81aa (diff)
Add additional grapheme, word, and sentence break class tests from tr29
Stop turning THAI CHARACTER SARA AM into a grapheme boundary because it breaks a test and chromium does not consider it to be a separate grapheme. Fixes: QTBUG-88545 Change-Id: Ib1aea8dbb66ac42b2129cf9fe04c39f5f76eeb36 Reviewed-by: Edward Welbourne <edward.welbourne@qt.io> Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
Diffstat (limited to 'tests/auto/corelib')
-rw-r--r--tests/auto/corelib/text/CMakeLists.txt1
-rw-r--r--tests/auto/corelib/text/qchar/tst_qchar.cpp20
-rw-r--r--tests/auto/corelib/text/qunicodetools/CMakeLists.txt11
-rw-r--r--tests/auto/corelib/text/qunicodetools/tst_qunicodetools.cpp224
4 files changed, 236 insertions, 20 deletions
diff --git a/tests/auto/corelib/text/CMakeLists.txt b/tests/auto/corelib/text/CMakeLists.txt
index 5b53538b9e..bbe86a12a5 100644
--- a/tests/auto/corelib/text/CMakeLists.txt
+++ b/tests/auto/corelib/text/CMakeLists.txt
@@ -21,6 +21,7 @@ add_subdirectory(qstringmatcher)
add_subdirectory(qstringtokenizer)
add_subdirectory(qstringview)
add_subdirectory(qtextboundaryfinder)
+add_subdirectory(qunicodetools)
# QTBUG-87414 # special case
if(NOT ANDROID)
add_subdirectory(qlocale)
diff --git a/tests/auto/corelib/text/qchar/tst_qchar.cpp b/tests/auto/corelib/text/qchar/tst_qchar.cpp
index 3c621948fe..fe7fd24544 100644
--- a/tests/auto/corelib/text/qchar/tst_qchar.cpp
+++ b/tests/auto/corelib/text/qchar/tst_qchar.cpp
@@ -30,7 +30,6 @@
#include <qchar.h>
#include <qfile.h>
#include <qstringlist.h>
-#include <private/qunicodetables_p.h>
class tst_QChar : public QObject
{
@@ -67,7 +66,6 @@ private slots:
void digitValue();
void mirroredChar();
void decomposition();
- void lineBreakClass();
void script();
void normalization_data();
void normalization();
@@ -748,24 +746,6 @@ void tst_QChar::decomposition()
}
}
-void tst_QChar::lineBreakClass()
-{
- QVERIFY(QUnicodeTables::lineBreakClass(0x0029) == QUnicodeTables::LineBreak_CP);
- QVERIFY(QUnicodeTables::lineBreakClass(0x0041) == QUnicodeTables::LineBreak_AL);
- QVERIFY(QUnicodeTables::lineBreakClass(0x0033) == QUnicodeTables::LineBreak_NU);
- QVERIFY(QUnicodeTables::lineBreakClass(0x00ad) == QUnicodeTables::LineBreak_BA);
- QVERIFY(QUnicodeTables::lineBreakClass(0x05d0) == QUnicodeTables::LineBreak_HL);
- QVERIFY(QUnicodeTables::lineBreakClass(0xfffc) == QUnicodeTables::LineBreak_CB);
- QVERIFY(QUnicodeTables::lineBreakClass(0xe0164) == QUnicodeTables::LineBreak_CM);
- QVERIFY(QUnicodeTables::lineBreakClass(0x2f9a4) == QUnicodeTables::LineBreak_ID);
- QVERIFY(QUnicodeTables::lineBreakClass(0x10000) == QUnicodeTables::LineBreak_AL);
- QVERIFY(QUnicodeTables::lineBreakClass(0x1f1e6) == QUnicodeTables::LineBreak_RI);
-
- // mapped to AL:
- QVERIFY(QUnicodeTables::lineBreakClass(0xfffd) == QUnicodeTables::LineBreak_AL); // AI -> AL
- QVERIFY(QUnicodeTables::lineBreakClass(0x100000) == QUnicodeTables::LineBreak_AL); // XX -> AL
-}
-
void tst_QChar::script()
{
QVERIFY(QChar::script(0x0020) == QChar::Script_Common);
diff --git a/tests/auto/corelib/text/qunicodetools/CMakeLists.txt b/tests/auto/corelib/text/qunicodetools/CMakeLists.txt
new file mode 100644
index 0000000000..7c624af995
--- /dev/null
+++ b/tests/auto/corelib/text/qunicodetools/CMakeLists.txt
@@ -0,0 +1,11 @@
+#####################################################################
+## tst_qunicodetools Test:
+#####################################################################
+
+qt_internal_add_test(tst_qunicodetools
+ SOURCES
+ tst_qunicodetools.cpp
+ PUBLIC_LIBRARIES
+ Qt::CorePrivate
+)
+
diff --git a/tests/auto/corelib/text/qunicodetools/tst_qunicodetools.cpp b/tests/auto/corelib/text/qunicodetools/tst_qunicodetools.cpp
new file mode 100644
index 0000000000..e84674c712
--- /dev/null
+++ b/tests/auto/corelib/text/qunicodetools/tst_qunicodetools.cpp
@@ -0,0 +1,224 @@
+/****************************************************************************
+**
+** Copyright (C) 2021 The Qt Company Ltd.
+** Contact: https://www.qt.io/licensing/
+**
+** This file is part of the test suite of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:GPL-EXCEPT$
+** Commercial License Usage
+** Licensees holding valid commercial Qt licenses may use this file in
+** accordance with the commercial license agreement provided with the
+** Software or, alternatively, in accordance with the terms contained in
+** a written agreement between you and The Qt Company. For licensing terms
+** and conditions see https://www.qt.io/terms-conditions. For further
+** information use the contact form at https://www.qt.io/contact-us.
+**
+** GNU General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU
+** General Public License version 3 as published by the Free Software
+** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
+** included in the packaging of this file. Please review the following
+** information to ensure the GNU General Public License requirements will
+** be met: https://www.gnu.org/licenses/gpl-3.0.html.
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+#include <QTest>
+#include <qchar.h>
+#include <qfile.h>
+#include <qstringlist.h>
+#include <private/qunicodetables_p.h>
+#include <private/qunicodetools_p.h>
+
+class tst_QUnicodeTools : public QObject
+{
+ Q_OBJECT
+private slots:
+ void lineBreakClass();
+ void graphemeBreakClass_data();
+ void graphemeBreakClass();
+ void wordBreakClass_data();
+ void wordBreakClass();
+ void sentenceBreakClass_data();
+ void sentenceBreakClass();
+};
+
+void tst_QUnicodeTools::lineBreakClass()
+{
+ QVERIFY(QUnicodeTables::lineBreakClass(0x0029) == QUnicodeTables::LineBreak_CP);
+ QVERIFY(QUnicodeTables::lineBreakClass(0x0041) == QUnicodeTables::LineBreak_AL);
+ QVERIFY(QUnicodeTables::lineBreakClass(0x0033) == QUnicodeTables::LineBreak_NU);
+ QVERIFY(QUnicodeTables::lineBreakClass(0x00ad) == QUnicodeTables::LineBreak_BA);
+ QVERIFY(QUnicodeTables::lineBreakClass(0x05d0) == QUnicodeTables::LineBreak_HL);
+ QVERIFY(QUnicodeTables::lineBreakClass(0xfffc) == QUnicodeTables::LineBreak_CB);
+ QVERIFY(QUnicodeTables::lineBreakClass(0xe0164) == QUnicodeTables::LineBreak_CM);
+ QVERIFY(QUnicodeTables::lineBreakClass(0x2f9a4) == QUnicodeTables::LineBreak_ID);
+ QVERIFY(QUnicodeTables::lineBreakClass(0x10000) == QUnicodeTables::LineBreak_AL);
+ QVERIFY(QUnicodeTables::lineBreakClass(0x1f1e6) == QUnicodeTables::LineBreak_RI);
+
+ // mapped to AL:
+ QVERIFY(QUnicodeTables::lineBreakClass(0xfffd) == QUnicodeTables::LineBreak_AL); // AI -> AL
+ QVERIFY(QUnicodeTables::lineBreakClass(0x100000) == QUnicodeTables::LineBreak_AL); // XX -> AL
+}
+
+static void verifyCharClassPattern(QString str, qulonglong pattern,
+ QUnicodeTools::CharAttributeOptions type)
+{
+ QUnicodeTools::ScriptItemArray scriptItems;
+ QUnicodeTools::initScripts(str, &scriptItems);
+ QCharAttributes cleared;
+ memset(&cleared, 0, sizeof(QCharAttributes));
+ QList<QCharAttributes> attributes(str.size() + 1, cleared);
+ QUnicodeTools::initCharAttributes(str, scriptItems.data(), scriptItems.count(),
+ attributes.data(), type);
+
+ qulonglong bit = 1ull << str.size();
+ Q_ASSERT(str.size() < std::numeric_limits<decltype(bit)>::digits);
+ for (qsizetype i = 0; i < str.size(); ++i) {
+ bit >>= 1;
+ bool test = pattern & bit;
+ bool isSet = false;
+ switch (type) {
+ case QUnicodeTools::GraphemeBreaks:
+ isSet = attributes[i].graphemeBoundary;
+ break;
+ case QUnicodeTools::WordBreaks:
+ isSet = attributes[i].wordBreak;
+ break;
+ case QUnicodeTools::SentenceBreaks:
+ isSet = attributes[i].sentenceBoundary;
+ break;
+ default:
+ Q_UNREACHABLE();
+ break;
+ };
+ QVERIFY2(isSet == test,
+ qPrintable(QString("Character #%1: 0x%2, isSet: %3")
+ .arg(i).arg(str[i].unicode(), 0, 16).arg(isSet)));
+ }
+}
+
+void tst_QUnicodeTools::graphemeBreakClass_data()
+{
+ QTest::addColumn<QString>("str");
+ QTest::addColumn<int>("pattern");
+
+ // A grapheme cluster is a set of unicode code points that is
+ // seen as a single character.
+ // The pattern has one bit per code point.
+ // A pattern bit is set whenever a new grapheme cluster begins.
+ // A pattern bit is cleared for every code point that modifies
+ // the current graphene cluster.
+
+ QTest::addRow("g and combining diaeresis")
+ << u8"g\u0308"
+ << 0b10;
+ QTest::addRow("hangul gag single")
+ << u8"\uAC01"
+ << 0b1;
+ QTest::addRow("hangul gag cluster")
+ << u8"\u1100\u1161\u11A8"
+ << 0b100;
+ QTest::addRow("thai ko")
+ << u8"\u0E01"
+ << 0b1;
+ QTest::addRow("tamil ni")
+ << u8"\u0BA8\u0BBF"
+ << 0b10;
+ QTest::addRow("thai e")
+ << u8"\u0E40"
+ << 0b1;
+ QTest::addRow("thai kam")
+ << u8"\u0E01\u0E33"
+ << 0b10;
+ QTest::addRow("devanagari ssi")
+ << u8"\u0937\u093F"
+ << 0b10;
+ QTest::addRow("thai am")
+ << u8"\u0E33"
+ << 0b1;
+ QTest::addRow("devanagari ssa")
+ << u8"\u0937"
+ << 0b1;
+ QTest::addRow("devanagari i")
+ << u8"\u093F"
+ << 0b1;
+ QTest::addRow("devanagari kshi")
+ << u8"\u0915\u094D\u0937\u093F"
+ << 0b1000;
+}
+
+void tst_QUnicodeTools::graphemeBreakClass()
+{
+ QFETCH(QString, str);
+ QFETCH(int, pattern);
+
+ verifyCharClassPattern(str, pattern, QUnicodeTools::GraphemeBreaks);
+}
+
+void tst_QUnicodeTools::wordBreakClass_data()
+{
+ QTest::addColumn<QString>("str");
+ QTest::addColumn<qulonglong>("pattern");
+
+ // Word boundaries are used for things like selection and whole word search.
+ // Typically they are beginning of words, whitespaces and punctuation.
+
+ QTest::addRow("two words")
+ << "two words"
+ << 0b100110000ULL;
+ // breaks at beginning of words and space
+ QTest::addRow("three words")
+ << "The quick fox"
+ << 0b1001100001100ULL;
+ // breaks at beginning of words and spaces
+ QTest::addRow("quoted")
+ << u8"The quick (\"brown\") fox"
+ << 0b10011000011'110000'111100ULL;
+ // as above plus quotes and parentesis
+ QTest::addRow("long")
+ << "The quick (\"brown\") fox can’t jump 32.3 feet, right?"
+ << 0b10011000011'110000'11110011000011000110001100011100001ULL;
+ // as above plus commma and question mark
+ // but decimal separator and apostrophes are not word breaks
+}
+
+void tst_QUnicodeTools::wordBreakClass()
+{
+ QFETCH(QString, str);
+ QFETCH(qulonglong, pattern);
+
+ verifyCharClassPattern(str, pattern, QUnicodeTools::WordBreaks);
+}
+
+void tst_QUnicodeTools::sentenceBreakClass_data()
+{
+ QTest::addColumn<QString>("str");
+ QTest::addColumn<qulonglong>("pattern");
+
+ // Sentence boundaries are at the beginning of each new sentence
+
+ QTest::addRow("one sentence")
+ << "One sentence."
+ << 0b1000000000000ULL;
+ QTest::addRow("two sentences")
+ << "One sentence. One more."
+ << 0b10000000000000100000000ULL;
+ QTest::addRow("question")
+ << "Who said \"Hey you?\" I did."
+ << 0b100000000'000000000'00100000ULL;
+}
+
+void tst_QUnicodeTools::sentenceBreakClass()
+{
+ QFETCH(QString, str);
+ QFETCH(qulonglong, pattern);
+
+ verifyCharClassPattern(str, pattern, QUnicodeTools::SentenceBreaks);
+}
+
+QTEST_APPLESS_MAIN(tst_QUnicodeTools)
+#include "tst_qunicodetools.moc"