summaryrefslogtreecommitdiffstats
path: root/tests/auto/corelib/text/qunicodetools/tst_qunicodetools.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'tests/auto/corelib/text/qunicodetools/tst_qunicodetools.cpp')
-rw-r--r--tests/auto/corelib/text/qunicodetools/tst_qunicodetools.cpp199
1 files changed, 199 insertions, 0 deletions
diff --git a/tests/auto/corelib/text/qunicodetools/tst_qunicodetools.cpp b/tests/auto/corelib/text/qunicodetools/tst_qunicodetools.cpp
new file mode 100644
index 0000000000..774c01c73b
--- /dev/null
+++ b/tests/auto/corelib/text/qunicodetools/tst_qunicodetools.cpp
@@ -0,0 +1,199 @@
+// Copyright (C) 2021 The Qt Company Ltd.
+// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only
+
+#include <QTest>
+#include <qchar.h>
+#include <qfile.h>
+#include <qstringlist.h>
+#include <private/qunicodetables_p.h>
+#include <private/qunicodetools_p.h>
+
+class tst_QUnicodeTools : public QObject
+{
+ Q_OBJECT
+private slots:
+ void lineBreakClass();
+ void graphemeBreakClass_data();
+ void graphemeBreakClass();
+ void wordBreakClass_data();
+ void wordBreakClass();
+ void sentenceBreakClass_data();
+ void sentenceBreakClass();
+};
+
+void tst_QUnicodeTools::lineBreakClass()
+{
+ QVERIFY(QUnicodeTables::lineBreakClass(0x0029) == QUnicodeTables::LineBreak_CP);
+ QVERIFY(QUnicodeTables::lineBreakClass(0x0041) == QUnicodeTables::LineBreak_AL);
+ QVERIFY(QUnicodeTables::lineBreakClass(0x0033) == QUnicodeTables::LineBreak_NU);
+ QVERIFY(QUnicodeTables::lineBreakClass(0x00ad) == QUnicodeTables::LineBreak_BA);
+ QVERIFY(QUnicodeTables::lineBreakClass(0x05d0) == QUnicodeTables::LineBreak_HL);
+ QVERIFY(QUnicodeTables::lineBreakClass(0xfffc) == QUnicodeTables::LineBreak_CB);
+ QVERIFY(QUnicodeTables::lineBreakClass(0xe0164) == QUnicodeTables::LineBreak_CM);
+ QVERIFY(QUnicodeTables::lineBreakClass(0x2f9a4) == QUnicodeTables::LineBreak_ID);
+ QVERIFY(QUnicodeTables::lineBreakClass(0x10000) == QUnicodeTables::LineBreak_AL);
+ QVERIFY(QUnicodeTables::lineBreakClass(0x1f1e6) == QUnicodeTables::LineBreak_RI);
+
+ // mapped to AL:
+ QVERIFY(QUnicodeTables::lineBreakClass(0xfffd) == QUnicodeTables::LineBreak_AL); // AI -> AL
+ QVERIFY(QUnicodeTables::lineBreakClass(0x100000) == QUnicodeTables::LineBreak_AL); // XX -> AL
+}
+
+static void verifyCharClassPattern(QString str, qulonglong pattern,
+ QUnicodeTools::CharAttributeOptions type)
+{
+ QUnicodeTools::ScriptItemArray scriptItems;
+ QUnicodeTools::initScripts(str, &scriptItems);
+ QCharAttributes cleared;
+ memset(&cleared, 0, sizeof(QCharAttributes));
+ QList<QCharAttributes> attributes(str.size() + 1, cleared);
+ QUnicodeTools::initCharAttributes(str, scriptItems.data(), scriptItems.size(),
+ attributes.data(), type);
+
+ qulonglong bit = 1ull << str.size();
+ Q_ASSERT(str.size() < std::numeric_limits<decltype(bit)>::digits);
+ for (qsizetype i = 0; i < str.size(); ++i) {
+ bit >>= 1;
+ bool test = pattern & bit;
+ bool isSet = false;
+ switch (type) {
+ case QUnicodeTools::GraphemeBreaks:
+ isSet = attributes[i].graphemeBoundary;
+ break;
+ case QUnicodeTools::WordBreaks:
+ isSet = attributes[i].wordBreak;
+ break;
+ case QUnicodeTools::SentenceBreaks:
+ isSet = attributes[i].sentenceBoundary;
+ break;
+ default:
+ Q_UNREACHABLE();
+ break;
+ };
+ QVERIFY2(isSet == test,
+ qPrintable(QString("Character #%1: 0x%2, isSet: %3")
+ .arg(i).arg(str[i].unicode(), 0, 16).arg(isSet)));
+ }
+}
+
+void tst_QUnicodeTools::graphemeBreakClass_data()
+{
+ QTest::addColumn<QString>("str");
+ QTest::addColumn<int>("pattern");
+
+ // A grapheme cluster is a set of unicode code points that is
+ // seen as a single character.
+ // The pattern has one bit per code point.
+ // A pattern bit is set whenever a new grapheme cluster begins.
+ // A pattern bit is cleared for every code point that modifies
+ // the current graphene cluster.
+
+ QTest::addRow("g and combining diaeresis")
+ << u8"g\u0308"
+ << 0b10;
+ QTest::addRow("hangul gag single")
+ << u8"\uAC01"
+ << 0b1;
+ QTest::addRow("hangul gag cluster")
+ << u8"\u1100\u1161\u11A8"
+ << 0b100;
+ QTest::addRow("thai ko")
+ << u8"\u0E01"
+ << 0b1;
+ QTest::addRow("tamil ni")
+ << u8"\u0BA8\u0BBF"
+ << 0b10;
+ QTest::addRow("thai e")
+ << u8"\u0E40"
+ << 0b1;
+ QTest::addRow("thai kam")
+ << u8"\u0E01\u0E33"
+ << 0b10;
+ QTest::addRow("devanagari ssi")
+ << u8"\u0937\u093F"
+ << 0b10;
+ QTest::addRow("thai am")
+ << u8"\u0E33"
+ << 0b1;
+ QTest::addRow("devanagari ssa")
+ << u8"\u0937"
+ << 0b1;
+ QTest::addRow("devanagari i")
+ << u8"\u093F"
+ << 0b1;
+ QTest::addRow("devanagari kshi")
+ << u8"\u0915\u094D\u0937\u093F"
+ << 0b1000;
+}
+
+void tst_QUnicodeTools::graphemeBreakClass()
+{
+ QFETCH(QString, str);
+ QFETCH(int, pattern);
+
+ verifyCharClassPattern(str, pattern, QUnicodeTools::GraphemeBreaks);
+}
+
+void tst_QUnicodeTools::wordBreakClass_data()
+{
+ QTest::addColumn<QString>("str");
+ QTest::addColumn<qulonglong>("pattern");
+
+ // Word boundaries are used for things like selection and whole word search.
+ // Typically they are beginning of words, whitespaces and punctuation.
+
+ QTest::addRow("two words")
+ << "two words"
+ << 0b100110000ULL;
+ // breaks at beginning of words and space
+ QTest::addRow("three words")
+ << "The quick fox"
+ << 0b1001100001100ULL;
+ // breaks at beginning of words and spaces
+ QTest::addRow("quoted")
+ << u8"The quick (\"brown\") fox"
+ << 0b10011000011'110000'111100ULL;
+ // as above plus quotes and parentesis
+ QTest::addRow("long")
+ << "The quick (\"brown\") fox can’t jump 32.3 feet, right?"
+ << 0b10011000011'110000'11110011000011000110001100011100001ULL;
+ // as above plus commma and question mark
+ // but decimal separator and apostrophes are not word breaks
+}
+
+void tst_QUnicodeTools::wordBreakClass()
+{
+ QFETCH(QString, str);
+ QFETCH(qulonglong, pattern);
+
+ verifyCharClassPattern(str, pattern, QUnicodeTools::WordBreaks);
+}
+
+void tst_QUnicodeTools::sentenceBreakClass_data()
+{
+ QTest::addColumn<QString>("str");
+ QTest::addColumn<qulonglong>("pattern");
+
+ // Sentence boundaries are at the beginning of each new sentence
+
+ QTest::addRow("one sentence")
+ << "One sentence."
+ << 0b1000000000000ULL;
+ QTest::addRow("two sentences")
+ << "One sentence. One more."
+ << 0b10000000000000100000000ULL;
+ QTest::addRow("question")
+ << "Who said \"Hey you?\" I did."
+ << 0b100000000'000000000'00100000ULL;
+}
+
+void tst_QUnicodeTools::sentenceBreakClass()
+{
+ QFETCH(QString, str);
+ QFETCH(qulonglong, pattern);
+
+ verifyCharClassPattern(str, pattern, QUnicodeTools::SentenceBreaks);
+}
+
+QTEST_APPLESS_MAIN(tst_QUnicodeTools)
+#include "tst_qunicodetools.moc"