summaryrefslogtreecommitdiffstats
path: root/tests
diff options
context:
space:
mode:
authorThiago Macieira <thiago.macieira@intel.com>2022-05-19 16:36:38 -0700
committerThiago Macieira <thiago.macieira@intel.com>2022-05-23 14:53:18 -0700
commitaef27c5aa2f43e8e34970168dfc517062cc87db8 (patch)
treefa05041472d28d9f51c09f0a16250f4f8cd5de55 /tests
parent9bad4be21482d36bff76357a000e008755b60361 (diff)
tst_QStringConverter: improve the char-by-char UTF-8 testing
The utf8.txt file was only 21 bytes and contained exactly two non-ASCII characters. It wasn't very good. This commit brings back the UTF-8 test rows that existed before commit 18ec53156ee704fdb4977436fccfdc85333e614b deleted tst_Utf8. There's a lot of overlap with some of the other rows in this test, though. Pick-to: 6.2 6.3 Change-Id: I77c8221eb2824c369feffffd16f094619b69faef Reviewed-by: Qt CI Bot <qt_ci_bot@qt-project.org> Reviewed-by: Lars Knoll <lars.knoll@qt.io>
Diffstat (limited to 'tests')
-rw-r--r--tests/auto/corelib/text/qstringconverter/CMakeLists.txt2
-rw-r--r--tests/auto/corelib/text/qstringconverter/tst_qstringconverter.cpp228
-rw-r--r--tests/auto/corelib/text/qstringconverter/utf8.txt1
3 files changed, 168 insertions, 63 deletions
diff --git a/tests/auto/corelib/text/qstringconverter/CMakeLists.txt b/tests/auto/corelib/text/qstringconverter/CMakeLists.txt
index 0e344cc8d7..07e33e26ca 100644
--- a/tests/auto/corelib/text/qstringconverter/CMakeLists.txt
+++ b/tests/auto/corelib/text/qstringconverter/CMakeLists.txt
@@ -4,8 +4,6 @@
## tst_qstringconverter Test:
#####################################################################
-list(APPEND test_data "utf8.txt")
-
qt_internal_add_test(tst_qstringconverter
SOURCES
tst_qstringconverter.cpp
diff --git a/tests/auto/corelib/text/qstringconverter/tst_qstringconverter.cpp b/tests/auto/corelib/text/qstringconverter/tst_qstringconverter.cpp
index 7b1a8d212e..0471cd8bee 100644
--- a/tests/auto/corelib/text/qstringconverter/tst_qstringconverter.cpp
+++ b/tests/auto/corelib/text/qstringconverter/tst_qstringconverter.cpp
@@ -48,6 +48,50 @@ static const std::array codes = {
Codec{ "System", QStringConverter::System, localeIsUtf8() ? FullUnicode : AsciiOnly }
};
+struct TestString
+{
+ const char *description;
+ QUtf8StringView utf8;
+ QStringView utf16;
+ CodecLimitation limitation = FullUnicode;
+};
+static const std::array testStrings = {
+ TestString{ "empty", "", u"", AsciiOnly },
+ TestString{ "null-character", QUtf8StringView("", 1), QStringView(u"", 1), AsciiOnly },
+ TestString{ "ascii-text",
+ "This is a standard US-ASCII message",
+ "This is a standard US-ASCII message" u"",
+ AsciiOnly
+ },
+ TestString{ "ascii-with-control",
+ "\1This\2is\3an\4US-ASCII\020 message interspersed with control chars",
+ "\1This\2is\3an\4US-ASCII\020 message interspersed with control chars" u"",
+ AsciiOnly
+ },
+
+ TestString{ "nbsp", "\u00a0", u"\u00a0", Latin1Only },
+ TestString{ "latin1-text",
+ "Hyvää päivää, käyhän että tuon kannettavani saunaan?",
+ "Hyvää päivää, käyhän että tuon kannettavani saunaan?" u"",
+ Latin1Only
+ },
+
+#define ROW(name, string) TestString{ name, u8"" string, u"" string }
+ ROW("euro", "€"),
+ //ROW("bom", "\ufeff"), // Can't test this because QString::fromUtf8 consumes it
+ ROW("replacement", "\ufffd"),
+ ROW("supplementary-plane", "\U00010203"),
+ ROW("mahjong", "\U0001f000\U0001f001\U0001f002\U0001f003\U0001f004\U0001f005"
+ "\U0001f006\U0001f007\U0001f008\U0001f009\U0001f00a\U0001f00b\U0001f00c"
+ "\U0001f00d\U0001f00e\U0001f00f"),
+ ROW("emojis", "😂, 😃, 🧘🏻‍♂️, 🌍, 🌦️, 🍞, 🚗, 📞, 🎉, ❤️, 🏁"), // https://en.wikipedia.org/wiki/Emoji
+ ROW("last-valid", "\U0010fffd"), // U+10FFFF is the strict last, but it's a non-character
+ ROW("mixed-bmp-only", "abc\u00a0\u00e1\u00e9\u01fd \u20acdef"),
+ ROW("mixed-full", "abc\u00a0\u00e1\u00e9\u01fd \U0010FFFD \u20acdef"),
+ ROW("xml", "<doc>\U00010000\U0010FFFD</doc>\r\n")
+#undef ROW
+};
+
class tst_QStringConverter : public QObject
{
Q_OBJECT
@@ -59,7 +103,10 @@ private slots:
void constructByName();
+ void convertUtf8_data();
void convertUtf8();
+ void convertUtf8CharByChar_data() { convertUtf8_data(); }
+ void convertUtf8CharByChar();
void roundtrip_data();
void roundtrip();
@@ -118,94 +165,155 @@ void tst_QStringConverter::constructByName()
QVERIFY(!strcmp(decoder.name(), "UTF-16"));
}
-void tst_QStringConverter::convertUtf8()
+void tst_QStringConverter::convertUtf8_data()
{
- QFile file(QFINDTESTDATA("utf8.txt"));
+ QTest::addColumn<QStringConverter::Encoding>("encoding");
+ QTest::addColumn<QUtf8StringView>("utf8");
+ QTest::addColumn<QStringView>("utf16");
+ auto addRow = [](const TestString &s) {
+ QTest::addRow("Utf8:%s", s.description) << QStringDecoder::Utf8 << s.utf8 << s.utf16;
+ if (localeIsUtf8())
+ QTest::addRow("System:%s", s.description) << QStringDecoder::System << s.utf8 << s.utf16;
+ };
+
+ for (const TestString &s : testStrings)
+ addRow(s);
+}
- if (!file.open(QIODevice::ReadOnly))
- QFAIL(qPrintable("File could not be opened: " + file.errorString()));
+void tst_QStringConverter::convertUtf8()
+{
+ QFETCH(QStringConverter::Encoding, encoding);
+ QFETCH(QUtf8StringView, utf8);
+ QFETCH(QStringView, utf16);
- QByteArray ba = file.readAll();
- QVERIFY(!ba.isEmpty());
+ QByteArray ba = QByteArray::fromRawData(utf8.data(), utf8.size());
- {
- QStringDecoder decoder(QStringDecoder::Utf8);
- QVERIFY(decoder.isValid());
- QString uniString = decoder(ba);
- QCOMPARE(uniString, QString::fromUtf8(ba));
- QCOMPARE(ba, uniString.toUtf8());
- uniString = decoder.decode(ba);
- QCOMPARE(uniString, QString::fromUtf8(ba));
- QCOMPARE(ba, uniString.toUtf8());
-
- QStringEncoder encoder(QStringEncoder::Utf8);
- QCOMPARE(ba, encoder(uniString));
- QCOMPARE(ba, encoder.encode(uniString));
+ QStringDecoder decoder(encoding);
+ QVERIFY(decoder.isValid());
+ QString uniString = decoder(ba);
+ QCOMPARE(uniString, utf16);
+ QCOMPARE(uniString, QString::fromUtf8(ba));
+ QCOMPARE(ba, uniString.toUtf8());
+
+ // do it again (using .decode())
+ uniString = decoder.decode(ba);
+ QCOMPARE(uniString, utf16);
+ QCOMPARE(uniString, QString::fromUtf8(ba));
+ QCOMPARE(ba, uniString.toUtf8());
+
+ QStringEncoder encoder(encoding);
+ QByteArray reencoded = encoder(utf16);
+ QCOMPARE(reencoded, utf8);
+ QCOMPARE(reencoded, uniString.toUtf8());
+
+ // do it again (using .encode())
+ reencoded = encoder.encode(utf16);
+ QCOMPARE(reencoded, utf8);
+ QCOMPARE(reencoded, uniString.toUtf8());
+
+ if (utf16.isEmpty())
+ return;
+
+ // repeat, with a longer string
+ constexpr qsizetype MinSize = 128;
+ uniString = utf16.toString();
+ while (uniString.size() < MinSize && ba.size() < MinSize) {
+ uniString += uniString;
+ ba += ba;
}
+ QCOMPARE(decoder(ba), uniString);
+ QCOMPARE(encoder(uniString), ba);
+}
- {
- // once again converting char by char
- QStringDecoder decoder(QStringDecoder::Utf8);
- QVERIFY(decoder.isValid());
- QString uniString;
- for (int i = 0; i < ba.size(); ++i)
- uniString += decoder(QByteArrayView(ba).sliced(i, 1));
- QCOMPARE(uniString, QString::fromUtf8(ba));
- uniString.clear();
- for (int i = 0; i < ba.size(); ++i)
- uniString += decoder.decode(QByteArrayView(ba).sliced(i, 1));
- QCOMPARE(uniString, QString::fromUtf8(ba));
-
- QStringEncoder encoder(QStringEncoder::Utf8);
- QByteArray reencoded;
- for (int i = 0; i < uniString.size(); ++i)
- reencoded += encoder(QStringView(uniString).sliced(i, 1));
- QCOMPARE(ba, encoder(uniString));
- reencoded.clear();
- for (int i = 0; i < uniString.size(); ++i)
- reencoded += encoder.encode(QStringView(uniString).sliced(i, 1));
- QCOMPARE(ba, encoder(uniString));
- }
+void tst_QStringConverter::convertUtf8CharByChar()
+{
+ QFETCH(QStringConverter::Encoding, encoding);
+ QFETCH(QUtf8StringView, utf8);
+ QFETCH(QStringView, utf16);
+
+ QByteArray ba = QByteArray::fromRawData(utf8.data(), utf8.size());
+
+ QStringDecoder decoder(encoding);
+ QVERIFY(decoder.isValid());
+ QString uniString;
+ for (int i = 0; i < ba.size(); ++i)
+ uniString += decoder(QByteArrayView(ba).sliced(i, 1));
+ QCOMPARE(uniString, utf16);
+ QCOMPARE(uniString, QString::fromUtf8(ba));
+ uniString.clear();
+
+ // do it again (using .decode())
+ for (int i = 0; i < ba.size(); ++i)
+ uniString += decoder.decode(QByteArrayView(ba).sliced(i, 1));
+ QCOMPARE(uniString, utf16);
+ QCOMPARE(uniString, QString::fromUtf8(ba));
+
+ QStringEncoder encoder(encoding);
+ QByteArray reencoded;
+ for (int i = 0; i < utf16.size(); ++i)
+ reencoded += encoder(utf16.sliced(i, 1));
+ QCOMPARE(reencoded, ba);
+ reencoded.clear();
+
+ // do it again (using .encode())
+ for (int i = 0; i < utf16.size(); ++i)
+ reencoded += encoder.encode(utf16.sliced(i, 1));
+ QCOMPARE(reencoded, ba);
}
void tst_QStringConverter::roundtrip_data()
{
- QTest::addColumn<QString>("utf16");
+ QTest::addColumn<QStringView>("utf16");
QTest::addColumn<QStringConverter::Encoding>("code");
// TODO: include flag variations, too.
for (const auto code : codes) {
- QTest::addRow("empty-%s", code.name) << u""_s << code.code;
+ for (const TestString &s : testStrings) {
+ // rules:
+ // 1) don't pass the null character to the System codec
+ // 2) only pass operate on a string that will properly convert
+ if (code.code == QStringConverter::System && s.utf16.contains(QChar(0)))
+ continue;
+ if (code.limitation < s.limitation)
+ continue;
+ QTest::addRow("%s:%s", code.name, s.description) << s.utf16 << code.code;
+ }
+
if (code.limitation == FullUnicode) {
const char32_t zeroVal = 0x11136; // Unicode's representation of Chakma zero
- const QChar data[] = {
- QChar::highSurrogate(zeroVal), QChar::lowSurrogate(zeroVal),
- QChar::highSurrogate(zeroVal + 1), QChar::lowSurrogate(zeroVal + 1),
- QChar::highSurrogate(zeroVal + 2), QChar::lowSurrogate(zeroVal + 2),
- QChar::highSurrogate(zeroVal + 3), QChar::lowSurrogate(zeroVal + 3),
- QChar::highSurrogate(zeroVal + 4), QChar::lowSurrogate(zeroVal + 4),
- QChar::highSurrogate(zeroVal + 5), QChar::lowSurrogate(zeroVal + 5),
- QChar::highSurrogate(zeroVal + 6), QChar::lowSurrogate(zeroVal + 6),
- QChar::highSurrogate(zeroVal + 7), QChar::lowSurrogate(zeroVal + 7),
- QChar::highSurrogate(zeroVal + 8), QChar::lowSurrogate(zeroVal + 8),
- QChar::highSurrogate(zeroVal + 9), QChar::lowSurrogate(zeroVal + 9)
- };
- QTest::addRow("Chakma-digits-%s", code.name)
- << QString(data, std::size(data)) << code.code;
+ for (int i = 0; i < 10; ++i) {
+ QChar data[] = {
+ QChar::highSurrogate(zeroVal + i), QChar::lowSurrogate(zeroVal + i),
+ };
+ QTest::addRow("%s:Chakma-digit-%d", code.name, i) << QStringView(data) << code.code;
+ }
}
}
}
void tst_QStringConverter::roundtrip()
{
- QFETCH(QString, utf16);
+ QFETCH(QStringView, utf16);
QFETCH(QStringConverter::Encoding, code);
QStringEncoder out(code);
- const QByteArray encoded = out.encode(utf16);
+ QByteArray encoded = out.encode(utf16);
QStringDecoder back(code);
const QString decoded = back.decode(encoded);
QCOMPARE(decoded, utf16);
+
+ if (utf16.isEmpty())
+ return;
+
+ // repeat, with a longer string
+ constexpr qsizetype MinSize = 128;
+ QString uniString = utf16.toString();
+ while (uniString.size() < MinSize && encoded.size() < MinSize) {
+ uniString += uniString;
+ encoded += encoded;
+ }
+ QCOMPARE(out.encode(uniString), encoded);
+ QCOMPARE(back.decode(encoded), uniString);
}
void tst_QStringConverter::nonFlaggedCodepointFFFF() const
diff --git a/tests/auto/corelib/text/qstringconverter/utf8.txt b/tests/auto/corelib/text/qstringconverter/utf8.txt
deleted file mode 100644
index f5ab44c8f4..0000000000
--- a/tests/auto/corelib/text/qstringconverter/utf8.txt
+++ /dev/null
@@ -1 +0,0 @@
-<doc>𐀀􏿽</doc>