diff options
author | Thiago Macieira <thiago.macieira@intel.com> | 2022-05-19 16:36:38 -0700 |
---|---|---|
committer | Thiago Macieira <thiago.macieira@intel.com> | 2022-05-23 14:53:18 -0700 |
commit | aef27c5aa2f43e8e34970168dfc517062cc87db8 (patch) | |
tree | fa05041472d28d9f51c09f0a16250f4f8cd5de55 /tests | |
parent | 9bad4be21482d36bff76357a000e008755b60361 (diff) |
tst_QStringConverter: improve the char-by-char UTF-8 testing
The utf8.txt file was only 21 bytes and contained exactly two non-ASCII
characters. It wasn't very good.
This commit brings back the UTF-8 test rows that existed before commit
18ec53156ee704fdb4977436fccfdc85333e614b deleted tst_Utf8. There's a lot
of overlap with some of the other rows in this test, though.
Pick-to: 6.2 6.3
Change-Id: I77c8221eb2824c369feffffd16f094619b69faef
Reviewed-by: Qt CI Bot <qt_ci_bot@qt-project.org>
Reviewed-by: Lars Knoll <lars.knoll@qt.io>
Diffstat (limited to 'tests')
3 files changed, 168 insertions, 63 deletions
diff --git a/tests/auto/corelib/text/qstringconverter/CMakeLists.txt b/tests/auto/corelib/text/qstringconverter/CMakeLists.txt index 0e344cc8d7..07e33e26ca 100644 --- a/tests/auto/corelib/text/qstringconverter/CMakeLists.txt +++ b/tests/auto/corelib/text/qstringconverter/CMakeLists.txt @@ -4,8 +4,6 @@ ## tst_qstringconverter Test: ##################################################################### -list(APPEND test_data "utf8.txt") - qt_internal_add_test(tst_qstringconverter SOURCES tst_qstringconverter.cpp diff --git a/tests/auto/corelib/text/qstringconverter/tst_qstringconverter.cpp b/tests/auto/corelib/text/qstringconverter/tst_qstringconverter.cpp index 7b1a8d212e..0471cd8bee 100644 --- a/tests/auto/corelib/text/qstringconverter/tst_qstringconverter.cpp +++ b/tests/auto/corelib/text/qstringconverter/tst_qstringconverter.cpp @@ -48,6 +48,50 @@ static const std::array codes = { Codec{ "System", QStringConverter::System, localeIsUtf8() ? FullUnicode : AsciiOnly } }; +struct TestString +{ + const char *description; + QUtf8StringView utf8; + QStringView utf16; + CodecLimitation limitation = FullUnicode; +}; +static const std::array testStrings = { + TestString{ "empty", "", u"", AsciiOnly }, + TestString{ "null-character", QUtf8StringView("", 1), QStringView(u"", 1), AsciiOnly }, + TestString{ "ascii-text", + "This is a standard US-ASCII message", + "This is a standard US-ASCII message" u"", + AsciiOnly + }, + TestString{ "ascii-with-control", + "\1This\2is\3an\4US-ASCII\020 message interspersed with control chars", + "\1This\2is\3an\4US-ASCII\020 message interspersed with control chars" u"", + AsciiOnly + }, + + TestString{ "nbsp", "\u00a0", u"\u00a0", Latin1Only }, + TestString{ "latin1-text", + "Hyvää päivää, käyhän että tuon kannettavani saunaan?", + "Hyvää päivää, käyhän että tuon kannettavani saunaan?" u"", + Latin1Only + }, + +#define ROW(name, string) TestString{ name, u8"" string, u"" string } + ROW("euro", "€"), + //ROW("bom", "\ufeff"), // Can't test this because QString::fromUtf8 consumes it + ROW("replacement", "\ufffd"), + ROW("supplementary-plane", "\U00010203"), + ROW("mahjong", "\U0001f000\U0001f001\U0001f002\U0001f003\U0001f004\U0001f005" + "\U0001f006\U0001f007\U0001f008\U0001f009\U0001f00a\U0001f00b\U0001f00c" + "\U0001f00d\U0001f00e\U0001f00f"), + ROW("emojis", "😂, 😃, 🧘🏻♂️, 🌍, 🌦️, 🍞, 🚗, 📞, 🎉, ❤️, 🏁"), // https://en.wikipedia.org/wiki/Emoji + ROW("last-valid", "\U0010fffd"), // U+10FFFF is the strict last, but it's a non-character + ROW("mixed-bmp-only", "abc\u00a0\u00e1\u00e9\u01fd \u20acdef"), + ROW("mixed-full", "abc\u00a0\u00e1\u00e9\u01fd \U0010FFFD \u20acdef"), + ROW("xml", "<doc>\U00010000\U0010FFFD</doc>\r\n") +#undef ROW +}; + class tst_QStringConverter : public QObject { Q_OBJECT @@ -59,7 +103,10 @@ private slots: void constructByName(); + void convertUtf8_data(); void convertUtf8(); + void convertUtf8CharByChar_data() { convertUtf8_data(); } + void convertUtf8CharByChar(); void roundtrip_data(); void roundtrip(); @@ -118,94 +165,155 @@ void tst_QStringConverter::constructByName() QVERIFY(!strcmp(decoder.name(), "UTF-16")); } -void tst_QStringConverter::convertUtf8() +void tst_QStringConverter::convertUtf8_data() { - QFile file(QFINDTESTDATA("utf8.txt")); + QTest::addColumn<QStringConverter::Encoding>("encoding"); + QTest::addColumn<QUtf8StringView>("utf8"); + QTest::addColumn<QStringView>("utf16"); + auto addRow = [](const TestString &s) { + QTest::addRow("Utf8:%s", s.description) << QStringDecoder::Utf8 << s.utf8 << s.utf16; + if (localeIsUtf8()) + QTest::addRow("System:%s", s.description) << QStringDecoder::System << s.utf8 << s.utf16; + }; + + for (const TestString &s : testStrings) + addRow(s); +} - if (!file.open(QIODevice::ReadOnly)) - QFAIL(qPrintable("File could not be opened: " + file.errorString())); +void tst_QStringConverter::convertUtf8() +{ + QFETCH(QStringConverter::Encoding, encoding); + QFETCH(QUtf8StringView, utf8); + QFETCH(QStringView, utf16); - QByteArray ba = file.readAll(); - QVERIFY(!ba.isEmpty()); + QByteArray ba = QByteArray::fromRawData(utf8.data(), utf8.size()); - { - QStringDecoder decoder(QStringDecoder::Utf8); - QVERIFY(decoder.isValid()); - QString uniString = decoder(ba); - QCOMPARE(uniString, QString::fromUtf8(ba)); - QCOMPARE(ba, uniString.toUtf8()); - uniString = decoder.decode(ba); - QCOMPARE(uniString, QString::fromUtf8(ba)); - QCOMPARE(ba, uniString.toUtf8()); - - QStringEncoder encoder(QStringEncoder::Utf8); - QCOMPARE(ba, encoder(uniString)); - QCOMPARE(ba, encoder.encode(uniString)); + QStringDecoder decoder(encoding); + QVERIFY(decoder.isValid()); + QString uniString = decoder(ba); + QCOMPARE(uniString, utf16); + QCOMPARE(uniString, QString::fromUtf8(ba)); + QCOMPARE(ba, uniString.toUtf8()); + + // do it again (using .decode()) + uniString = decoder.decode(ba); + QCOMPARE(uniString, utf16); + QCOMPARE(uniString, QString::fromUtf8(ba)); + QCOMPARE(ba, uniString.toUtf8()); + + QStringEncoder encoder(encoding); + QByteArray reencoded = encoder(utf16); + QCOMPARE(reencoded, utf8); + QCOMPARE(reencoded, uniString.toUtf8()); + + // do it again (using .encode()) + reencoded = encoder.encode(utf16); + QCOMPARE(reencoded, utf8); + QCOMPARE(reencoded, uniString.toUtf8()); + + if (utf16.isEmpty()) + return; + + // repeat, with a longer string + constexpr qsizetype MinSize = 128; + uniString = utf16.toString(); + while (uniString.size() < MinSize && ba.size() < MinSize) { + uniString += uniString; + ba += ba; } + QCOMPARE(decoder(ba), uniString); + QCOMPARE(encoder(uniString), ba); +} - { - // once again converting char by char - QStringDecoder decoder(QStringDecoder::Utf8); - QVERIFY(decoder.isValid()); - QString uniString; - for (int i = 0; i < ba.size(); ++i) - uniString += decoder(QByteArrayView(ba).sliced(i, 1)); - QCOMPARE(uniString, QString::fromUtf8(ba)); - uniString.clear(); - for (int i = 0; i < ba.size(); ++i) - uniString += decoder.decode(QByteArrayView(ba).sliced(i, 1)); - QCOMPARE(uniString, QString::fromUtf8(ba)); - - QStringEncoder encoder(QStringEncoder::Utf8); - QByteArray reencoded; - for (int i = 0; i < uniString.size(); ++i) - reencoded += encoder(QStringView(uniString).sliced(i, 1)); - QCOMPARE(ba, encoder(uniString)); - reencoded.clear(); - for (int i = 0; i < uniString.size(); ++i) - reencoded += encoder.encode(QStringView(uniString).sliced(i, 1)); - QCOMPARE(ba, encoder(uniString)); - } +void tst_QStringConverter::convertUtf8CharByChar() +{ + QFETCH(QStringConverter::Encoding, encoding); + QFETCH(QUtf8StringView, utf8); + QFETCH(QStringView, utf16); + + QByteArray ba = QByteArray::fromRawData(utf8.data(), utf8.size()); + + QStringDecoder decoder(encoding); + QVERIFY(decoder.isValid()); + QString uniString; + for (int i = 0; i < ba.size(); ++i) + uniString += decoder(QByteArrayView(ba).sliced(i, 1)); + QCOMPARE(uniString, utf16); + QCOMPARE(uniString, QString::fromUtf8(ba)); + uniString.clear(); + + // do it again (using .decode()) + for (int i = 0; i < ba.size(); ++i) + uniString += decoder.decode(QByteArrayView(ba).sliced(i, 1)); + QCOMPARE(uniString, utf16); + QCOMPARE(uniString, QString::fromUtf8(ba)); + + QStringEncoder encoder(encoding); + QByteArray reencoded; + for (int i = 0; i < utf16.size(); ++i) + reencoded += encoder(utf16.sliced(i, 1)); + QCOMPARE(reencoded, ba); + reencoded.clear(); + + // do it again (using .encode()) + for (int i = 0; i < utf16.size(); ++i) + reencoded += encoder.encode(utf16.sliced(i, 1)); + QCOMPARE(reencoded, ba); } void tst_QStringConverter::roundtrip_data() { - QTest::addColumn<QString>("utf16"); + QTest::addColumn<QStringView>("utf16"); QTest::addColumn<QStringConverter::Encoding>("code"); // TODO: include flag variations, too. for (const auto code : codes) { - QTest::addRow("empty-%s", code.name) << u""_s << code.code; + for (const TestString &s : testStrings) { + // rules: + // 1) don't pass the null character to the System codec + // 2) only pass operate on a string that will properly convert + if (code.code == QStringConverter::System && s.utf16.contains(QChar(0))) + continue; + if (code.limitation < s.limitation) + continue; + QTest::addRow("%s:%s", code.name, s.description) << s.utf16 << code.code; + } + if (code.limitation == FullUnicode) { const char32_t zeroVal = 0x11136; // Unicode's representation of Chakma zero - const QChar data[] = { - QChar::highSurrogate(zeroVal), QChar::lowSurrogate(zeroVal), - QChar::highSurrogate(zeroVal + 1), QChar::lowSurrogate(zeroVal + 1), - QChar::highSurrogate(zeroVal + 2), QChar::lowSurrogate(zeroVal + 2), - QChar::highSurrogate(zeroVal + 3), QChar::lowSurrogate(zeroVal + 3), - QChar::highSurrogate(zeroVal + 4), QChar::lowSurrogate(zeroVal + 4), - QChar::highSurrogate(zeroVal + 5), QChar::lowSurrogate(zeroVal + 5), - QChar::highSurrogate(zeroVal + 6), QChar::lowSurrogate(zeroVal + 6), - QChar::highSurrogate(zeroVal + 7), QChar::lowSurrogate(zeroVal + 7), - QChar::highSurrogate(zeroVal + 8), QChar::lowSurrogate(zeroVal + 8), - QChar::highSurrogate(zeroVal + 9), QChar::lowSurrogate(zeroVal + 9) - }; - QTest::addRow("Chakma-digits-%s", code.name) - << QString(data, std::size(data)) << code.code; + for (int i = 0; i < 10; ++i) { + QChar data[] = { + QChar::highSurrogate(zeroVal + i), QChar::lowSurrogate(zeroVal + i), + }; + QTest::addRow("%s:Chakma-digit-%d", code.name, i) << QStringView(data) << code.code; + } } } } void tst_QStringConverter::roundtrip() { - QFETCH(QString, utf16); + QFETCH(QStringView, utf16); QFETCH(QStringConverter::Encoding, code); QStringEncoder out(code); - const QByteArray encoded = out.encode(utf16); + QByteArray encoded = out.encode(utf16); QStringDecoder back(code); const QString decoded = back.decode(encoded); QCOMPARE(decoded, utf16); + + if (utf16.isEmpty()) + return; + + // repeat, with a longer string + constexpr qsizetype MinSize = 128; + QString uniString = utf16.toString(); + while (uniString.size() < MinSize && encoded.size() < MinSize) { + uniString += uniString; + encoded += encoded; + } + QCOMPARE(out.encode(uniString), encoded); + QCOMPARE(back.decode(encoded), uniString); } void tst_QStringConverter::nonFlaggedCodepointFFFF() const diff --git a/tests/auto/corelib/text/qstringconverter/utf8.txt b/tests/auto/corelib/text/qstringconverter/utf8.txt deleted file mode 100644 index f5ab44c8f4..0000000000 --- a/tests/auto/corelib/text/qstringconverter/utf8.txt +++ /dev/null @@ -1 +0,0 @@ -<doc>𐀀</doc>
|