diff options
Diffstat (limited to 'tests/auto/corelib/text/qstringconverter/tst_qstringconverter.cpp')
-rw-r--r-- | tests/auto/corelib/text/qstringconverter/tst_qstringconverter.cpp | 482 |
1 files changed, 433 insertions, 49 deletions
diff --git a/tests/auto/corelib/text/qstringconverter/tst_qstringconverter.cpp b/tests/auto/corelib/text/qstringconverter/tst_qstringconverter.cpp index a60478d542..342c343a42 100644 --- a/tests/auto/corelib/text/qstringconverter/tst_qstringconverter.cpp +++ b/tests/auto/corelib/text/qstringconverter/tst_qstringconverter.cpp @@ -1,6 +1,6 @@ // Copyright (C) 2021 The Qt Company Ltd. // Copyright (C) 2016 Intel Corporation. -// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only WITH Qt-GPL-exception-1.0 +// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR GPL-3.0-only #include <QTest> @@ -14,6 +14,21 @@ using namespace Qt::StringLiterals; +QT_BEGIN_NAMESPACE +namespace QTest { +template <typename T> +char *toString(const std::optional<T> &opt) +{ + if (opt) + return QTest::toString(*opt); + else + return qstrdup("std::nullopt"); +} +} // namespace QTest +QT_END_NAMESPACE + +using QTest::toString; + static constexpr bool IsBigEndian = QSysInfo::ByteOrder == QSysInfo::BigEndian; enum CodecLimitation { AsciiOnly, @@ -134,6 +149,8 @@ private slots: void convertL1U8(); + void convertL1U16(); + #if QT_CONFIG(icu) void roundtripIcu_data(); void roundtripIcu(); @@ -176,6 +193,20 @@ private slots: void encodingForHtml_data(); void encodingForHtml(); + + void availableCodesAreAvailable(); + +#ifdef Q_OS_WIN + // On all other systems local 8-bit encoding is UTF-8 + void fromLocal8Bit_data(); + void fromLocal8Bit(); + void fromLocal8Bit_special_cases(); + void fromLocal8Bit_2GiB(); + void toLocal8Bit_data(); + void toLocal8Bit(); + void toLocal8Bit_special_cases(); + void toLocal8Bit_2GiB(); +#endif }; void tst_QStringConverter::constructByName() @@ -226,7 +257,7 @@ void tst_QStringConverter::invalidConverter() QVERIFY(!encoder.hasError()); char buffer[100]; char *position = encoder.appendToBuffer(buffer, u"Even more"); - QCOMPARE(position, buffer); + QCOMPARE(position - buffer, 0); QVERIFY(encoder.hasError()); } @@ -250,9 +281,9 @@ void tst_QStringConverter::invalidConverter() decoder.resetState(); QVERIFY(!decoder.hasError()); - QChar buffer[100]; - QChar *position = decoder.appendToBuffer(buffer, "Even more"); - QCOMPARE(position, buffer); + char16_t buffer[100]; + char16_t *position = decoder.appendToBuffer(buffer, "Even more"); + QCOMPARE(position - buffer, 0); QVERIFY(decoder.hasError()); } } @@ -353,6 +384,33 @@ void tst_QStringConverter::convertUtf8CharByChar() QCOMPARE(reencoded, ba); } +void tst_QStringConverter::convertL1U16() +{ + const QLatin1StringView latin1("some plain latin1 text"); + const QString qstr(latin1); + + QStringDecoder decoder(QStringConverter::Latin1); + QVERIFY(decoder.isValid()); + QString uniString = decoder(latin1); + QCOMPARE(uniString, qstr); + QCOMPARE(latin1, uniString.toLatin1()); + + // do it again (using .decode()) + uniString = decoder.decode(latin1); + QCOMPARE(uniString, qstr); + QCOMPARE(latin1, uniString.toLatin1()); + + QStringEncoder encoder(QStringConverter::Latin1); + QByteArray reencoded = encoder(uniString); + QCOMPARE(reencoded, QByteArrayView(latin1)); + QCOMPARE(reencoded, uniString.toLatin1()); + + // do it again (using .encode()) + reencoded = encoder.encode(uniString); + QCOMPARE(reencoded, QByteArrayView(latin1)); + QCOMPARE(reencoded, uniString.toLatin1()); +} + void tst_QStringConverter::roundtrip_data() { QTest::addColumn<QStringView>("utf16"); @@ -513,11 +571,10 @@ void tst_QStringConverter::charByCharConsistency_data() void tst_QStringConverter::charByCharConsistency() { - QFETCH(QStringView, source); - QFETCH(QByteArray, codec); + QFETCH(const QStringView, source); + QFETCH(const QByteArray, codec); - { - QStringEncoder encoder(codec); + const auto check = [&](QStringEncoder encoder){ if (!encoder.isValid()) QSKIP("Unsupported codec"); @@ -528,19 +585,28 @@ void tst_QStringConverter::charByCharConsistency() stepByStepConverted += encoder.encode(codeUnit); } QCOMPARE(stepByStepConverted, fullyConverted); - } + }; + + check(QStringEncoder(codec)); + if (QTest::currentTestResolved()) return; + + check(QStringEncoder(codec, QStringConverter::Flag::ConvertInvalidToNull)); + if (QTest::currentTestResolved()) return; + + // moved codecs also work: { - QStringEncoder encoder(codec, QStringConverter::Flag::ConvertInvalidToNull); + QStringEncoder dec(codec); + check(std::move(dec)); + } + if (QTest::currentTestResolved()) return; - QByteArray fullyConverted = encoder.encode(source); - encoder.resetState(); - QByteArray stepByStepConverted; - for (const auto& codeUnit: source) { - stepByStepConverted += encoder.encode(codeUnit); - } - QCOMPARE(stepByStepConverted, fullyConverted); + { + QStringEncoder dec(codec, QStringConverter::Flag::ConvertInvalidToNull); + check(std::move(dec)); } + if (QTest::currentTestResolved()) return; + } void tst_QStringConverter::byteByByteConsistency_data() @@ -557,11 +623,10 @@ void tst_QStringConverter::byteByByteConsistency_data() void tst_QStringConverter::byteByByteConsistency() { - QFETCH(QByteArray, source); - QFETCH(QByteArray, codec); + QFETCH(const QByteArray, source); + QFETCH(const QByteArray, codec); - { - QStringDecoder decoder(codec); + const auto check = [&](QStringDecoder decoder) { if (!decoder.isValid()) QSKIP("Unsupported codec"); @@ -574,23 +639,28 @@ void tst_QStringConverter::byteByByteConsistency() stepByStepConverted += decoder.decode(singleChar); } QCOMPARE(stepByStepConverted, fullyConverted); - } + }; + + check(QStringDecoder(codec)); + if (QTest::currentTestResolved()) return; + + check(QStringDecoder(codec, QStringConverter::Flag::ConvertInvalidToNull)); + if (QTest::currentTestResolved()) return; + + // moved codecs also work: { - QStringDecoder decoder(codec, QStringConverter::Flag::ConvertInvalidToNull); - if (!decoder.isValid()) - QSKIP("Unsupported codec"); + QStringDecoder dec(codec); + check(std::move(dec)); + } + if (QTest::currentTestResolved()) return; - QString fullyConverted = decoder.decode(source); - decoder.resetState(); - QString stepByStepConverted; - for (const auto& byte: source) { - QByteArray singleChar; - singleChar.append(byte); - stepByStepConverted += decoder.decode(singleChar); - } - QCOMPARE(stepByStepConverted, fullyConverted); + { + QStringDecoder dec(codec, QStringConverter::Flag::ConvertInvalidToNull); + check(std::move(dec)); } + if (QTest::currentTestResolved()) return; + } void tst_QStringConverter::statefulPieceWise() @@ -2170,25 +2240,42 @@ void tst_QStringConverter::encodingForName_data() QTest::addColumn<QByteArray>("name"); QTest::addColumn<std::optional<QStringConverter::Encoding>>("encoding"); - QTest::newRow("UTF-8") << QByteArray("UTF-8") << std::optional<QStringConverter::Encoding>(QStringConverter::Utf8); - QTest::newRow("utf8") << QByteArray("utf8") << std::optional<QStringConverter::Encoding>(QStringConverter::Utf8); - QTest::newRow("Utf-8") << QByteArray("Utf-8") << std::optional<QStringConverter::Encoding>(QStringConverter::Utf8); - QTest::newRow("UTF-16") << QByteArray("UTF-16") << std::optional<QStringConverter::Encoding>(QStringConverter::Utf16); - QTest::newRow("UTF-16le") << QByteArray("UTF-16le") << std::optional<QStringConverter::Encoding>(QStringConverter::Utf16LE); - QTest::newRow("ISO-8859-1") << QByteArray("ISO-8859-1") << std::optional<QStringConverter::Encoding>(QStringConverter::Latin1); - QTest::newRow("ISO8859-1") << QByteArray("ISO8859-1") << std::optional<QStringConverter::Encoding>(QStringConverter::Latin1); - QTest::newRow("iso8859-1") << QByteArray("iso8859-1") << std::optional<QStringConverter::Encoding>(QStringConverter::Latin1); - QTest::newRow("latin1") << QByteArray("latin1") << std::optional<QStringConverter::Encoding>(QStringConverter::Latin1); - QTest::newRow("latin2") << QByteArray("latin2") << std::optional<QStringConverter::Encoding>(); - QTest::newRow("latin15") << QByteArray("latin15") << std::optional<QStringConverter::Encoding>(); + auto row = [](const char *name, std::optional<QStringConverter::Encoding> expected = std::nullopt) { + auto protect = [](auto p) { return p ? *p ? p : "<empty>" : "<nullptr>"; }; + QTest::addRow("%s", protect(name)) << QByteArray(name) << expected; + }; + + row("UTF-8", QStringConverter::Utf8); + row("utf8", QStringConverter::Utf8); + row("Utf-8", QStringConverter::Utf8); + row("UTF-16", QStringConverter::Utf16); + row("UTF-16le", QStringConverter::Utf16LE); + row("ISO-8859-1", QStringConverter::Latin1); + row("ISO8859-1", QStringConverter::Latin1); + row("iso8859-1", QStringConverter::Latin1); + row("latin1", QStringConverter::Latin1); + row("latin-1_-", QStringConverter::Latin1); + row("latin_1-_", QStringConverter::Latin1); + row("-_latin-1", QStringConverter::Latin1); + row("_-latin_1", QStringConverter::Latin1); + + // failures: + row(nullptr); + row(""); + row("latin2"); + row("latin42"); + row(" latin1"); // spaces are significant + row("\tlatin1"); // HTs are significant } void tst_QStringConverter::encodingForName() { - QFETCH(QByteArray, name); - QFETCH(std::optional<QStringConverter::Encoding>, encoding); + QFETCH(const QByteArray, name); + QFETCH(const std::optional<QStringConverter::Encoding>, encoding); + + const auto *ptr = name.isNull() ? nullptr : name.data(); - auto e = QStringConverter::encodingForName(name); + const auto e = QStringConverter::encodingForName(ptr); QCOMPARE(e, encoding); } @@ -2397,6 +2484,13 @@ void tst_QStringConverter::encodingForHtml() } } +void tst_QStringConverter::availableCodesAreAvailable() +{ + auto codecs = QStringConverter::availableCodecs(); + for (const auto &codecName: codecs) + QVERIFY(QStringEncoder(codecName.toLatin1()).isValid()); +} + class LoadAndConvert: public QRunnable { public: @@ -2427,6 +2521,10 @@ void tst_QStringConverter::initTestCase() void tst_QStringConverter::threadSafety() { +#if defined(Q_OS_WASM) + QSKIP("This test misbehaves on WASM. Investigation needed (QTBUG-110067)"); +#endif + QThreadPool::globalInstance()->setMaxThreadCount(12); QList<QString> res; @@ -2442,6 +2540,292 @@ void tst_QStringConverter::threadSafety() QCOMPARE(b, QString::fromLatin1("abcdefghijklmonpqrstufvxyz")); } +#ifdef Q_OS_WIN +void tst_QStringConverter::fromLocal8Bit_data() +{ + QTest::addColumn<QByteArray>("eightBit"); + QTest::addColumn<QString>("utf16"); + QTest::addColumn<quint32>("codePage"); + + constexpr uint WINDOWS_1252 = 1252u; + QTest::newRow("windows-1252") << "Hello, world!"_ba << u"Hello, world!"_s << WINDOWS_1252; + constexpr uint SHIFT_JIS = 932u; + // Mostly two byte characters, but the comma is a single byte character (0xa4) + QTest::newRow("shiftJIS") + << "\x82\xb1\x82\xf1\x82\xc9\x82\xbf\x82\xcd\xa4\x90\xa2\x8a\x45\x81\x49"_ba + << u"こんにちは、世界!"_s << SHIFT_JIS; + + constexpr uint GB_18030 = 54936u; + QTest::newRow("GB-18030") << "\xc4\xe3\xba\xc3\xca\xc0\xbd\xe7\xa3\xa1"_ba << u"你好世界!"_s + << GB_18030; +} + +void tst_QStringConverter::fromLocal8Bit() +{ + QFETCH(const QByteArray, eightBit); + QFETCH(const QString, utf16); + QFETCH(const quint32, codePage); + + QStringConverter::State state; + + QString result = QLocal8Bit::convertToUnicode_sys(eightBit, codePage, &state); + QCOMPARE(result, utf16); + QCOMPARE(state.remainingChars, 0); + + result.clear(); + state.clear(); + for (char c : eightBit) + result += QLocal8Bit::convertToUnicode_sys({&c, 1}, codePage, &state); + QCOMPARE(result, utf16); + QCOMPARE(state.remainingChars, 0); + + result.clear(); + state.clear(); + // Decode the full string again, this time without state + state.flags |= QStringConverter::Flag::Stateless; + result = QLocal8Bit::convertToUnicode_sys(eightBit, codePage, &state); + QCOMPARE(result, utf16); + QCOMPARE(state.remainingChars, 0); +} + +void tst_QStringConverter::fromLocal8Bit_special_cases() +{ + QStringConverter::State state; + constexpr uint SHIFT_JIS = 932u; + // Decode a 2-octet character, but only provide 1 octet at first: + QString result = QLocal8Bit::convertToUnicode_sys("\x82", SHIFT_JIS, &state); + QCOMPARE(result, QString()); + QVERIFY(result.isNull()); + QCOMPARE_GT(state.remainingChars, 0); + // Then provide the second octet: + result = QLocal8Bit::convertToUnicode_sys("\xb1", SHIFT_JIS, &state); + QCOMPARE(result, u"こ"); + QCOMPARE(state.remainingChars, 0); + + // And without state: + result.clear(); + QStringConverter::State statelessState; + statelessState.flags |= QStringConverter::Flag::Stateless; + result = QLocal8Bit::convertToUnicode_sys("\x82", SHIFT_JIS, &statelessState); + result += QLocal8Bit::convertToUnicode_sys("\xb1", SHIFT_JIS, &statelessState); + // 0xb1 is a valid single-octet character in Shift-JIS, so the output + // isn't really what you would expect: + QCOMPARE(result, QString(QChar::ReplacementCharacter) + u'ア'); + QCOMPARE(statelessState.remainingChars, 0); + + // Now try a 3-octet UTF-8 sequence: + result.clear(); + state.clear(); + constexpr uint UTF8 = 65001u; + // First the first 2 octets: + result = QLocal8Bit::convertToUnicode_sys("\xe4\xbd", UTF8, &state); + QCOMPARE(result, QString()); + QVERIFY(result.isNull()); + QCOMPARE_GT(state.remainingChars, 0); + // Then provide the remaining octet: + result = QLocal8Bit::convertToUnicode_sys("\xa0", UTF8, &state); + QCOMPARE(result, u"你"); + QCOMPARE(state.remainingChars, 0); + + // Now the same, but there is an incomplete sequence at the start + result.clear(); + state.clear(); + result = QLocal8Bit::convertToUnicode_sys("\xe4\xe4\xbd", UTF8, &state); + QCOMPARE(result, QString()); + QVERIFY(result.isNull()); + // Remaining octet (and a '.' to force it to discard something from the + // internal state which is currently limited to 4 octets): + result += QLocal8Bit::convertToUnicode_sys("\xa0.", UTF8, &state); + QCOMPARE(result, QChar::ReplacementCharacter + u"你."_s); + QCOMPARE(state.remainingChars, 0); + + // Test QTBUG-118834, which is failing + result.clear(); + state.clear(); + result = QLocal8Bit::convertToUnicode_sys("\xe4\xe4\xbd", UTF8, &state); + QCOMPARE(result, QString()); + QVERIFY(result.isNull()); + // Remaining octet: + result += QLocal8Bit::convertToUnicode_sys("\xa0", UTF8, &state); + QEXPECT_FAIL("", "QTBUG-118834: We don't output anything because it's " + "within the size of our internal state, and we cannot " + "signal that it needs to be drained.", Continue); + QCOMPARE(result, QChar::ReplacementCharacter + u"你"_s); + QEXPECT_FAIL("", "QTBUG-118834: As above", Continue); + QCOMPARE(state.remainingChars, 0); + + // Now try a 4-octet GB 18030 sequence: + result.clear(); + state.clear(); + constexpr uint GB_18030 = 54936u; + const char sequence[] = "\x95\x32\x90\x31"; + // Repeat the sequence multiple times to test handling of exhaustion of + // internal buffer + QByteArray repeated = QByteArray(sequence).repeated(2049); + QByteArrayView octets = QByteArrayView(repeated); + result = QLocal8Bit::convertToUnicode_sys(octets.first(2), GB_18030, &state); + QCOMPARE(result, QString()); + QVERIFY(result.isNull()); + QCOMPARE_GT(state.remainingChars, 0); + // Then provide one more octet: + result = QLocal8Bit::convertToUnicode_sys(octets.sliced(2, 1), GB_18030, &state); + QCOMPARE(result, QString()); + QVERIFY(result.isNull()); + QCOMPARE_GT(state.remainingChars, 0); + // Then provide the last octet + the rest of the string + result = QLocal8Bit::convertToUnicode_sys(octets.sliced(3), GB_18030, &state); + QCOMPARE(result.first(2), u"𠂇"); + QCOMPARE(state.remainingChars, 0); +} + +void tst_QStringConverter::fromLocal8Bit_2GiB() +{ +#if QT_POINTER_SIZE == 4 + QSKIP("This test is only relevant for 64-bit builds"); +#else + qsizetype size = qsizetype(std::numeric_limits<int>::max()) + 3; + QByteArray input; + QT_TRY { + input.reserve(size); + } QT_CATCH (const std::bad_alloc &) { + QSKIP("Out of memory"); + } + // fill with '、' - a single octet character in Shift-JIS + input.fill('\xa4', std::numeric_limits<int>::max() - 1); + // then append 'こ' - a two octet character in Shift-JIS + // which is now straddling the 2 GiB boundary + input += "\x82\xb1"; + // then append another two '、', so that our output is also crossing the + // 2 GiB boundary + input += "\xa4\xa4"; + QCOMPARE(input.size(), input.capacity()); + constexpr uint SHIFT_JIS = 932u; + QStringConverter::State state; + QString result; + QT_TRY { + result = QLocal8Bit::convertToUnicode_sys(input, SHIFT_JIS, &state); + } QT_CATCH (const std::bad_alloc &) { + QSKIP("Out of memory"); + } + QCOMPARE(result.size(), size - 1); // The 2-octet character is only 1 code unit in UTF-16 + QCOMPARE(result.last(4), u"、こ、、"); // Check we correctly decoded it + QCOMPARE(state.remainingChars, 0); // and there is nothing left in the state +#endif +} + +void tst_QStringConverter::toLocal8Bit_data() +{ + fromLocal8Bit_data(); +} + +void tst_QStringConverter::toLocal8Bit() +{ + QFETCH(const QByteArray, eightBit); + QFETCH(const QString, utf16); + QFETCH(const quint32, codePage); + + QStringConverter::State state; + + QByteArray result = QLocal8Bit::convertFromUnicode_sys(utf16, codePage, &state); + QCOMPARE(result, eightBit); + QCOMPARE(state.remainingChars, 0); + + result.clear(); + state.clear(); + for (QChar c : utf16) + result += QLocal8Bit::convertFromUnicode_sys(QStringView(&c, 1), codePage, &state); + QCOMPARE(result, eightBit); + QCOMPARE(state.remainingChars, 0); + + result.clear(); + state.clear(); + // Decode the full string again, this time without state + state.flags |= QStringConverter::Flag::Stateless; + result = QLocal8Bit::convertFromUnicode_sys(utf16, codePage, &state); + QCOMPARE(result, eightBit); + QCOMPARE(state.remainingChars, 0); +} + +void tst_QStringConverter::toLocal8Bit_special_cases() +{ + QStringConverter::State state; + // Normally utf8 goes through a different code path, but we can force it here + constexpr uint UTF8 = 65001u; + // Decode a 2-code unit character, but only provide 1 code unit at first: + const char16_t a[] = u"𬽦"; + QStringView codeUnits = a; + QByteArray result = QLocal8Bit::convertFromUnicode_sys(codeUnits.first(1), UTF8, &state); + QCOMPARE(result, QString()); + QVERIFY(result.isNull()); + QCOMPARE_GT(state.remainingChars, 0); + // Then provide the second code unit: + result = QLocal8Bit::convertFromUnicode_sys(codeUnits.sliced(1), UTF8, &state); + QCOMPARE(result, "\xf0\xac\xbd\xa6"_ba); + QCOMPARE(state.remainingChars, 0); + + // Retain compat with the behavior for toLocal8Bit: + QCOMPARE(codeUnits.first(1).toLocal8Bit(), "?"); + + // QString::toLocal8Bit is already stateless, but test stateless handling + // explicitly anyway: + result.clear(); + QStringConverter::State statelessState; + statelessState.flags |= QStringConverter::Flag::Stateless; + result = QLocal8Bit::convertFromUnicode_sys(codeUnits.first(1), UTF8, &statelessState); + result += QLocal8Bit::convertFromUnicode_sys(codeUnits.sliced(1), UTF8, &statelessState); + // Windows uses the replacement character for invalid characters: + QCOMPARE(result, "\ufffd\ufffd"); + + // Now do the same, but the second time we feed in a character, we also + // provide many more so the internal stack buffer is not large enough. + result.clear(); + state.clear(); + QString str = QStringView(a).toString().repeated(2048); + codeUnits = str; + result = QLocal8Bit::convertFromUnicode_sys(codeUnits.first(1), UTF8, &state); + QCOMPARE(result, QString()); + QVERIFY(result.isNull()); + QCOMPARE_GT(state.remainingChars, 0); + // Then we provide the rest of the string: + result = QLocal8Bit::convertFromUnicode_sys(codeUnits.sliced(1), UTF8, &state); + QCOMPARE(result.first(4), "\xf0\xac\xbd\xa6"_ba); + QCOMPARE(state.remainingChars, 0); +} + +void tst_QStringConverter::toLocal8Bit_2GiB() +{ +#if QT_POINTER_SIZE == 4 + QSKIP("This test is only relevant for 64-bit builds"); +#else + constexpr qsizetype TwoGiB = qsizetype(std::numeric_limits<int>::max()); + QString input; + QT_TRY { + input.reserve(TwoGiB + 1); + } QT_CATCH (const std::bad_alloc &) { + QSKIP("Out of memory"); + } + // Fill with a single code unit character + input.fill(u'.', TwoGiB - 1); + // Then append a 2 code unit character, so that the input straddles the 2 GiB + // boundary + input += u"🙂"; + QCOMPARE(input.size(), input.capacity()); + constexpr uint UTF8 = 65001u; + QStringConverter::State state; + QByteArray result; + QT_TRY { + result = QLocal8Bit::convertFromUnicode_sys(input, UTF8, &state); + } QT_CATCH (const std::bad_alloc &) { + QSKIP("Out of memory"); + } + QUtf8StringView rView = result; + QCOMPARE(rView.size(), TwoGiB + 3); // The 2 code unit smiley is 4 code units in UTF-8 + QCOMPARE(rView.last(7), u8"...🙂"); // Check we correctly decoded it + QCOMPARE(state.remainingChars, 0); // and there is nothing left in the state +#endif +} +#endif // Q_OS_WIN + struct DontCrashAtExit { ~DontCrashAtExit() { QStringDecoder decoder(QStringDecoder::Utf8); |