diff options
author | Ivan Solovev <ivan.solovev@qt.io> | 2021-08-30 10:38:28 +0200 |
---|---|---|
committer | Ivan Solovev <ivan.solovev@qt.io> | 2021-09-08 14:31:29 +0200 |
commit | 6ab89e118251a836a32bfd7e6b8db5cb033b0b4b (patch) | |
tree | cdd4d439a2599805e245f24c18637fea8816c5f7 /tests | |
parent | 57c86998fd1e891a032b6cfe5a874d17a238e178 (diff) |
Fix QTextCodec::canEncode() for ICU codec
QTextCodec::canEncode() relies on the number of invalid characters
to determine if the encoding is possible or not.
By default the ICU fromUnicode converter method does not provide any
ways to track the amount of failures. However it uses callbacks to
report errors or replace unrecognized characters with substitute string.
This patch introduces a custom callback for fromUnicode conversion.
The callback just increases the invalid characters counter and then
calls the default callback, which does its usual job.
Task-number: QTBUG-83081
Change-Id: Ie07fd183c728c7c77e8285f55238b1d57f5c9eb2
(adapted from commit 421de71a521ab07e942ae46a8f0a8f36147d86c8)
Reviewed-by: Edward Welbourne <edward.welbourne@qt.io>
Diffstat (limited to 'tests')
-rw-r--r-- | tests/auto/corelib/codecs/qtextcodec/test.pro | 2 | ||||
-rw-r--r-- | tests/auto/corelib/codecs/qtextcodec/tst_qtextcodec.cpp | 69 |
2 files changed, 70 insertions, 1 deletions
diff --git a/tests/auto/corelib/codecs/qtextcodec/test.pro b/tests/auto/corelib/codecs/qtextcodec/test.pro index 7505c5ad51..07c1e4e2bd 100644 --- a/tests/auto/corelib/codecs/qtextcodec/test.pro +++ b/tests/auto/corelib/codecs/qtextcodec/test.pro @@ -1,5 +1,5 @@ CONFIG += testcase -QT = core testlib +QT = core-private testlib SOURCES = tst_qtextcodec.cpp TARGET = tst_qtextcodec diff --git a/tests/auto/corelib/codecs/qtextcodec/tst_qtextcodec.cpp b/tests/auto/corelib/codecs/qtextcodec/tst_qtextcodec.cpp index 78b6449a69..62a8321844 100644 --- a/tests/auto/corelib/codecs/qtextcodec/tst_qtextcodec.cpp +++ b/tests/auto/corelib/codecs/qtextcodec/tst_qtextcodec.cpp @@ -38,6 +38,11 @@ #endif #include <QThreadPool> +#include <private/qglobal_p.h> // for the icu feature test +#if QT_CONFIG(icu) +# include <unicode/uvernum.h> +#endif + class tst_QTextCodec : public QObject { Q_OBJECT @@ -96,6 +101,9 @@ private slots: void shiftJis(); void userCodec(); + + void canEncode(); + void canEncode_data(); }; void tst_QTextCodec::toUnicode_data() @@ -2455,6 +2463,67 @@ void tst_QTextCodec::userCodec() QCOMPARE(pcodec, nullptr); } +void tst_QTextCodec::canEncode() +{ + QFETCH(QString, codecName); + QFETCH(QString, inputString); + QFETCH(QByteArray, expectedData); + QFETCH(bool, canEncode); + + QTextCodec *codec = QTextCodec::codecForName(codecName.toLatin1()); + QVERIFY(codec != nullptr); + + QCOMPARE(codec->canEncode(inputString), canEncode); + QByteArray encoded = codec->fromUnicode(inputString); + QCOMPARE(encoded, expectedData); +} + +void tst_QTextCodec::canEncode_data() +{ + QTest::addColumn<QString>("codecName"); + QTest::addColumn<QString>("inputString"); + QTest::addColumn<QByteArray>("expectedData"); + QTest::addColumn<bool>("canEncode"); + + QTest::newRow("English ISO-8859-1") << "ISO-8859-1" << "Hello World" + << QByteArray("Hello World") << true; + QTest::newRow("English big5") << "Big5" << "Hello World" << QByteArray("Hello World") << true; + + QTest::newRow("Greek win1252") + << "Windows-1252" + << QString("\u03c0\u03bf\u03bb\u03cd\u03c4\u03c1\u03bf\u03c0\u03bf\u03bd") + << QByteArray("??????????") << false; + QTest::newRow("Greek win1253") + << "Windows-1253" + << QString("\u03c0\u03bf\u03bb\u03cd\u03c4\u03c1\u03bf\u03c0\u03bf\u03bd") + << QByteArray("\xF0\xEF\xEB\xFD\xF4\xF1\xEF\xF0\xEF\xED") << true; + + QTest::newRow("Russian win1252") + << "Windows-1252" << QString("\u041f\u0440\u0438\u0432\u0435\u0442 \u043c\u0438\u0440") + << QByteArray("?????? ???") << false; + QTest::newRow("Russian win1251") + << "Windows-1251" << QString("\u041f\u0440\u0438\u0432\u0435\u0442 \u043c\u0438\u0440") + << QByteArray("\xCF\xF0\xE8\xE2\xE5\xF2 \xEC\xE8\xF0") << true; + + QTest::newRow("English from ucs4") + << "ISO-8859-1" << QString("\u0048\u0065\u006c\u006c\u006f\u0021") + << QByteArray("Hello!") << true; + + // ICU on Linux RHEL 7.6 seems to be old, and does not handle NULL + // characters properly. It returns 0x01 instead of 0x00 for it, so + // we just skip the test. +#if !QT_CONFIG(icu) || (U_ICU_VERSION_MAJOR_NUM > 56) + QTest::newRow("With null") << "ISO-8859-1" << QString::fromUcs4(U"Hello\u0000World", 11) + << QByteArray("Hello\x00World", 11) << true; +#endif + + QTest::newRow("With special chars") + << "ISO-8859-1" << QString("\u0001\u0002\u0003\u0008\u0009\u000a\u000b\u000d") + << QByteArray("\x01\x02\x03\b\t\n\x0B\r") << true; + + QTest::newRow("Pencil icon") << "ISO-8859-1" << QString("\u270f") << QByteArray("?") << false; +} + struct DontCrashAtExit { ~DontCrashAtExit() { QTextCodec *c = QTextCodec::codecForName("utf8"); |