summaryrefslogtreecommitdiffstats
path: root/tests
diff options
context:
space:
mode:
authorLars Knoll <lars.knoll@qt.io>2020-04-30 10:50:44 +0200
committerLars Knoll <lars.knoll@qt.io>2020-05-14 07:46:14 +0200
commitf64a6bd638d399403845fe52e6f8e52889f1f52b (patch)
treef3eda565c343b1b24f8711bc87e973910ec788f4 /tests
parentf437c8c5f9772a0d73c8772b64fea6133d43bcc7 (diff)
Start work on a new API to replace QTextCodec
The new QStringEncoder and QStringDecoder classes (with a common QStringConverter base class) are there to replace QTextCodec in Qt 6. It currently uses a trivial wrapper around the utf encoding functionality. Added some autotests, mostly copied from the text codec tests. Change-Id: Ib6eeee55fba918b9424be244cbda9dfd5096f7eb Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
Diffstat (limited to 'tests')
-rw-r--r--tests/auto/corelib/codecs/qtextcodec/tst_qtextcodec.cpp1
-rw-r--r--tests/auto/corelib/text/CMakeLists.txt1
-rw-r--r--tests/auto/corelib/text/qstringconverter/CMakeLists.txt10
-rw-r--r--tests/auto/corelib/text/qstringconverter/qstringconverter.pro5
-rw-r--r--tests/auto/corelib/text/qstringconverter/tst_qstringconverter.cpp1591
-rw-r--r--tests/auto/corelib/text/qstringconverter/utf8.txt1
-rw-r--r--tests/auto/corelib/text/text.pro1
7 files changed, 1609 insertions, 1 deletions
diff --git a/tests/auto/corelib/codecs/qtextcodec/tst_qtextcodec.cpp b/tests/auto/corelib/codecs/qtextcodec/tst_qtextcodec.cpp
index 799c0bfc77..90edac3ed0 100644
--- a/tests/auto/corelib/codecs/qtextcodec/tst_qtextcodec.cpp
+++ b/tests/auto/corelib/codecs/qtextcodec/tst_qtextcodec.cpp
@@ -1686,7 +1686,6 @@ void tst_QTextCodec::utf8stateful()
QVERIFY(utf8codec);
QTextCodec::ConverterState state;
- memset(&state, 0, sizeof state);
QString decoded1 = utf8codec->toUnicode(buffer1, buffer1.size(), &state);
if (result.isNull()) {
diff --git a/tests/auto/corelib/text/CMakeLists.txt b/tests/auto/corelib/text/CMakeLists.txt
index e23de92c8c..19cd71a987 100644
--- a/tests/auto/corelib/text/CMakeLists.txt
+++ b/tests/auto/corelib/text/CMakeLists.txt
@@ -14,6 +14,7 @@ add_subdirectory(qstring)
add_subdirectory(qstring_no_cast_from_bytearray)
add_subdirectory(qstringapisymmetry)
add_subdirectory(qstringbuilder)
+add_subdirectory(qstringconverter)
add_subdirectory(qstringiterator)
add_subdirectory(qstringlist)
add_subdirectory(qstringmatcher)
diff --git a/tests/auto/corelib/text/qstringconverter/CMakeLists.txt b/tests/auto/corelib/text/qstringconverter/CMakeLists.txt
new file mode 100644
index 0000000000..582e4fcf93
--- /dev/null
+++ b/tests/auto/corelib/text/qstringconverter/CMakeLists.txt
@@ -0,0 +1,10 @@
+# Generated from qstringconverter.pro.
+
+#####################################################################
+## tst_qstringconverter Test:
+#####################################################################
+
+qt_add_test(tst_qstringconverter
+ SOURCES
+ tst_qstringconverter.cpp
+)
diff --git a/tests/auto/corelib/text/qstringconverter/qstringconverter.pro b/tests/auto/corelib/text/qstringconverter/qstringconverter.pro
new file mode 100644
index 0000000000..6cd797e805
--- /dev/null
+++ b/tests/auto/corelib/text/qstringconverter/qstringconverter.pro
@@ -0,0 +1,5 @@
+CONFIG += testcase
+QT = core testlib
+SOURCES = tst_qstringconverter.cpp
+
+TARGET = tst_qstringconverter
diff --git a/tests/auto/corelib/text/qstringconverter/tst_qstringconverter.cpp b/tests/auto/corelib/text/qstringconverter/tst_qstringconverter.cpp
new file mode 100644
index 0000000000..c94f8146ff
--- /dev/null
+++ b/tests/auto/corelib/text/qstringconverter/tst_qstringconverter.cpp
@@ -0,0 +1,1591 @@
+/****************************************************************************
+**
+** Copyright (C) 2020 The Qt Company Ltd.
+** Copyright (C) 2016 Intel Corporation.
+** Contact: https://www.qt.io/licensing/
+**
+** This file is part of the test suite of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:GPL-EXCEPT$
+** Commercial License Usage
+** Licensees holding valid commercial Qt licenses may use this file in
+** accordance with the commercial license agreement provided with the
+** Software or, alternatively, in accordance with the terms contained in
+** a written agreement between you and The Qt Company. For licensing terms
+** and conditions see https://www.qt.io/terms-conditions. For further
+** information use the contact form at https://www.qt.io/contact-us.
+**
+** GNU General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU
+** General Public License version 3 as published by the Free Software
+** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
+** included in the packaging of this file. Please review the following
+** information to ensure the GNU General Public License requirements will
+** be met: https://www.gnu.org/licenses/gpl-3.0.html.
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+#include <QtTest/QtTest>
+
+#include <qstringconverter.h>
+#include <qthreadpool.h>
+
+class tst_QStringConverter : public QObject
+{
+ Q_OBJECT
+
+private slots:
+ void threadSafety();
+
+ void convertUtf8();
+
+ void nonFlaggedCodepointFFFF() const;
+ void flagF7808080() const;
+ void nonFlaggedEFBFBF() const;
+ void decode0D() const;
+
+ void utf8Codec_data();
+ void utf8Codec();
+
+ void utf8bom_data();
+ void utf8bom();
+
+ void utf8stateful_data();
+ void utf8stateful();
+
+ void utfHeaders_data();
+ void utfHeaders();
+};
+
+void tst_QStringConverter::convertUtf8()
+{
+ QFile file(QFINDTESTDATA("utf8.txt"));
+
+ if (!file.open(QIODevice::ReadOnly))
+ QFAIL(qPrintable("File could not be opened: " + file.errorString()));
+
+ QByteArray ba = file.readAll();
+ QVERIFY(!ba.isEmpty());
+
+ {
+ QStringDecoder decoder(QStringDecoder::Utf8);
+ QVERIFY(decoder.isValid());
+ QString uniString = decoder(ba);
+ QCOMPARE(uniString, QString::fromUtf8(ba));
+ QCOMPARE(ba, uniString.toUtf8());
+
+ QStringEncoder encoder(QStringEncoder::Utf8);
+ QCOMPARE(ba, encoder(uniString));
+ }
+
+ {
+ // once again converting char by char
+ QStringDecoder decoder(QStringDecoder::Utf8);
+ QVERIFY(decoder.isValid());
+ QString uniString;
+ for (int i = 0; i < ba.size(); ++i)
+ uniString += decoder(ba.constData() + i, 1);
+ QCOMPARE(uniString, QString::fromUtf8(ba));
+
+ QStringEncoder encoder(QStringEncoder::Utf8);
+ QByteArray reencoded;
+ for (int i = 0; i < uniString.size(); ++i)
+ reencoded += encoder(uniString.constData() + i, 1);
+ QCOMPARE(ba, encoder(uniString));
+ }
+}
+
+void tst_QStringConverter::nonFlaggedCodepointFFFF() const
+{
+ //Check that the code point 0xFFFF (=non-character code 0xEFBFBF) is not flagged
+ const QChar ch(0xFFFF);
+
+ QStringEncoder encoder(QStringEncoder::Utf8);
+ QVERIFY(encoder.isValid());
+
+ const QByteArray asDecoded = encoder(QStringView(&ch, 1));
+ QCOMPARE(asDecoded, QByteArray("\357\277\277"));
+
+ QByteArray ffff("\357\277\277");
+ QStringDecoder decoder(QStringEncoder::Utf8, QStringDecoder::ConvertInvalidToNull);
+ QVERIFY(decoder.isValid());
+ QVERIFY(decoder(ffff) == QString(1, ch));
+}
+
+void tst_QStringConverter::flagF7808080() const
+{
+ /* This test case stems from test not-wf-sa-170, tests/qxmlstream/XML-Test-Suite/xmlconf/xmltest/not-wf/sa/166.xml,
+ * whose description reads:
+ *
+ * "Four byte UTF-8 encodings can encode UCS-4 characters
+ * which are beyond the range of legal XML characters
+ * (and can't be expressed in Unicode surrogate pairs).
+ * This document holds such a character."
+ *
+ * In binary, this is:
+ * 11110111100000001000000010000000
+ * * * * *
+ * 11110www10xxxxxx10yyyyyy10zzzzzz
+ *
+ * With multibyte logic removed it is the codepoint 0x1C0000.
+ */
+ QByteArray input;
+ input.resize(4);
+ input[0] = char(0xF7);
+ input[1] = char(0x80);
+ input[2] = char(0x80);
+ input[3] = char(0x80);
+
+ QStringDecoder decoder(QStringEncoder::Utf8, QStringDecoder::ConvertInvalidToNull);
+ QVERIFY(decoder.isValid());
+
+ QCOMPARE(decoder(input), QString(input.size(), QChar(0)));
+}
+
+void tst_QStringConverter::nonFlaggedEFBFBF() const
+{
+ /* Check that the codec does NOT flag EFBFBF.
+ * This is a regression test; see QTBUG-33229
+ */
+ QByteArray validInput;
+ validInput.resize(3);
+ validInput[0] = char(0xEF);
+ validInput[1] = char(0xBF);
+ validInput[2] = char(0xBF);
+
+ {
+ QStringDecoder decoder(QStringEncoder::Utf8, QStringDecoder::ConvertInvalidToNull);
+ QVERIFY(decoder.isValid());
+ QVERIFY(decoder(validInput) == QString::fromUtf8(QByteArray::fromHex("EFBFBF")));
+ }
+
+ // Check that 0xEFBFBF is correctly decoded when preceded by an arbitrary character
+ {
+ QByteArray start("B");
+ start.append(validInput);
+
+ QStringDecoder decoder(QStringEncoder::Utf8, QStringDecoder::ConvertInvalidToNull);
+ QVERIFY(decoder.isValid());
+ QVERIFY(decoder(start) == QString::fromUtf8(QByteArray("B").append(QByteArray::fromHex("EFBFBF"))));
+ }
+}
+
+void tst_QStringConverter::decode0D() const
+{
+ QByteArray input;
+ input.resize(3);
+ input[0] = 'A';
+ input[1] = '\r';
+ input[2] = 'B';
+
+ QCOMPARE(QString::fromUtf8(input.constData()).toUtf8(), input);
+}
+
+static QString fromInvalidUtf8Sequence(const QByteArray &ba)
+{
+ return QString().fill(QChar::ReplacementCharacter, ba.size());
+}
+
+// copied from tst_QString::fromUtf8_data()
+void tst_QStringConverter::utf8Codec_data()
+{
+ QTest::addColumn<QByteArray>("utf8");
+ QTest::addColumn<QString>("res");
+ QTest::addColumn<int>("len");
+ QString str;
+
+ QTest::newRow("str0") << QByteArray("abcdefgh") << QString("abcdefgh") << -1;
+ QTest::newRow("str0-len") << QByteArray("abcdefgh") << QString("abc") << 3;
+ QTest::newRow("str1") << QByteArray("\303\266\303\244\303\274\303\226\303\204\303\234\303\270\303\246\303\245\303\230\303\206\303\205")
+ << QString::fromLatin1("\366\344\374\326\304\334\370\346\345\330\306\305") << -1;
+ QTest::newRow("str1-len") << QByteArray("\303\266\303\244\303\274\303\226\303\204\303\234\303\270\303\246\303\245\303\230\303\206\303\205")
+ << QString::fromLatin1("\366\344\374\326\304") << 10;
+
+ str += QChar(0x05e9);
+ str += QChar(0x05d3);
+ str += QChar(0x05d2);
+ QTest::newRow("str2") << QByteArray("\327\251\327\223\327\222") << str << -1;
+
+ str = QChar(0x05e9);
+ QTest::newRow("str2-len") << QByteArray("\327\251\327\223\327\222") << str << 2;
+
+ str = QChar(0x20ac);
+ str += " some text";
+ QTest::newRow("str3") << QByteArray("\342\202\254 some text") << str << -1;
+
+ str = QChar(0x20ac);
+ str += " some ";
+ QTest::newRow("str3-len") << QByteArray("\342\202\254 some text") << str << 9;
+
+ str = "hello";
+ str += QChar::ReplacementCharacter;
+ str += QChar(0x68);
+ str += QChar::ReplacementCharacter;
+ str += QChar::ReplacementCharacter;
+ str += QChar::ReplacementCharacter;
+ str += QChar::ReplacementCharacter;
+ str += QChar(0x61);
+ str += QChar::ReplacementCharacter;
+ QTest::newRow("invalid utf8") << QByteArray("hello\344h\344\344\366\344a\304") << str << -1;
+ QTest::newRow("invalid utf8-len") << QByteArray("hello\344h\344\344\366\344a\304") << QString("hello") << 5;
+
+ str = "Prohl";
+ str += QChar::ReplacementCharacter;
+ str += QChar::ReplacementCharacter;
+ str += QLatin1Char('e');
+ str += QChar::ReplacementCharacter;
+ str += " plugin";
+ str += QChar::ReplacementCharacter;
+ str += " Netscape";
+
+ QTest::newRow("task28417") << QByteArray("Prohl\355\276e\350 plugin\371 Netscape") << str << -1;
+ QTest::newRow("task28417-len") << QByteArray("Prohl\355\276e\350 plugin\371 Netscape") << QString("") << 0;
+
+ QTest::newRow("null-1") << QByteArray() << QString() << -1;
+ QTest::newRow("null0") << QByteArray() << QString() << 0;
+ // QTest::newRow("null5") << QByteArray() << QString() << 5;
+ QTest::newRow("empty-1") << QByteArray("\0abcd", 5) << QString() << -1;
+ QTest::newRow("empty0") << QByteArray() << QString() << 0;
+ QTest::newRow("empty5") << QByteArray("\0abcd", 5) << QString::fromLatin1("\0abcd", 5) << 5;
+ QTest::newRow("other-1") << QByteArray("ab\0cd", 5) << QString::fromLatin1("ab") << -1;
+ QTest::newRow("other5") << QByteArray("ab\0cd", 5) << QString::fromLatin1("ab\0cd", 5) << 5;
+
+ str = "Old Italic: ";
+ str += QChar(0xd800);
+ str += QChar(0xdf00);
+ str += QChar(0xd800);
+ str += QChar(0xdf01);
+ str += QChar(0xd800);
+ str += QChar(0xdf02);
+ str += QChar(0xd800);
+ str += QChar(0xdf03);
+ str += QChar(0xd800);
+ str += QChar(0xdf04);
+ QTest::newRow("surrogate") << QByteArray("Old Italic: \360\220\214\200\360\220\214\201\360\220\214\202\360\220\214\203\360\220\214\204") << str << -1;
+
+ QTest::newRow("surrogate-len") << QByteArray("Old Italic: \360\220\214\200\360\220\214\201\360\220\214\202\360\220\214\203\360\220\214\204") << str.left(16) << 20;
+
+ // from http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html
+
+ // 2.1.1 U+00000000
+ QByteArray utf8;
+ utf8 += char(0x00);
+ str = QChar(QChar::Null);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 2.1.1") << utf8 << str << 1;
+
+ // 2.1.2 U+00000080
+ utf8.clear();
+ utf8 += char(0xc2);
+ utf8 += char(0x80);
+ str = QChar(0x80);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 2.1.2") << utf8 << str << -1;
+
+ // 2.1.3 U+00000800
+ utf8.clear();
+ utf8 += char(0xe0);
+ utf8 += char(0xa0);
+ utf8 += char(0x80);
+ str = QChar(0x800);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 2.1.3") << utf8 << str << -1;
+
+ // 2.1.4 U+00010000
+ utf8.clear();
+ utf8 += char(0xf0);
+ utf8 += char(0x90);
+ utf8 += char(0x80);
+ utf8 += char(0x80);
+ str.clear();
+ str += QChar(0xd800);
+ str += QChar(0xdc00);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 2.1.4") << utf8 << str << -1;
+
+ // 2.1.5 U+00200000 (not a valid Unicode character)
+ utf8.clear();
+ utf8 += char(0xf8);
+ utf8 += char(0x88);
+ utf8 += char(0x80);
+ utf8 += char(0x80);
+ utf8 += char(0x80);
+ str = fromInvalidUtf8Sequence(utf8);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 2.1.5") << utf8 << str << -1;
+
+ // 2.1.6 U+04000000 (not a valid Unicode character)
+ utf8.clear();
+ utf8 += char(0xfc);
+ utf8 += char(0x84);
+ utf8 += char(0x80);
+ utf8 += char(0x80);
+ utf8 += char(0x80);
+ utf8 += char(0x80);
+ str = fromInvalidUtf8Sequence(utf8);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 2.1.6") << utf8 << str << -1;
+
+ // 2.2.1 U+0000007F
+ utf8.clear();
+ utf8 += char(0x7f);
+ str = QChar(0x7f);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 2.2.1") << utf8 << str << -1;
+
+ // 2.2.2 U+000007FF
+ utf8.clear();
+ utf8 += char(0xdf);
+ utf8 += char(0xbf);
+ str = QChar(0x7ff);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 2.2.2") << utf8 << str << -1;
+
+ // 2.2.3 U+000FFFF - non-character code
+ utf8.clear();
+ utf8 += char(0xef);
+ utf8 += char(0xbf);
+ utf8 += char(0xbf);
+ str = QString::fromUtf8(utf8);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 2.2.3") << utf8 << str << -1;
+
+ // 2.2.4 U+001FFFFF
+ utf8.clear();
+ utf8 += char(0xf7);
+ utf8 += char(0xbf);
+ utf8 += char(0xbf);
+ utf8 += char(0xbf);
+ str = fromInvalidUtf8Sequence(utf8);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 2.2.4") << utf8 << str << -1;
+
+ // 2.2.5 U+03FFFFFF (not a valid Unicode character)
+ utf8.clear();
+ utf8 += char(0xfb);
+ utf8 += char(0xbf);
+ utf8 += char(0xbf);
+ utf8 += char(0xbf);
+ utf8 += char(0xbf);
+ str = fromInvalidUtf8Sequence(utf8);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 2.2.5") << utf8 << str << -1;
+
+ // 2.2.6 U+7FFFFFFF
+ utf8.clear();
+ utf8 += char(0xfd);
+ utf8 += char(0xbf);
+ utf8 += char(0xbf);
+ utf8 += char(0xbf);
+ utf8 += char(0xbf);
+ utf8 += char(0xbf);
+ str = fromInvalidUtf8Sequence(utf8);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 2.2.6") << utf8 << str << -1;
+
+ // 2.3.1 U+0000D7FF
+ utf8.clear();
+ utf8 += char(0xed);
+ utf8 += char(0x9f);
+ utf8 += char(0xbf);
+ str = QChar(0xd7ff);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 2.3.1") << utf8 << str << -1;
+
+ // 2.3.2 U+0000E000
+ utf8.clear();
+ utf8 += char(0xee);
+ utf8 += char(0x80);
+ utf8 += char(0x80);
+ str = QChar(0xe000);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 2.3.2") << utf8 << str << -1;
+
+ // 2.3.3 U+0000FFFD
+ utf8.clear();
+ utf8 += char(0xef);
+ utf8 += char(0xbf);
+ utf8 += char(0xbd);
+ str = QChar(QChar::ReplacementCharacter);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 2.3.3") << utf8 << str << -1;
+
+ // 2.3.4 U+0010FFFD
+ utf8.clear();
+ utf8 += char(0xf4);
+ utf8 += char(0x8f);
+ utf8 += char(0xbf);
+ utf8 += char(0xbd);
+ str.clear();
+ str += QChar(0xdbff);
+ str += QChar(0xdffd);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 2.3.4") << utf8 << str << -1;
+
+ // 2.3.5 U+00110000
+ utf8.clear();
+ utf8 += char(0xf4);
+ utf8 += char(0x90);
+ utf8 += char(0x80);
+ utf8 += char(0x80);
+ str = fromInvalidUtf8Sequence(utf8);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 2.3.5") << utf8 << str << -1;
+
+ // 3.1.1
+ utf8.clear();
+ utf8 += char(0x80);
+ str = fromInvalidUtf8Sequence(utf8);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.1.1") << utf8 << str << -1;
+
+ // 3.1.2
+ utf8.clear();
+ utf8 += char(0xbf);
+ str = fromInvalidUtf8Sequence(utf8);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.1.2") << utf8 << str << -1;
+
+ // 3.1.3
+ utf8.clear();
+ utf8 += char(0x80);
+ utf8 += char(0xbf);
+ str = fromInvalidUtf8Sequence(utf8);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.1.3") << utf8 << str << -1;
+
+ // 3.1.4
+ utf8.clear();
+ utf8 += char(0x80);
+ utf8 += char(0xbf);
+ utf8 += char(0x80);
+ str = fromInvalidUtf8Sequence(utf8);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.1.4") << utf8 << str << -1;
+
+ // 3.1.5
+ utf8.clear();
+ utf8 += char(0x80);
+ utf8 += char(0xbf);
+ utf8 += char(0x80);
+ utf8 += char(0xbf);
+ str = fromInvalidUtf8Sequence(utf8);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.1.5") << utf8 << str << -1;
+
+ // 3.1.6
+ utf8.clear();
+ utf8 += char(0x80);
+ utf8 += char(0xbf);
+ utf8 += char(0x80);
+ utf8 += char(0xbf);
+ utf8 += char(0x80);
+ str = fromInvalidUtf8Sequence(utf8);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.1.6") << utf8 << str << -1;
+
+ // 3.1.7
+ utf8.clear();
+ utf8 += char(0x80);
+ utf8 += char(0xbf);
+ utf8 += char(0x80);
+ utf8 += char(0xbf);
+ utf8 += char(0x80);
+ utf8 += char(0xbf);
+ str = fromInvalidUtf8Sequence(utf8);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.1.7") << utf8 << str << -1;
+
+ // 3.1.8
+ utf8.clear();
+ utf8 += char(0x80);
+ utf8 += char(0xbf);
+ utf8 += char(0x80);
+ utf8 += char(0xbf);
+ utf8 += char(0x80);
+ utf8 += char(0xbf);
+ utf8 += char(0x80);
+ str = fromInvalidUtf8Sequence(utf8);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.1.8") << utf8 << str << -1;
+
+ // 3.1.9
+ utf8.clear();
+ for (uint i = 0x80; i<= 0xbf; ++i)
+ utf8 += i;
+ str = fromInvalidUtf8Sequence(utf8);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.1.9") << utf8 << str << -1;
+
+ // 3.2.1
+ utf8.clear();
+ str.clear();
+ for (uint i = 0xc8; i <= 0xdf; ++i) {
+ utf8 += i;
+ utf8 += char(0x20);
+
+ str += QChar::ReplacementCharacter;
+ str += QChar(0x0020);
+ }
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.2.1") << utf8 << str << -1;
+
+ // 3.2.2
+ utf8.clear();
+ str.clear();
+ for (uint i = 0xe0; i <= 0xef; ++i) {
+ utf8 += i;
+ utf8 += char(0x20);
+
+ str += QChar::ReplacementCharacter;
+ str += QChar(0x0020);
+ }
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.2.2") << utf8 << str << -1;
+
+ // 3.2.3
+ utf8.clear();
+ str.clear();
+ for (uint i = 0xf0; i <= 0xf7; ++i) {
+ utf8 += i;
+ utf8 += 0x20;
+
+ str += QChar::ReplacementCharacter;
+ str += QChar(0x0020);
+ }
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.2.3") << utf8 << str << -1;
+
+ // 3.2.4
+ utf8.clear();
+ str.clear();
+ for (uint i = 0xf8; i <= 0xfb; ++i) {
+ utf8 += i;
+ utf8 += 0x20;
+
+ str += QChar::ReplacementCharacter;
+ str += QChar(0x0020);
+ }
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.2.4") << utf8 << str << -1;
+
+ // 3.2.5
+ utf8.clear();
+ str.clear();
+ for (uint i = 0xfc; i <= 0xfd; ++i) {
+ utf8 += i;
+ utf8 += 0x20;
+
+ str += QChar::ReplacementCharacter;
+ str += QChar(0x0020);
+ }
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.2.5") << utf8 << str << -1;
+
+ // 3.3.1
+ utf8.clear();
+ utf8 += char(0xc0);
+ str = fromInvalidUtf8Sequence(utf8);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.1") << utf8 << str << -1;
+ utf8 += char(0x30);
+ str += 0x30;
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.1-1") << utf8 << str << -1;
+
+ // 3.3.2
+ utf8.clear();
+ utf8 += char(0xe0);
+ utf8 += char(0x80);
+ str = fromInvalidUtf8Sequence(utf8);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.2") << utf8 << str << -1;
+ utf8 += char(0x30);
+ str += 0x30;
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.2-1") << utf8 << str << -1;
+
+ utf8.clear();
+ utf8 += char(0xe0);
+ str = fromInvalidUtf8Sequence(utf8);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.2-2") << utf8 << str << -1;
+ utf8 += 0x30;
+ str += 0x30;
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.2-3") << utf8 << str << -1;
+
+ // 3.3.3
+ utf8.clear();
+ utf8 += char(0xf0);
+ utf8 += char(0x80);
+ utf8 += char(0x80);
+ str = fromInvalidUtf8Sequence(utf8);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.3") << utf8 << str << -1;
+ utf8 += char(0x30);
+ str += 0x30;
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.3-1") << utf8 << str << -1;
+
+ utf8.clear();
+ utf8 += char(0xf0);
+ str = fromInvalidUtf8Sequence(utf8);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.3-2") << utf8 << str << -1;
+ utf8 += char(0x30);
+ str += 0x30;
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.3-3") << utf8 << str << -1;
+
+ utf8.clear();
+ utf8 += char(0xf0);
+ utf8 += char(0x80);
+ str = fromInvalidUtf8Sequence(utf8);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.3-4") << utf8 << str << -1;
+ utf8 += char(0x30);
+ str += 0x30;
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.3-5") << utf8 << str << -1;
+
+ // 3.3.4
+ utf8.clear();
+ utf8 += char(0xf8);
+ utf8 += char(0x80);
+ utf8 += char(0x80);
+ utf8 += char(0x80);
+ str = fromInvalidUtf8Sequence(utf8);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.4") << utf8 << str << -1;
+ utf8 += char(0x30);
+ str += 0x30;
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.4-1") << utf8 << str << -1;
+
+ utf8.clear();
+ utf8 += char(0xf8);
+ utf8 += char(0x80);
+ utf8 += char(0x80);
+ str = fromInvalidUtf8Sequence(utf8);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.4-2") << utf8 << str << -1;
+ utf8 += char(0x30);
+ str += 0x30;
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.4-3") << utf8 << str << -1;
+
+ utf8.clear();
+ utf8 += char(0xf8);
+ utf8 += char(0x80);
+ str = fromInvalidUtf8Sequence(utf8);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.4-4") << utf8 << str << -1;
+ utf8 += char(0x30);
+ str += 0x30;
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.4-5") << utf8 << str << -1;
+
+ utf8.clear();
+ utf8 += char(0xf8);
+ str = fromInvalidUtf8Sequence(utf8);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.4-6") << utf8 << str << -1;
+ utf8 += char(0x30);
+ str += 0x30;
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.4-7") << utf8 << str << -1;
+
+ // 3.3.5
+ utf8.clear();
+ utf8 += char(0xfc);
+ utf8 += char(0x80);
+ utf8 += char(0x80);
+ utf8 += char(0x80);
+ utf8 += char(0x80);
+ str = fromInvalidUtf8Sequence(utf8);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.5") << utf8 << str << -1;
+ utf8 += char(0x30);
+ str += 0x30;
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.5-1") << utf8 << str << -1;
+
+ utf8.clear();
+ utf8 += char(0xfc);
+ utf8 += char(0x80);
+ utf8 += char(0x80);
+ utf8 += char(0x80);
+ str = fromInvalidUtf8Sequence(utf8);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.5-2") << utf8 << str << -1;
+ utf8 += char(0x30);
+ str += 0x30;
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.5-3") << utf8 << str << -1;
+
+ utf8.clear();
+ utf8 += char(0xfc);
+ utf8 += char(0x80);
+ utf8 += char(0x80);
+ str = fromInvalidUtf8Sequence(utf8);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.5-4") << utf8 << str << -1;
+ utf8 += char(0x30);
+ str += 0x30;
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.5-5") << utf8 << str << -1;
+
+ utf8.clear();
+ utf8 += char(0xfc);
+ utf8 += char(0x80);
+ str = fromInvalidUtf8Sequence(utf8);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.5-6") << utf8 << str << -1;
+ utf8 += char(0x30);
+ str += 0x30;
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.5-7") << utf8 << str << -1;
+
+ utf8.clear();
+ utf8 += char(0xfc);
+ str = fromInvalidUtf8Sequence(utf8);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.5-8") << utf8 << str << -1;
+ utf8 += char(0x30);
+ str += 0x30;
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.5-9") << utf8 << str << -1;
+
+ // 3.3.6
+ utf8.clear();
+ utf8 += char(0xdf);
+ str = fromInvalidUtf8Sequence(utf8);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.6") << utf8 << str << -1;
+ utf8 += char(0x30);
+ str += 0x30;
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.6-1") << utf8 << str << -1;
+
+ // 3.3.7
+ utf8.clear();
+ utf8 += char(0xef);
+ utf8 += char(0xbf);
+ str = fromInvalidUtf8Sequence(utf8);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.7") << utf8 << str << -1;
+ utf8 += char(0x30);
+ str += 0x30;
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.7-1") << utf8 << str << -1;
+
+ utf8.clear();
+ utf8 += char(0xef);
+ str = fromInvalidUtf8Sequence(utf8);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.7-2") << utf8 << str << -1;
+ utf8 += char(0x30);
+ str += 0x30;
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.7-3") << utf8 << str << -1;
+
+ // 3.3.8
+ utf8.clear();
+ utf8 += char(0xf7);
+ utf8 += char(0xbf);
+ utf8 += char(0xbf);
+ str = fromInvalidUtf8Sequence(utf8);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.8") << utf8 << str << -1;
+ utf8 += char(0x30);
+ str += 0x30;
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.8-1") << utf8 << str << -1;
+
+ utf8.clear();
+ utf8 += char(0xf7);
+ utf8 += char(0xbf);
+ str = fromInvalidUtf8Sequence(utf8);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.8-2") << utf8 << str << -1;
+ utf8 += char(0x30);
+ str += 0x30;
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.8-3") << utf8 << str << -1;
+
+ utf8.clear();
+ utf8 += char(0xf7);
+ str = fromInvalidUtf8Sequence(utf8);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.8-4") << utf8 << str << -1;
+ utf8 += char(0x30);
+ str += 0x30;
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.8-5") << utf8 << str << -1;
+
+ // 3.3.9
+ utf8.clear();
+ utf8 += char(0xfb);
+ utf8 += char(0xbf);
+ utf8 += char(0xbf);
+ utf8 += char(0xbf);
+ str = fromInvalidUtf8Sequence(utf8);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.9") << utf8 << str << -1;
+ utf8 += char(0x30);
+ str += 0x30;
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.9-1") << utf8 << str << -1;
+
+ utf8.clear();
+ utf8 += char(0xfb);
+ utf8 += char(0xbf);
+ utf8 += char(0xbf);
+ str = fromInvalidUtf8Sequence(utf8);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.9-2") << utf8 << str << -1;
+ utf8 += char(0x30);
+ str += 0x30;
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.9-3") << utf8 << str << -1;
+
+ utf8.clear();
+ utf8 += char(0xfb);
+ utf8 += char(0xbf);
+ str = fromInvalidUtf8Sequence(utf8);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.9-4") << utf8 << str << -1;
+ utf8 += char(0x30);
+ str += 0x30;
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.9-5") << utf8 << str << -1;
+
+ utf8.clear();
+ utf8 += char(0xfb);
+ str = fromInvalidUtf8Sequence(utf8);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.9-6") << utf8 << str << -1;
+ utf8 += char(0x30);
+ str += 0x30;
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.9-7") << utf8 << str << -1;
+
+ // 3.3.10
+ utf8.clear();
+ utf8 += char(0xfd);
+ utf8 += char(0xbf);
+ utf8 += char(0xbf);
+ utf8 += char(0xbf);
+ utf8 += char(0xbf);
+ str = fromInvalidUtf8Sequence(utf8);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.10") << utf8 << str << -1;
+ utf8 += char(0x30);
+ str += 0x30;
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.10-1") << utf8 << str << -1;
+
+ utf8.clear();
+ utf8 += char(0xfd);
+ utf8 += char(0xbf);
+ utf8 += char(0xbf);
+ utf8 += char(0xbf);
+ str = fromInvalidUtf8Sequence(utf8);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.10-2") << utf8 << str << -1;
+ utf8 += char(0x30);
+ str += 0x30;
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.10-3") << utf8 << str << -1;
+
+ utf8.clear();
+ utf8 += char(0xfd);
+ utf8 += char(0xbf);
+ utf8 += char(0xbf);
+ str = fromInvalidUtf8Sequence(utf8);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.10-4") << utf8 << str << -1;
+ utf8 += char(0x30);
+ str += 0x30;
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.10-5") << utf8 << str << -1;
+
+ utf8.clear();
+ utf8 += char(0xfd);
+ utf8 += char(0xbf);
+ str = fromInvalidUtf8Sequence(utf8);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.10-6") << utf8 << str << -1;
+ utf8 += char(0x30);
+ str += 0x30;
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.10-7") << utf8 << str << -1;
+
+ utf8.clear();
+ utf8 += char(0xfd);
+ str = fromInvalidUtf8Sequence(utf8);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.10-8") << utf8 << str << -1;
+ utf8 += char(0x30);
+ str += 0x30;
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.3.10-9") << utf8 << str << -1;
+
+ // 3.4
+ utf8.clear();
+ utf8 += char(0xc0);
+ utf8 += char(0xe0);
+ utf8 += char(0x80);
+ utf8 += char(0xf0);
+ utf8 += char(0x80);
+ utf8 += char(0x80);
+ utf8 += char(0xf8);
+ utf8 += char(0x80);
+ utf8 += char(0x80);
+ utf8 += char(0x80);
+ utf8 += char(0xfc);
+ utf8 += char(0x80);
+ utf8 += char(0x80);
+ utf8 += char(0x80);
+ utf8 += char(0x80);
+ utf8 += char(0xdf);
+ utf8 += char(0xef);
+ utf8 += char(0xbf);
+ utf8 += char(0xf7);
+ utf8 += char(0xbf);
+ utf8 += char(0xbf);
+ utf8 += char(0xfb);
+ utf8 += char(0xbf);
+ utf8 += char(0xbf);
+ utf8 += char(0xbf);
+ utf8 += char(0xfd);
+ utf8 += char(0xbf);
+ utf8 += char(0xbf);
+ utf8 += char(0xbf);
+ utf8 += char(0xbf);
+ str = fromInvalidUtf8Sequence(utf8);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.4") << utf8 << str << -1;
+
+ // 3.5.1
+ utf8.clear();
+ utf8 += char(0xfe);
+ str = fromInvalidUtf8Sequence(utf8);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.5.1") << utf8 << str << -1;
+
+ // 3.5.2
+ utf8.clear();
+ utf8 += char(0xff);
+ str = fromInvalidUtf8Sequence(utf8);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.5.2") << utf8 << str << -1;
+
+ // 3.5.2
+ utf8.clear();
+ utf8 += char(0xfe);
+ utf8 += char(0xfe);
+ utf8 += char(0xff);
+ str = fromInvalidUtf8Sequence(utf8);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 3.5.2-1") << utf8 << str << -1;
+
+ // 4.1.1
+ utf8.clear();
+ utf8 += char(0xc0);
+ utf8 += char(0xaf);
+ str = fromInvalidUtf8Sequence(utf8);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 4.1.1") << utf8 << str << -1;
+
+ // 4.1.2
+ utf8.clear();
+ utf8 += char(0xe0);
+ utf8 += char(0x80);
+ utf8 += char(0xaf);
+ str = fromInvalidUtf8Sequence(utf8);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 4.1.2") << utf8 << str << -1;
+
+ // 4.1.3
+ utf8.clear();
+ utf8 += char(0xf0);
+ utf8 += char(0x80);
+ utf8 += char(0x80);
+ utf8 += char(0xaf);
+ str = fromInvalidUtf8Sequence(utf8);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 4.1.3") << utf8 << str << -1;
+
+ // 4.1.4
+ utf8.clear();
+ utf8 += char(0xf8);
+ utf8 += char(0x80);
+ utf8 += char(0x80);
+ utf8 += char(0x80);
+ utf8 += char(0xaf);
+ str = fromInvalidUtf8Sequence(utf8);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 4.1.4") << utf8 << str << -1;
+
+ // 4.1.5
+ utf8.clear();
+ utf8 += char(0xfc);
+ utf8 += char(0x80);
+ utf8 += char(0x80);
+ utf8 += char(0x80);
+ utf8 += char(0x80);
+ utf8 += char(0xaf);
+ str = fromInvalidUtf8Sequence(utf8);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 4.1.5") << utf8 << str << -1;
+
+ // 4.2.1
+ utf8.clear();
+ utf8 += char(0xc1);
+ utf8 += char(0xbf);
+ str = fromInvalidUtf8Sequence(utf8);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 4.2.1") << utf8 << str << -1;
+
+ // 4.2.2
+ utf8.clear();
+ utf8 += char(0xe0);
+ utf8 += char(0x9f);
+ utf8 += char(0xbf);
+ str = fromInvalidUtf8Sequence(utf8);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 4.2.2") << utf8 << str << -1;
+
+ // 4.2.3
+ utf8.clear();
+ utf8 += char(0xf0);
+ utf8 += char(0x8f);
+ utf8 += char(0xbf);
+ utf8 += char(0xbf);
+ str = fromInvalidUtf8Sequence(utf8);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 4.2.3") << utf8 << str << -1;
+
+ // 4.2.4
+ utf8.clear();
+ utf8 += char(0xf8);
+ utf8 += char(0x87);
+ utf8 += char(0xbf);
+ utf8 += char(0xbf);
+ utf8 += char(0xbf);
+ str = fromInvalidUtf8Sequence(utf8);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 4.2.4") << utf8 << str << -1;
+
+ // 4.2.5
+ utf8.clear();
+ utf8 += char(0xfc);
+ utf8 += char(0x83);
+ utf8 += char(0xbf);
+ utf8 += char(0xbf);
+ utf8 += char(0xbf);
+ utf8 += char(0xbf);
+ str = fromInvalidUtf8Sequence(utf8);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 4.2.5") << utf8 << str << -1;
+
+ // 4.3.1
+ utf8.clear();
+ utf8 += char(0xc0);
+ utf8 += char(0x80);
+ str = fromInvalidUtf8Sequence(utf8);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 4.3.1") << utf8 << str << -1;
+
+ // 4.3.2
+ utf8.clear();
+ utf8 += char(0xe0);
+ utf8 += char(0x80);
+ utf8 += char(0x80);
+ str = fromInvalidUtf8Sequence(utf8);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 4.3.2") << utf8 << str << -1;
+
+ // 4.3.3
+ utf8.clear();
+ utf8 += char(0xf0);
+ utf8 += char(0x80);
+ utf8 += char(0x80);
+ utf8 += char(0x80);
+ str = fromInvalidUtf8Sequence(utf8);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 4.3.3") << utf8 << str << -1;
+
+ // 4.3.4
+ utf8.clear();
+ utf8 += char(0xf8);
+ utf8 += char(0x80);
+ utf8 += char(0x80);
+ utf8 += char(0x80);
+ utf8 += char(0x80);
+ str = fromInvalidUtf8Sequence(utf8);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 4.3.4") << utf8 << str << -1;
+
+ // 4.3.5
+ utf8.clear();
+ utf8 += char(0xfc);
+ utf8 += char(0x80);
+ utf8 += char(0x80);
+ utf8 += char(0x80);
+ utf8 += char(0x80);
+ utf8 += char(0x80);
+ str = fromInvalidUtf8Sequence(utf8);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 4.3.5") << utf8 << str << -1;
+
+ // 5.1.1
+ utf8.clear();
+ utf8 += char(0xed);
+ utf8 += char(0xa0);
+ utf8 += char(0x80);
+ str = fromInvalidUtf8Sequence(utf8);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.1.1") << utf8 << str << -1;
+
+ // 5.1.2
+ utf8.clear();
+ utf8 += char(0xed);
+ utf8 += char(0xad);
+ utf8 += char(0xbf);
+ str = fromInvalidUtf8Sequence(utf8);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.1.2") << utf8 << str << -1;
+
+ // 5.1.3
+ utf8.clear();
+ utf8 += char(0xed);
+ utf8 += char(0xae);
+ utf8 += char(0x80);
+ str = fromInvalidUtf8Sequence(utf8);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.1.3") << utf8 << str << -1;
+
+ // 5.1.4
+ utf8.clear();
+ utf8 += char(0xed);
+ utf8 += char(0xaf);
+ utf8 += char(0xbf);
+ str = fromInvalidUtf8Sequence(utf8);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.1.4") << utf8 << str << -1;
+
+ // 5.1.5
+ utf8.clear();
+ utf8 += char(0xed);
+ utf8 += char(0xb0);
+ utf8 += char(0x80);
+ str = fromInvalidUtf8Sequence(utf8);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.1.5") << utf8 << str << -1;
+
+ // 5.1.6
+ utf8.clear();
+ utf8 += char(0xed);
+ utf8 += char(0xbe);
+ utf8 += char(0x80);
+ str = fromInvalidUtf8Sequence(utf8);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.1.6") << utf8 << str << -1;
+
+ // 5.1.7
+ utf8.clear();
+ utf8 += char(0xed);
+ utf8 += char(0xbf);
+ utf8 += char(0xbf);
+ str = fromInvalidUtf8Sequence(utf8);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.1.7") << utf8 << str << -1;
+
+ // 5.2.1
+ utf8.clear();
+ utf8 += char(0xed);
+ utf8 += char(0xa0);
+ utf8 += char(0x80);
+ utf8 += char(0xed);
+ utf8 += char(0xb0);
+ utf8 += char(0x80);
+ str = fromInvalidUtf8Sequence(utf8);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.2.1") << utf8 << str << -1;
+
+ // 5.2.2
+ utf8.clear();
+ utf8 += char(0xed);
+ utf8 += char(0xa0);
+ utf8 += char(0x80);
+ utf8 += char(0xed);
+ utf8 += char(0xbf);
+ utf8 += char(0xbf);
+ str = fromInvalidUtf8Sequence(utf8);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.2.2") << utf8 << str << -1;
+
+ // 5.2.3
+ utf8.clear();
+ utf8 += char(0xed);
+ utf8 += char(0xad);
+ utf8 += char(0xbf);
+ utf8 += char(0xed);
+ utf8 += char(0xb0);
+ utf8 += char(0x80);
+ str = fromInvalidUtf8Sequence(utf8);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.2.3") << utf8 << str << -1;
+
+ // 5.2.4
+ utf8.clear();
+ utf8 += char(0xed);
+ utf8 += char(0xad);
+ utf8 += char(0xbf);
+ utf8 += char(0xed);
+ utf8 += char(0xbf);
+ utf8 += char(0xbf);
+ str = fromInvalidUtf8Sequence(utf8);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.2.4") << utf8 << str << -1;
+
+ // 5.2.5
+ utf8.clear();
+ utf8 += char(0xed);
+ utf8 += char(0xae);
+ utf8 += char(0x80);
+ utf8 += char(0xed);
+ utf8 += char(0xb0);
+ utf8 += char(0x80);
+ str = fromInvalidUtf8Sequence(utf8);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.2.5") << utf8 << str << -1;
+
+ // 5.2.6
+ utf8.clear();
+ utf8 += char(0xed);
+ utf8 += char(0xae);
+ utf8 += char(0x80);
+ utf8 += char(0xed);
+ utf8 += char(0xbf);
+ utf8 += char(0xbf);
+ str = fromInvalidUtf8Sequence(utf8);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.2.6") << utf8 << str << -1;
+
+ // 5.2.7
+ utf8.clear();
+ utf8 += char(0xed);
+ utf8 += char(0xaf);
+ utf8 += char(0xbf);
+ utf8 += char(0xed);
+ utf8 += char(0xb0);
+ utf8 += char(0x80);
+ str = fromInvalidUtf8Sequence(utf8);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.2.7") << utf8 << str << -1;
+
+ // 5.2.8
+ utf8.clear();
+ utf8 += char(0xed);
+ utf8 += char(0xaf);
+ utf8 += char(0xbf);
+ utf8 += char(0xed);
+ utf8 += char(0xbf);
+ utf8 += char(0xbf);
+ str = fromInvalidUtf8Sequence(utf8);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.2.8") << utf8 << str << -1;
+
+ // 5.3.1 - non-character code
+ utf8.clear();
+ utf8 += char(0xef);
+ utf8 += char(0xbf);
+ utf8 += char(0xbe);
+ //str = QChar(QChar::ReplacementCharacter);
+ str = QChar(0xfffe);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.3.1") << utf8 << str << -1;
+
+ // 5.3.2 - non-character code
+ utf8.clear();
+ utf8 += char(0xef);
+ utf8 += char(0xbf);
+ utf8 += char(0xbf);
+ //str = QChar(QChar::ReplacementCharacter);
+ str = QChar(0xffff);
+ QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.3.2") << utf8 << str << -1;
+}
+
+void tst_QStringConverter::utf8Codec()
+{
+ QFETCH(QByteArray, utf8);
+ QFETCH(QString, res);
+ QFETCH(int, len);
+
+ QStringDecoder decoder(QStringDecoder::Utf8, QStringDecoder::Stateless);
+ QString str = decoder(utf8.isNull() ? 0 : utf8.constData(),
+ len < 0 ? qstrlen(utf8.constData()) : len);
+ QCOMPARE(str, res);
+
+ str = QString::fromUtf8(utf8.isNull() ? 0 : utf8.constData(), len);
+ QCOMPARE(str, res);
+}
+
+void tst_QStringConverter::utf8bom_data()
+{
+ QTest::addColumn<QByteArray>("data");
+ QTest::addColumn<QString>("result");
+
+ QTest::newRow("nobom")
+ << QByteArray("\302\240", 2)
+ << QString::fromLatin1("\240");
+
+ {
+ static const ushort data[] = { 0x201d };
+ QTest::newRow("nobom 2")
+ << QByteArray("\342\200\235", 3)
+ << QString::fromUtf16(data, sizeof(data)/sizeof(short));
+ }
+
+ {
+ static const ushort data[] = { 0xf000 };
+ QTest::newRow("bom1")
+ << QByteArray("\357\200\200", 3)
+ << QString::fromUtf16(data, sizeof(data)/sizeof(short));
+ }
+
+ {
+ static const ushort data[] = { 0xfec0 };
+ QTest::newRow("bom2")
+ << QByteArray("\357\273\200", 3)
+ << QString::fromUtf16(data, sizeof(data)/sizeof(short));
+ }
+
+ {
+ QTest::newRow("normal-bom")
+ << QByteArray("\357\273\277a", 4)
+ << QString("a");
+ }
+
+ { // test the non-SIMD code-path
+ static const ushort data[] = { 0x61, 0xfeff, 0x62 };
+ QTest::newRow("middle-bom (non SIMD)")
+ << QByteArray("a\357\273\277b")
+ << QString::fromUtf16(data, sizeof(data)/sizeof(short));
+ }
+
+ { // test the SIMD code-path
+ static const ushort data[] = { 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0xfeff, 0x6d };
+ QTest::newRow("middle-bom (SIMD)")
+ << QByteArray("abcdefghijkl\357\273\277m")
+ << QString::fromUtf16(data, sizeof(data)/sizeof(short));
+ }
+}
+
+void tst_QStringConverter::utf8bom()
+{
+ QFETCH(QByteArray, data);
+ QFETCH(QString, result);
+
+ QStringDecoder decoder(QStringDecoder::Utf8);
+
+ QCOMPARE(decoder(data.constData(), data.length()), result);
+}
+
+void tst_QStringConverter::utf8stateful_data()
+{
+ QTest::addColumn<QByteArray>("buffer1");
+ QTest::addColumn<QByteArray>("buffer2");
+ QTest::addColumn<QString>("result"); // null QString indicates decoder error
+
+ // valid buffer continuations
+ QTest::newRow("1of2+valid") << QByteArray("\xc2") << QByteArray("\xa0") << "\xc2\xa0";
+ QTest::newRow("1of3+valid") << QByteArray("\xe0") << QByteArray("\xa0\x80") << "\xe0\xa0\x80";
+ QTest::newRow("2of3+valid") << QByteArray("\xe0\xa0") << QByteArray("\x80") << "\xe0\xa0\x80";
+ QTest::newRow("1of4+valid") << QByteArray("\360") << QByteArray("\220\210\203") << "\360\220\210\203";
+ QTest::newRow("2of4+valid") << QByteArray("\360\220") << QByteArray("\210\203") << "\360\220\210\203";
+ QTest::newRow("3of4+valid") << QByteArray("\360\220\210") << QByteArray("\203") << "\360\220\210\203";
+ QTest::newRow("1ofBom+valid") << QByteArray("\xef") << QByteArray("\xbb\xbf") << "";
+ QTest::newRow("2ofBom+valid") << QByteArray("\xef\xbb") << QByteArray("\xbf") << "";
+
+ // invalid continuation
+ QTest::newRow("1of2+invalid") << QByteArray("\xc2") << QByteArray("a") << QString();
+ QTest::newRow("1of3+invalid") << QByteArray("\xe0") << QByteArray("a") << QString();
+ QTest::newRow("2of3+invalid") << QByteArray("\xe0\xa0") << QByteArray("a") << QString();
+ QTest::newRow("1of4+invalid") << QByteArray("\360") << QByteArray("a") << QString();
+ QTest::newRow("2of4+invalid") << QByteArray("\360\220") << QByteArray("a") << QString();
+ QTest::newRow("3of4+invalid") << QByteArray("\360\220\210") << QByteArray("a") << QString();
+
+ // invalid: sequence too short (the empty second buffer causes a state reset)
+ QTest::newRow("1of2+empty") << QByteArray("\xc2") << QByteArray() << QString();
+ QTest::newRow("1of3+empty") << QByteArray("\xe0") << QByteArray() << QString();
+ QTest::newRow("2of3+empty") << QByteArray("\xe0\xa0") << QByteArray() << QString();
+ QTest::newRow("1of4+empty") << QByteArray("\360") << QByteArray() << QString();
+ QTest::newRow("2of4+empty") << QByteArray("\360\220") << QByteArray() << QString();
+ QTest::newRow("3of4+empty") << QByteArray("\360\220\210") << QByteArray() << QString();
+
+ // overlong sequence:
+ QTest::newRow("overlong-1of2") << QByteArray("\xc1") << QByteArray("\x81") << QString();
+ QTest::newRow("overlong-1of3") << QByteArray("\xe0") << QByteArray("\x81\x81") << QString();
+ QTest::newRow("overlong-2of3") << QByteArray("\xe0\x81") << QByteArray("\x81") << QString();
+ QTest::newRow("overlong-1of4") << QByteArray("\xf0") << QByteArray("\x80\x81\x81") << QString();
+ QTest::newRow("overlong-2of4") << QByteArray("\xf0\x80") << QByteArray("\x81\x81") << QString();
+ QTest::newRow("overlong-3of4") << QByteArray("\xf0\x80\x81") << QByteArray("\x81") << QString();
+
+ // out of range:
+ // leading byte 0xF4 can produce codepoints above U+10FFFF, which aren't valid
+ QTest::newRow("outofrange1-1of4") << QByteArray("\xf4") << QByteArray("\x90\x80\x80") << QString();
+ QTest::newRow("outofrange1-2of4") << QByteArray("\xf4\x90") << QByteArray("\x80\x80") << QString();
+ QTest::newRow("outofrange1-3of4") << QByteArray("\xf4\x90\x80") << QByteArray("\x80") << QString();
+ QTest::newRow("outofrange2-1of4") << QByteArray("\xf5") << QByteArray("\x90\x80\x80") << QString();
+ QTest::newRow("outofrange2-2of4") << QByteArray("\xf5\x90") << QByteArray("\x80\x80") << QString();
+ QTest::newRow("outofrange2-3of4") << QByteArray("\xf5\x90\x80") << QByteArray("\x80") << QString();
+ QTest::newRow("outofrange-1of5") << QByteArray("\xf8") << QByteArray("\x88\x80\x80\x80") << QString();
+ QTest::newRow("outofrange-2of5") << QByteArray("\xf8\x88") << QByteArray("\x80\x80\x80") << QString();
+ QTest::newRow("outofrange-3of5") << QByteArray("\xf8\x88\x80") << QByteArray("\x80\x80") << QString();
+ QTest::newRow("outofrange-4of5") << QByteArray("\xf8\x88\x80\x80") << QByteArray("\x80") << QString();
+ QTest::newRow("outofrange-1of6") << QByteArray("\xfc") << QByteArray("\x84\x80\x80\x80\x80") << QString();
+ QTest::newRow("outofrange-2of6") << QByteArray("\xfc\x84") << QByteArray("\x80\x80\x80\x80") << QString();
+ QTest::newRow("outofrange-3of6") << QByteArray("\xfc\x84\x80") << QByteArray("\x80\x80\x80") << QString();
+ QTest::newRow("outofrange-4of6") << QByteArray("\xfc\x84\x80\x80") << QByteArray("\x80\x80") << QString();
+ QTest::newRow("outofrange-5of6") << QByteArray("\xfc\x84\x80\x80\x80") << QByteArray("\x80") << QString();
+}
+
+void tst_QStringConverter::utf8stateful()
+{
+ QFETCH(QByteArray, buffer1);
+ QFETCH(QByteArray, buffer2);
+ QFETCH(QString, result);
+
+ QStringDecoder decoder(QStringDecoder::Utf8);
+ QVERIFY(decoder.isValid());
+
+ QString decoded = decoder(buffer1);
+ if (result.isNull()) {
+ if (!decoder.hasError()) {
+ // incomplete data
+ decoded += decoder(buffer2);
+ QVERIFY(decoder.hasError());
+ }
+ } else {
+ QVERIFY(!decoder.hasError());
+ decoded += decoder(buffer2);
+ QVERIFY(!decoder.hasError());
+ QCOMPARE(decoded, result);
+ }
+}
+
+void tst_QStringConverter::utfHeaders_data()
+{
+ QTest::addColumn<QStringConverter::Encoding>("encoding");
+ QTest::addColumn<QStringConverter::Flag>("flags");
+ QTest::addColumn<QByteArray>("encoded");
+ QTest::addColumn<QString>("unicode");
+
+ QTest::newRow("utf8 bom")
+ << QStringConverter::Utf8
+ << QStringConverter::DefaultConversion
+ << QByteArray("\xef\xbb\xbfhello")
+ << QString::fromLatin1("hello");
+ QTest::newRow("utf8 nobom")
+ << QStringConverter::Utf8
+ << QStringConverter::DefaultConversion
+ << QByteArray("hello")
+ << QString::fromLatin1("hello");
+ QTest::newRow("utf8 bom ignore header")
+ << QStringConverter::Utf8
+ << QStringConverter::IgnoreHeader
+ << QByteArray("\xef\xbb\xbfhello")
+ << (QString(QChar(0xfeff)) + QString::fromLatin1("hello"));
+ QTest::newRow("utf8 nobom ignore header")
+ << QStringConverter::Utf8
+ << QStringConverter::IgnoreHeader
+ << QByteArray("hello")
+ << QString::fromLatin1("hello");
+
+ QTest::newRow("utf16 bom be")
+ << QStringConverter::Utf16
+ << QStringConverter::DefaultConversion
+ << QByteArray("\xfe\xff\0h\0e\0l", 8)
+ << QString::fromLatin1("hel");
+ QTest::newRow("utf16 bom le")
+ << QStringConverter::Utf16
+ << QStringConverter::DefaultConversion
+ << QByteArray("\xff\xfeh\0e\0l\0", 8)
+ << QString::fromLatin1("hel");
+ if (QSysInfo::ByteOrder == QSysInfo::BigEndian) {
+ QTest::newRow("utf16 nobom")
+ << QStringConverter::Utf16
+ << QStringConverter::DefaultConversion
+ << QByteArray("\0h\0e\0l", 6)
+ << QString::fromLatin1("hel");
+ QTest::newRow("utf16 bom be ignore header")
+ << QStringConverter::Utf16
+ << QStringConverter::IgnoreHeader
+ << QByteArray("\xfe\xff\0h\0e\0l", 8)
+ << (QString(QChar(0xfeff)) + QString::fromLatin1("hel"));
+ } else {
+ QTest::newRow("utf16 nobom")
+ << QStringConverter::Utf16
+ << QStringConverter::DefaultConversion
+ << QByteArray("h\0e\0l\0", 6)
+ << QString::fromLatin1("hel");
+ QTest::newRow("utf16 bom le ignore header")
+ << QStringConverter::Utf16
+ << QStringConverter::IgnoreHeader
+ << QByteArray("\xff\xfeh\0e\0l\0", 8)
+ << (QString(QChar(0xfeff)) + QString::fromLatin1("hel"));
+ }
+
+ QTest::newRow("utf16-be bom be")
+ << QStringConverter::Utf16BE
+ << QStringConverter::DefaultConversion
+ << QByteArray("\xfe\xff\0h\0e\0l", 8)
+ << QString::fromLatin1("hel");
+ QTest::newRow("utf16-be nobom")
+ << QStringConverter::Utf16BE
+ << QStringConverter::DefaultConversion
+ << QByteArray("\0h\0e\0l", 6)
+ << QString::fromLatin1("hel");
+ QTest::newRow("utf16-be bom be ignore header")
+ << QStringConverter::Utf16BE
+ << QStringConverter::IgnoreHeader
+ << QByteArray("\xfe\xff\0h\0e\0l", 8)
+ << (QString(QChar(0xfeff)) + QString::fromLatin1("hel"));
+
+ QTest::newRow("utf16-le bom le")
+ << QStringConverter::Utf16LE
+ << QStringConverter::DefaultConversion
+ << QByteArray("\xff\xfeh\0e\0l\0", 8)
+ << QString::fromLatin1("hel");
+ QTest::newRow("utf16-le nobom")
+ << QStringConverter::Utf16LE
+ << QStringConverter::DefaultConversion
+ << QByteArray("h\0e\0l\0", 6)
+ << QString::fromLatin1("hel");
+ QTest::newRow("utf16-le bom le ignore header")
+ << QStringConverter::Utf16LE
+ << QStringConverter::IgnoreHeader
+ << QByteArray("\xff\xfeh\0e\0l\0", 8)
+ << (QString(QChar(0xfeff)) + QString::fromLatin1("hel"));
+
+ QTest::newRow("utf32 bom be")
+ << QStringConverter::Utf32
+ << QStringConverter::DefaultConversion
+ << QByteArray("\0\0\xfe\xff\0\0\0h\0\0\0e\0\0\0l", 16)
+ << QString::fromLatin1("hel");
+ QTest::newRow("utf32 bom le")
+ << QStringConverter::Utf32
+ << QStringConverter::DefaultConversion
+ << QByteArray("\xff\xfe\0\0h\0\0\0e\0\0\0l\0\0\0", 16)
+ << QString::fromLatin1("hel");
+ if (QSysInfo::ByteOrder == QSysInfo::BigEndian) {
+ QTest::newRow("utf32 nobom")
+ << QStringConverter::Utf32
+ << QStringConverter::DefaultConversion
+ << QByteArray("\0\0\0h\0\0\0e\0\0\0l", 12)
+ << QString::fromLatin1("hel");
+ QTest::newRow("utf32 bom be ignore header")
+ << QStringConverter::Utf32
+ << QStringConverter::IgnoreHeader
+ << QByteArray("\0\0\xfe\xff\0\0\0h\0\0\0e\0\0\0l", 16)
+ << (QString(QChar(0xfeff)) + QString::fromLatin1("hel"));
+ } else {
+ QTest::newRow("utf32 nobom")
+ << QStringConverter::Utf32
+ << QStringConverter::DefaultConversion
+ << QByteArray("h\0\0\0e\0\0\0l\0\0\0", 12)
+ << QString::fromLatin1("hel");
+ QTest::newRow("utf32 bom le ignore header")
+ << QStringConverter::Utf32
+ << QStringConverter::IgnoreHeader
+ << QByteArray("\xff\xfe\0\0h\0\0\0e\0\0\0l\0\0\0", 16)
+ << (QString(QChar(0xfeff)) + QString::fromLatin1("hel"));
+ }
+
+ QTest::newRow("utf32-be bom be")
+ << QStringConverter::Utf32BE
+ << QStringConverter::DefaultConversion
+ << QByteArray("\0\0\xfe\xff\0\0\0h\0\0\0e\0\0\0l", 16)
+ << QString::fromLatin1("hel");
+ QTest::newRow("utf32-be nobom")
+ << QStringConverter::Utf32BE
+ << QStringConverter::DefaultConversion
+ << QByteArray("\0\0\0h\0\0\0e\0\0\0l", 12)
+ << QString::fromLatin1("hel");
+ QTest::newRow("utf32-be bom be ignore header")
+ << QStringConverter::Utf32BE
+ << QStringConverter::IgnoreHeader
+ << QByteArray("\0\0\xfe\xff\0\0\0h\0\0\0e\0\0\0l", 16)
+ << (QString(QChar(0xfeff)) + QString::fromLatin1("hel"));
+
+ QTest::newRow("utf32-le bom le")
+ << QStringConverter::Utf32LE
+ << QStringConverter::DefaultConversion
+ << QByteArray("\xff\xfe\0\0h\0\0\0e\0\0\0l\0\0\0", 16)
+ << QString::fromLatin1("hel");
+ QTest::newRow("utf32-le nobom")
+ << QStringConverter::Utf32LE
+ << QStringConverter::DefaultConversion
+ << QByteArray("h\0\0\0e\0\0\0l\0\0\0", 12)
+ << QString::fromLatin1("hel");
+ QTest::newRow("utf32-le bom le ignore header")
+ << QStringConverter::Utf32LE
+ << QStringConverter::IgnoreHeader
+ << QByteArray("\xff\xfe\0\0h\0\0\0e\0\0\0l\0\0\0", 16)
+ << (QString(QChar(0xfeff)) + QString::fromLatin1("hel"));
+}
+
+void tst_QStringConverter::utfHeaders()
+{
+ QFETCH(QStringConverter::Encoding, encoding);
+ QFETCH(QStringConverter::Flag, flags);
+ QFETCH(QByteArray, encoded);
+ QFETCH(QString, unicode);
+
+ QLatin1String ignoreReverseTestOn = (QSysInfo::ByteOrder == QSysInfo::BigEndian) ? QLatin1String(" le") : QLatin1String(" be");
+ QString rowName(QTest::currentDataTag());
+
+ QStringDecoder decode(encoding, flags);
+ QVERIFY(decode.isValid());
+
+ QString result = decode(encoded);
+ QCOMPARE(result.length(), unicode.length());
+ QCOMPARE(result, unicode);
+
+ if (!rowName.endsWith("nobom") && !rowName.contains(ignoreReverseTestOn)) {
+ QStringEncoder encode(encoding, flags);
+ QVERIFY(encode.isValid());
+ QByteArray reencoded = encode(unicode);
+ QCOMPARE(reencoded, encoded);
+ }
+}
+
+class LoadAndConvert: public QRunnable
+{
+public:
+ LoadAndConvert(QStringConverter::Encoding encoding, QString *destination)
+ : encode(encoding), decode(encoding), target(destination)
+ {}
+ QStringEncoder encode;
+ QStringDecoder decode;
+ QString *target;
+ void run()
+ {
+ QString str = QString::fromLatin1("abcdefghijklmonpqrstufvxyz");
+ for (int i = 0; i < 10000; ++i) {
+ QByteArray b = encode(str);
+ *target = decode(b);
+ QCOMPARE(*target, str);
+ }
+ }
+};
+
+void tst_QStringConverter::threadSafety()
+{
+ QThreadPool::globalInstance()->setMaxThreadCount(12);
+
+ QVector<QString> res;
+ res.resize(QStringConverter::LastEncoding + 1);
+ for (int i = 0; i < QStringConverter::LastEncoding + 1; ++i) {
+ QThreadPool::globalInstance()->start(new LoadAndConvert(QStringConverter::Encoding(i), &res[i]));
+ }
+
+ // wait for all threads to finish working
+ QThreadPool::globalInstance()->waitForDone();
+
+ for (auto b : res)
+ QCOMPARE(b, QString::fromLatin1("abcdefghijklmonpqrstufvxyz"));
+}
+
+struct DontCrashAtExit {
+ ~DontCrashAtExit() {
+ QStringDecoder decoder(QStringDecoder::Utf8);
+ QVERIFY(decoder.isValid());
+ (void)decoder("azerty");
+ }
+} dontCrashAtExit;
+
+
+QTEST_MAIN(tst_QStringConverter)
+#include "tst_qstringconverter.moc"
diff --git a/tests/auto/corelib/text/qstringconverter/utf8.txt b/tests/auto/corelib/text/qstringconverter/utf8.txt
new file mode 100644
index 0000000000..f5ab44c8f4
--- /dev/null
+++ b/tests/auto/corelib/text/qstringconverter/utf8.txt
@@ -0,0 +1 @@
+<doc>𐀀􏿽</doc>
diff --git a/tests/auto/corelib/text/text.pro b/tests/auto/corelib/text/text.pro
index 09d58bd74c..cb7de443bd 100644
--- a/tests/auto/corelib/text/text.pro
+++ b/tests/auto/corelib/text/text.pro
@@ -15,6 +15,7 @@ SUBDIRS = \
qstring_no_cast_from_bytearray \
qstringapisymmetry \
qstringbuilder \
+ qstringconverter \
qstringiterator \
qstringlist \
qstringmatcher \