diff options
author | Thiago Macieira <thiago.macieira@intel.com> | 2013-10-20 17:43:46 +0100 |
---|---|---|
committer | The Qt Project <gerrit-noreply@qt-project.org> | 2014-01-09 22:34:54 +0100 |
commit | 8dd47e34b9b96ac27a99cdcf10b8aec506882fc2 (patch) | |
tree | be92b77f4006e2b96683e5bfd4810db09a5b15ab /tests/auto/corelib/codecs/qtextcodec | |
parent | d51130cc3a00df8147e2eb0799e06865c901c6e0 (diff) |
Add a new UTF-8 decoder, similar to the encoder we've just added
Like before, this is taken from the existing QUrl code and is optimized for
ASCII handling (for the same reasons). And like previously, make
QString::fromUtf8 use a stateless version of the codec, which is faster.
There's a small change in behavior in the decoding: we insert a U+FFFD for
each byte that cannot be decoded properly. Previously, it would "eat" all bad
high-bit bytes and replace them all with one single U+FFFD. Either behavior is
allowed by the UTF-8 specifications, even though this new behavior will cause
misalignment in the Bradley Kuhn sample UTF-8 text.
Change-Id: Ib1b1f0b4291293bab345acaf376e00204ed87565
Reviewed-by: Olivier Goffart <ogoffart@woboq.com>
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
Diffstat (limited to 'tests/auto/corelib/codecs/qtextcodec')
-rw-r--r-- | tests/auto/corelib/codecs/qtextcodec/tst_qtextcodec.cpp | 76 |
1 files changed, 29 insertions, 47 deletions
diff --git a/tests/auto/corelib/codecs/qtextcodec/tst_qtextcodec.cpp b/tests/auto/corelib/codecs/qtextcodec/tst_qtextcodec.cpp index 8e1b3cf3b2..12b81ee7d4 100644 --- a/tests/auto/corelib/codecs/qtextcodec/tst_qtextcodec.cpp +++ b/tests/auto/corelib/codecs/qtextcodec/tst_qtextcodec.cpp @@ -456,7 +456,7 @@ void tst_QTextCodec::flagF7808080() const //QVERIFY(!codec->canEncode(QChar(0x1C0000))); QTextCodec::ConverterState state(QTextCodec::ConvertInvalidToNull); - QVERIFY(codec->toUnicode(input.constData(), input.length(), &state) == QChar(0)); + QCOMPARE(codec->toUnicode(input.constData(), input.length(), &state), QString(input.size(), QChar(0))); } void tst_QTextCodec::nonFlaggedEFBFBF() const @@ -689,8 +689,7 @@ void tst_QTextCodec::utf8Codec_data() utf8 += char(0xbf); utf8 += char(0xbf); utf8 += char(0xbf); - str.clear(); - str += QChar(QChar::ReplacementCharacter); + str = fromInvalidUtf8Sequence(utf8); QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 2.2.4") << utf8 << str << -1; // 2.2.5 U+03FFFFFF (not a valid Unicode character) @@ -755,8 +754,7 @@ void tst_QTextCodec::utf8Codec_data() utf8 += char(0x90); utf8 += char(0x80); utf8 += char(0x80); - str.clear(); - str += QChar(QChar::ReplacementCharacter); + str = fromInvalidUtf8Sequence(utf8); QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 2.3.5") << utf8 << str << -1; // 3.1.1 @@ -1244,7 +1242,7 @@ void tst_QTextCodec::utf8Codec_data() utf8.clear(); utf8 += char(0xc0); utf8 += char(0xaf); - str = QChar(QChar::ReplacementCharacter); + str = fromInvalidUtf8Sequence(utf8); QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 4.1.1") << utf8 << str << -1; // 4.1.2 @@ -1252,7 +1250,7 @@ void tst_QTextCodec::utf8Codec_data() utf8 += char(0xe0); utf8 += char(0x80); utf8 += char(0xaf); - str = QChar(QChar::ReplacementCharacter); + str = fromInvalidUtf8Sequence(utf8); QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 4.1.2") << utf8 << str << -1; // 4.1.3 @@ -1261,7 +1259,7 @@ void tst_QTextCodec::utf8Codec_data() utf8 += char(0x80); utf8 += char(0x80); utf8 += char(0xaf); - str = QChar(QChar::ReplacementCharacter); + str = fromInvalidUtf8Sequence(utf8); QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 4.1.3") << utf8 << str << -1; // 4.1.4 @@ -1289,7 +1287,7 @@ void tst_QTextCodec::utf8Codec_data() utf8.clear(); utf8 += char(0xc1); utf8 += char(0xbf); - str = QChar(QChar::ReplacementCharacter); + str = fromInvalidUtf8Sequence(utf8); QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 4.2.1") << utf8 << str << -1; // 4.2.2 @@ -1297,7 +1295,7 @@ void tst_QTextCodec::utf8Codec_data() utf8 += char(0xe0); utf8 += char(0x9f); utf8 += char(0xbf); - str = QChar(QChar::ReplacementCharacter); + str = fromInvalidUtf8Sequence(utf8); QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 4.2.2") << utf8 << str << -1; // 4.2.3 @@ -1306,7 +1304,7 @@ void tst_QTextCodec::utf8Codec_data() utf8 += char(0x8f); utf8 += char(0xbf); utf8 += char(0xbf); - str = QChar(QChar::ReplacementCharacter); + str = fromInvalidUtf8Sequence(utf8); QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 4.2.3") << utf8 << str << -1; // 4.2.4 @@ -1334,7 +1332,7 @@ void tst_QTextCodec::utf8Codec_data() utf8.clear(); utf8 += char(0xc0); utf8 += char(0x80); - str = QChar(QChar::ReplacementCharacter); + str = fromInvalidUtf8Sequence(utf8); QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 4.3.1") << utf8 << str << -1; // 4.3.2 @@ -1342,7 +1340,7 @@ void tst_QTextCodec::utf8Codec_data() utf8 += char(0xe0); utf8 += char(0x80); utf8 += char(0x80); - str = QChar(QChar::ReplacementCharacter); + str = fromInvalidUtf8Sequence(utf8); QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 4.3.2") << utf8 << str << -1; // 4.3.3 @@ -1351,7 +1349,7 @@ void tst_QTextCodec::utf8Codec_data() utf8 += char(0x80); utf8 += char(0x80); utf8 += char(0x80); - str = QChar(QChar::ReplacementCharacter); + str = fromInvalidUtf8Sequence(utf8); QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 4.3.3") << utf8 << str << -1; // 4.3.4 @@ -1380,7 +1378,7 @@ void tst_QTextCodec::utf8Codec_data() utf8 += char(0xed); utf8 += char(0xa0); utf8 += char(0x80); - str = QChar(QChar::ReplacementCharacter); + str = fromInvalidUtf8Sequence(utf8); QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.1.1") << utf8 << str << -1; // 5.1.2 @@ -1388,7 +1386,7 @@ void tst_QTextCodec::utf8Codec_data() utf8 += char(0xed); utf8 += char(0xad); utf8 += char(0xbf); - str = QChar(QChar::ReplacementCharacter); + str = fromInvalidUtf8Sequence(utf8); QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.1.2") << utf8 << str << -1; // 5.1.3 @@ -1396,7 +1394,7 @@ void tst_QTextCodec::utf8Codec_data() utf8 += char(0xed); utf8 += char(0xae); utf8 += char(0x80); - str = QChar(QChar::ReplacementCharacter); + str = fromInvalidUtf8Sequence(utf8); QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.1.3") << utf8 << str << -1; // 5.1.4 @@ -1404,7 +1402,7 @@ void tst_QTextCodec::utf8Codec_data() utf8 += char(0xed); utf8 += char(0xaf); utf8 += char(0xbf); - str = QChar(QChar::ReplacementCharacter); + str = fromInvalidUtf8Sequence(utf8); QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.1.4") << utf8 << str << -1; // 5.1.5 @@ -1412,7 +1410,7 @@ void tst_QTextCodec::utf8Codec_data() utf8 += char(0xed); utf8 += char(0xb0); utf8 += char(0x80); - str = QChar(QChar::ReplacementCharacter); + str = fromInvalidUtf8Sequence(utf8); QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.1.5") << utf8 << str << -1; // 5.1.6 @@ -1420,7 +1418,7 @@ void tst_QTextCodec::utf8Codec_data() utf8 += char(0xed); utf8 += char(0xbe); utf8 += char(0x80); - str = QChar(QChar::ReplacementCharacter); + str = fromInvalidUtf8Sequence(utf8); QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.1.6") << utf8 << str << -1; // 5.1.7 @@ -1428,7 +1426,7 @@ void tst_QTextCodec::utf8Codec_data() utf8 += char(0xed); utf8 += char(0xbf); utf8 += char(0xbf); - str = QChar(QChar::ReplacementCharacter); + str = fromInvalidUtf8Sequence(utf8); QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.1.7") << utf8 << str << -1; // 5.2.1 @@ -1439,9 +1437,7 @@ void tst_QTextCodec::utf8Codec_data() utf8 += char(0xed); utf8 += char(0xb0); utf8 += char(0x80); - str.clear(); - str += QChar(QChar::ReplacementCharacter); - str += QChar(QChar::ReplacementCharacter); + str = fromInvalidUtf8Sequence(utf8); QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.2.1") << utf8 << str << -1; // 5.2.2 @@ -1452,9 +1448,7 @@ void tst_QTextCodec::utf8Codec_data() utf8 += char(0xed); utf8 += char(0xbf); utf8 += char(0xbf); - str.clear(); - str += QChar(QChar::ReplacementCharacter); - str += QChar(QChar::ReplacementCharacter); + str = fromInvalidUtf8Sequence(utf8); QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.2.2") << utf8 << str << -1; // 5.2.3 @@ -1465,9 +1459,7 @@ void tst_QTextCodec::utf8Codec_data() utf8 += char(0xed); utf8 += char(0xb0); utf8 += char(0x80); - str.clear(); - str += QChar(QChar::ReplacementCharacter); - str += QChar(QChar::ReplacementCharacter); + str = fromInvalidUtf8Sequence(utf8); QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.2.3") << utf8 << str << -1; // 5.2.4 @@ -1478,9 +1470,7 @@ void tst_QTextCodec::utf8Codec_data() utf8 += char(0xed); utf8 += char(0xbf); utf8 += char(0xbf); - str.clear(); - str += QChar(QChar::ReplacementCharacter); - str += QChar(QChar::ReplacementCharacter); + str = fromInvalidUtf8Sequence(utf8); QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.2.4") << utf8 << str << -1; // 5.2.5 @@ -1491,9 +1481,7 @@ void tst_QTextCodec::utf8Codec_data() utf8 += char(0xed); utf8 += char(0xb0); utf8 += char(0x80); - str.clear(); - str += QChar(QChar::ReplacementCharacter); - str += QChar(QChar::ReplacementCharacter); + str = fromInvalidUtf8Sequence(utf8); QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.2.5") << utf8 << str << -1; // 5.2.6 @@ -1504,9 +1492,7 @@ void tst_QTextCodec::utf8Codec_data() utf8 += char(0xed); utf8 += char(0xbf); utf8 += char(0xbf); - str.clear(); - str += QChar(QChar::ReplacementCharacter); - str += QChar(QChar::ReplacementCharacter); + str = fromInvalidUtf8Sequence(utf8); QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.2.6") << utf8 << str << -1; // 5.2.7 @@ -1517,9 +1503,7 @@ void tst_QTextCodec::utf8Codec_data() utf8 += char(0xed); utf8 += char(0xb0); utf8 += char(0x80); - str.clear(); - str += QChar(QChar::ReplacementCharacter); - str += QChar(QChar::ReplacementCharacter); + str = fromInvalidUtf8Sequence(utf8); QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.2.7") << utf8 << str << -1; // 5.2.8 @@ -1530,9 +1514,7 @@ void tst_QTextCodec::utf8Codec_data() utf8 += char(0xed); utf8 += char(0xbf); utf8 += char(0xbf); - str.clear(); - str += QChar(QChar::ReplacementCharacter); - str += QChar(QChar::ReplacementCharacter); + str = fromInvalidUtf8Sequence(utf8); QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.2.8") << utf8 << str << -1; // 5.3.1 - non-character code @@ -1541,7 +1523,7 @@ void tst_QTextCodec::utf8Codec_data() utf8 += char(0xbf); utf8 += char(0xbe); //str = QChar(QChar::ReplacementCharacter); - str = QString::fromUtf8(utf8); + str = QChar(0xfffe); QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.3.1") << utf8 << str << -1; // 5.3.2 - non-character code @@ -1550,7 +1532,7 @@ void tst_QTextCodec::utf8Codec_data() utf8 += char(0xbf); utf8 += char(0xbf); //str = QChar(QChar::ReplacementCharacter); - str = QString::fromUtf8(utf8); + str = QChar(0xffff); QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.3.2") << utf8 << str << -1; } |