summaryrefslogtreecommitdiffstats
path: root/tests/auto
diff options
context:
space:
mode:
authorKurt Pattyn <pattyn.kurt@gmail.com>2013-10-06 11:40:47 +0200
committerThe Qt Project <gerrit-noreply@qt-project.org>2013-10-17 09:50:58 +0200
commitadd2bf739ae96603cb919b908cbb53c00d0628cc (patch)
tree9702a95d145fc9f429aa6f2ec104cfab75cae753 /tests/auto
parente8853506bf82e569009e68a23437d6a134176f63 (diff)
Allow non-character codes in utf8 strings
Changed the processing of non-character code handling in the UTF8 codec. Non-character codes are now accepted in QStrings, QUrls and QJson strings. Unit tests were adapted accordingly. For more info about non-character codes, see: http://www.unicode.org/versions/corrigendum9.html [ChangeLog][QtCore][QUtf8] UTF-8 now accepts non-character unicode points; these are not replaced by the replacement character anymore [ChangeLog][QtCore][QUrl] QUrl now fully accepts non-character unicode points; they are encoded as percent characters; they can also be pretty decoded [ChangeLog][QtCore][QJson] The Writer and the Parser now fully accept non-character unicode points. Change-Id: I77cf4f0e6210741eac8082912a0b6118eced4f77 Task-number: QTBUG-33229 Reviewed-by: Lars Knoll <lars.knoll@digia.com> Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
Diffstat (limited to 'tests/auto')
-rw-r--r--tests/auto/corelib/codecs/qtextcodec/tst_qtextcodec.cpp54
-rw-r--r--tests/auto/corelib/codecs/utf8/tst_utf8.cpp20
-rw-r--r--tests/auto/corelib/codecs/utf8/utf8data.cpp4
-rw-r--r--tests/auto/corelib/io/qurlinternal/tst_qurlinternal.cpp7
-rw-r--r--tests/auto/corelib/json/tst_qtjson.cpp24
-rw-r--r--tests/auto/xml/sax/qxmlsimplereader/tst_qxmlsimplereader.cpp21
-rw-r--r--tests/auto/xml/sax/qxmlsimplereader/xmldocs/not-wf/sa/170.xml.ref2
7 files changed, 66 insertions, 66 deletions
diff --git a/tests/auto/corelib/codecs/qtextcodec/tst_qtextcodec.cpp b/tests/auto/corelib/codecs/qtextcodec/tst_qtextcodec.cpp
index dd557b8d21..8e1b3cf3b2 100644
--- a/tests/auto/corelib/codecs/qtextcodec/tst_qtextcodec.cpp
+++ b/tests/auto/corelib/codecs/qtextcodec/tst_qtextcodec.cpp
@@ -66,9 +66,9 @@ private slots:
void codecForLocale();
void asciiToIscii() const;
- void flagCodepointFFFF() const;
+ void nonFlaggedCodepointFFFF() const;
void flagF7808080() const;
- void flagEFBFBF() const;
+ void nonFlaggedEFBFBF() const;
void decode0D() const;
void aliasForUTF16() const;
void mibForTSCII() const;
@@ -409,9 +409,9 @@ void tst_QTextCodec::asciiToIscii() const
}
}
-void tst_QTextCodec::flagCodepointFFFF() const
+void tst_QTextCodec::nonFlaggedCodepointFFFF() const
{
- // This is an invalid Unicode codepoint.
+ //Check that the code point 0xFFFF (=non-character code 0xEFBFBF) is not flagged
const QChar ch(0xFFFF);
QString input(ch);
@@ -419,12 +419,11 @@ void tst_QTextCodec::flagCodepointFFFF() const
QVERIFY(codec);
const QByteArray asDecoded(codec->fromUnicode(input));
- QCOMPARE(asDecoded, QByteArray("?"));
+ QCOMPARE(asDecoded, QByteArray("\357\277\277"));
QByteArray ffff("\357\277\277");
QTextCodec::ConverterState state(QTextCodec::ConvertInvalidToNull);
- QVERIFY(codec->toUnicode(ffff.constData(), ffff.length(), &state) == QChar(0));
- QVERIFY(codec->toUnicode(ffff) == QChar(0xfffd));
+ QVERIFY(codec->toUnicode(ffff.constData(), ffff.length(), &state) == QByteArray::fromHex("EFBFBF"));
}
void tst_QTextCodec::flagF7808080() const
@@ -460,13 +459,16 @@ void tst_QTextCodec::flagF7808080() const
QVERIFY(codec->toUnicode(input.constData(), input.length(), &state) == QChar(0));
}
-void tst_QTextCodec::flagEFBFBF() const
+void tst_QTextCodec::nonFlaggedEFBFBF() const
{
- QByteArray invalidInput;
- invalidInput.resize(3);
- invalidInput[0] = char(0xEF);
- invalidInput[1] = char(0xBF);
- invalidInput[2] = char(0xBF);
+ /* Check that the codec does NOT flag EFBFBF.
+ * This is a regression test; see QTBUG-33229
+ */
+ QByteArray validInput;
+ validInput.resize(3);
+ validInput[0] = char(0xEF);
+ validInput[1] = char(0xBF);
+ validInput[2] = char(0xBF);
const QTextCodec *const codec = QTextCodec::codecForMib(106); // UTF-8
QVERIFY(codec);
@@ -474,21 +476,20 @@ void tst_QTextCodec::flagEFBFBF() const
{
//QVERIFY(!codec->canEncode(QChar(0xFFFF)));
QTextCodec::ConverterState state(QTextCodec::ConvertInvalidToNull);
- QVERIFY(codec->toUnicode(invalidInput.constData(), invalidInput.length(), &state) == QChar(0));
+ QVERIFY(codec->toUnicode(validInput.constData(), validInput.length(), &state) == QByteArray::fromHex("EFBFBF"));
QByteArray start("<?pi ");
- start.append(invalidInput);
+ start.append(validInput);
start.append("?>");
}
- /* When 0xEFBFBF is preceded by what seems to be an arbitrary character,
- * QTextCodec fails to flag it. */
+ // Check that 0xEFBFBF is correctly decoded when preceded by an arbitrary character
{
QByteArray start("B");
- start.append(invalidInput);
+ start.append(validInput);
QTextCodec::ConverterState state(QTextCodec::ConvertInvalidToNull);
- QVERIFY(codec->toUnicode(start.constData(), start.length(), &state) == QString::fromLatin1("B\0", 2));
+ QVERIFY(codec->toUnicode(start.constData(), start.length(), &state) == QByteArray("B").append(QByteArray::fromHex("EFBFBF")));
}
}
@@ -674,13 +675,12 @@ void tst_QTextCodec::utf8Codec_data()
str = QChar(0x7ff);
QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 2.2.2") << utf8 << str << -1;
- // 2.2.3 U+000FFFF
+ // 2.2.3 U+000FFFF - non-character code
utf8.clear();
utf8 += char(0xef);
utf8 += char(0xbf);
utf8 += char(0xbf);
- str.clear();
- str += QChar::ReplacementCharacter;
+ str = QString::fromUtf8(utf8);
QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 2.2.3") << utf8 << str << -1;
// 2.2.4 U+001FFFFF
@@ -1535,20 +1535,22 @@ void tst_QTextCodec::utf8Codec_data()
str += QChar(QChar::ReplacementCharacter);
QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.2.8") << utf8 << str << -1;
- // 5.3.1
+ // 5.3.1 - non-character code
utf8.clear();
utf8 += char(0xef);
utf8 += char(0xbf);
utf8 += char(0xbe);
- str = QChar(QChar::ReplacementCharacter);
+ //str = QChar(QChar::ReplacementCharacter);
+ str = QString::fromUtf8(utf8);
QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.3.1") << utf8 << str << -1;
- // 5.3.2
+ // 5.3.2 - non-character code
utf8.clear();
utf8 += char(0xef);
utf8 += char(0xbf);
utf8 += char(0xbf);
- str = QChar(QChar::ReplacementCharacter);
+ //str = QChar(QChar::ReplacementCharacter);
+ str = QString::fromUtf8(utf8);
QTest::newRow("http://www.w3.org/2001/06/utf-8-wrong/UTF-8-test.html 5.3.2") << utf8 << str << -1;
}
diff --git a/tests/auto/corelib/codecs/utf8/tst_utf8.cpp b/tests/auto/corelib/codecs/utf8/tst_utf8.cpp
index 99147f3aff..e18f6f73b9 100644
--- a/tests/auto/corelib/codecs/utf8/tst_utf8.cpp
+++ b/tests/auto/corelib/codecs/utf8/tst_utf8.cpp
@@ -233,8 +233,9 @@ void tst_Utf8::nonCharacters_data()
QTest::addColumn<QByteArray>("utf8");
QTest::addColumn<QString>("utf16");
- // Unicode has a couple of "non-characters" that one can use internally,
- // but are not allowed to be used for text interchange.
+ // Unicode has a couple of "non-characters" that one can use internally
+ // These characters may be used for interchange;
+ // see: http://www.unicode.org/versions/corrigendum9.html
//
// Those are the last two entries each Unicode Plane (U+FFFE, U+FFFF,
// U+1FFFE, U+1FFFF, etc.) as well as the entries between U+FDD0 and
@@ -279,20 +280,17 @@ void tst_Utf8::nonCharacters()
decoder->toUnicode(utf8);
// Only enforce correctness on our UTF-8 decoder
- // The system's UTF-8 codec is sometimes buggy
- // GNU libc's iconv is known to accept U+FFFF and U+FFFE encoded as UTF-8
- // OS X's iconv is known to accept those, plus surrogates and codepoints above U+10FFFF
if (!useLocale)
- QVERIFY(decoder->hasFailure());
- else if (!decoder->hasFailure())
- qWarning("System codec does not report failure when it should. Should report bug upstream.");
+ QVERIFY(!decoder->hasFailure());
+ else if (decoder->hasFailure())
+ qWarning("System codec reports failure when it shouldn't. Should report bug upstream.");
QSharedPointer<QTextEncoder> encoder(codec->makeEncoder());
encoder->fromUnicode(utf16);
if (!useLocale)
- QVERIFY(encoder->hasFailure());
- else if (!encoder->hasFailure())
- qWarning("System codec does not report failure when it should. Should report bug upstream.");
+ QVERIFY(!encoder->hasFailure());
+ else if (encoder->hasFailure())
+ qWarning("System codec reports failure when it shouldn't. Should report bug upstream.");
}
QTEST_MAIN(tst_Utf8)
diff --git a/tests/auto/corelib/codecs/utf8/utf8data.cpp b/tests/auto/corelib/codecs/utf8/utf8data.cpp
index 2516cc9734..a41b0772e6 100644
--- a/tests/auto/corelib/codecs/utf8/utf8data.cpp
+++ b/tests/auto/corelib/codecs/utf8/utf8data.cpp
@@ -129,8 +129,8 @@ void loadInvalidUtf8Rows()
void loadNonCharactersRows()
{
- // Unicode has a couple of "non-characters" that one can use internally,
- // but are not allowed to be used for text interchange.
+ // Unicode has a couple of "non-characters" that one can use internally
+ // These characters are allowed for text-interchange (see http://www.unicode.org/versions/corrigendum9.html)
//
// Those are the last two entries each Unicode Plane (U+FFFE, U+FFFF,
// U+1FFFE, U+1FFFF, etc.) as well as the entries between U+FDD0 and
diff --git a/tests/auto/corelib/io/qurlinternal/tst_qurlinternal.cpp b/tests/auto/corelib/io/qurlinternal/tst_qurlinternal.cpp
index 75b17df759..d3a8bcfd13 100644
--- a/tests/auto/corelib/io/qurlinternal/tst_qurlinternal.cpp
+++ b/tests/auto/corelib/io/qurlinternal/tst_qurlinternal.cpp
@@ -964,8 +964,10 @@ void tst_QUrlInternal::encodingRecode_data()
addUtf8Data("utf8-string-2", "\xDF\xBF\xE0\xA0\x80""A");
addUtf8Data("utf8-string-3", "\xE0\xA0\x80\xDF\xBF...");
+ QTest::newRow("encode-unicode-noncharacter") << QString(QChar(0xffff)) << F(QUrl::FullyEncoded) << "%EF%BF%BF";
+ QTest::newRow("decode-unicode-noncharacter") << QString(QChar(0xffff)) << F(QUrl::PrettyDecoded) << QString::fromUtf8("\xEF\xBF\xBF");
+
// special cases: stuff we can encode, but not decode
- QTest::newRow("unicode-noncharacter") << QString(QChar(0xffff)) << F(QUrl::FullyEncoded) << "%EF%BF%BF";
QTest::newRow("unicode-lo-surrogate") << QString(QChar(0xD800)) << F(QUrl::FullyEncoded) << "%ED%A0%80";
QTest::newRow("unicode-hi-surrogate") << QString(QChar(0xDC00)) << F(QUrl::FullyEncoded) << "%ED%B0%80";
@@ -1011,9 +1013,6 @@ void tst_QUrlInternal::encodingRecodeInvalidUtf8_data()
extern void loadInvalidUtf8Rows();
loadInvalidUtf8Rows();
- extern void loadNonCharactersRows();
- loadNonCharactersRows();
-
QTest::newRow("utf8-mix-4") << QByteArray("\xE0.A2\x80");
QTest::newRow("utf8-mix-5") << QByteArray("\xE0\xA2.80");
QTest::newRow("utf8-mix-6") << QByteArray("\xE0\xA2\x33");
diff --git a/tests/auto/corelib/json/tst_qtjson.cpp b/tests/auto/corelib/json/tst_qtjson.cpp
index 9dbd6414ad..c79e7273c0 100644
--- a/tests/auto/corelib/json/tst_qtjson.cpp
+++ b/tests/auto/corelib/json/tst_qtjson.cpp
@@ -47,7 +47,8 @@
#include "qjsondocument.h"
#include <limits>
-#define INVALID_UNICODE "\357\277\277" // "\uffff"
+#define INVALID_UNICODE "\xCE\xBA\xE1"
+#define UNICODE_NON_CHARACTER "\xEF\xBF\xBF"
#define UNICODE_DJE "\320\202" // Character from the Serbian Cyrillic alphabet
class tst_QtJson: public QObject
@@ -1306,6 +1307,19 @@ void tst_QtJson::fromJson()
QCOMPARE(doc.toJson(), json);
}
{
+ //regression test: test if unicode_control_characters are correctly decoded
+ QByteArray json = "[\n \"" UNICODE_NON_CHARACTER "\"\n]\n";
+ QJsonDocument doc = QJsonDocument::fromJson(json);
+ QVERIFY(!doc.isEmpty());
+ QCOMPARE(doc.isArray(), true);
+ QCOMPARE(doc.isObject(), false);
+ QJsonArray array = doc.array();
+ QCOMPARE(array.size(), 1);
+ QCOMPARE(array.at(0).type(), QJsonValue::String);
+ QCOMPARE(array.at(0).toString(), QString::fromUtf8(UNICODE_NON_CHARACTER));
+ QCOMPARE(doc.toJson(), json);
+ }
+ {
QByteArray json = "[]";
QJsonDocument doc = QJsonDocument::fromJson(json);
QVERIFY(!doc.isEmpty());
@@ -1532,7 +1546,7 @@ void tst_QtJson::fromJsonErrors()
QJsonDocument doc = QJsonDocument::fromJson(json, &error);
QVERIFY(doc.isEmpty());
QCOMPARE(error.error, QJsonParseError::IllegalUTF8String);
- QCOMPARE(error.offset, 13);
+ QCOMPARE(error.offset, 14);
}
{
QJsonParseError error;
@@ -1556,7 +1570,7 @@ void tst_QtJson::fromJsonErrors()
QJsonDocument doc = QJsonDocument::fromJson(json, &error);
QVERIFY(doc.isEmpty());
QCOMPARE(error.error, QJsonParseError::IllegalUTF8String);
- QCOMPARE(error.offset, 14);
+ QCOMPARE(error.offset, 15);
}
{
QJsonParseError error;
@@ -1702,6 +1716,7 @@ void tst_QtJson::parseStrings()
"abc\\tabc",
"abc\\u0019abc",
"abc" UNICODE_DJE "abc",
+ UNICODE_NON_CHARACTER
};
int size = sizeof(strings)/sizeof(const char *);
@@ -1728,7 +1743,8 @@ void tst_QtJson::parseStrings()
Pairs pairs [] = {
{ "abc\\/abc", "abc/abc" },
{ "abc\\u0402abc", "abc" UNICODE_DJE "abc" },
- { "abc\\u0065abc", "abceabc" }
+ { "abc\\u0065abc", "abceabc" },
+ { "abc\\uFFFFabc", "abc" UNICODE_NON_CHARACTER "abc" }
};
size = sizeof(pairs)/sizeof(Pairs);
diff --git a/tests/auto/xml/sax/qxmlsimplereader/tst_qxmlsimplereader.cpp b/tests/auto/xml/sax/qxmlsimplereader/tst_qxmlsimplereader.cpp
index d4c0ff44ca..5be43e2c8f 100644
--- a/tests/auto/xml/sax/qxmlsimplereader/tst_qxmlsimplereader.cpp
+++ b/tests/auto/xml/sax/qxmlsimplereader/tst_qxmlsimplereader.cpp
@@ -315,8 +315,6 @@ void tst_QXmlSimpleReader::testGoodXmlFile()
QVERIFY(file.open(QIODevice::ReadOnly));
Parser parser;
-// static int i = 0;
-// qWarning("Test nr: " + QString::number(i)); ++i;
QEXPECT_FAIL("xmldocs/valid/sa/089.xml", "", Continue);
QVERIFY(parser.parseFile(&file));
@@ -326,7 +324,6 @@ void tst_QXmlSimpleReader::testGoodXmlFile()
ref_stream.setCodec("UTF-8");
QString ref_file_contents = ref_stream.readAll();
- QEXPECT_FAIL("xmldocs/valid/sa/089.xml", "", Continue);
QCOMPARE(parser.result(), ref_file_contents);
}
@@ -355,8 +352,6 @@ void tst_QXmlSimpleReader::testBadXmlFile()
QVERIFY(file.open(QIODevice::ReadOnly));
Parser parser;
-// static int i = 0;
-// qWarning("Test nr: " + QString::number(++i));
QEXPECT_FAIL("xmldocs/not-wf/sa/030.xml", "", Continue);
QEXPECT_FAIL("xmldocs/not-wf/sa/031.xml", "", Continue);
QEXPECT_FAIL("xmldocs/not-wf/sa/032.xml", "", Continue);
@@ -381,22 +376,17 @@ void tst_QXmlSimpleReader::testBadXmlFile()
QEXPECT_FAIL("xmldocs/not-wf/sa/132.xml", "", Continue);
QEXPECT_FAIL("xmldocs/not-wf/sa/142.xml", "", Continue);
QEXPECT_FAIL("xmldocs/not-wf/sa/143.xml", "", Continue);
+
QEXPECT_FAIL("xmldocs/not-wf/sa/144.xml", "", Continue);
QEXPECT_FAIL("xmldocs/not-wf/sa/145.xml", "", Continue);
QEXPECT_FAIL("xmldocs/not-wf/sa/146.xml", "", Abort);
QEXPECT_FAIL("xmldocs/not-wf/sa/160.xml", "", Continue);
QEXPECT_FAIL("xmldocs/not-wf/sa/162.xml", "", Continue);
- QEXPECT_FAIL("xmldocs/not-wf/sa/166.xml", "", Continue);
- QEXPECT_FAIL("xmldocs/not-wf/sa/167.xml", "", Continue);
+
QEXPECT_FAIL("xmldocs/not-wf/sa/168.xml", "", Continue);
QEXPECT_FAIL("xmldocs/not-wf/sa/169.xml", "", Continue);
QEXPECT_FAIL("xmldocs/not-wf/sa/170.xml", "", Continue);
- QEXPECT_FAIL("xmldocs/not-wf/sa/171.xml", "", Abort);
- QEXPECT_FAIL("xmldocs/not-wf/sa/172.xml", "", Abort);
- QEXPECT_FAIL("xmldocs/not-wf/sa/173.xml", "", Abort);
- QEXPECT_FAIL("xmldocs/not-wf/sa/174.xml", "", Abort);
- QEXPECT_FAIL("xmldocs/not-wf/sa/175.xml", "", Abort);
- QEXPECT_FAIL("xmldocs/not-wf/sa/177.xml", "", Abort);
+
QEXPECT_FAIL("xmldocs/not-wf/sa/180.xml", "", Continue);
QEXPECT_FAIL("xmldocs/not-wf/sa/181.xml", "", Continue);
QEXPECT_FAIL("xmldocs/not-wf/sa/182.xml", "", Continue);
@@ -411,12 +401,7 @@ void tst_QXmlSimpleReader::testBadXmlFile()
ref_stream.setCodec("UTF-8");
QString ref_file_contents = ref_stream.readAll();
- QEXPECT_FAIL("xmldocs/not-wf/sa/144.xml", "", Continue);
QEXPECT_FAIL("xmldocs/not-wf/sa/145.xml", "", Continue);
- QEXPECT_FAIL("xmldocs/not-wf/sa/146.xml", "", Continue);
- QEXPECT_FAIL("xmldocs/not-wf/sa/167.xml", "", Continue);
- QEXPECT_FAIL("xmldocs/not-wf/sa/166.xml", "", Continue);
- QEXPECT_FAIL("xmldocs/not-wf/sa/170.xml", "", Continue);
QCOMPARE(parser.result(), ref_file_contents);
}
diff --git a/tests/auto/xml/sax/qxmlsimplereader/xmldocs/not-wf/sa/170.xml.ref b/tests/auto/xml/sax/qxmlsimplereader/xmldocs/not-wf/sa/170.xml.ref
index 0508ee88c7..eca786f688 100644
--- a/tests/auto/xml/sax/qxmlsimplereader/xmldocs/not-wf/sa/170.xml.ref
+++ b/tests/auto/xml/sax/qxmlsimplereader/xmldocs/not-wf/sa/170.xml.ref
@@ -1,6 +1,6 @@
setDocumentLocator(locator={columnNumber=1, lineNumber=1})
startDocument()
startElement(namespaceURI="", localName="doc", qName="doc", atts=[])
- characters(ch="")
+ characters(ch="�")
endElement(namespaceURI="", localName="doc", qName="doc")
endDocument()