From d4302ec6936b8b3799a266b640b5d116b3296b29 Mon Sep 17 00:00:00 2001 From: Luca Bellonda Date: Wed, 13 Jul 2016 21:44:16 +0200 Subject: QtCore: Fix QXmlStreamReader for invalid characters in XML 1.0 The XML parser uses fastScanLiteralContent() to read a block of text. The routine was not checking the range of valid characters as defined in the XML standard: https://www.w3.org/TR/2008/REC-xml-20081126/#NT-Char A check has been added to stop reading the bad character. Note that the characters are legal in XML 1.1, but QXmlStreamReader is a well-formed XML 1.0 parser [ChangeLog][QtCore][QXmlStreamReader] Fixed a bug in the XML parser that prevented to load XML that contained invalid characters for XML 1.0. Change-Id: I10aaf84fbf95ccdaf9f6d683ea7c31925efff36d Reviewed-by: Thiago Macieira --- .../auto/corelib/xml/qxmlstream/tst_qxmlstream.cpp | 59 ++++++++++++++++++++++ 1 file changed, 59 insertions(+) (limited to 'tests/auto/corelib') diff --git a/tests/auto/corelib/xml/qxmlstream/tst_qxmlstream.cpp b/tests/auto/corelib/xml/qxmlstream/tst_qxmlstream.cpp index b0fd1187f5..686b8f22fa 100644 --- a/tests/auto/corelib/xml/qxmlstream/tst_qxmlstream.cpp +++ b/tests/auto/corelib/xml/qxmlstream/tst_qxmlstream.cpp @@ -562,6 +562,8 @@ private slots: void checkCommentIndentation_data() const; void crashInXmlStreamReader() const; void write8bitCodec() const; + void invalidStringCharacters_data() const; + void invalidStringCharacters() const; void hasError() const; private: @@ -1616,6 +1618,63 @@ void tst_QXmlStream::write8bitCodec() const QVERIFY(decodedText.startsWith(expected)); } +void tst_QXmlStream::invalidStringCharacters() const +{ + // test scan in attributes + QFETCH(QString, testString); + QFETCH(bool, expectedResultNoError); + + QByteArray values = testString.toUtf8(); + QBuffer inBuffer; + inBuffer.setData(values); + QVERIFY(inBuffer.open(QIODevice::ReadOnly)); + QXmlStreamReader reader(&inBuffer); + do { + reader.readNext(); + } while (!reader.atEnd()); + QCOMPARE((reader.error() == QXmlStreamReader::NoError), expectedResultNoError); +} + +void tst_QXmlStream::invalidStringCharacters_data() const +{ + // test scan in attributes + QTest::addColumn("expectedResultNoError"); + QTest::addColumn("testString"); + QChar ctrl(0x1A); + QTest::newRow("utf8, attributes, legal") << true << QString(""); + QTest::newRow("utf8, attributes, only char, control") << false << QString(""); + QTest::newRow("utf8, attributes, 1st char, control") << false << QString(""); + QTest::newRow("utf8, attributes, middle char, control") << false << QString(""); + QTest::newRow("utf8, attributes, last char, control") << false << QString(""); + // + QTest::newRow("utf8, text, legal") << true << QString("abcx1A"); + QTest::newRow("utf8, text, only, control") << false << QString("")+ctrl+QString(""); + QTest::newRow("utf8, text, 1st char, control") << false << QString("abc")+ctrl+QString("def"); + QTest::newRow("utf8, text, middle char, control") << false << QString("abc")+ctrl+QString("efg"); + QTest::newRow("utf8, text, last char, control") << false << QString("abc")+ctrl+QString(""); + // + QTest::newRow("utf8, cdata text, legal") << true << QString(""); + QTest::newRow("utf8, cdata text, only, control") << false << QString(""); + QTest::newRow("utf8, cdata text, 1st char, control") << false << QString(""); + QTest::newRow("utf8, cdata text, middle char, control") << false << QString(""); + QTest::newRow("utf8, cdata text, last char, control") << false << QString(""); + // + QTest::newRow("utf8, mixed, control") << false << QString(""); + QTest::newRow("utf8, tag") << false << QString(""); + // + QTest::newRow("utf8, attributes, 1st char, legal escaping hex") << true << QString(""); + QTest::newRow("utf8, attributes, 1st char, control escaping hex") << false << QString(""); + QTest::newRow("utf8, attributes, middle char, legal escaping hex") << false << QString(""); + QTest::newRow("utf8, attributes, last char, control escaping hex") << false << QString(""); + QTest::newRow("utf8, attributes, 1st char, legal escaping dec") << true << QString(""); + QTest::newRow("utf8, attributes, 1st char, control escaping dec") << false << QString(""); + QTest::newRow("utf8, attributes, middle char, legal escaping dec") << false << QString(""); + QTest::newRow("utf8, attributes, last char, control escaping dec") << false << QString(""); + QTest::newRow("utf8, tag escaping") << false << QString(""); + // + QTest::newRow("utf8, mix of illegal control") << false << QString(""); + // +} #include "tst_qxmlstream.moc" // vim: et:ts=4:sw=4:sts=4 -- cgit v1.2.3 From c14c149b51a1c7bf01e4e039f6e8cf1819e37ca6 Mon Sep 17 00:00:00 2001 From: Friedemann Kleint Date: Tue, 19 Jul 2016 10:18:39 +0200 Subject: Fix QTemporaryDir to handle Unicode characters on Windows For platforms not providing mkdtemp(), QTemporaryDir relied on an implementation of q_mkdtemp() operating on char *, converting back and forth using QFile::encodeName()/decodeName() when passing the name to QFileSystemEngine. This caused failures on Windows (which uses "System"/Latin1 encoding) for names containing characters outside the Latin1 space. Reimplement q_mkdtemp() to operate on QString, which avoids the conversions altogether and also enables the use of larger character spaces for the pattern. Add tests. Task-number: QTBUG-54810 Change-Id: Ie4323ad73b5beb8a1b8ab81425f73d03c626d58a Reviewed-by: Thiago Macieira --- .../corelib/io/qtemporarydir/tst_qtemporarydir.cpp | 41 ++++++++++++++++++++++ .../io/qtemporaryfile/tst_qtemporaryfile.cpp | 41 ++++++++++++++++++++++ 2 files changed, 82 insertions(+) (limited to 'tests/auto/corelib') diff --git a/tests/auto/corelib/io/qtemporarydir/tst_qtemporarydir.cpp b/tests/auto/corelib/io/qtemporarydir/tst_qtemporarydir.cpp index 6e03d8360e..67a39f21ca 100644 --- a/tests/auto/corelib/io/qtemporarydir/tst_qtemporarydir.cpp +++ b/tests/auto/corelib/io/qtemporarydir/tst_qtemporarydir.cpp @@ -39,6 +39,7 @@ #include #include #include +#include #ifdef Q_OS_WIN # include #endif @@ -113,6 +114,38 @@ void tst_QTemporaryDir::getSetCheck() QCOMPARE(true, obj1.autoRemove()); } +static inline bool canHandleUnicodeFileNames() +{ +#if defined(Q_OS_WIN) && !defined(Q_OS_WINCE) + return true; +#else + // Check for UTF-8 by converting the Euro symbol (see tst_utf8) + return QFile::encodeName(QString(QChar(0x20AC))) == QByteArrayLiteral("\342\202\254"); +#endif +} + +static QString hanTestText() +{ + QString text; + text += QChar(0x65B0); + text += QChar(0x5E10); + text += QChar(0x6237); + return text; +} + +static QString umlautTestText() +{ + QString text; + text += QChar(0xc4); + text += QChar(0xe4); + text += QChar(0xd6); + text += QChar(0xf6); + text += QChar(0xdc); + text += QChar(0xfc); + text += QChar(0xdf); + return text; +} + void tst_QTemporaryDir::fileTemplate_data() { QTest::addColumn("constructorTemplate"); @@ -129,6 +162,14 @@ void tst_QTemporaryDir::fileTemplate_data() QTest::newRow("constructor with XXXX suffix") << "qt_XXXXXX_XXXX" << "qt_"; QTest::newRow("constructor with XXXX prefix") << "qt_XXXX" << "qt_"; QTest::newRow("constructor with XXXXX prefix") << "qt_XXXXX" << "qt_"; + if (canHandleUnicodeFileNames()) { + // Test Umlauts (contained in Latin1) + QString prefix = "qt_" + umlautTestText(); + QTest::newRow("Umlauts") << (prefix + "XXXXXX") << prefix; + // Test Chinese + prefix = "qt_" + hanTestText(); + QTest::newRow("Chinese characters") << (prefix + "XXXXXX") << prefix; + } } void tst_QTemporaryDir::fileTemplate() diff --git a/tests/auto/corelib/io/qtemporaryfile/tst_qtemporaryfile.cpp b/tests/auto/corelib/io/qtemporaryfile/tst_qtemporaryfile.cpp index 7b06355990..7fdc8fd44c 100644 --- a/tests/auto/corelib/io/qtemporaryfile/tst_qtemporaryfile.cpp +++ b/tests/auto/corelib/io/qtemporaryfile/tst_qtemporaryfile.cpp @@ -39,6 +39,7 @@ #include #include #include +#include #if defined(Q_OS_WIN) # include @@ -145,6 +146,38 @@ void tst_QTemporaryFile::getSetCheck() QCOMPARE(true, obj1.autoRemove()); } +static inline bool canHandleUnicodeFileNames() +{ +#if defined(Q_OS_WIN) && !defined(Q_OS_WINCE) + return true; +#else + // Check for UTF-8 by converting the Euro symbol (see tst_utf8) + return QFile::encodeName(QString(QChar(0x20AC))) == QByteArrayLiteral("\342\202\254"); +#endif +} + +static QString hanTestText() +{ + QString text; + text += QChar(0x65B0); + text += QChar(0x5E10); + text += QChar(0x6237); + return text; +} + +static QString umlautTestText() +{ + QString text; + text += QChar(0xc4); + text += QChar(0xe4); + text += QChar(0xd6); + text += QChar(0xf6); + text += QChar(0xdc); + text += QChar(0xfc); + text += QChar(0xdf); + return text; +} + void tst_QTemporaryFile::fileTemplate_data() { QTest::addColumn("constructorTemplate"); @@ -171,6 +204,14 @@ void tst_QTemporaryFile::fileTemplate_data() QTest::newRow("set template, with xxx") << "" << "qt_" << ".xxx" << "qt_XXXXXX.xxx"; QTest::newRow("set template, with >6 X's") << "" << "qt_" << ".xxx" << "qt_XXXXXXXXXXXXXX.xxx"; QTest::newRow("set template, with >6 X's, no suffix") << "" << "qt_" << "" << "qt_XXXXXXXXXXXXXX"; + if (canHandleUnicodeFileNames()) { + // Test Umlauts (contained in Latin1) + QString prefix = "qt_" + umlautTestText(); + QTest::newRow("Umlauts") << (prefix + "XXXXXX") << prefix << QString() << QString(); + // Test Chinese + prefix = "qt_" + hanTestText(); + QTest::newRow("Chinese characters") << (prefix + "XXXXXX") << prefix << QString() << QString(); + } } void tst_QTemporaryFile::fileTemplate() -- cgit v1.2.3