diff options
author | Axel Spoerl <axel.spoerl@qt.io> | 2023-06-30 12:43:59 +0200 |
---|---|---|
committer | Axel Spoerl <axel.spoerl@qt.io> | 2023-07-10 22:44:06 +0200 |
commit | c4301be7d5f94852e1b17f2c2989d5ca807855d4 (patch) | |
tree | 1237a23fdad6520e17980cc80ec4bafc13d65705 /tests/auto/corelib/serialization/qxmlstream | |
parent | b08ddd2c4ecedccd0bc08e9f2390a7b86ed861f4 (diff) |
QXmlStreamReader: Raise error on unexpected tokens
QXmlStreamReader accepted multiple DOCTYPE elements, containing DTD
fragments in the XML prolog, and in the XML body.
Well-formed but invalid XML files - with multiple DTD fragments in
prolog and body, combined with recursive entity expansions - have
caused infinite loops in QXmlStreamReader.
This patch implements a token check in QXmlStreamReader.
A stream is allowed to start with an XML prolog. StartDocument
and DOCTYPE elements are only allowed in this prolog, which
may also contain ProcessingInstruction and Comment elements.
As soon as anything else is seen, the prolog ends.
After that, the prolog-specific elements are treated as unexpected.
Furthermore, the prolog can contain at most one DOCTYPE element.
Update the documentation to reflect the new behavior.
Add an autotest that checks the new error cases are correctly detected,
and no error is raised for legitimate input.
The original OSS-Fuzz files (see bug reports) are not included in this
patch for file size reasons. They have been tested manually. Each of
them has more than one DOCTYPE element, causing infinite loops in
recursive entity expansions. The newly implemented functionality
detects those invalid DTD fragments. By raising an error, it aborts
stream reading before an infinite loop occurs.
Thanks to OSS-Fuzz for finding this.
Fixes: QTBUG-92113
Fixes: QTBUG-95188
Pick-to: 6.6 6.5 6.2 5.15
Change-Id: I0a082b9188b2eee50b396c4d5b1c9e1fd237bbdd
Reviewed-by: Volker Hilsheimer <volker.hilsheimer@qt.io>
Diffstat (limited to 'tests/auto/corelib/serialization/qxmlstream')
4 files changed, 94 insertions, 0 deletions
diff --git a/tests/auto/corelib/serialization/qxmlstream/tokenError/dtdInBody.xml b/tests/auto/corelib/serialization/qxmlstream/tokenError/dtdInBody.xml new file mode 100644 index 0000000000..1c3ca4e271 --- /dev/null +++ b/tests/auto/corelib/serialization/qxmlstream/tokenError/dtdInBody.xml @@ -0,0 +1,20 @@ +<!DOCTYPE TEST [ + <!ELEMENT TESTATTRIBUTE (CASE+)> + <!ELEMENT CASE (CLASS, FUNCTION)> + <!ELEMENT CLASS (#PCDATA)> + + <!-- adding random ENTITY statement, as this is typical DTD content --> + <!ENTITY unite "∪"> + + <!ATTLIST CASE CLASS CDATA #REQUIRED> +]> +<TEST> + <CASE> + <CLASS>tst_QXmlStream</CLASS> + </CASE> + <!-- invalid DTD in XML body follows --> + <!DOCTYPE DTDTEST [ + <!ELEMENT RESULT (CASE+)> + <!ATTLIST RESULT OUTPUT CDATA #REQUIRED> + ]> +</TEST> diff --git a/tests/auto/corelib/serialization/qxmlstream/tokenError/multipleDtd.xml b/tests/auto/corelib/serialization/qxmlstream/tokenError/multipleDtd.xml new file mode 100644 index 0000000000..cd398c0f9f --- /dev/null +++ b/tests/auto/corelib/serialization/qxmlstream/tokenError/multipleDtd.xml @@ -0,0 +1,20 @@ +<!DOCTYPE TEST [ + <!ELEMENT TESTATTRIBUTE (CASE+)> + <!ELEMENT CASE (CLASS, FUNCTION, DATASET, COMMENTS)> + <!ELEMENT CLASS (#PCDATA)> + + <!-- adding random ENTITY statements, as this is typical DTD content --> + <!ENTITY iff "⇔"> + + <!ATTLIST CASE CLASS CDATA #REQUIRED> +]> +<!-- invalid second DTD follows --> +<!DOCTYPE SECOND [ + <!ELEMENT SECONDATTRIBUTE (#PCDATA)> + <!ENTITY on "∘"> +]> +<TEST> + <CASE> + <CLASS>tst_QXmlStream</CLASS> + </CASE> +</TEST> diff --git a/tests/auto/corelib/serialization/qxmlstream/tokenError/wellFormed.xml b/tests/auto/corelib/serialization/qxmlstream/tokenError/wellFormed.xml new file mode 100644 index 0000000000..1b61a3f062 --- /dev/null +++ b/tests/auto/corelib/serialization/qxmlstream/tokenError/wellFormed.xml @@ -0,0 +1,15 @@ +<!DOCTYPE TEST [ + <!ELEMENT TESTATTRIBUTE (CASE+)> + <!ELEMENT CASE (CLASS, FUNCTION, DATASET, COMMENTS)> + <!ELEMENT CLASS (#PCDATA)> + + <!-- adding random ENTITY statements, as this is typical DTD content --> + <!ENTITY unite "∪"> + + <!ATTLIST CASE CLASS CDATA #REQUIRED> +]> +<TEST> + <CASE> + <CLASS>tst_QXmlStream</CLASS> + </CASE> +</TEST> diff --git a/tests/auto/corelib/serialization/qxmlstream/tst_qxmlstream.cpp b/tests/auto/corelib/serialization/qxmlstream/tst_qxmlstream.cpp index 3a1b4511e8..75edda97e0 100644 --- a/tests/auto/corelib/serialization/qxmlstream/tst_qxmlstream.cpp +++ b/tests/auto/corelib/serialization/qxmlstream/tst_qxmlstream.cpp @@ -591,6 +591,9 @@ private slots: void entityExpansionLimit() const; + void tokenErrorHandling_data() const; + void tokenErrorHandling() const; + private: static QByteArray readFile(const QString &filename); @@ -1867,5 +1870,41 @@ void tst_QXmlStream::test_fastScanName() const QCOMPARE(reader.error(), errorType); } +void tst_QXmlStream::tokenErrorHandling_data() const +{ + QTest::addColumn<QString>("fileName"); + QTest::addColumn<QXmlStreamReader::Error>("expectedError"); + QTest::addColumn<QString>("errorKeyWord"); + + constexpr auto invalid = QXmlStreamReader::Error::UnexpectedElementError; + constexpr auto valid = QXmlStreamReader::Error::NoError; + QTest::newRow("DtdInBody") << "dtdInBody.xml" << invalid << "DTD"; + QTest::newRow("multipleDTD") << "multipleDtd.xml" << invalid << "second DTD"; + QTest::newRow("wellFormed") << "wellFormed.xml" << valid << ""; +} + +void tst_QXmlStream::tokenErrorHandling() const +{ + QFETCH(const QString, fileName); + QFETCH(const QXmlStreamReader::Error, expectedError); + QFETCH(const QString, errorKeyWord); + + const QDir dir(QFINDTESTDATA("tokenError")); + QFile file(dir.absoluteFilePath(fileName)); + + // Cross-compiling: File will be on host only + if (!file.exists()) + QSKIP("Testfile not found."); + + file.open(QIODevice::ReadOnly); + QXmlStreamReader reader(&file); + while (!reader.atEnd()) + reader.readNext(); + + QCOMPARE(reader.error(), expectedError); + if (expectedError != QXmlStreamReader::Error::NoError) + QVERIFY(reader.errorString().contains(errorKeyWord)); +} + #include "tst_qxmlstream.moc" // vim: et:ts=4:sw=4:sts=4 |