From 3b5b8f1d4ab8092e5dd337b7b4e32d85fda2e0b7 Mon Sep 17 00:00:00 2001 From: Ulf Hermann Date: Tue, 12 Sep 2017 17:31:31 +0200 Subject: QXmlStreamWriter: Avoid writing invalid characters Some valid UTF-16 characters cannot be expressed in XML 1.0 and a QString may contain invalid unicode. In both cases we should not write the respective data to the output stream, as that generates invalid XML, which then cannot be read back by QXmlStreamReader. In addition we should report an error if we encounter them. The change filters the incorrect strings from the output and introduces an "encodingError" flag which is reported from hasError(). [ChangeLog][Important Behavior Changes] Characters invalid in XML, such as 0x0 or 0xfffe, as well as strings containing unmatched UTF-16 surrogates are now suppressed from the output of QXmlStreamWriter and cause the error flag to be set. Task-number: QTBUG-63150 Change-Id: Ia29bab768fed9681dd68e8934da2a7e3fcdfc3cd Reviewed-by: Thiago Macieira --- src/corelib/xml/qxmlstream.cpp | 79 ++++++++++++++++------ .../auto/corelib/xml/qxmlstream/tst_qxmlstream.cpp | 46 +++++++++++++ 2 files changed, 103 insertions(+), 22 deletions(-) diff --git a/src/corelib/xml/qxmlstream.cpp b/src/corelib/xml/qxmlstream.cpp index f6ae3571ab..c1ed410de7 100644 --- a/src/corelib/xml/qxmlstream.cpp +++ b/src/corelib/xml/qxmlstream.cpp @@ -2980,7 +2980,8 @@ public: uint inEmptyElement :1; uint lastWasStartElement :1; uint wroteSomething :1; - uint hasError :1; + uint hasIoError :1; + uint hasEncodingError :1; uint autoFormatting :1; uint isCodecASCIICompatible :1; QByteArray autoFormattingIndent; @@ -3016,7 +3017,8 @@ QXmlStreamWriterPrivate::QXmlStreamWriterPrivate(QXmlStreamWriter *q) checkIfASCIICompatibleCodec(); inStartElement = inEmptyElement = false; wroteSomething = false; - hasError = false; + hasIoError = false; + hasEncodingError = false; lastWasStartElement = false; lastNamespaceDeclaration = 1; autoFormatting = false; @@ -3043,15 +3045,19 @@ void QXmlStreamWriterPrivate::checkIfASCIICompatibleCodec() void QXmlStreamWriterPrivate::write(const QStringRef &s) { if (device) { - if (hasError) + if (hasIoError) return; #ifdef QT_NO_TEXTCODEC QByteArray bytes = s.toLatin1(); #else QByteArray bytes = encoder->fromUnicode(s.constData(), s.size()); + if (encoder->hasFailure()) { + hasEncodingError = true; + return; + } #endif if (device->write(bytes) != bytes.size()) - hasError = true; + hasIoError = true; } else if (stringDevice) s.appendTo(stringDevice); @@ -3062,15 +3068,19 @@ void QXmlStreamWriterPrivate::write(const QStringRef &s) void QXmlStreamWriterPrivate::write(const QString &s) { if (device) { - if (hasError) + if (hasIoError) return; #ifdef QT_NO_TEXTCODEC QByteArray bytes = s.toLatin1(); #else QByteArray bytes = encoder->fromUnicode(s); + if (encoder->hasFailure()) { + hasEncodingError = true; + return; + } #endif if (device->write(bytes) != bytes.size()) - hasError = true; + hasIoError = true; } else if (stringDevice) stringDevice->append(s); @@ -3084,25 +3094,47 @@ void QXmlStreamWriterPrivate::writeEscaped(const QString &s, bool escapeWhitespa escaped.reserve(s.size()); for ( int i = 0; i < s.size(); ++i ) { QChar c = s.at(i); - if (c.unicode() == '<' ) + switch (c.unicode()) { + case '<': escaped.append(QLatin1String("<")); - else if (c.unicode() == '>' ) + break; + case '>': escaped.append(QLatin1String(">")); - else if (c.unicode() == '&' ) + break; + case '&': escaped.append(QLatin1String("&")); - else if (c.unicode() == '\"' ) + break; + case '\"': escaped.append(QLatin1String(""")); - else if (escapeWhitespace && c.isSpace()) { - if (c.unicode() == '\n') + break; + case '\t': + if (escapeWhitespace) + escaped.append(QLatin1String(" ")); + else + escaped += c; + break; + case '\n': + if (escapeWhitespace) escaped.append(QLatin1String(" ")); - else if (c.unicode() == '\r') + else + escaped += c; + break; + case '\v': + case '\f': + hasEncodingError = true; + break; + case '\r': + if (escapeWhitespace) escaped.append(QLatin1String(" ")); - else if (c.unicode() == '\t') - escaped.append(QLatin1String(" ")); else escaped += c; - } else { - escaped += QChar(c); + break; + default: + if (c.unicode() > 0x1f && c.unicode() < 0xfffe) + escaped += c; + else + hasEncodingError = true; + break; } } write(escaped); @@ -3112,11 +3144,11 @@ void QXmlStreamWriterPrivate::writeEscaped(const QString &s, bool escapeWhitespa void QXmlStreamWriterPrivate::write(const char *s, int len) { if (device) { - if (hasError) + if (hasIoError) return; if (isCodecASCIICompatible) { if (device->write(s, len) != len) - hasError = true; + hasIoError = true; return; } } @@ -3400,15 +3432,18 @@ int QXmlStreamWriter::autoFormattingIndent() const } /*! - Returns \c true if the stream failed to write to the underlying device. + Returns \c true if writing failed. + + This can happen if the stream failed to write to the underlying + device or if the data to be written contained invalid characters. The error status is never reset. Writes happening after the error - occurred are ignored, even if the error condition is cleared. + occurred may be ignored, even if the error condition is cleared. */ bool QXmlStreamWriter::hasError() const { Q_D(const QXmlStreamWriter); - return d->hasError; + return d->hasIoError || d->hasEncodingError; } /*! diff --git a/tests/auto/corelib/xml/qxmlstream/tst_qxmlstream.cpp b/tests/auto/corelib/xml/qxmlstream/tst_qxmlstream.cpp index 79cd17b5b3..16a4200b5d 100644 --- a/tests/auto/corelib/xml/qxmlstream/tst_qxmlstream.cpp +++ b/tests/auto/corelib/xml/qxmlstream/tst_qxmlstream.cpp @@ -576,6 +576,7 @@ private slots: void invalidStringCharacters_data() const; void invalidStringCharacters() const; void hasError() const; + void readBack() const; private: static QByteArray readFile(const QString &filename); @@ -1695,5 +1696,50 @@ void tst_QXmlStream::invalidStringCharacters_data() const // } +static bool isValidSingleTextChar(const ushort c) +{ + // Conforms to https://www.w3.org/TR/REC-xml/#NT-Char - except for the high range, which is done + // with surrogates. + // Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] + static const QPair validRanges[] = { + QPair(0x9, 0xb), + QPair(0xd, 0xe), + QPair(0x20, 0xd800), + QPair(0xe000, 0xfffe) + }; + + for (const QPair &range : validRanges) { + if (c >= range.first && c < range.second) + return true; + } + return false; +} + +void tst_QXmlStream::readBack() const +{ + for (ushort c = 0; c < std::numeric_limits::max(); ++c) { + QBuffer buffer; + + QVERIFY(buffer.open(QIODevice::WriteOnly)); + QXmlStreamWriter writer(&buffer); + writer.writeStartDocument(); + writer.writeTextElement("a", QString(QChar(c))); + writer.writeEndDocument(); + buffer.close(); + + if (writer.hasError()) { + QVERIFY2(!isValidSingleTextChar(c), QByteArray::number(c)); + } else { + QVERIFY2(isValidSingleTextChar(c), QByteArray::number(c)); + QVERIFY(buffer.open(QIODevice::ReadOnly)); + QXmlStreamReader reader(&buffer); + do { + reader.readNext(); + } while (!reader.atEnd()); + QVERIFY2(!reader.hasError(), QByteArray::number(c)); + } + } +} + #include "tst_qxmlstream.moc" // vim: et:ts=4:sw=4:sts=4 -- cgit v1.2.3