diff options
author | Ulf Hermann <ulf.hermann@qt.io> | 2017-09-12 17:31:31 +0200 |
---|---|---|
committer | Ulf Hermann <ulf.hermann@qt.io> | 2017-09-13 07:29:00 +0000 |
commit | 3b5b8f1d4ab8092e5dd337b7b4e32d85fda2e0b7 (patch) | |
tree | 0c3d8e427eeeb440e4e20a836f9e1721d72e10a7 /src/corelib/xml | |
parent | 71a2dd955cdc15317c83ad35b4845ef864cba0ef (diff) |
QXmlStreamWriter: Avoid writing invalid characters
Some valid UTF-16 characters cannot be expressed in XML 1.0 and a
QString may contain invalid unicode. In both cases we should not write
the respective data to the output stream, as that generates invalid XML,
which then cannot be read back by QXmlStreamReader. In addition we
should report an error if we encounter them.
The change filters the incorrect strings from the output and introduces
an "encodingError" flag which is reported from hasError().
[ChangeLog][Important Behavior Changes] Characters invalid in XML, such
as 0x0 or 0xfffe, as well as strings containing unmatched UTF-16
surrogates are now suppressed from the output of QXmlStreamWriter and
cause the error flag to be set.
Task-number: QTBUG-63150
Change-Id: Ia29bab768fed9681dd68e8934da2a7e3fcdfc3cd
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
Diffstat (limited to 'src/corelib/xml')
-rw-r--r-- | src/corelib/xml/qxmlstream.cpp | 79 |
1 files changed, 57 insertions, 22 deletions
diff --git a/src/corelib/xml/qxmlstream.cpp b/src/corelib/xml/qxmlstream.cpp index f6ae3571ab..c1ed410de7 100644 --- a/src/corelib/xml/qxmlstream.cpp +++ b/src/corelib/xml/qxmlstream.cpp @@ -2980,7 +2980,8 @@ public: uint inEmptyElement :1; uint lastWasStartElement :1; uint wroteSomething :1; - uint hasError :1; + uint hasIoError :1; + uint hasEncodingError :1; uint autoFormatting :1; uint isCodecASCIICompatible :1; QByteArray autoFormattingIndent; @@ -3016,7 +3017,8 @@ QXmlStreamWriterPrivate::QXmlStreamWriterPrivate(QXmlStreamWriter *q) checkIfASCIICompatibleCodec(); inStartElement = inEmptyElement = false; wroteSomething = false; - hasError = false; + hasIoError = false; + hasEncodingError = false; lastWasStartElement = false; lastNamespaceDeclaration = 1; autoFormatting = false; @@ -3043,15 +3045,19 @@ void QXmlStreamWriterPrivate::checkIfASCIICompatibleCodec() void QXmlStreamWriterPrivate::write(const QStringRef &s) { if (device) { - if (hasError) + if (hasIoError) return; #ifdef QT_NO_TEXTCODEC QByteArray bytes = s.toLatin1(); #else QByteArray bytes = encoder->fromUnicode(s.constData(), s.size()); + if (encoder->hasFailure()) { + hasEncodingError = true; + return; + } #endif if (device->write(bytes) != bytes.size()) - hasError = true; + hasIoError = true; } else if (stringDevice) s.appendTo(stringDevice); @@ -3062,15 +3068,19 @@ void QXmlStreamWriterPrivate::write(const QStringRef &s) void QXmlStreamWriterPrivate::write(const QString &s) { if (device) { - if (hasError) + if (hasIoError) return; #ifdef QT_NO_TEXTCODEC QByteArray bytes = s.toLatin1(); #else QByteArray bytes = encoder->fromUnicode(s); + if (encoder->hasFailure()) { + hasEncodingError = true; + return; + } #endif if (device->write(bytes) != bytes.size()) - hasError = true; + hasIoError = true; } else if (stringDevice) stringDevice->append(s); @@ -3084,25 +3094,47 @@ void QXmlStreamWriterPrivate::writeEscaped(const QString &s, bool escapeWhitespa escaped.reserve(s.size()); for ( int i = 0; i < s.size(); ++i ) { QChar c = s.at(i); - if (c.unicode() == '<' ) + switch (c.unicode()) { + case '<': escaped.append(QLatin1String("<")); - else if (c.unicode() == '>' ) + break; + case '>': escaped.append(QLatin1String(">")); - else if (c.unicode() == '&' ) + break; + case '&': escaped.append(QLatin1String("&")); - else if (c.unicode() == '\"' ) + break; + case '\"': escaped.append(QLatin1String(""")); - else if (escapeWhitespace && c.isSpace()) { - if (c.unicode() == '\n') + break; + case '\t': + if (escapeWhitespace) + escaped.append(QLatin1String("	")); + else + escaped += c; + break; + case '\n': + if (escapeWhitespace) escaped.append(QLatin1String(" ")); - else if (c.unicode() == '\r') + else + escaped += c; + break; + case '\v': + case '\f': + hasEncodingError = true; + break; + case '\r': + if (escapeWhitespace) escaped.append(QLatin1String(" ")); - else if (c.unicode() == '\t') - escaped.append(QLatin1String("	")); else escaped += c; - } else { - escaped += QChar(c); + break; + default: + if (c.unicode() > 0x1f && c.unicode() < 0xfffe) + escaped += c; + else + hasEncodingError = true; + break; } } write(escaped); @@ -3112,11 +3144,11 @@ void QXmlStreamWriterPrivate::writeEscaped(const QString &s, bool escapeWhitespa void QXmlStreamWriterPrivate::write(const char *s, int len) { if (device) { - if (hasError) + if (hasIoError) return; if (isCodecASCIICompatible) { if (device->write(s, len) != len) - hasError = true; + hasIoError = true; return; } } @@ -3400,15 +3432,18 @@ int QXmlStreamWriter::autoFormattingIndent() const } /*! - Returns \c true if the stream failed to write to the underlying device. + Returns \c true if writing failed. + + This can happen if the stream failed to write to the underlying + device or if the data to be written contained invalid characters. The error status is never reset. Writes happening after the error - occurred are ignored, even if the error condition is cleared. + occurred may be ignored, even if the error condition is cleared. */ bool QXmlStreamWriter::hasError() const { Q_D(const QXmlStreamWriter); - return d->hasError; + return d->hasIoError || d->hasEncodingError; } /*! |