summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorUlf Hermann <ulf.hermann@qt.io>2017-09-12 17:31:31 +0200
committerUlf Hermann <ulf.hermann@qt.io>2017-09-13 07:29:00 +0000
commit3b5b8f1d4ab8092e5dd337b7b4e32d85fda2e0b7 (patch)
tree0c3d8e427eeeb440e4e20a836f9e1721d72e10a7
parent71a2dd955cdc15317c83ad35b4845ef864cba0ef (diff)
QXmlStreamWriter: Avoid writing invalid characters
Some valid UTF-16 characters cannot be expressed in XML 1.0 and a QString may contain invalid unicode. In both cases we should not write the respective data to the output stream, as that generates invalid XML, which then cannot be read back by QXmlStreamReader. In addition we should report an error if we encounter them. The change filters the incorrect strings from the output and introduces an "encodingError" flag which is reported from hasError(). [ChangeLog][Important Behavior Changes] Characters invalid in XML, such as 0x0 or 0xfffe, as well as strings containing unmatched UTF-16 surrogates are now suppressed from the output of QXmlStreamWriter and cause the error flag to be set. Task-number: QTBUG-63150 Change-Id: Ia29bab768fed9681dd68e8934da2a7e3fcdfc3cd Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
-rw-r--r--src/corelib/xml/qxmlstream.cpp79
-rw-r--r--tests/auto/corelib/xml/qxmlstream/tst_qxmlstream.cpp46
2 files changed, 103 insertions, 22 deletions
diff --git a/src/corelib/xml/qxmlstream.cpp b/src/corelib/xml/qxmlstream.cpp
index f6ae3571ab..c1ed410de7 100644
--- a/src/corelib/xml/qxmlstream.cpp
+++ b/src/corelib/xml/qxmlstream.cpp
@@ -2980,7 +2980,8 @@ public:
uint inEmptyElement :1;
uint lastWasStartElement :1;
uint wroteSomething :1;
- uint hasError :1;
+ uint hasIoError :1;
+ uint hasEncodingError :1;
uint autoFormatting :1;
uint isCodecASCIICompatible :1;
QByteArray autoFormattingIndent;
@@ -3016,7 +3017,8 @@ QXmlStreamWriterPrivate::QXmlStreamWriterPrivate(QXmlStreamWriter *q)
checkIfASCIICompatibleCodec();
inStartElement = inEmptyElement = false;
wroteSomething = false;
- hasError = false;
+ hasIoError = false;
+ hasEncodingError = false;
lastWasStartElement = false;
lastNamespaceDeclaration = 1;
autoFormatting = false;
@@ -3043,15 +3045,19 @@ void QXmlStreamWriterPrivate::checkIfASCIICompatibleCodec()
void QXmlStreamWriterPrivate::write(const QStringRef &s)
{
if (device) {
- if (hasError)
+ if (hasIoError)
return;
#ifdef QT_NO_TEXTCODEC
QByteArray bytes = s.toLatin1();
#else
QByteArray bytes = encoder->fromUnicode(s.constData(), s.size());
+ if (encoder->hasFailure()) {
+ hasEncodingError = true;
+ return;
+ }
#endif
if (device->write(bytes) != bytes.size())
- hasError = true;
+ hasIoError = true;
}
else if (stringDevice)
s.appendTo(stringDevice);
@@ -3062,15 +3068,19 @@ void QXmlStreamWriterPrivate::write(const QStringRef &s)
void QXmlStreamWriterPrivate::write(const QString &s)
{
if (device) {
- if (hasError)
+ if (hasIoError)
return;
#ifdef QT_NO_TEXTCODEC
QByteArray bytes = s.toLatin1();
#else
QByteArray bytes = encoder->fromUnicode(s);
+ if (encoder->hasFailure()) {
+ hasEncodingError = true;
+ return;
+ }
#endif
if (device->write(bytes) != bytes.size())
- hasError = true;
+ hasIoError = true;
}
else if (stringDevice)
stringDevice->append(s);
@@ -3084,25 +3094,47 @@ void QXmlStreamWriterPrivate::writeEscaped(const QString &s, bool escapeWhitespa
escaped.reserve(s.size());
for ( int i = 0; i < s.size(); ++i ) {
QChar c = s.at(i);
- if (c.unicode() == '<' )
+ switch (c.unicode()) {
+ case '<':
escaped.append(QLatin1String("&lt;"));
- else if (c.unicode() == '>' )
+ break;
+ case '>':
escaped.append(QLatin1String("&gt;"));
- else if (c.unicode() == '&' )
+ break;
+ case '&':
escaped.append(QLatin1String("&amp;"));
- else if (c.unicode() == '\"' )
+ break;
+ case '\"':
escaped.append(QLatin1String("&quot;"));
- else if (escapeWhitespace && c.isSpace()) {
- if (c.unicode() == '\n')
+ break;
+ case '\t':
+ if (escapeWhitespace)
+ escaped.append(QLatin1String("&#9;"));
+ else
+ escaped += c;
+ break;
+ case '\n':
+ if (escapeWhitespace)
escaped.append(QLatin1String("&#10;"));
- else if (c.unicode() == '\r')
+ else
+ escaped += c;
+ break;
+ case '\v':
+ case '\f':
+ hasEncodingError = true;
+ break;
+ case '\r':
+ if (escapeWhitespace)
escaped.append(QLatin1String("&#13;"));
- else if (c.unicode() == '\t')
- escaped.append(QLatin1String("&#9;"));
else
escaped += c;
- } else {
- escaped += QChar(c);
+ break;
+ default:
+ if (c.unicode() > 0x1f && c.unicode() < 0xfffe)
+ escaped += c;
+ else
+ hasEncodingError = true;
+ break;
}
}
write(escaped);
@@ -3112,11 +3144,11 @@ void QXmlStreamWriterPrivate::writeEscaped(const QString &s, bool escapeWhitespa
void QXmlStreamWriterPrivate::write(const char *s, int len)
{
if (device) {
- if (hasError)
+ if (hasIoError)
return;
if (isCodecASCIICompatible) {
if (device->write(s, len) != len)
- hasError = true;
+ hasIoError = true;
return;
}
}
@@ -3400,15 +3432,18 @@ int QXmlStreamWriter::autoFormattingIndent() const
}
/*!
- Returns \c true if the stream failed to write to the underlying device.
+ Returns \c true if writing failed.
+
+ This can happen if the stream failed to write to the underlying
+ device or if the data to be written contained invalid characters.
The error status is never reset. Writes happening after the error
- occurred are ignored, even if the error condition is cleared.
+ occurred may be ignored, even if the error condition is cleared.
*/
bool QXmlStreamWriter::hasError() const
{
Q_D(const QXmlStreamWriter);
- return d->hasError;
+ return d->hasIoError || d->hasEncodingError;
}
/*!
diff --git a/tests/auto/corelib/xml/qxmlstream/tst_qxmlstream.cpp b/tests/auto/corelib/xml/qxmlstream/tst_qxmlstream.cpp
index 79cd17b5b3..16a4200b5d 100644
--- a/tests/auto/corelib/xml/qxmlstream/tst_qxmlstream.cpp
+++ b/tests/auto/corelib/xml/qxmlstream/tst_qxmlstream.cpp
@@ -576,6 +576,7 @@ private slots:
void invalidStringCharacters_data() const;
void invalidStringCharacters() const;
void hasError() const;
+ void readBack() const;
private:
static QByteArray readFile(const QString &filename);
@@ -1695,5 +1696,50 @@ void tst_QXmlStream::invalidStringCharacters_data() const
//
}
+static bool isValidSingleTextChar(const ushort c)
+{
+ // Conforms to https://www.w3.org/TR/REC-xml/#NT-Char - except for the high range, which is done
+ // with surrogates.
+ // Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
+ static const QPair<ushort, ushort> validRanges[] = {
+ QPair<ushort, ushort>(0x9, 0xb),
+ QPair<ushort, ushort>(0xd, 0xe),
+ QPair<ushort, ushort>(0x20, 0xd800),
+ QPair<ushort, ushort>(0xe000, 0xfffe)
+ };
+
+ for (const QPair<ushort, ushort> &range : validRanges) {
+ if (c >= range.first && c < range.second)
+ return true;
+ }
+ return false;
+}
+
+void tst_QXmlStream::readBack() const
+{
+ for (ushort c = 0; c < std::numeric_limits<ushort>::max(); ++c) {
+ QBuffer buffer;
+
+ QVERIFY(buffer.open(QIODevice::WriteOnly));
+ QXmlStreamWriter writer(&buffer);
+ writer.writeStartDocument();
+ writer.writeTextElement("a", QString(QChar(c)));
+ writer.writeEndDocument();
+ buffer.close();
+
+ if (writer.hasError()) {
+ QVERIFY2(!isValidSingleTextChar(c), QByteArray::number(c));
+ } else {
+ QVERIFY2(isValidSingleTextChar(c), QByteArray::number(c));
+ QVERIFY(buffer.open(QIODevice::ReadOnly));
+ QXmlStreamReader reader(&buffer);
+ do {
+ reader.readNext();
+ } while (!reader.atEnd());
+ QVERIFY2(!reader.hasError(), QByteArray::number(c));
+ }
+ }
+}
+
#include "tst_qxmlstream.moc"
// vim: et:ts=4:sw=4:sts=4