diff options
author | Thiago Macieira <thiago.macieira@intel.com> | 2014-12-22 21:19:27 -0200 |
---|---|---|
committer | Thiago Macieira <thiago.macieira@intel.com> | 2015-01-24 18:35:22 +0100 |
commit | 817800ad39df10ca78e2c965a61d4d2025df622b (patch) | |
tree | c47b9d29de7ac3e3b75113cc3cd613301059fb32 | |
parent | 508b1fa173e135c839f07e0e4cd6009ac63a577c (diff) |
Fix QXmlStreamReader parsing of files containing NULs
Due to a flaw in the internal API, QXmlStreamReader's internal buffering
would mistake a NUL byte in the input stream for EOF during parsing, but
wouldn't set atEnd == true because it hadn't yet processed all bytes.
This resulted in an infinite loop in QXmlStreamReaderPrivate::parse.
So, instead of returning zero (false) to indicate EOF, return -1 (but in
unsigned form, ~0, to avoid ICC warnings of change of sign). In turn,
this required enlarging a few variables to avoid ~0U becoming 0xffff,
which is a valid QChar (could happen if the input is a QString, not a
QIODevice).
Task-number: QTBUG-43513
Change-Id: If5badcfd3e4176b79517da1fd108e0abb93a3fd1
Reviewed-by: Lars Knoll <lars.knoll@digia.com>
Reviewed-by: Oswald Buddenhagen <oswald.buddenhagen@theqtcompany.com>
20 files changed, 49 insertions, 25 deletions
diff --git a/src/corelib/xml/qxmlstream.cpp b/src/corelib/xml/qxmlstream.cpp index 94f6a8bcde..d1698b812f 100644 --- a/src/corelib/xml/qxmlstream.cpp +++ b/src/corelib/xml/qxmlstream.cpp @@ -64,6 +64,8 @@ QT_BEGIN_NAMESPACE #include "qxmlstream_p.h" +enum { StreamEOF = ~0U }; + /*! \enum QXmlStreamReader::TokenType @@ -903,7 +905,7 @@ inline uint QXmlStreamReaderPrivate::filterCarriageReturn() ++readBufferPos; return peekc; } - if (peekc == 0) { + if (peekc == StreamEOF) { putChar('\r'); return 0; } @@ -912,13 +914,13 @@ inline uint QXmlStreamReaderPrivate::filterCarriageReturn() /*! \internal - If the end of the file is encountered, 0 is returned. + If the end of the file is encountered, ~0 is returned. */ inline uint QXmlStreamReaderPrivate::getChar() { uint c; if (putStack.size()) { - c = atEnd ? 0 : putStack.pop(); + c = atEnd ? StreamEOF : putStack.pop(); } else { if (readBufferPos < readBuffer.size()) c = readBuffer.at(readBufferPos++).unicode(); @@ -937,7 +939,7 @@ inline uint QXmlStreamReaderPrivate::peekChar() } else if (readBufferPos < readBuffer.size()) { c = readBuffer.at(readBufferPos).unicode(); } else { - if ((c = getChar_helper())) + if ((c = getChar_helper()) != StreamEOF) --readBufferPos; } @@ -961,7 +963,8 @@ bool QXmlStreamReaderPrivate::scanUntil(const char *str, short tokenToInject) int pos = textBuffer.size(); int oldLineNumber = lineNumber; - while (uint c = getChar()) { + uint c; + while ((c = getChar()) != StreamEOF) { /* First, we do the validation & normalization. */ switch (c) { case '\r': @@ -1007,9 +1010,9 @@ bool QXmlStreamReaderPrivate::scanString(const char *str, short tokenToInject, b { int n = 0; while (str[n]) { - ushort c = getChar(); + uint c = getChar(); if (c != ushort(str[n])) { - if (c) + if (c != StreamEOF) putChar(c); while (n--) { putChar(ushort(str[n])); @@ -1137,7 +1140,7 @@ inline int QXmlStreamReaderPrivate::fastScanLiteralContent() { int n = 0; uint c; - while ((c = getChar())) { + while ((c = getChar()) != StreamEOF) { switch (ushort(c)) { case 0xfffe: case 0xffff: @@ -1182,8 +1185,8 @@ inline int QXmlStreamReaderPrivate::fastScanLiteralContent() inline int QXmlStreamReaderPrivate::fastScanSpace() { int n = 0; - ushort c; - while ((c = getChar())) { + uint c; + while ((c = getChar()) != StreamEOF) { switch (c) { case '\r': if ((c = filterCarriageReturn()) == 0) @@ -1216,7 +1219,7 @@ inline int QXmlStreamReaderPrivate::fastScanContentCharList() { int n = 0; uint c; - while ((c = getChar())) { + while ((c = getChar()) != StreamEOF) { switch (ushort(c)) { case 0xfffe: case 0xffff: @@ -1279,8 +1282,8 @@ inline int QXmlStreamReaderPrivate::fastScanContentCharList() inline int QXmlStreamReaderPrivate::fastScanName(int *prefix) { int n = 0; - ushort c; - while ((c = getChar())) { + uint c; + while ((c = getChar()) != StreamEOF) { switch (c) { case '\n': case ' ': @@ -1396,7 +1399,7 @@ inline int QXmlStreamReaderPrivate::fastScanNMTOKEN() { int n = 0; uint c; - while ((c = getChar())) { + while ((c = getChar()) != StreamEOF) { if (fastDetermineNameChar(c) == NotName) { putChar(c); return n; @@ -1452,7 +1455,7 @@ void QXmlStreamReaderPrivate::putReplacementInAttributeValue(const QString &s) } } -ushort QXmlStreamReaderPrivate::getChar_helper() +uint QXmlStreamReaderPrivate::getChar_helper() { const int BUFFER_SIZE = 8192; characterOffset += readBufferPos; @@ -1476,7 +1479,7 @@ ushort QXmlStreamReaderPrivate::getChar_helper() } if (!nbytesread) { atEnd = true; - return 0; + return StreamEOF; } #ifndef QT_NO_TEXTCODEC @@ -1484,7 +1487,7 @@ ushort QXmlStreamReaderPrivate::getChar_helper() if (nbytesread < 4) { // the 4 is to cover 0xef 0xbb 0xbf plus // one extra for the utf8 codec atEnd = true; - return 0; + return StreamEOF; } int mib = 106; // UTF-8 @@ -1517,7 +1520,7 @@ ushort QXmlStreamReaderPrivate::getChar_helper() if(lockEncoding && decoder->hasFailure()) { raiseWellFormedError(QXmlStream::tr("Encountered incorrectly encoded content.")); readBuffer.clear(); - return 0; + return StreamEOF; } #else readBuffer = QString::fromLatin1(rawReadBuffer.data(), nbytesread); @@ -1531,7 +1534,7 @@ ushort QXmlStreamReaderPrivate::getChar_helper() } atEnd = true; - return 0; + return StreamEOF; } QStringRef QXmlStreamReaderPrivate::namespaceForPrefix(const QStringRef &prefix) diff --git a/src/corelib/xml/qxmlstream_p.h b/src/corelib/xml/qxmlstream_p.h index 7ff65e1718..087d64fce8 100644 --- a/src/corelib/xml/qxmlstream_p.h +++ b/src/corelib/xml/qxmlstream_p.h @@ -944,7 +944,7 @@ public: short token; - ushort token_char; + uint token_char; uint filterCarriageReturn(); inline uint getChar(); @@ -955,7 +955,7 @@ public: void putStringLiteral(const QString &s); void putReplacement(const QString &s); void putReplacementInAttributeValue(const QString &s); - ushort getChar_helper(); + uint getChar_helper(); bool scanUntil(const char *str, short tokenToInject = -1); bool scanString(const char *str, short tokenToInject, bool requireSpace = true); @@ -1068,7 +1068,7 @@ bool QXmlStreamReaderPrivate::parse() documentVersion.clear(); documentEncoding.clear(); #ifndef QT_NO_TEXTCODEC - if (decoder->hasFailure()) { + if (decoder && decoder->hasFailure()) { raiseWellFormedError(QXmlStream::tr("Encountered incorrectly encoded content.")); readBuffer.clear(); return false; @@ -1099,8 +1099,8 @@ bool QXmlStreamReaderPrivate::parse() if (token == -1 && - TERMINAL_COUNT != action_index[act]) { uint cu = getChar(); token = NOTOKEN; - token_char = cu; - if (cu & 0xff0000) { + token_char = cu == ~0U ? cu : ushort(cu); + if ((cu != ~0U) && (cu & 0xff0000)) { token = cu >> 16; } else switch (token_char) { case 0xfffe: @@ -1119,7 +1119,7 @@ bool QXmlStreamReaderPrivate::parse() break; } // fall through - case '\0': { + case ~0U: { token = EOF_SYMBOL; if (!tagsDone && !inParseEntity) { int a = t_action(act, token); diff --git a/tests/auto/corelib/xml/qxmlstream/data/carriagereturn+nul.ref b/tests/auto/corelib/xml/qxmlstream/data/carriagereturn+nul.ref new file mode 100644 index 0000000000..b636d80294 --- /dev/null +++ b/tests/auto/corelib/xml/qxmlstream/data/carriagereturn+nul.ref @@ -0,0 +1,3 @@ +StartDocument( ) +Invalid( processingInstructionTarget="xml_" ) +ERROR: Invalid XML character. diff --git a/tests/auto/corelib/xml/qxmlstream/data/carriagereturn+nul.xml b/tests/auto/corelib/xml/qxmlstream/data/carriagereturn+nul.xml Binary files differnew file mode 100644 index 0000000000..e87bf56453 --- /dev/null +++ b/tests/auto/corelib/xml/qxmlstream/data/carriagereturn+nul.xml diff --git a/tests/auto/corelib/xml/qxmlstream/data/nul0.ref b/tests/auto/corelib/xml/qxmlstream/data/nul0.ref new file mode 100644 index 0000000000..a76455ee07 --- /dev/null +++ b/tests/auto/corelib/xml/qxmlstream/data/nul0.ref @@ -0,0 +1,2 @@ +Invalid( ) +ERROR: Premature end of document. diff --git a/tests/auto/corelib/xml/qxmlstream/data/nul0.xml b/tests/auto/corelib/xml/qxmlstream/data/nul0.xml Binary files differnew file mode 100644 index 0000000000..f76dd238ad --- /dev/null +++ b/tests/auto/corelib/xml/qxmlstream/data/nul0.xml diff --git a/tests/auto/corelib/xml/qxmlstream/data/nul1.ref b/tests/auto/corelib/xml/qxmlstream/data/nul1.ref new file mode 100644 index 0000000000..a76455ee07 --- /dev/null +++ b/tests/auto/corelib/xml/qxmlstream/data/nul1.ref @@ -0,0 +1,2 @@ +Invalid( ) +ERROR: Premature end of document. diff --git a/tests/auto/corelib/xml/qxmlstream/data/nul1.xml b/tests/auto/corelib/xml/qxmlstream/data/nul1.xml Binary files differnew file mode 100644 index 0000000000..aab3ad6ca9 --- /dev/null +++ b/tests/auto/corelib/xml/qxmlstream/data/nul1.xml diff --git a/tests/auto/corelib/xml/qxmlstream/data/nul2.ref b/tests/auto/corelib/xml/qxmlstream/data/nul2.ref new file mode 100644 index 0000000000..08bbcc6aa4 --- /dev/null +++ b/tests/auto/corelib/xml/qxmlstream/data/nul2.ref @@ -0,0 +1,3 @@ +StartDocument( ) +Invalid( ) +ERROR: Expected '[a-zA-Z]', but got ' '. diff --git a/tests/auto/corelib/xml/qxmlstream/data/nul2.xml b/tests/auto/corelib/xml/qxmlstream/data/nul2.xml Binary files differnew file mode 100644 index 0000000000..e14d1b8ae5 --- /dev/null +++ b/tests/auto/corelib/xml/qxmlstream/data/nul2.xml diff --git a/tests/auto/corelib/xml/qxmlstream/data/nul3.ref b/tests/auto/corelib/xml/qxmlstream/data/nul3.ref new file mode 100644 index 0000000000..cf4dd3848b --- /dev/null +++ b/tests/auto/corelib/xml/qxmlstream/data/nul3.ref @@ -0,0 +1,2 @@ +Invalid( ) +ERROR: Expected 'version', but got ''. diff --git a/tests/auto/corelib/xml/qxmlstream/data/nul3.xml b/tests/auto/corelib/xml/qxmlstream/data/nul3.xml Binary files differnew file mode 100644 index 0000000000..d8260b908e --- /dev/null +++ b/tests/auto/corelib/xml/qxmlstream/data/nul3.xml diff --git a/tests/auto/corelib/xml/qxmlstream/data/nul3bis.ref b/tests/auto/corelib/xml/qxmlstream/data/nul3bis.ref new file mode 100644 index 0000000000..cf4dd3848b --- /dev/null +++ b/tests/auto/corelib/xml/qxmlstream/data/nul3bis.ref @@ -0,0 +1,2 @@ +Invalid( ) +ERROR: Expected 'version', but got ''. diff --git a/tests/auto/corelib/xml/qxmlstream/data/nul3bis.xml b/tests/auto/corelib/xml/qxmlstream/data/nul3bis.xml Binary files differnew file mode 100644 index 0000000000..61e011014e --- /dev/null +++ b/tests/auto/corelib/xml/qxmlstream/data/nul3bis.xml diff --git a/tests/auto/corelib/xml/qxmlstream/data/nul4.ref b/tests/auto/corelib/xml/qxmlstream/data/nul4.ref new file mode 100644 index 0000000000..cf4dd3848b --- /dev/null +++ b/tests/auto/corelib/xml/qxmlstream/data/nul4.ref @@ -0,0 +1,2 @@ +Invalid( ) +ERROR: Expected 'version', but got ''. diff --git a/tests/auto/corelib/xml/qxmlstream/data/nul4.xml b/tests/auto/corelib/xml/qxmlstream/data/nul4.xml Binary files differnew file mode 100644 index 0000000000..90f20eebf0 --- /dev/null +++ b/tests/auto/corelib/xml/qxmlstream/data/nul4.xml diff --git a/tests/auto/corelib/xml/qxmlstream/data/nul4bis.ref b/tests/auto/corelib/xml/qxmlstream/data/nul4bis.ref new file mode 100644 index 0000000000..cf4dd3848b --- /dev/null +++ b/tests/auto/corelib/xml/qxmlstream/data/nul4bis.ref @@ -0,0 +1,2 @@ +Invalid( ) +ERROR: Expected 'version', but got ''. diff --git a/tests/auto/corelib/xml/qxmlstream/data/nul4bis.xml b/tests/auto/corelib/xml/qxmlstream/data/nul4bis.xml Binary files differnew file mode 100644 index 0000000000..15d2d10685 --- /dev/null +++ b/tests/auto/corelib/xml/qxmlstream/data/nul4bis.xml diff --git a/tests/auto/corelib/xml/qxmlstream/data/nul5.ref b/tests/auto/corelib/xml/qxmlstream/data/nul5.ref new file mode 100644 index 0000000000..9432b74a99 --- /dev/null +++ b/tests/auto/corelib/xml/qxmlstream/data/nul5.ref @@ -0,0 +1,3 @@ +StartDocument( documentVersion="1.0" ) +Invalid( ) +ERROR: Start tag expected. diff --git a/tests/auto/corelib/xml/qxmlstream/data/nul5.xml b/tests/auto/corelib/xml/qxmlstream/data/nul5.xml Binary files differnew file mode 100644 index 0000000000..6a79cbdc75 --- /dev/null +++ b/tests/auto/corelib/xml/qxmlstream/data/nul5.xml |