From 817800ad39df10ca78e2c965a61d4d2025df622b Mon Sep 17 00:00:00 2001 From: Thiago Macieira Date: Mon, 22 Dec 2014 21:19:27 -0200 Subject: Fix QXmlStreamReader parsing of files containing NULs Due to a flaw in the internal API, QXmlStreamReader's internal buffering would mistake a NUL byte in the input stream for EOF during parsing, but wouldn't set atEnd == true because it hadn't yet processed all bytes. This resulted in an infinite loop in QXmlStreamReaderPrivate::parse. So, instead of returning zero (false) to indicate EOF, return -1 (but in unsigned form, ~0, to avoid ICC warnings of change of sign). In turn, this required enlarging a few variables to avoid ~0U becoming 0xffff, which is a valid QChar (could happen if the input is a QString, not a QIODevice). Task-number: QTBUG-43513 Change-Id: If5badcfd3e4176b79517da1fd108e0abb93a3fd1 Reviewed-by: Lars Knoll Reviewed-by: Oswald Buddenhagen --- src/corelib/xml/qxmlstream.cpp | 41 ++++++++++++++++++++++------------------- src/corelib/xml/qxmlstream_p.h | 12 ++++++------ 2 files changed, 28 insertions(+), 25 deletions(-) (limited to 'src/corelib/xml') diff --git a/src/corelib/xml/qxmlstream.cpp b/src/corelib/xml/qxmlstream.cpp index 94f6a8bcde..d1698b812f 100644 --- a/src/corelib/xml/qxmlstream.cpp +++ b/src/corelib/xml/qxmlstream.cpp @@ -64,6 +64,8 @@ QT_BEGIN_NAMESPACE #include "qxmlstream_p.h" +enum { StreamEOF = ~0U }; + /*! \enum QXmlStreamReader::TokenType @@ -903,7 +905,7 @@ inline uint QXmlStreamReaderPrivate::filterCarriageReturn() ++readBufferPos; return peekc; } - if (peekc == 0) { + if (peekc == StreamEOF) { putChar('\r'); return 0; } @@ -912,13 +914,13 @@ inline uint QXmlStreamReaderPrivate::filterCarriageReturn() /*! \internal - If the end of the file is encountered, 0 is returned. + If the end of the file is encountered, ~0 is returned. */ inline uint QXmlStreamReaderPrivate::getChar() { uint c; if (putStack.size()) { - c = atEnd ? 0 : putStack.pop(); + c = atEnd ? StreamEOF : putStack.pop(); } else { if (readBufferPos < readBuffer.size()) c = readBuffer.at(readBufferPos++).unicode(); @@ -937,7 +939,7 @@ inline uint QXmlStreamReaderPrivate::peekChar() } else if (readBufferPos < readBuffer.size()) { c = readBuffer.at(readBufferPos).unicode(); } else { - if ((c = getChar_helper())) + if ((c = getChar_helper()) != StreamEOF) --readBufferPos; } @@ -961,7 +963,8 @@ bool QXmlStreamReaderPrivate::scanUntil(const char *str, short tokenToInject) int pos = textBuffer.size(); int oldLineNumber = lineNumber; - while (uint c = getChar()) { + uint c; + while ((c = getChar()) != StreamEOF) { /* First, we do the validation & normalization. */ switch (c) { case '\r': @@ -1007,9 +1010,9 @@ bool QXmlStreamReaderPrivate::scanString(const char *str, short tokenToInject, b { int n = 0; while (str[n]) { - ushort c = getChar(); + uint c = getChar(); if (c != ushort(str[n])) { - if (c) + if (c != StreamEOF) putChar(c); while (n--) { putChar(ushort(str[n])); @@ -1137,7 +1140,7 @@ inline int QXmlStreamReaderPrivate::fastScanLiteralContent() { int n = 0; uint c; - while ((c = getChar())) { + while ((c = getChar()) != StreamEOF) { switch (ushort(c)) { case 0xfffe: case 0xffff: @@ -1182,8 +1185,8 @@ inline int QXmlStreamReaderPrivate::fastScanLiteralContent() inline int QXmlStreamReaderPrivate::fastScanSpace() { int n = 0; - ushort c; - while ((c = getChar())) { + uint c; + while ((c = getChar()) != StreamEOF) { switch (c) { case '\r': if ((c = filterCarriageReturn()) == 0) @@ -1216,7 +1219,7 @@ inline int QXmlStreamReaderPrivate::fastScanContentCharList() { int n = 0; uint c; - while ((c = getChar())) { + while ((c = getChar()) != StreamEOF) { switch (ushort(c)) { case 0xfffe: case 0xffff: @@ -1279,8 +1282,8 @@ inline int QXmlStreamReaderPrivate::fastScanContentCharList() inline int QXmlStreamReaderPrivate::fastScanName(int *prefix) { int n = 0; - ushort c; - while ((c = getChar())) { + uint c; + while ((c = getChar()) != StreamEOF) { switch (c) { case '\n': case ' ': @@ -1396,7 +1399,7 @@ inline int QXmlStreamReaderPrivate::fastScanNMTOKEN() { int n = 0; uint c; - while ((c = getChar())) { + while ((c = getChar()) != StreamEOF) { if (fastDetermineNameChar(c) == NotName) { putChar(c); return n; @@ -1452,7 +1455,7 @@ void QXmlStreamReaderPrivate::putReplacementInAttributeValue(const QString &s) } } -ushort QXmlStreamReaderPrivate::getChar_helper() +uint QXmlStreamReaderPrivate::getChar_helper() { const int BUFFER_SIZE = 8192; characterOffset += readBufferPos; @@ -1476,7 +1479,7 @@ ushort QXmlStreamReaderPrivate::getChar_helper() } if (!nbytesread) { atEnd = true; - return 0; + return StreamEOF; } #ifndef QT_NO_TEXTCODEC @@ -1484,7 +1487,7 @@ ushort QXmlStreamReaderPrivate::getChar_helper() if (nbytesread < 4) { // the 4 is to cover 0xef 0xbb 0xbf plus // one extra for the utf8 codec atEnd = true; - return 0; + return StreamEOF; } int mib = 106; // UTF-8 @@ -1517,7 +1520,7 @@ ushort QXmlStreamReaderPrivate::getChar_helper() if(lockEncoding && decoder->hasFailure()) { raiseWellFormedError(QXmlStream::tr("Encountered incorrectly encoded content.")); readBuffer.clear(); - return 0; + return StreamEOF; } #else readBuffer = QString::fromLatin1(rawReadBuffer.data(), nbytesread); @@ -1531,7 +1534,7 @@ ushort QXmlStreamReaderPrivate::getChar_helper() } atEnd = true; - return 0; + return StreamEOF; } QStringRef QXmlStreamReaderPrivate::namespaceForPrefix(const QStringRef &prefix) diff --git a/src/corelib/xml/qxmlstream_p.h b/src/corelib/xml/qxmlstream_p.h index 7ff65e1718..087d64fce8 100644 --- a/src/corelib/xml/qxmlstream_p.h +++ b/src/corelib/xml/qxmlstream_p.h @@ -944,7 +944,7 @@ public: short token; - ushort token_char; + uint token_char; uint filterCarriageReturn(); inline uint getChar(); @@ -955,7 +955,7 @@ public: void putStringLiteral(const QString &s); void putReplacement(const QString &s); void putReplacementInAttributeValue(const QString &s); - ushort getChar_helper(); + uint getChar_helper(); bool scanUntil(const char *str, short tokenToInject = -1); bool scanString(const char *str, short tokenToInject, bool requireSpace = true); @@ -1068,7 +1068,7 @@ bool QXmlStreamReaderPrivate::parse() documentVersion.clear(); documentEncoding.clear(); #ifndef QT_NO_TEXTCODEC - if (decoder->hasFailure()) { + if (decoder && decoder->hasFailure()) { raiseWellFormedError(QXmlStream::tr("Encountered incorrectly encoded content.")); readBuffer.clear(); return false; @@ -1099,8 +1099,8 @@ bool QXmlStreamReaderPrivate::parse() if (token == -1 && - TERMINAL_COUNT != action_index[act]) { uint cu = getChar(); token = NOTOKEN; - token_char = cu; - if (cu & 0xff0000) { + token_char = cu == ~0U ? cu : ushort(cu); + if ((cu != ~0U) && (cu & 0xff0000)) { token = cu >> 16; } else switch (token_char) { case 0xfffe: @@ -1119,7 +1119,7 @@ bool QXmlStreamReaderPrivate::parse() break; } // fall through - case '\0': { + case ~0U: { token = EOF_SYMBOL; if (!tagsDone && !inParseEntity) { int a = t_action(act, token); -- cgit v1.2.3