diff options
Diffstat (limited to 'src/corelib/serialization/qxmlstream.cpp')
-rw-r--r-- | src/corelib/serialization/qxmlstream.cpp | 334 |
1 files changed, 275 insertions, 59 deletions
diff --git a/src/corelib/serialization/qxmlstream.cpp b/src/corelib/serialization/qxmlstream.cpp index 61c9f343ec..0fe8c87779 100644 --- a/src/corelib/serialization/qxmlstream.cpp +++ b/src/corelib/serialization/qxmlstream.cpp @@ -22,6 +22,7 @@ #include "qxmlstream_p.h" #include "qxmlstreamparser_p.h" #include <private/qstringconverter_p.h> +#include <private/qstringiterator_p.h> QT_BEGIN_NAMESPACE @@ -185,7 +186,7 @@ WRAP(indexOf, QLatin1StringView) addData() or by waiting for it to arrive on the device(). \value UnexpectedElementError The parser encountered an element - that was different to those it expected. + or token that was different to those it expected. */ @@ -322,13 +323,34 @@ QXmlStreamEntityResolver *QXmlStreamReader::entityResolver() const QXmlStreamReader is a well-formed XML 1.0 parser that does \e not include external parsed entities. As long as no error occurs, the - application code can thus be assured that the data provided by the - stream reader satisfies the W3C's criteria for well-formed XML. For - example, you can be certain that all tags are indeed nested and - closed properly, that references to internal entities have been - replaced with the correct replacement text, and that attributes have - been normalized or added according to the internal subset of the - DTD. + application code can thus be assured, that + \list + \li the data provided by the stream reader satisfies the W3C's + criteria for well-formed XML, + \li tokens are provided in a valid order. + \endlist + + Unless QXmlStreamReader raises an error, it guarantees the following: + \list + \li All tags are nested and closed properly. + \li References to internal entities have been replaced with the + correct replacement text. + \li Attributes have been normalized or added according to the + internal subset of the \l DTD. + \li Tokens of type \l StartDocument happen before all others, + aside from comments and processing instructions. + \li At most one DOCTYPE element (a token of type \l DTD) is present. + \li If present, the DOCTYPE appears before all other elements, + aside from StartDocument, comments and processing instructions. + \endlist + + In particular, once any token of type \l StartElement, \l EndElement, + \l Characters, \l EntityReference or \l EndDocument is seen, no + tokens of type StartDocument or DTD will be seen. If one is present in + the input stream, out of order, an error is raised. + + \note The token types \l Comment and \l ProcessingInstruction may appear + anywhere in the stream. If an error occurs while parsing, atEnd() and hasError() return true, and error() returns the error that occurred. The functions @@ -659,6 +681,7 @@ QXmlStreamReader::TokenType QXmlStreamReader::readNext() d->token = -1; return readNext(); } + d->checkToken(); return d->type; } @@ -699,7 +722,7 @@ QXmlStreamReader::TokenType QXmlStreamReader::tokenType() const bool QXmlStreamReader::readNextStartElement() { while (readNext() != Invalid) { - if (isEndElement()) + if (isEndElement() || isEndDocument()) return false; else if (isStartElement()) return true; @@ -743,6 +766,11 @@ static constexpr auto QXmlStreamReader_tokenTypeString = qOffsetStringArray( "ProcessingInstruction" ); +static constexpr auto QXmlStreamReader_XmlContextString = qOffsetStringArray( + "Prolog", + "Body" +); + /*! \property QXmlStreamReader::namespaceProcessing \brief the namespace-processing flag of the stream reader. @@ -777,6 +805,15 @@ QString QXmlStreamReader::tokenString() const return QLatin1StringView(QXmlStreamReader_tokenTypeString.at(d->type)); } +/*! + \internal + \return \param ctxt (Prolog/Body) as a string. + */ +static constexpr QLatin1StringView contextString(QXmlStreamReaderPrivate::XmlContext ctxt) +{ + return QLatin1StringView(QXmlStreamReader_XmlContextString.at(static_cast<int>(ctxt))); +} + #endif // feature xmlstreamreader QXmlStreamPrivateTagStack::QXmlStreamPrivateTagStack() @@ -864,6 +901,8 @@ void QXmlStreamReaderPrivate::init() type = QXmlStreamReader::NoToken; error = QXmlStreamReader::NoError; + currentContext = XmlContext::Prolog; + foundDTD = false; } /* @@ -1296,7 +1335,8 @@ inline qsizetype QXmlStreamReaderPrivate::fastScanContentCharList() return n; } -inline qsizetype QXmlStreamReaderPrivate::fastScanName(qint16 *prefix) +// Fast scan an XML attribute name (e.g. "xml:lang"). +inline std::optional<qsizetype> QXmlStreamReaderPrivate::fastScanName(Value *val) { qsizetype n = 0; uint c; @@ -1304,7 +1344,8 @@ inline qsizetype QXmlStreamReaderPrivate::fastScanName(qint16 *prefix) if (n >= 4096) { // This is too long to be a sensible name, and // can exhaust memory, or the range of decltype(*prefix) - return 0; + raiseNamePrefixTooLongError(); + return std::nullopt; } switch (c) { case '\n': @@ -1333,16 +1374,16 @@ inline qsizetype QXmlStreamReaderPrivate::fastScanName(qint16 *prefix) case '+': case '*': putChar(c); - if (prefix && *prefix == n+1) { - *prefix = 0; + if (val && val->prefix == n + 1) { + val->prefix = 0; putChar(':'); --n; } return n; case ':': - if (prefix) { - if (*prefix == 0) { - *prefix = qint16(n + 2); + if (val) { + if (val->prefix == 0) { + val->prefix = qint16(n + 2); } else { // only one colon allowed according to the namespace spec. putChar(c); return n; @@ -1358,8 +1399,8 @@ inline qsizetype QXmlStreamReaderPrivate::fastScanName(qint16 *prefix) } } - if (prefix) - *prefix = 0; + if (val) + val->prefix = 0; qsizetype pos = textBuffer.size() - n; putString(textBuffer, pos); textBuffer.resize(pos); @@ -1812,7 +1853,7 @@ void QXmlStreamReaderPrivate::startDocument() else err = QXmlStream::tr("Standalone accepts only yes or no."); } else { - err = QXmlStream::tr("Invalid attribute in XML declaration."); + err = QXmlStream::tr("Invalid attribute in XML declaration: %1 = %2").arg(key).arg(value); } } @@ -1841,6 +1882,14 @@ void QXmlStreamReaderPrivate::raiseWellFormedError(const QString &message) raiseError(QXmlStreamReader::NotWellFormedError, message); } +void QXmlStreamReaderPrivate::raiseNamePrefixTooLongError() +{ + // TODO: add a ImplementationLimitsExceededError and use it instead + raiseError(QXmlStreamReader::NotWellFormedError, + QXmlStream::tr("Length of XML attribute name exceeds implementation limits (4KiB " + "characters).")); +} + void QXmlStreamReaderPrivate::parseError() { @@ -2283,6 +2332,8 @@ QXmlStreamAttributes QXmlStreamReader::attributes() const \ingroup xml-tools + \compares equality + An attribute consists of an optionally empty namespaceUri(), a name(), a value(), and an isDefault() attribute. @@ -2357,14 +2408,14 @@ QXmlStreamAttribute::QXmlStreamAttribute(const QString &qualifiedName, const QSt value following an ATTLIST declaration in the DTD; otherwise returns \c false. */ -/*! \fn bool QXmlStreamAttribute::operator==(const QXmlStreamAttribute &other) const +/*! \fn bool QXmlStreamAttribute::operator==(const QXmlStreamAttribute &lhs, const QXmlStreamAttribute &rhs) - Compares this attribute with \a other and returns \c true if they are + Compares \a lhs attribute with \a rhs and returns \c true if they are equal; otherwise returns \c false. */ -/*! \fn bool QXmlStreamAttribute::operator!=(const QXmlStreamAttribute &other) const +/*! \fn bool QXmlStreamAttribute::operator!=(const QXmlStreamAttribute &lhs, const QXmlStreamAttribute &rhs) - Compares this attribute with \a other and returns \c true if they are + Compares \a lhs attribute with \a rhs and returns \c true if they are not equal; otherwise returns \c false. */ @@ -2413,6 +2464,8 @@ QXmlStreamAttribute::QXmlStreamAttribute(const QString &qualifiedName, const QSt \ingroup xml-tools + \compares equality + An notation declaration consists of a name(), a systemId(), and a publicId(). */ @@ -2436,14 +2489,14 @@ Returns the system identifier. Returns the public identifier. */ -/*! \fn inline bool QXmlStreamNotationDeclaration::operator==(const QXmlStreamNotationDeclaration &other) const +/*! \fn inline bool QXmlStreamNotationDeclaration::operator==(const QXmlStreamNotationDeclaration &lhs, const QXmlStreamNotationDeclaration &rhs) - Compares this notation declaration with \a other and returns \c true + Compares \a lhs notation declaration with \a rhs and returns \c true if they are equal; otherwise returns \c false. */ -/*! \fn inline bool QXmlStreamNotationDeclaration::operator!=(const QXmlStreamNotationDeclaration &other) const +/*! \fn inline bool QXmlStreamNotationDeclaration::operator!=(const QXmlStreamNotationDeclaration &lhs, const QXmlStreamNotationDeclaration &rhs) - Compares this notation declaration with \a other and returns \c true + Compares \a lhs notation declaration with \a rhs and returns \c true if they are not equal; otherwise returns \c false. */ @@ -2463,16 +2516,18 @@ Returns the public identifier. \ingroup xml-tools + \compares equality + An namespace declaration consists of a prefix() and a namespaceUri(). */ -/*! \fn inline bool QXmlStreamNamespaceDeclaration::operator==(const QXmlStreamNamespaceDeclaration &other) const +/*! \fn inline bool QXmlStreamNamespaceDeclaration::operator==(const QXmlStreamNamespaceDeclaration &lhs, const QXmlStreamNamespaceDeclaration &rhs) - Compares this namespace declaration with \a other and returns \c true + Compares \a lhs namespace declaration with \a rhs and returns \c true if they are equal; otherwise returns \c false. */ -/*! \fn inline bool QXmlStreamNamespaceDeclaration::operator!=(const QXmlStreamNamespaceDeclaration &other) const +/*! \fn inline bool QXmlStreamNamespaceDeclaration::operator!=(const QXmlStreamNamespaceDeclaration &lhs, const QXmlStreamNamespaceDeclaration &rhs) - Compares this namespace declaration with \a other and returns \c true + Compares \a lhs namespace declaration with \a rhs and returns \c true if they are not equal; otherwise returns \c false. */ @@ -2529,6 +2584,7 @@ Returns the namespaceUri. \ingroup xml-tools + \compares equality An entity declaration consists of a name(), a notationName(), a systemId(), a publicId(), and a value(). */ @@ -2561,14 +2617,14 @@ Returns the public identifier. Returns the entity's value. */ -/*! \fn bool QXmlStreamEntityDeclaration::operator==(const QXmlStreamEntityDeclaration &other) const +/*! \fn bool QXmlStreamEntityDeclaration::operator==(const QXmlStreamEntityDeclaration &lhs, const QXmlStreamEntityDeclaration &rhs) - Compares this entity declaration with \a other and returns \c true if + Compares \a lhs entity declaration with \a rhs and returns \c true if they are equal; otherwise returns \c false. */ -/*! \fn bool QXmlStreamEntityDeclaration::operator!=(const QXmlStreamEntityDeclaration &other) const +/*! \fn bool QXmlStreamEntityDeclaration::operator!=(const QXmlStreamEntityDeclaration &lhs, const QXmlStreamEntityDeclaration &rhs) - Compares this entity declaration with \a other and returns \c true if + Compares \a lhs entity declaration with \a rhs and returns \c true if they are not equal; otherwise returns \c false. */ @@ -2831,6 +2887,11 @@ class QXmlStreamWriterPrivate : public QXmlStreamPrivateTagStack QXmlStreamWriter *q_ptr; Q_DECLARE_PUBLIC(QXmlStreamWriter) public: + enum class StartElementOption { + KeepEverything = 0, // write out every attribute, namespace, &c. + OmitNamespaceDeclarations = 1, + }; + QXmlStreamWriterPrivate(QXmlStreamWriter *q); ~QXmlStreamWriterPrivate() { if (deleteDevice) @@ -2840,7 +2901,8 @@ public: void write(QAnyStringView s); void writeEscaped(QAnyStringView, bool escapeWhitespace = false); bool finishStartElement(bool contents = true); - void writeStartElement(QAnyStringView namespaceUri, QAnyStringView name); + void writeStartElement(QAnyStringView namespaceUri, QAnyStringView name, + StartElementOption option = StartElementOption::KeepEverything); QIODevice *device; QString *stringDevice; uint deleteDevice :1; @@ -2855,6 +2917,7 @@ public: NamespaceDeclaration emptyNamespace; qsizetype lastNamespaceDeclaration; + NamespaceDeclaration &addExtraNamespace(QAnyStringView namespaceUri, QAnyStringView prefix); NamespaceDeclaration &findNamespace(QAnyStringView namespaceUri, bool writeDeclaration = false, bool noDefault = false); void writeNamespaceDeclaration(const NamespaceDeclaration &namespaceDeclaration); @@ -2901,54 +2964,83 @@ void QXmlStreamWriterPrivate::write(QAnyStringView s) void QXmlStreamWriterPrivate::writeEscaped(QAnyStringView s, bool escapeWhitespace) { + struct NextLatin1 { + char32_t operator()(const char *&it, const char *) const + { return uchar(*it++); } + }; + struct NextUtf8 { + char32_t operator()(const char *&it, const char *end) const + { + uchar uc = *it++; + char32_t utf32 = 0; + char32_t *output = &utf32; + qsizetype n = QUtf8Functions::fromUtf8<QUtf8BaseTraits>(uc, output, it, end); + return n < 0 ? 0 : utf32; + } + }; + struct NextUtf16 { + char32_t operator()(const QChar *&it, const QChar *end) const + { + QStringIterator decoder(it, end); + char32_t result = decoder.next(u'\0'); + it = decoder.position(); + return result; + } + }; + QString escaped; escaped.reserve(s.size()); s.visit([&] (auto s) { using View = decltype(s); + using Decoder = std::conditional_t<std::is_same_v<View, QLatin1StringView>, NextLatin1, + std::conditional_t<std::is_same_v<View, QUtf8StringView>, NextUtf8, NextUtf16>>; auto it = s.begin(); const auto end = s.end(); + Decoder decoder; while (it != end) { QLatin1StringView replacement; auto mark = it; while (it != end) { - if (*it == u'<') { + auto next_it = it; + char32_t uc = decoder(next_it, end); + if (uc == u'<') { replacement = "<"_L1; break; - } else if (*it == u'>') { + } else if (uc == u'>') { replacement = ">"_L1; break; - } else if (*it == u'&') { + } else if (uc == u'&') { replacement = "&"_L1; break; - } else if (*it == u'\"') { + } else if (uc == u'\"') { replacement = """_L1; break; - } else if (*it == u'\t') { + } else if (uc == u'\t') { if (escapeWhitespace) { replacement = "	"_L1; break; } - } else if (*it == u'\n') { + } else if (uc == u'\n') { if (escapeWhitespace) { replacement = " "_L1; break; } - } else if (*it == u'\v' || *it == u'\f') { + } else if (uc == u'\v' || uc == u'\f') { hasEncodingError = true; break; - } else if (*it == u'\r') { + } else if (uc == u'\r') { if (escapeWhitespace) { replacement = " "_L1; break; } - } else if (*it <= u'\x1F' || *it >= u'\uFFFE') { + } else if (uc <= u'\x1F' || uc == u'\uFFFE' || uc == u'\uFFFF') { hasEncodingError = true; break; } - ++it; + it = next_it; } escaped.append(View{mark, it}); @@ -2995,6 +3087,32 @@ bool QXmlStreamWriterPrivate::finishStartElement(bool contents) return hadSomethingWritten; } +QXmlStreamPrivateTagStack::NamespaceDeclaration & +QXmlStreamWriterPrivate::addExtraNamespace(QAnyStringView namespaceUri, QAnyStringView prefix) +{ + const bool prefixIsXml = prefix == "xml"_L1; + const bool namespaceUriIsXml = namespaceUri == "http://www.w3.org/XML/1998/namespace"_L1; + if (prefixIsXml && !namespaceUriIsXml) { + qWarning("Reserved prefix 'xml' must not be bound to a different namespace name " + "than 'http://www.w3.org/XML/1998/namespace'"); + } else if (!prefixIsXml && namespaceUriIsXml) { + const QString prefixString = prefix.toString(); + qWarning("The prefix '%ls' must not be bound to namespace name " + "'http://www.w3.org/XML/1998/namespace' which 'xml' is already bound to", + qUtf16Printable(prefixString)); + } + if (namespaceUri == "http://www.w3.org/2000/xmlns/"_L1) { + const QString prefixString = prefix.toString(); + qWarning("The prefix '%ls' must not be bound to namespace name " + "'http://www.w3.org/2000/xmlns/'", + qUtf16Printable(prefixString)); + } + auto &namespaceDeclaration = namespaceDeclarations.push(); + namespaceDeclaration.prefix = addToStringStorage(prefix); + namespaceDeclaration.namespaceUri = addToStringStorage(namespaceUri); + return namespaceDeclaration; +} + QXmlStreamPrivateTagStack::NamespaceDeclaration &QXmlStreamWriterPrivate::findNamespace(QAnyStringView namespaceUri, bool writeDeclaration, bool noDefault) { for (NamespaceDeclaration &namespaceDeclaration : reversed(namespaceDeclarations)) { @@ -3573,11 +3691,7 @@ void QXmlStreamWriter::writeNamespace(QAnyStringView namespaceUri, QAnyStringVie if (prefix.isEmpty()) { d->findNamespace(namespaceUri, d->inStartElement); } else { - Q_ASSERT(!((prefix == "xml"_L1) ^ (namespaceUri == "http://www.w3.org/XML/1998/namespace"_L1))); - Q_ASSERT(namespaceUri != "http://www.w3.org/2000/xmlns/"_L1); - QXmlStreamWriterPrivate::NamespaceDeclaration &namespaceDeclaration = d->namespaceDeclarations.push(); - namespaceDeclaration.prefix = d->addToStringStorage(prefix); - namespaceDeclaration.namespaceUri = d->addToStringStorage(namespaceUri); + auto &namespaceDeclaration = d->addExtraNamespace(namespaceUri, prefix); if (d->inStartElement) d->writeNamespaceDeclaration(namespaceDeclaration); } @@ -3726,7 +3840,8 @@ void QXmlStreamWriter::writeStartElement(QAnyStringView namespaceUri, QAnyString d->writeStartElement(namespaceUri, name); } -void QXmlStreamWriterPrivate::writeStartElement(QAnyStringView namespaceUri, QAnyStringView name) +void QXmlStreamWriterPrivate::writeStartElement(QAnyStringView namespaceUri, QAnyStringView name, + StartElementOption option) { if (!finishStartElement(false) && autoFormatting) indent(tagStack.size()); @@ -3742,8 +3857,10 @@ void QXmlStreamWriterPrivate::writeStartElement(QAnyStringView namespaceUri, QAn write(tag.name); inStartElement = lastWasStartElement = true; - for (qsizetype i = lastNamespaceDeclaration; i < namespaceDeclarations.size(); ++i) - writeNamespaceDeclaration(namespaceDeclarations[i]); + if (option != StartElementOption::OmitNamespaceDeclarations) { + for (qsizetype i = lastNamespaceDeclaration; i < namespaceDeclarations.size(); ++i) + writeNamespaceDeclaration(namespaceDeclarations[i]); + } tag.namespaceDeclarationsSize = lastNamespaceDeclaration; } @@ -3757,6 +3874,7 @@ void QXmlStreamWriterPrivate::writeStartElement(QAnyStringView namespaceUri, QAn */ void QXmlStreamWriter::writeCurrentToken(const QXmlStreamReader &reader) { + Q_D(QXmlStreamWriter); switch (reader.tokenType()) { case QXmlStreamReader::NoToken: break; @@ -3767,12 +3885,19 @@ void QXmlStreamWriter::writeCurrentToken(const QXmlStreamReader &reader) writeEndDocument(); break; case QXmlStreamReader::StartElement: { - writeStartElement(reader.namespaceUri(), reader.name()); - const QXmlStreamNamespaceDeclarations decls = reader.namespaceDeclarations(); - for (const auto &namespaceDeclaration : decls) { - writeNamespace(namespaceDeclaration.namespaceUri(), - namespaceDeclaration.prefix()); + // Namespaces must be added before writeStartElement is called so new prefixes are found + QList<QXmlStreamPrivateTagStack::NamespaceDeclaration> extraNamespaces; + for (const auto &namespaceDeclaration : reader.namespaceDeclarations()) { + auto &extraNamespace = d->addExtraNamespace(namespaceDeclaration.namespaceUri(), + namespaceDeclaration.prefix()); + extraNamespaces.append(extraNamespace); } + d->writeStartElement( + reader.namespaceUri(), reader.name(), + QXmlStreamWriterPrivate::StartElementOption::OmitNamespaceDeclarations); + // Namespace declarations are written afterwards + for (const auto &extraNamespace : std::as_const(extraNamespaces)) + d->writeNamespaceDeclaration(extraNamespace); writeAttributes(reader.attributes()); } break; case QXmlStreamReader::EndElement: @@ -3804,6 +3929,97 @@ void QXmlStreamWriter::writeCurrentToken(const QXmlStreamReader &reader) } } +static constexpr bool isTokenAllowedInContext(QXmlStreamReader::TokenType type, + QXmlStreamReaderPrivate::XmlContext ctxt) +{ + switch (type) { + case QXmlStreamReader::StartDocument: + case QXmlStreamReader::DTD: + return ctxt == QXmlStreamReaderPrivate::XmlContext::Prolog; + + case QXmlStreamReader::StartElement: + case QXmlStreamReader::EndElement: + case QXmlStreamReader::Characters: + case QXmlStreamReader::EntityReference: + case QXmlStreamReader::EndDocument: + return ctxt == QXmlStreamReaderPrivate::XmlContext::Body; + + case QXmlStreamReader::Comment: + case QXmlStreamReader::ProcessingInstruction: + return true; + + case QXmlStreamReader::NoToken: + case QXmlStreamReader::Invalid: + return false; + } + + // GCC 8.x does not treat __builtin_unreachable() as constexpr +#if !defined(Q_CC_GNU_ONLY) || (Q_CC_GNU >= 900) + Q_UNREACHABLE_RETURN(false); +#else + return false; +#endif +} + +/*! + \internal + \brief QXmlStreamReader::isValidToken + \return \c true if \param type is a valid token type. + \return \c false if \param type is an unexpected token, + which indicates a non-well-formed or invalid XML stream. + */ +bool QXmlStreamReaderPrivate::isValidToken(QXmlStreamReader::TokenType type) +{ + // Don't change currentContext, if Invalid or NoToken occur in the prolog + if (type == QXmlStreamReader::Invalid || type == QXmlStreamReader::NoToken) + return false; + + // If a token type gets rejected in the body, there is no recovery + const bool result = isTokenAllowedInContext(type, currentContext); + if (result || currentContext == XmlContext::Body) + return result; + + // First non-Prolog token observed => switch context to body and check again. + currentContext = XmlContext::Body; + return isTokenAllowedInContext(type, currentContext); +} + +/*! + \internal + Checks token type and raises an error, if it is invalid + in the current context (prolog/body). + */ +void QXmlStreamReaderPrivate::checkToken() +{ + Q_Q(QXmlStreamReader); + + // The token type must be consumed, to keep track if the body has been reached. + const XmlContext context = currentContext; + const bool ok = isValidToken(type); + + // Do nothing if an error has been raised already (going along with an unexpected token) + if (error != QXmlStreamReader::Error::NoError) + return; + + if (!ok) { + raiseError(QXmlStreamReader::UnexpectedElementError, + QXmlStream::tr("Unexpected token type %1 in %2.") + .arg(q->tokenString(), contextString(context))); + return; + } + + if (type != QXmlStreamReader::DTD) + return; + + // Raise error on multiple DTD tokens + if (foundDTD) { + raiseError(QXmlStreamReader::UnexpectedElementError, + QXmlStream::tr("Found second DTD token in %1.").arg(contextString(context))); + } else { + foundDTD = true; + } +} + /*! \fn bool QXmlStreamAttributes::hasAttribute(QAnyStringView qualifiedName) const |