diff options
author | Sona Kurazyan <sona.kurazyan@qt.io> | 2019-10-25 12:05:27 +0200 |
---|---|---|
committer | Sona Kurazyan <sona.kurazyan@qt.io> | 2019-11-26 09:07:26 +0100 |
commit | ccc2133c648ca531630095f3f1016e2026d7e34e (patch) | |
tree | 22ed29871c17b8a87048249e92c399f8b01546cc /src/xml | |
parent | f0443984b84dea782ccd06dbce59808d55b15dbe (diff) |
Port QDomDocument to QXmlStreamReader
Reimplement QDomDocument using QXmlStreamReader and switch to the new
implementation starting from Qt 6.
The changes in the behavior are reflected in tests: some test cases
which were marked as "expected to fail" are now passing.
Task-number: QTBUG-76178
Change-Id: I5ace2f13c036a9a778de922b47a1ce35957ce5f6
Reviewed-by: Kai Koehne <kai.koehne@qt.io>
Diffstat (limited to 'src/xml')
-rw-r--r-- | src/xml/dom/qdom.cpp | 77 | ||||
-rw-r--r-- | src/xml/dom/qdom.h | 3 | ||||
-rw-r--r-- | src/xml/dom/qdom_p.h | 2 | ||||
-rw-r--r-- | src/xml/dom/qdomhelpers.cpp | 272 | ||||
-rw-r--r-- | src/xml/dom/qdomhelpers_p.h | 42 |
5 files changed, 396 insertions, 0 deletions
diff --git a/src/xml/dom/qdom.cpp b/src/xml/dom/qdom.cpp index bbbdf1c6f3..81f33b0693 100644 --- a/src/xml/dom/qdom.cpp +++ b/src/xml/dom/qdom.cpp @@ -59,6 +59,7 @@ #include <qvariant.h> #include <qshareddata.h> #include <qdebug.h> +#include <qxmlstream.h> #include <stdio.h> QT_BEGIN_NAMESPACE @@ -5734,6 +5735,34 @@ bool QDomDocumentPrivate::setContent(QXmlInputSource *source, QXmlReader *reader return true; } +bool QDomDocumentPrivate::setContent(QXmlStreamReader *reader, bool namespaceProcessing, + QString *errorMsg, int *errorLine, int *errorColumn) +{ + clear(); + impl = new QDomImplementationPrivate; + type = new QDomDocumentTypePrivate(this, this); + type->ref.deref(); + + if (!reader) { + qWarning("Failed to set content, XML reader is not initialized"); + return false; + } + + QDomParser domParser(this, reader, namespaceProcessing); + + if (!domParser.parse()) { + if (errorMsg) + *errorMsg = std::get<0>(domParser.errorInfo()); + if (errorLine) + *errorLine = std::get<1>(domParser.errorInfo()); + if (errorColumn) + *errorColumn = std::get<2>(domParser.errorInfo()); + return false; + } + + return true; +} + QDomNodePrivate* QDomDocumentPrivate::cloneNode(bool deep) { QDomNodePrivate *p = new QDomDocumentPrivate(this, deep); @@ -6153,9 +6182,16 @@ bool QDomDocument::setContent(const QString& text, bool namespaceProcessing, QSt { if (!impl) impl = new QDomDocumentPrivate(); + +#if QT_VERSION < QT_VERSION_CHECK(6, 0, 0) QXmlInputSource source; source.setData(text); return IMPL->setContent(&source, namespaceProcessing, errorMsg, errorLine, errorColumn); +#else + QXmlStreamReader streamReader(text); + streamReader.setNamespaceProcessing(namespaceProcessing); + return IMPL->setContent(&streamReader, namespaceProcessing, errorMsg, errorLine, errorColumn); +#endif } /*! @@ -6215,10 +6251,17 @@ bool QDomDocument::setContent(const QByteArray &data, bool namespaceProcessing, { if (!impl) impl = new QDomDocumentPrivate(); + +#if QT_VERSION < QT_VERSION_CHECK(6, 0, 0) QBuffer buf; buf.setData(data); QXmlInputSource source(&buf); return IMPL->setContent(&source, namespaceProcessing, errorMsg, errorLine, errorColumn); +#else + QXmlStreamReader streamReader(data); + streamReader.setNamespaceProcessing(namespaceProcessing); + return IMPL->setContent(&streamReader, namespaceProcessing, errorMsg, errorLine, errorColumn); +#endif } /*! @@ -6231,8 +6274,15 @@ bool QDomDocument::setContent(QIODevice* dev, bool namespaceProcessing, QString { if (!impl) impl = new QDomDocumentPrivate(); + +#if QT_VERSION < QT_VERSION_CHECK(6, 0, 0) QXmlInputSource source(dev); return IMPL->setContent(&source, namespaceProcessing, errorMsg, errorLine, errorColumn); +#else + QXmlStreamReader streamReader(dev); + streamReader.setNamespaceProcessing(namespaceProcessing); + return IMPL->setContent(&streamReader, namespaceProcessing, errorMsg, errorLine, errorColumn); +#endif } /*! @@ -6315,6 +6365,33 @@ bool QDomDocument::setContent(QXmlInputSource *source, QXmlReader *reader, QStri } /*! + \overload + \since 5.15 + + This function reads the XML document from the QXmlStreamReader \a reader + and parses it. Returns \c true if the content was successfully parsed; + otherwise returns \c false. + + If \a namespaceProcessing is \c true, the parser recognizes namespaces in the XML + file and sets the prefix name, local name and namespace URI to appropriate values. + If \a namespaceProcessing is \c false, the parser does no namespace processing when + it reads the XML file. + + If a parse error occurs, the error message is placed in \c{*}\a{errorMsg}, the line + number in \c{*}\a{errorLine} and the column number in \c{*}\a{errorColumn} (unless + the associated pointer is set to 0). + + \sa QXmlStreamReader +*/ +bool QDomDocument::setContent(QXmlStreamReader *reader, bool namespaceProcessing, QString *errorMsg, + int *errorLine, int *errorColumn) +{ + if (!impl) + impl = new QDomDocumentPrivate(); + return IMPL->setContent(reader, namespaceProcessing, errorMsg, errorLine, errorColumn); +} + +/*! Converts the parsed document back to its textual representation. This function uses \a indent as the amount of space to indent diff --git a/src/xml/dom/qdom.h b/src/xml/dom/qdom.h index 0a7db7dcd7..1b00a14179 100644 --- a/src/xml/dom/qdom.h +++ b/src/xml/dom/qdom.h @@ -91,6 +91,7 @@ class QDomNode; class QDomEntity; class QDomNotation; class QDomCharacterData; +class QXmlStreamReader; class Q_XML_EXPORT QDomImplementation { @@ -343,6 +344,8 @@ public: bool setContent(const QString& text, QString *errorMsg=nullptr, int *errorLine=nullptr, int *errorColumn=nullptr ); bool setContent(QIODevice* dev, QString *errorMsg=nullptr, int *errorLine=nullptr, int *errorColumn=nullptr ); bool setContent(QXmlInputSource *source, QXmlReader *reader, QString *errorMsg=nullptr, int *errorLine=nullptr, int *errorColumn=nullptr ); + bool setContent(QXmlStreamReader *reader, bool namespaceProcessing, QString *errorMsg = nullptr, + int *errorLine = nullptr, int *errorColumn = nullptr); // Qt extensions QString toString(int = 1) const; diff --git a/src/xml/dom/qdom_p.h b/src/xml/dom/qdom_p.h index 06ed0d5f0e..d197e999f1 100644 --- a/src/xml/dom/qdom_p.h +++ b/src/xml/dom/qdom_p.h @@ -465,6 +465,8 @@ public: int *errorLine, int *errorColumn); bool setContent(QXmlInputSource *source, QXmlReader *reader, QXmlSimpleReader *simpleReader, QString *errorMsg, int *errorLine, int *errorColumn); + bool setContent(QXmlStreamReader *reader, bool namespaceProcessing, QString *errorMsg, + int *errorLine, int *errorColumn); // Attributes QDomDocumentTypePrivate *doctype() { return type.data(); } diff --git a/src/xml/dom/qdomhelpers.cpp b/src/xml/dom/qdomhelpers.cpp index bda6f8ef87..9399ad3b9b 100644 --- a/src/xml/dom/qdomhelpers.cpp +++ b/src/xml/dom/qdomhelpers.cpp @@ -39,6 +39,7 @@ #include "qdomhelpers_p.h" #include "qdom_p.h" +#include "qxmlstream.h" #include "private/qxml_p.h" QT_BEGIN_NAMESPACE @@ -166,6 +167,18 @@ QDomBuilder::ErrorInfo QDomHandler::errorInfo() const * **************************************************************/ +int QDomDocumentLocator::column() const +{ + Q_ASSERT(reader); + return static_cast<int>(reader->columnNumber()); +} + +int QDomDocumentLocator::line() const +{ + Q_ASSERT(reader); + return static_cast<int>(reader->lineNumber()); +} + void QSAXDocumentLocator::setLocator(QXmlLocator *l) { locator = l; @@ -252,6 +265,44 @@ bool QDomBuilder::startElement(const QString &nsURI, const QString &qName, return true; } +inline QString stringRefToString(const QStringRef &stringRef) +{ + // Calling QStringRef::toString() on a NULL QStringRef in some cases returns + // an empty string (i.e. QString("")) instead of a NULL string (i.e. QString()). + // QDom implementation differentiates between NULL and empty strings, so + // we need this as workaround to keep the current behavior unchanged. + return stringRef.isNull() ? QString() : stringRef.toString(); +} + +bool QDomBuilder::startElement(const QString &nsURI, const QString &qName, + const QXmlStreamAttributes &atts) +{ + QDomNodePrivate *n = + nsProcessing ? doc->createElementNS(nsURI, qName) : doc->createElement(qName); + if (!n) + return false; + + n->setLocation(locator->line(), locator->column()); + + node->appendChild(n); + node = n; + + // attributes + for (const auto &attr : atts) { + auto domElement = static_cast<QDomElementPrivate *>(node); + if (nsProcessing) { + domElement->setAttributeNS(stringRefToString(attr.namespaceUri()), + stringRefToString(attr.qualifiedName()), + stringRefToString(attr.value())); + } else { + domElement->setAttribute(stringRefToString(attr.qualifiedName()), + stringRefToString(attr.value())); + } + } + + return true; +} + bool QDomBuilder::endElement() { if (!node || node == doc) @@ -368,4 +419,225 @@ bool QDomBuilder::notationDecl(const QString &name, const QString &publicId, return true; } +/************************************************************** + * + * QDomParser + * + **************************************************************/ + +QDomParser::QDomParser(QDomDocumentPrivate *d, QXmlStreamReader *r, bool namespaceProcessing) + : reader(r), locator(r), domBuilder(d, &locator, namespaceProcessing) +{ +} + +bool QDomParser::parse() +{ + return parseProlog() && parseBody(); +} + +QDomBuilder::ErrorInfo QDomParser::errorInfo() const +{ + return domBuilder.error(); +} + +bool QDomParser::parseProlog() +{ + Q_ASSERT(reader); + + bool foundDtd = false; + + while (!reader->atEnd()) { + reader->readNext(); + + if (reader->hasError()) { + domBuilder.fatalError(reader->errorString()); + return false; + } + + switch (reader->tokenType()) { + case QXmlStreamReader::StartDocument: + if (!reader->documentVersion().isEmpty()) { + QString value(QLatin1String("version='")); + value += reader->documentVersion(); + value += QLatin1Char('\''); + if (!reader->documentEncoding().isEmpty()) { + value += QLatin1String(" encoding='"); + value += reader->documentEncoding(); + value += QLatin1Char('\''); + } + if (reader->isStandaloneDocument()) { + value += QLatin1String(" standalone='yes'"); + } else { + // TODO: Add standalone='no', if 'standalone' is specified. With the current + // QXmlStreamReader there is no way to figure out if it was specified or not. + // QXmlStreamReader needs to be modified for handling that case correctly. + } + + if (!domBuilder.processingInstruction(QLatin1String("xml"), value)) { + domBuilder.fatalError( + QDomParser::tr("Error occurred while processing XML declaration")); + return false; + } + } + break; + case QXmlStreamReader::DTD: + if (foundDtd) { + domBuilder.fatalError(QDomParser::tr("Multiple DTD sections are not allowed")); + return false; + } + foundDtd = true; + + if (!domBuilder.startDTD(stringRefToString(reader->dtdName()), + stringRefToString(reader->dtdPublicId()), + stringRefToString(reader->dtdSystemId()))) { + domBuilder.fatalError( + QDomParser::tr("Error occurred while processing document type declaration")); + return false; + } + if (!parseMarkupDecl()) + return false; + break; + case QXmlStreamReader::Comment: + if (!domBuilder.comment(reader->text().toString())) { + domBuilder.fatalError(QDomParser::tr("Error occurred while processing comment")); + return false; + } + break; + case QXmlStreamReader::ProcessingInstruction: + if (!domBuilder.processingInstruction(reader->processingInstructionTarget().toString(), + reader->processingInstructionData().toString())) { + domBuilder.fatalError( + QDomParser::tr("Error occurred while processing a processing instruction")); + return false; + } + break; + default: + // If the token is none of the above, prolog processing is done. + return true; + } + } + + return true; +} + +bool QDomParser::parseBody() +{ + Q_ASSERT(reader); + + std::stack<QStringRef> tagStack; + while (!reader->atEnd() && !reader->hasError()) { + switch (reader->tokenType()) { + case QXmlStreamReader::StartElement: + tagStack.push(reader->qualifiedName()); + if (!domBuilder.startElement(stringRefToString(reader->namespaceUri()), + stringRefToString(reader->qualifiedName()), + reader->attributes())) { + domBuilder.fatalError( + QDomParser::tr("Error occurred while processing a start element")); + return false; + } + break; + case QXmlStreamReader::EndElement: + if (tagStack.empty() || reader->qualifiedName() != tagStack.top()) { + domBuilder.fatalError( + QDomParser::tr("Unexpected end element '%1'").arg(reader->name())); + return false; + } + tagStack.pop(); + if (!domBuilder.endElement()) { + domBuilder.fatalError( + QDomParser::tr("Error occurred while processing an end element")); + return false; + } + break; + case QXmlStreamReader::Characters: + if (!reader->isWhitespace()) { // Skip the content consisting of only whitespaces + if (!reader->text().toString().trimmed().isEmpty()) { + if (!domBuilder.characters(reader->text().toString(), reader->isCDATA())) { + domBuilder.fatalError(QDomParser::tr( + "Error occurred while processing the element content")); + return false; + } + } + } + break; + case QXmlStreamReader::Comment: + if (!domBuilder.comment(reader->text().toString())) { + domBuilder.fatalError(QDomParser::tr("Error occurred while processing comments")); + return false; + } + break; + case QXmlStreamReader::ProcessingInstruction: + if (!domBuilder.processingInstruction(reader->processingInstructionTarget().toString(), + reader->processingInstructionData().toString())) { + domBuilder.fatalError( + QDomParser::tr("Error occurred while processing a processing instruction")); + return false; + } + break; + case QXmlStreamReader::EntityReference: + if (!domBuilder.skippedEntity(reader->name().toString())) { + domBuilder.fatalError( + QDomParser::tr("Error occurred while processing an entity reference")); + return false; + } + break; + default: + domBuilder.fatalError(QDomParser::tr("Unexpected token")); + return false; + } + + reader->readNext(); + } + + if (reader->hasError()) { + domBuilder.fatalError(reader->errorString()); + reader->readNext(); + return false; + } + + if (!tagStack.empty()) { + domBuilder.fatalError(QDomParser::tr("Tag mismatch")); + return false; + } + + return true; +} + +bool QDomParser::parseMarkupDecl() +{ + Q_ASSERT(reader); + + const auto entities = reader->entityDeclarations(); + for (const auto &entityDecl : entities) { + // Entity declarations are created only for Extrenal Entities. Internal Entities + // are parsed, and QXmlStreamReader handles the parsing itself and returns the + // parsed result. So we don't need to do anything for the Internal Entities. + if (!entityDecl.publicId().isEmpty() || !entityDecl.systemId().isEmpty()) { + // External Entity + if (!domBuilder.unparsedEntityDecl(stringRefToString(entityDecl.name()), + stringRefToString(entityDecl.publicId()), + stringRefToString(entityDecl.systemId()), + stringRefToString(entityDecl.notationName()))) { + domBuilder.fatalError( + QDomParser::tr("Error occurred while processing entity declaration")); + return false; + } + } + } + + const auto notations = reader->notationDeclarations(); + for (const auto ¬ationDecl : notations) { + if (!domBuilder.notationDecl(stringRefToString(notationDecl.name()), + stringRefToString(notationDecl.publicId()), + stringRefToString(notationDecl.systemId()))) { + domBuilder.fatalError( + QDomParser::tr("Error occurred while processing notation declaration")); + return false; + } + } + + return true; +} + QT_END_NAMESPACE diff --git a/src/xml/dom/qdomhelpers_p.h b/src/xml/dom/qdomhelpers_p.h index 7647de65d2..f5efd8a42d 100644 --- a/src/xml/dom/qdomhelpers_p.h +++ b/src/xml/dom/qdomhelpers_p.h @@ -39,6 +39,7 @@ #ifndef QDOMHELPERS_P_H #define QDOMHELPERS_P_H +#include <qcoreapplication.h> #include <qglobal.h> #include <qxml.h> @@ -57,6 +58,8 @@ QT_BEGIN_NAMESPACE class QDomDocumentPrivate; class QDomNodePrivate; +class QXmlStreamReader; +class QXmlStreamAttributes; /************************************************************** * @@ -77,6 +80,19 @@ public: virtual int line() const = 0; }; +class QDomDocumentLocator : public QXmlDocumentLocator +{ +public: + QDomDocumentLocator(QXmlStreamReader *r) : reader(r) {} + ~QDomDocumentLocator() override = default; + + int column() const override; + int line() const override; + +private: + QXmlStreamReader *reader; +}; + class QSAXDocumentLocator : public QXmlDocumentLocator { public: @@ -105,6 +121,7 @@ public: bool endDocument(); bool startElement(const QString &nsURI, const QString &qName, const QXmlAttributes &atts); + bool startElement(const QString &nsURI, const QString &qName, const QXmlStreamAttributes &atts); bool endElement(); bool characters(const QString &characters, bool cdata = false); bool processingInstruction(const QString &target, const QString &data); @@ -188,6 +205,31 @@ private: QDomBuilder domBuilder; }; +/************************************************************** + * + * QDomParser + * + **************************************************************/ + +class QDomParser +{ + Q_DECLARE_TR_FUNCTIONS(QDomParser) +public: + QDomParser(QDomDocumentPrivate *d, QXmlStreamReader *r, bool namespaceProcessing); + + bool parse(); + QDomBuilder::ErrorInfo errorInfo() const; + +private: + bool parseProlog(); + bool parseBody(); + bool parseMarkupDecl(); + + QXmlStreamReader *reader; + QDomDocumentLocator locator; + QDomBuilder domBuilder; +}; + QT_END_NAMESPACE #endif // QDOMHELPERS_P_H |