summaryrefslogtreecommitdiffstats
path: root/src/corelib/mimetypes/qmimetypeparser.cpp
diff options
context:
space:
mode:
authorDavid Faure <faure@kde.org>2012-02-18 16:00:09 +0100
committerQt by Nokia <qt-info@nokia.com>2012-02-18 22:19:43 +0100
commitb164911b7f0efd81ec33325405b88bff8b2334d0 (patch)
treeea4c2eb39f3ae8edb90b8255ffe3ee1b14806ab7 /src/corelib/mimetypes/qmimetypeparser.cpp
parent87fcbd82fc679715853e5261f8f0194a80c10b76 (diff)
Import QMimeType / QMimeDatabase into QtCore.
History of the development before the import: ssh://codereview.qt-project.org/playground/mimetypes.git Mimetype definitions come from shared-mime-info where available (UNIX systems), loaded using a mmap'ed binary cache generated by update-mime-database. As a fallback if no cache is found, we parse the raw XML files otherwise. This makes the MIME type support fast and with very low memory usage on UNIX, and it makes it easy to use on Windows (no dependency on shared-mime-info, Qt even includes a freedesktop.xml file to use if none are found on the system). Change-Id: I27b05008216ff936dc463bd80d3893422bfb940e Reviewed-by: Richard J. Moore <rich@kde.org>
Diffstat (limited to 'src/corelib/mimetypes/qmimetypeparser.cpp')
-rw-r--r--src/corelib/mimetypes/qmimetypeparser.cpp342
1 files changed, 342 insertions, 0 deletions
diff --git a/src/corelib/mimetypes/qmimetypeparser.cpp b/src/corelib/mimetypes/qmimetypeparser.cpp
new file mode 100644
index 0000000000..23f57367e3
--- /dev/null
+++ b/src/corelib/mimetypes/qmimetypeparser.cpp
@@ -0,0 +1,342 @@
+/****************************************************************************
+**
+** Copyright (C) 2012 Nokia Corporation and/or its subsidiary(-ies).
+** Contact: http://www.qt-project.org/
+**
+** This file is part of the QtCore module of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** GNU Lesser General Public License Usage
+** This file may be used under the terms of the GNU Lesser General Public
+** License version 2.1 as published by the Free Software Foundation and
+** appearing in the file LICENSE.LGPL included in the packaging of this
+** file. Please review the following information to ensure the GNU Lesser
+** General Public License version 2.1 requirements will be met:
+** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain additional
+** rights. These rights are described in the Nokia Qt LGPL Exception
+** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
+**
+** GNU General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU General
+** Public License version 3.0 as published by the Free Software Foundation
+** and appearing in the file LICENSE.GPL included in the packaging of this
+** file. Please review the following information to ensure the GNU General
+** Public License version 3.0 requirements will be met:
+** http://www.gnu.org/copyleft/gpl.html.
+**
+** Other Usage
+** Alternatively, this file may be used in accordance with the terms and
+** conditions contained in a signed written agreement between you and Nokia.
+**
+**
+**
+**
+**
+**
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+
+#define QT_NO_CAST_FROM_ASCII
+
+#include "qmimetypeparser_p.h"
+
+#include "qmimetype_p.h"
+#include "qmimemagicrulematcher_p.h"
+
+#include <QtCore/QCoreApplication>
+#include <QtCore/QDebug>
+#include <QtCore/QDir>
+#include <QtCore/QPair>
+#include <QtCore/QXmlStreamReader>
+#include <QtCore/QXmlStreamWriter>
+#include <QtCore/QStack>
+
+QT_BEGIN_NAMESPACE
+
+// XML tags in MIME files
+const char *const mimeInfoTagC = "mime-info";
+const char *const mimeTypeTagC = "mime-type";
+const char *const mimeTypeAttributeC = "type";
+const char *const subClassTagC = "sub-class-of";
+const char *const commentTagC = "comment";
+const char *const genericIconTagC = "generic-icon";
+const char *const iconTagC = "icon";
+const char *const nameAttributeC = "name";
+const char *const globTagC = "glob";
+const char *const aliasTagC = "alias";
+const char *const patternAttributeC = "pattern";
+const char *const weightAttributeC = "weight";
+const char *const caseSensitiveAttributeC = "case-sensitive";
+const char *const localeAttributeC = "xml:lang";
+
+const char *const magicTagC = "magic";
+const char *const priorityAttributeC = "priority";
+
+const char *const matchTagC = "match";
+const char *const matchValueAttributeC = "value";
+const char *const matchTypeAttributeC = "type";
+const char *const matchOffsetAttributeC = "offset";
+const char *const matchMaskAttributeC = "mask";
+
+/*!
+ \class QMimeTypeParser
+ \internal
+ \brief The QMimeTypeParser class parses MIME types, and builds a MIME database hierarchy by adding to QMimeDatabasePrivate.
+
+ Populates QMimeDataBase
+
+ \sa QMimeDatabase, QMimeMagicRuleMatcher, MagicRule, MagicStringRule, MagicByteRule, GlobPattern
+ \sa QMimeTypeParser
+*/
+
+
+/*!
+ \class QMimeTypeParserBase
+ \brief The QMimeTypeParserBase class parses for a sequence of <mime-type> in a generic way.
+
+ Calls abstract handler function process for QMimeType it finds.
+
+ \sa QMimeDatabase, QMimeMagicRuleMatcher, MagicRule, MagicStringRule, MagicByteRule, GlobPattern
+ \sa QMimeTypeParser
+*/
+
+/*!
+ \fn virtual bool QMimeTypeParserBase::process(const QMimeType &t, QString *errorMessage) = 0;
+ Overwrite to process the sequence of parsed data
+*/
+
+QMimeTypeParserBase::ParseState QMimeTypeParserBase::nextState(ParseState currentState, const QStringRef &startElement)
+{
+ switch (currentState) {
+ case ParseBeginning:
+ if (startElement == QLatin1String(mimeInfoTagC))
+ return ParseMimeInfo;
+ if (startElement == QLatin1String(mimeTypeTagC))
+ return ParseMimeType;
+ return ParseError;
+ case ParseMimeInfo:
+ return startElement == QLatin1String(mimeTypeTagC) ? ParseMimeType : ParseError;
+ case ParseMimeType:
+ case ParseComment:
+ case ParseGenericIcon:
+ case ParseIcon:
+ case ParseGlobPattern:
+ case ParseSubClass:
+ case ParseAlias:
+ case ParseOtherMimeTypeSubTag:
+ case ParseMagicMatchRule:
+ if (startElement == QLatin1String(mimeTypeTagC)) // Sequence of <mime-type>
+ return ParseMimeType;
+ if (startElement == QLatin1String(commentTagC ))
+ return ParseComment;
+ if (startElement == QLatin1String(genericIconTagC))
+ return ParseGenericIcon;
+ if (startElement == QLatin1String(iconTagC))
+ return ParseIcon;
+ if (startElement == QLatin1String(globTagC))
+ return ParseGlobPattern;
+ if (startElement == QLatin1String(subClassTagC))
+ return ParseSubClass;
+ if (startElement == QLatin1String(aliasTagC))
+ return ParseAlias;
+ if (startElement == QLatin1String(magicTagC))
+ return ParseMagic;
+ if (startElement == QLatin1String(matchTagC))
+ return ParseMagicMatchRule;
+ return ParseOtherMimeTypeSubTag;
+ case ParseMagic:
+ if (startElement == QLatin1String(matchTagC))
+ return ParseMagicMatchRule;
+ break;
+ case ParseError:
+ break;
+ }
+ return ParseError;
+}
+
+// Parse int number from an (attribute) string)
+static bool parseNumber(const QString &n, int *target, QString *errorMessage)
+{
+ bool ok;
+ *target = n.toInt(&ok);
+ if (!ok) {
+ *errorMessage = QString::fromLatin1("Not a number '%1'.").arg(n);
+ return false;
+ }
+ return true;
+}
+
+// Evaluate a magic match rule like
+// <match value="must be converted with BinHex" type="string" offset="11"/>
+// <match value="0x9501" type="big16" offset="0:64"/>
+static bool createMagicMatchRule(const QXmlStreamAttributes &atts,
+ QString *errorMessage, QMimeMagicRule *&rule)
+{
+ const QString type = atts.value(QLatin1String(matchTypeAttributeC)).toString();
+ QMimeMagicRule::Type magicType = QMimeMagicRule::type(type.toLatin1());
+ if (magicType == QMimeMagicRule::Invalid) {
+ qWarning("%s: match type %s is not supported.", Q_FUNC_INFO, type.toUtf8().constData());
+ return true;
+ }
+ const QString value = atts.value(QLatin1String(matchValueAttributeC)).toString();
+ if (value.isEmpty()) {
+ *errorMessage = QString::fromLatin1("Empty match value detected.");
+ return false;
+ }
+ // Parse for offset as "1" or "1:10"
+ int startPos, endPos;
+ const QString offsetS = atts.value(QLatin1String(matchOffsetAttributeC)).toString();
+ const int colonIndex = offsetS.indexOf(QLatin1Char(':'));
+ const QString startPosS = colonIndex == -1 ? offsetS : offsetS.mid(0, colonIndex);
+ const QString endPosS = colonIndex == -1 ? offsetS : offsetS.mid(colonIndex + 1);
+ if (!parseNumber(startPosS, &startPos, errorMessage) || !parseNumber(endPosS, &endPos, errorMessage))
+ return false;
+ const QString mask = atts.value(QLatin1String(matchMaskAttributeC)).toString();
+
+ rule = new QMimeMagicRule(magicType, value.toUtf8(), startPos, endPos, mask.toLatin1());
+
+ return true;
+}
+
+bool QMimeTypeParserBase::parse(QIODevice *dev, const QString &fileName, QString *errorMessage)
+{
+ QMimeTypePrivate data;
+ int priority = 50;
+ QStack<QMimeMagicRule *> currentRules; // stack for the nesting of rules
+ QList<QMimeMagicRule> rules; // toplevel rules
+ QXmlStreamReader reader(dev);
+ ParseState ps = ParseBeginning;
+ QXmlStreamAttributes atts;
+ while (!reader.atEnd()) {
+ switch (reader.readNext()) {
+ case QXmlStreamReader::StartElement:
+ ps = nextState(ps, reader.name());
+ atts = reader.attributes();
+ switch (ps) {
+ case ParseMimeType: { // start parsing a MIME type name
+ const QString name = atts.value(QLatin1String(mimeTypeAttributeC)).toString();
+ if (name.isEmpty()) {
+ reader.raiseError(QString::fromLatin1("Missing '%1'-attribute").arg(QString::fromLatin1(mimeTypeAttributeC)));
+ } else {
+ data.name = name;
+ }
+ }
+ break;
+ case ParseGenericIcon:
+ data.genericIconName = atts.value(QLatin1String(nameAttributeC)).toString();
+ break;
+ case ParseIcon:
+ data.iconName = atts.value(QLatin1String(nameAttributeC)).toString();
+ break;
+ case ParseGlobPattern: {
+ const QString pattern = atts.value(QLatin1String(patternAttributeC)).toString();
+ unsigned weight = atts.value(QLatin1String(weightAttributeC)).toString().toInt();
+ const bool caseSensitive = atts.value(QLatin1String(caseSensitiveAttributeC)).toString() == QLatin1String("true");
+
+ if (weight == 0)
+ weight = QMimeGlobPattern::DefaultWeight;
+
+ Q_ASSERT(!data.name.isEmpty());
+ const QMimeGlobPattern glob(pattern, data.name, weight, caseSensitive ? Qt::CaseSensitive : Qt::CaseInsensitive);
+ if (!process(glob, errorMessage)) // for actual glob matching
+ return false;
+ data.addGlobPattern(pattern); // just for QMimeType::globPatterns()
+ }
+ break;
+ case ParseSubClass: {
+ const QString inheritsFrom = atts.value(QLatin1String(mimeTypeAttributeC)).toString();
+ if (!inheritsFrom.isEmpty())
+ processParent(data.name, inheritsFrom);
+ }
+ break;
+ case ParseComment: {
+ // comments have locale attributes. We want the default, English one
+ QString locale = atts.value(QLatin1String(localeAttributeC)).toString();
+ const QString comment = reader.readElementText();
+ if (locale.isEmpty())
+ locale = QString::fromLatin1("en_US");
+ data.localeComments.insert(locale, comment);
+ }
+ break;
+ case ParseAlias: {
+ const QString alias = atts.value(QLatin1String(mimeTypeAttributeC)).toString();
+ if (!alias.isEmpty())
+ processAlias(alias, data.name);
+ }
+ break;
+ case ParseMagic: {
+ priority = 50;
+ const QString priorityS = atts.value(QLatin1String(priorityAttributeC)).toString();
+ if (!priorityS.isEmpty()) {
+ if (!parseNumber(priorityS, &priority, errorMessage))
+ return false;
+
+ }
+ currentRules.clear();
+ //qDebug() << "MAGIC start for mimetype" << data.name;
+ }
+ break;
+ case ParseMagicMatchRule: {
+ QMimeMagicRule *rule = 0;
+ if (!createMagicMatchRule(atts, errorMessage, rule))
+ return false;
+ QList<QMimeMagicRule> *ruleList;
+ if (currentRules.isEmpty())
+ ruleList = &rules;
+ else // nest this rule into the proper parent
+ ruleList = &currentRules.top()->m_subMatches;
+ ruleList->append(*rule);
+ //qDebug() << " MATCH added. Stack size was" << currentRules.size();
+ currentRules.push(&ruleList->last());
+ delete rule;
+ break;
+ }
+ case ParseError:
+ reader.raiseError(QString::fromLatin1("Unexpected element <%1>").
+ arg(reader.name().toString()));
+ break;
+ default:
+ break;
+ }
+ break;
+ // continue switch QXmlStreamReader::Token...
+ case QXmlStreamReader::EndElement: // Finished element
+ {
+ const QStringRef elementName = reader.name();
+ if (elementName == QLatin1String(mimeTypeTagC)) {
+ if (!process(QMimeType(data), errorMessage))
+ return false;
+ data.clear();
+ } else if (elementName == QLatin1String(matchTagC)) {
+ // Closing a <match> tag, pop stack
+ currentRules.pop();
+ //qDebug() << " MATCH closed. Stack size is now" << currentRules.size();
+ } else if (elementName == QLatin1String(magicTagC)) {
+ //qDebug() << "MAGIC ended, we got" << rules.count() << "rules, with prio" << priority;
+ // Finished a <magic> sequence
+ QMimeMagicRuleMatcher ruleMatcher(data.name, priority);
+ ruleMatcher.addRules(rules);
+ processMagicMatcher(ruleMatcher);
+ rules.clear();
+ }
+ break;
+ }
+ default:
+ break;
+ }
+ }
+
+ if (reader.hasError()) {
+ if (errorMessage)
+ *errorMessage = QString::fromLatin1("An error has been encountered at line %1 of %2: %3:").arg(reader.lineNumber()).arg(fileName, reader.errorString());
+ return false;
+ }
+
+ return true;
+}
+
+QT_END_NAMESPACE