summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIgor Kushnir <igorkuo@gmail.com>2021-11-13 14:21:40 +0200
committerIgor Kushnir <igorkuo@gmail.com>2021-12-06 22:55:06 +0200
commitd797e3c88ed7be6e2b683984950334a8054728a9 (patch)
tree952034e2a5021b8767d250cd89697312ba9b440d
parent6d6fb7846c664b3b15ae03542658f6beae96e0b2 (diff)
Optimize QMimeDatabase::mimeTypeForFile(f, MatchDefault)
Open the file only if matching on content is needed. Use QFileInfo::filePath() instead of QFileInfo::absoluteFilePath() in QMimeDatabase::mimeTypeForFile(). filePath() does much less work, and so is faster. Thiago Macieira helpfully explained in a review comment why the absolute path is not useful for correctness here: "Nothing needs absolute paths within the same application that would resolve the relative path to absolute. You only need an absolute path if you're communicating with another application that may be in a different directory." QMimeDatabase::mimeTypeForFile() checks fileInfo.isDir(), so the fileName.endsWith(QLatin1Char('/')) check in QMimeDatabasePrivate::mimeTypeForFileNameAndData() was redundant when called from this function. The other two callers of that function now check this condition before opening IO devices. This improves performance of the two QMimeDatabase::mimeTypeForFileNameAndData() overloads in the corner case. Refactor and optimize QMimeDatabasePrivate::findByFileName() and its usages. Previously each caller constructed a QFileInfo object and passed QFileInfo::fileName() into this function. Now the callers simply pass an absolute or relative path to a file into this function, which then uses QFileSystemEntry::fileName() to exclude the path. Constructing QFileInfo is relatively expensive, so this change slightly improves performance. Optimize QMimeDatabasePrivate::loadProviders() by calling static QFileInfo::exists() instead of constructing a QFileInfo object and calling the non-static QFileInfo::exists() overload. Note that the QFileInfo object was always created, even if QFileInfo::exists() under an `if` and an `#if` was never called. The following table contains the average results of the added benchmark tst_QMimeDatabase::benchMimeTypeForFile() on my GNU/Linux system before and at this commit. The numbers denote milliseconds per iteration. data row tag before at MatchDefault: archive 0.029 0.016 OpenDocument Text 0.029 0.015 existent archive with extension 0.039 0.025 existent C with extension 0.033 0.020 existent text file with extension 0.033 0.020 existent C w/o extension 0.076 0.074 existent patch w/o extension 0.11 0.105 existent archive w/o extension 0.069 0.066 MatchExtension: archive 0.012 0.0115 OpenDocument Text 0.0115 0.011 existent archive with extension 0.017 0.016 existent C with extension 0.011 0.011 existent text file with extension 0.011 0.011 existent C w/o extension 0.016 0.0155 existent patch w/o extension 0.013 0.012 existent archive w/o extension 0.013 0.012 MatchContent: archive 0.019 0.012 OpenDocument Text 0.019 0.012 existent archive with extension 0.053 0.051 existent C with extension 0.056 0.0545 existent text file with extension 0.058 0.056 existent C w/o extension 0.0605 0.059 existent patch w/o extension 0.10 0.099 existent archive w/o extension 0.057 0.054 Change-Id: Idb541656e073a2c4822ace3f4da412f29f2351f8 Reviewed-by: Thiago Macieira <thiago.macieira@intel.com> Reviewed-by: David Faure <david.faure@kdab.com>
-rw-r--r--src/corelib/mimetypes/qmimedatabase.cpp118
-rw-r--r--tests/benchmarks/corelib/mimetypes/qmimedatabase/files/N.tar.gzbin0 -> 115 bytes
-rw-r--r--tests/benchmarks/corelib/mimetypes/qmimedatabase/files/X1
-rw-r--r--tests/benchmarks/corelib/mimetypes/qmimedatabase/files/t.c1
-rw-r--r--tests/benchmarks/corelib/mimetypes/qmimedatabase/files/u.txt4
-rw-r--r--tests/benchmarks/corelib/mimetypes/qmimedatabase/files/y1
-rw-r--r--tests/benchmarks/corelib/mimetypes/qmimedatabase/files/zbin0 -> 132 bytes
-rw-r--r--tests/benchmarks/corelib/mimetypes/qmimedatabase/tst_bench_qmimedatabase.cpp77
8 files changed, 150 insertions, 52 deletions
diff --git a/src/corelib/mimetypes/qmimedatabase.cpp b/src/corelib/mimetypes/qmimedatabase.cpp
index 0be220e124..f7cd926220 100644
--- a/src/corelib/mimetypes/qmimedatabase.cpp
+++ b/src/corelib/mimetypes/qmimedatabase.cpp
@@ -46,6 +46,8 @@
#include "qmimeprovider_p.h"
#include "qmimetype_p.h"
+#include <private/qfilesystementry_p.h>
+
#include <QtCore/QFile>
#include <QtCore/QFileInfo>
#include <QtCore/QStandardPaths>
@@ -111,7 +113,6 @@ void QMimeDatabasePrivate::loadProviders()
for (const QString &mimeDir : mimeDirs) {
const QString cacheFile = mimeDir + QStringLiteral("/mime.cache");
- QFileInfo fileInfo(cacheFile);
// Check if we already have a provider for this dir
const auto predicate = [mimeDir](const std::unique_ptr<QMimeProviderBase> &prov)
{
@@ -121,7 +122,7 @@ void QMimeDatabasePrivate::loadProviders()
if (it == currentProviders.end()) {
std::unique_ptr<QMimeProviderBase> provider;
#if defined(QT_USE_MMAP)
- if (qEnvironmentVariableIsEmpty("QT_NO_MIME_CACHE") && fileInfo.exists()) {
+ if (qEnvironmentVariableIsEmpty("QT_NO_MIME_CACHE") && QFileInfo::exists(cacheFile)) {
provider.reset(new QMimeBinaryProvider(this, mimeDir));
//qDebug() << "Created binary provider for" << mimeDir;
if (!provider->isValid()) {
@@ -206,8 +207,7 @@ QStringList QMimeDatabasePrivate::mimeTypeForFileName(const QString &fileName)
if (fileName.endsWith(QLatin1Char('/')))
return QStringList() << QLatin1String("inode/directory");
- const QString shortName = QFileInfo(fileName).fileName();
- const QMimeGlobMatchResult result = findByFileName(shortName);
+ const QMimeGlobMatchResult result = findByFileName(fileName);
QStringList matchingMimeTypes = result.m_matchingMimeTypes;
matchingMimeTypes.sort(); // make it deterministic
return matchingMimeTypes;
@@ -216,8 +216,9 @@ QStringList QMimeDatabasePrivate::mimeTypeForFileName(const QString &fileName)
QMimeGlobMatchResult QMimeDatabasePrivate::findByFileName(const QString &fileName)
{
QMimeGlobMatchResult result;
+ const QString fileNameExcludingPath = QFileSystemEntry(fileName).fileName();
for (const auto &provider : providers())
- provider->addFileNameMatches(fileName, result);
+ provider->addFileNameMatches(fileNameExcludingPath, result);
return result;
}
@@ -374,11 +375,7 @@ QMimeType QMimeDatabasePrivate::mimeTypeForFileNameAndData(const QString &fileNa
*accuracyPtr = 0;
// Pass 1) Try to match on the file name
- QMimeGlobMatchResult candidatesByName;
- if (fileName.endsWith(QLatin1Char('/')))
- candidatesByName.addMatch(QLatin1String("inode/directory"), 100, QString());
- else
- candidatesByName = findByFileName(QFileInfo(fileName).fileName());
+ QMimeGlobMatchResult candidatesByName = findByFileName(fileName);
if (candidatesByName.m_allMatchingMimeTypes.count() == 1) {
*accuracyPtr = 100;
const QMimeType mime = mimeTypeForName(candidatesByName.m_matchingMimeTypes.at(0));
@@ -389,47 +386,55 @@ QMimeType QMimeDatabasePrivate::mimeTypeForFileNameAndData(const QString &fileNa
// Extension is unknown, or matches multiple mimetypes.
// Pass 2) Match on content, if we can read the data
- if (device->isOpen()) {
-
- // Read 16K in one go (QIODEVICE_BUFFERSIZE in qiodevice_p.h).
- // This is much faster than seeking back and forth into QIODevice.
- const QByteArray data = device->peek(16384);
-
- int magicAccuracy = 0;
- QMimeType candidateByData(findByData(data, &magicAccuracy));
-
- // Disambiguate conflicting extensions (if magic matching found something)
- if (candidateByData.isValid() && magicAccuracy > 0) {
- const QString sniffedMime = candidateByData.name();
- // If the sniffedMime matches a highest-weight glob match, use it
- if (candidatesByName.m_matchingMimeTypes.contains(sniffedMime)) {
- *accuracyPtr = 100;
- return candidateByData;
- }
- for (const QString &m : qAsConst(candidatesByName.m_allMatchingMimeTypes)) {
- if (inherits(m, sniffedMime)) {
- // We have magic + pattern pointing to this, so it's a pretty good match
+ const auto matchOnContent = [this, accuracyPtr, &candidatesByName](QIODevice *device) {
+ if (device->isOpen()) {
+ // Read 16K in one go (QIODEVICE_BUFFERSIZE in qiodevice_p.h).
+ // This is much faster than seeking back and forth into QIODevice.
+ const QByteArray data = device->peek(16384);
+
+ int magicAccuracy = 0;
+ QMimeType candidateByData(findByData(data, &magicAccuracy));
+
+ // Disambiguate conflicting extensions (if magic matching found something)
+ if (candidateByData.isValid() && magicAccuracy > 0) {
+ const QString sniffedMime = candidateByData.name();
+ // If the sniffedMime matches a highest-weight glob match, use it
+ if (candidatesByName.m_matchingMimeTypes.contains(sniffedMime)) {
*accuracyPtr = 100;
- return mimeTypeForName(m);
+ return candidateByData;
+ }
+ for (const QString &m : qAsConst(candidatesByName.m_allMatchingMimeTypes)) {
+ if (inherits(m, sniffedMime)) {
+ // We have magic + pattern pointing to this, so it's a pretty good match
+ *accuracyPtr = 100;
+ return mimeTypeForName(m);
+ }
+ }
+ if (candidatesByName.m_allMatchingMimeTypes.isEmpty()) {
+ // No glob, use magic
+ *accuracyPtr = magicAccuracy;
+ return candidateByData;
}
- }
- if (candidatesByName.m_allMatchingMimeTypes.isEmpty()) {
- // No glob, use magic
- *accuracyPtr = magicAccuracy;
- return candidateByData;
}
}
- }
- if (candidatesByName.m_allMatchingMimeTypes.count() > 1) {
- candidatesByName.m_matchingMimeTypes.sort(); // make it deterministic
- *accuracyPtr = 20;
- const QMimeType mime = mimeTypeForName(candidatesByName.m_matchingMimeTypes.at(0));
- if (mime.isValid())
- return mime;
- }
+ if (candidatesByName.m_allMatchingMimeTypes.count() > 1) {
+ candidatesByName.m_matchingMimeTypes.sort(); // make it deterministic
+ *accuracyPtr = 20;
+ const QMimeType mime = mimeTypeForName(candidatesByName.m_matchingMimeTypes.at(0));
+ if (mime.isValid())
+ return mime;
+ }
- return mimeTypeForName(defaultMimeType());
+ return mimeTypeForName(defaultMimeType());
+ };
+
+ if (device)
+ return matchOnContent(device);
+
+ QFile fallbackFile(fileName);
+ fallbackFile.open(QIODevice::ReadOnly); // error handling: matchOnContent() will check isOpen()
+ return matchOnContent(&fallbackFile);
}
QList<QMimeType> QMimeDatabasePrivate::allMimeTypes()
@@ -566,12 +571,12 @@ QMimeType QMimeDatabase::mimeTypeForFile(const QFileInfo &fileInfo, MatchMode mo
if (fileInfo.isDir())
return d->mimeTypeForName(QLatin1String("inode/directory"));
- QFile file(fileInfo.absoluteFilePath());
+ const QString filePath = fileInfo.filePath();
#ifdef Q_OS_UNIX
// Cannot access statBuf.st_mode from the filesystem engine, so we have to stat again.
// In addition we want to follow symlinks.
- const QByteArray nativeFilePath = QFile::encodeName(file.fileName());
+ const QByteArray nativeFilePath = QFile::encodeName(filePath);
QT_STATBUF statBuffer;
if (QT_STAT(nativeFilePath.constData(), &statBuffer) == 0) {
if (S_ISCHR(statBuffer.st_mode))
@@ -588,18 +593,19 @@ QMimeType QMimeDatabase::mimeTypeForFile(const QFileInfo &fileInfo, MatchMode mo
int priority = 0;
switch (mode) {
case MatchDefault:
- file.open(QIODevice::ReadOnly); // isOpen() will be tested by method below
- return d->mimeTypeForFileNameAndData(fileInfo.absoluteFilePath(), &file, &priority);
+ return d->mimeTypeForFileNameAndData(filePath, nullptr, &priority);
case MatchExtension:
locker.unlock();
- return mimeTypeForFile(fileInfo.absoluteFilePath(), mode);
- case MatchContent:
+ return mimeTypeForFile(filePath, mode);
+ case MatchContent: {
+ QFile file(filePath);
if (file.open(QIODevice::ReadOnly)) {
locker.unlock();
return mimeTypeForData(&file);
} else {
return d->mimeTypeForName(d->defaultMimeType());
}
+ }
default:
Q_ASSERT(false);
}
@@ -664,7 +670,7 @@ QList<QMimeType> QMimeDatabase::mimeTypesForFileName(const QString &fileName) co
QString QMimeDatabase::suffixForFileName(const QString &fileName) const
{
QMutexLocker locker(&d->mutex);
- const int suffixLength = d->findByFileName(QFileInfo(fileName).fileName()).m_knownSuffixLength;
+ const int suffixLength = d->findByFileName(fileName).m_knownSuffixLength;
return fileName.right(suffixLength);
}
@@ -756,6 +762,10 @@ QMimeType QMimeDatabase::mimeTypeForUrl(const QUrl &url) const
QMimeType QMimeDatabase::mimeTypeForFileNameAndData(const QString &fileName, QIODevice *device) const
{
QMutexLocker locker(&d->mutex);
+
+ if (fileName.endsWith(QLatin1Char('/')))
+ return d->mimeTypeForName(QLatin1String("inode/directory"));
+
int accuracy = 0;
const bool openedByUs = !device->isOpen() && device->open(QIODevice::ReadOnly);
const QMimeType result = d->mimeTypeForFileNameAndData(fileName, device, &accuracy);
@@ -783,6 +793,10 @@ QMimeType QMimeDatabase::mimeTypeForFileNameAndData(const QString &fileName, QIO
QMimeType QMimeDatabase::mimeTypeForFileNameAndData(const QString &fileName, const QByteArray &data) const
{
QMutexLocker locker(&d->mutex);
+
+ if (fileName.endsWith(QLatin1Char('/')))
+ return d->mimeTypeForName(QLatin1String("inode/directory"));
+
QBuffer buffer(const_cast<QByteArray *>(&data));
buffer.open(QIODevice::ReadOnly);
int accuracy = 0;
diff --git a/tests/benchmarks/corelib/mimetypes/qmimedatabase/files/N.tar.gz b/tests/benchmarks/corelib/mimetypes/qmimedatabase/files/N.tar.gz
new file mode 100644
index 0000000000..0a710447a5
--- /dev/null
+++ b/tests/benchmarks/corelib/mimetypes/qmimedatabase/files/N.tar.gz
Binary files differ
diff --git a/tests/benchmarks/corelib/mimetypes/qmimedatabase/files/X b/tests/benchmarks/corelib/mimetypes/qmimedatabase/files/X
new file mode 100644
index 0000000000..a84546e5bc
--- /dev/null
+++ b/tests/benchmarks/corelib/mimetypes/qmimedatabase/files/X
@@ -0,0 +1 @@
+#include <math.h>
diff --git a/tests/benchmarks/corelib/mimetypes/qmimedatabase/files/t.c b/tests/benchmarks/corelib/mimetypes/qmimedatabase/files/t.c
new file mode 100644
index 0000000000..3774c48f13
--- /dev/null
+++ b/tests/benchmarks/corelib/mimetypes/qmimedatabase/files/t.c
@@ -0,0 +1 @@
+void x();
diff --git a/tests/benchmarks/corelib/mimetypes/qmimedatabase/files/u.txt b/tests/benchmarks/corelib/mimetypes/qmimedatabase/files/u.txt
new file mode 100644
index 0000000000..8d45724eff
--- /dev/null
+++ b/tests/benchmarks/corelib/mimetypes/qmimedatabase/files/u.txt
@@ -0,0 +1,4 @@
+foo
+bar
+-
+_
diff --git a/tests/benchmarks/corelib/mimetypes/qmimedatabase/files/y b/tests/benchmarks/corelib/mimetypes/qmimedatabase/files/y
new file mode 100644
index 0000000000..399593b6ce
--- /dev/null
+++ b/tests/benchmarks/corelib/mimetypes/qmimedatabase/files/y
@@ -0,0 +1 @@
+diff --git a/plugins/patchreview/kdevpatchreview.json b/plugins/patchreview/kdevpatchreview.json
diff --git a/tests/benchmarks/corelib/mimetypes/qmimedatabase/files/z b/tests/benchmarks/corelib/mimetypes/qmimedatabase/files/z
new file mode 100644
index 0000000000..d1ed8a45d1
--- /dev/null
+++ b/tests/benchmarks/corelib/mimetypes/qmimedatabase/files/z
Binary files differ
diff --git a/tests/benchmarks/corelib/mimetypes/qmimedatabase/tst_bench_qmimedatabase.cpp b/tests/benchmarks/corelib/mimetypes/qmimedatabase/tst_bench_qmimedatabase.cpp
index fb2a500982..959330f256 100644
--- a/tests/benchmarks/corelib/mimetypes/qmimedatabase/tst_bench_qmimedatabase.cpp
+++ b/tests/benchmarks/corelib/mimetypes/qmimedatabase/tst_bench_qmimedatabase.cpp
@@ -1,6 +1,7 @@
/****************************************************************************
**
** Copyright (C) 2016 The Qt Company Ltd.
+** Copyright (C) 2021 Igor Kushnir <igorkuo@gmail.com>
** Contact: https://www.qt.io/licensing/
**
** This file is part of the test suite of the Qt Toolkit.
@@ -29,6 +30,36 @@
#include <QTest>
#include <QMimeDatabase>
+namespace {
+struct MatchModeInfo
+{
+ QMimeDatabase::MatchMode mode;
+ const char *name;
+};
+
+constexpr MatchModeInfo matchModes[] = { { QMimeDatabase::MatchDefault, "Default" },
+ { QMimeDatabase::MatchExtension, "Extension" },
+ { QMimeDatabase::MatchContent, "Content" } };
+
+void addFileRows(const char *tag, const QString &fileName, const QStringList &expectedMimeNames)
+{
+ QCOMPARE(static_cast<std::size_t>(expectedMimeNames.size()), std::size(matchModes));
+ for (int i = 0; i < expectedMimeNames.size(); ++i) {
+ QTest::addRow(qPrintable(tag + QStringLiteral(" - %s")), matchModes[i].name)
+ << fileName << matchModes[i].mode << expectedMimeNames[i];
+ }
+}
+
+void addExistentFileRows(const char *tag, const QString &fileName,
+ const QStringList &expectedMimeNames)
+{
+ const QString filePath = QFINDTESTDATA("files/" + fileName);
+ QVERIFY2(!filePath.isEmpty(),
+ qPrintable(QStringLiteral("Cannot find test file %1 in files/").arg(fileName)));
+ addFileRows(tag, filePath, expectedMimeNames);
+}
+}
+
class tst_QMimeDatabase: public QObject
{
@@ -37,6 +68,8 @@ class tst_QMimeDatabase: public QObject
private slots:
void inheritsPerformance();
void benchMimeTypeForName();
+ void benchMimeTypeForFile_data();
+ void benchMimeTypeForFile();
};
void tst_QMimeDatabase::inheritsPerformance()
@@ -82,6 +115,50 @@ void tst_QMimeDatabase::benchMimeTypeForName()
}
}
+void tst_QMimeDatabase::benchMimeTypeForFile_data()
+{
+ QTest::addColumn<QString>("fileName");
+ QTest::addColumn<QMimeDatabase::MatchMode>("mode");
+ QTest::addColumn<QString>("expectedMimeName");
+
+ addFileRows("archive", "a.tar.gz",
+ { "application/x-compressed-tar",
+ "application/x-compressed-tar",
+ "application/octet-stream" });
+ addFileRows("OpenDocument Text", "b.odt",
+ { "application/vnd.oasis.opendocument.text",
+ "application/vnd.oasis.opendocument.text",
+ "application/octet-stream" });
+
+ addExistentFileRows(
+ "existent archive with extension", "N.tar.gz",
+ { "application/x-compressed-tar", "application/x-compressed-tar", "application/gzip" });
+ addExistentFileRows("existent C with extension", "t.c",
+ { "text/x-csrc", "text/x-csrc", "text/plain" });
+ addExistentFileRows("existent text file with extension", "u.txt",
+ { "text/plain", "text/plain", "text/plain" });
+ addExistentFileRows("existent C w/o extension", "X",
+ { "text/x-csrc", "application/octet-stream", "text/x-csrc" });
+ addExistentFileRows("existent patch w/o extension", "y",
+ { "text/x-patch", "application/octet-stream", "text/x-patch" });
+ addExistentFileRows("existent archive w/o extension", "z",
+ { "application/gzip", "application/octet-stream", "application/gzip" });
+}
+
+void tst_QMimeDatabase::benchMimeTypeForFile()
+{
+ QFETCH(const QString, fileName);
+ QFETCH(const QMimeDatabase::MatchMode, mode);
+ QFETCH(const QString, expectedMimeName);
+
+ QMimeDatabase db;
+
+ QBENCHMARK {
+ const auto mimeType = db.mimeTypeForFile(fileName, mode);
+ QCOMPARE(mimeType.name(), expectedMimeName);
+ }
+}
+
QTEST_MAIN(tst_QMimeDatabase)
#include "tst_bench_qmimedatabase.moc"