diff options
author | Igor Kushnir <igorkuo@gmail.com> | 2021-11-13 14:21:40 +0200 |
---|---|---|
committer | Igor Kushnir <igorkuo@gmail.com> | 2021-12-06 22:55:06 +0200 |
commit | d797e3c88ed7be6e2b683984950334a8054728a9 (patch) | |
tree | 952034e2a5021b8767d250cd89697312ba9b440d | |
parent | 6d6fb7846c664b3b15ae03542658f6beae96e0b2 (diff) |
Optimize QMimeDatabase::mimeTypeForFile(f, MatchDefault)
Open the file only if matching on content is needed.
Use QFileInfo::filePath() instead of QFileInfo::absoluteFilePath() in
QMimeDatabase::mimeTypeForFile(). filePath() does much less work, and so
is faster. Thiago Macieira helpfully explained in a review comment why
the absolute path is not useful for correctness here: "Nothing needs
absolute paths within the same application that would resolve the
relative path to absolute. You only need an absolute path if you're
communicating with another application that may be in a different
directory."
QMimeDatabase::mimeTypeForFile() checks fileInfo.isDir(), so the
fileName.endsWith(QLatin1Char('/')) check in
QMimeDatabasePrivate::mimeTypeForFileNameAndData() was redundant when
called from this function. The other two callers of that function now
check this condition before opening IO devices. This improves
performance of the two QMimeDatabase::mimeTypeForFileNameAndData()
overloads in the corner case.
Refactor and optimize QMimeDatabasePrivate::findByFileName() and its
usages. Previously each caller constructed a QFileInfo object and passed
QFileInfo::fileName() into this function. Now the callers simply pass an
absolute or relative path to a file into this function, which then uses
QFileSystemEntry::fileName() to exclude the path. Constructing QFileInfo
is relatively expensive, so this change slightly improves performance.
Optimize QMimeDatabasePrivate::loadProviders() by calling static
QFileInfo::exists() instead of constructing a QFileInfo object and
calling the non-static QFileInfo::exists() overload. Note that the
QFileInfo object was always created, even if QFileInfo::exists() under
an `if` and an `#if` was never called.
The following table contains the average results of the added benchmark
tst_QMimeDatabase::benchMimeTypeForFile() on my GNU/Linux system before
and at this commit. The numbers denote milliseconds per iteration.
data row tag before at
MatchDefault:
archive 0.029 0.016
OpenDocument Text 0.029 0.015
existent archive with extension 0.039 0.025
existent C with extension 0.033 0.020
existent text file with extension 0.033 0.020
existent C w/o extension 0.076 0.074
existent patch w/o extension 0.11 0.105
existent archive w/o extension 0.069 0.066
MatchExtension:
archive 0.012 0.0115
OpenDocument Text 0.0115 0.011
existent archive with extension 0.017 0.016
existent C with extension 0.011 0.011
existent text file with extension 0.011 0.011
existent C w/o extension 0.016 0.0155
existent patch w/o extension 0.013 0.012
existent archive w/o extension 0.013 0.012
MatchContent:
archive 0.019 0.012
OpenDocument Text 0.019 0.012
existent archive with extension 0.053 0.051
existent C with extension 0.056 0.0545
existent text file with extension 0.058 0.056
existent C w/o extension 0.0605 0.059
existent patch w/o extension 0.10 0.099
existent archive w/o extension 0.057 0.054
Change-Id: Idb541656e073a2c4822ace3f4da412f29f2351f8
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
Reviewed-by: David Faure <david.faure@kdab.com>
-rw-r--r-- | src/corelib/mimetypes/qmimedatabase.cpp | 118 | ||||
-rw-r--r-- | tests/benchmarks/corelib/mimetypes/qmimedatabase/files/N.tar.gz | bin | 0 -> 115 bytes | |||
-rw-r--r-- | tests/benchmarks/corelib/mimetypes/qmimedatabase/files/X | 1 | ||||
-rw-r--r-- | tests/benchmarks/corelib/mimetypes/qmimedatabase/files/t.c | 1 | ||||
-rw-r--r-- | tests/benchmarks/corelib/mimetypes/qmimedatabase/files/u.txt | 4 | ||||
-rw-r--r-- | tests/benchmarks/corelib/mimetypes/qmimedatabase/files/y | 1 | ||||
-rw-r--r-- | tests/benchmarks/corelib/mimetypes/qmimedatabase/files/z | bin | 0 -> 132 bytes | |||
-rw-r--r-- | tests/benchmarks/corelib/mimetypes/qmimedatabase/tst_bench_qmimedatabase.cpp | 77 |
8 files changed, 150 insertions, 52 deletions
diff --git a/src/corelib/mimetypes/qmimedatabase.cpp b/src/corelib/mimetypes/qmimedatabase.cpp index 0be220e124..f7cd926220 100644 --- a/src/corelib/mimetypes/qmimedatabase.cpp +++ b/src/corelib/mimetypes/qmimedatabase.cpp @@ -46,6 +46,8 @@ #include "qmimeprovider_p.h" #include "qmimetype_p.h" +#include <private/qfilesystementry_p.h> + #include <QtCore/QFile> #include <QtCore/QFileInfo> #include <QtCore/QStandardPaths> @@ -111,7 +113,6 @@ void QMimeDatabasePrivate::loadProviders() for (const QString &mimeDir : mimeDirs) { const QString cacheFile = mimeDir + QStringLiteral("/mime.cache"); - QFileInfo fileInfo(cacheFile); // Check if we already have a provider for this dir const auto predicate = [mimeDir](const std::unique_ptr<QMimeProviderBase> &prov) { @@ -121,7 +122,7 @@ void QMimeDatabasePrivate::loadProviders() if (it == currentProviders.end()) { std::unique_ptr<QMimeProviderBase> provider; #if defined(QT_USE_MMAP) - if (qEnvironmentVariableIsEmpty("QT_NO_MIME_CACHE") && fileInfo.exists()) { + if (qEnvironmentVariableIsEmpty("QT_NO_MIME_CACHE") && QFileInfo::exists(cacheFile)) { provider.reset(new QMimeBinaryProvider(this, mimeDir)); //qDebug() << "Created binary provider for" << mimeDir; if (!provider->isValid()) { @@ -206,8 +207,7 @@ QStringList QMimeDatabasePrivate::mimeTypeForFileName(const QString &fileName) if (fileName.endsWith(QLatin1Char('/'))) return QStringList() << QLatin1String("inode/directory"); - const QString shortName = QFileInfo(fileName).fileName(); - const QMimeGlobMatchResult result = findByFileName(shortName); + const QMimeGlobMatchResult result = findByFileName(fileName); QStringList matchingMimeTypes = result.m_matchingMimeTypes; matchingMimeTypes.sort(); // make it deterministic return matchingMimeTypes; @@ -216,8 +216,9 @@ QStringList QMimeDatabasePrivate::mimeTypeForFileName(const QString &fileName) QMimeGlobMatchResult QMimeDatabasePrivate::findByFileName(const QString &fileName) { QMimeGlobMatchResult result; + const QString fileNameExcludingPath = QFileSystemEntry(fileName).fileName(); for (const auto &provider : providers()) - provider->addFileNameMatches(fileName, result); + provider->addFileNameMatches(fileNameExcludingPath, result); return result; } @@ -374,11 +375,7 @@ QMimeType QMimeDatabasePrivate::mimeTypeForFileNameAndData(const QString &fileNa *accuracyPtr = 0; // Pass 1) Try to match on the file name - QMimeGlobMatchResult candidatesByName; - if (fileName.endsWith(QLatin1Char('/'))) - candidatesByName.addMatch(QLatin1String("inode/directory"), 100, QString()); - else - candidatesByName = findByFileName(QFileInfo(fileName).fileName()); + QMimeGlobMatchResult candidatesByName = findByFileName(fileName); if (candidatesByName.m_allMatchingMimeTypes.count() == 1) { *accuracyPtr = 100; const QMimeType mime = mimeTypeForName(candidatesByName.m_matchingMimeTypes.at(0)); @@ -389,47 +386,55 @@ QMimeType QMimeDatabasePrivate::mimeTypeForFileNameAndData(const QString &fileNa // Extension is unknown, or matches multiple mimetypes. // Pass 2) Match on content, if we can read the data - if (device->isOpen()) { - - // Read 16K in one go (QIODEVICE_BUFFERSIZE in qiodevice_p.h). - // This is much faster than seeking back and forth into QIODevice. - const QByteArray data = device->peek(16384); - - int magicAccuracy = 0; - QMimeType candidateByData(findByData(data, &magicAccuracy)); - - // Disambiguate conflicting extensions (if magic matching found something) - if (candidateByData.isValid() && magicAccuracy > 0) { - const QString sniffedMime = candidateByData.name(); - // If the sniffedMime matches a highest-weight glob match, use it - if (candidatesByName.m_matchingMimeTypes.contains(sniffedMime)) { - *accuracyPtr = 100; - return candidateByData; - } - for (const QString &m : qAsConst(candidatesByName.m_allMatchingMimeTypes)) { - if (inherits(m, sniffedMime)) { - // We have magic + pattern pointing to this, so it's a pretty good match + const auto matchOnContent = [this, accuracyPtr, &candidatesByName](QIODevice *device) { + if (device->isOpen()) { + // Read 16K in one go (QIODEVICE_BUFFERSIZE in qiodevice_p.h). + // This is much faster than seeking back and forth into QIODevice. + const QByteArray data = device->peek(16384); + + int magicAccuracy = 0; + QMimeType candidateByData(findByData(data, &magicAccuracy)); + + // Disambiguate conflicting extensions (if magic matching found something) + if (candidateByData.isValid() && magicAccuracy > 0) { + const QString sniffedMime = candidateByData.name(); + // If the sniffedMime matches a highest-weight glob match, use it + if (candidatesByName.m_matchingMimeTypes.contains(sniffedMime)) { *accuracyPtr = 100; - return mimeTypeForName(m); + return candidateByData; + } + for (const QString &m : qAsConst(candidatesByName.m_allMatchingMimeTypes)) { + if (inherits(m, sniffedMime)) { + // We have magic + pattern pointing to this, so it's a pretty good match + *accuracyPtr = 100; + return mimeTypeForName(m); + } + } + if (candidatesByName.m_allMatchingMimeTypes.isEmpty()) { + // No glob, use magic + *accuracyPtr = magicAccuracy; + return candidateByData; } - } - if (candidatesByName.m_allMatchingMimeTypes.isEmpty()) { - // No glob, use magic - *accuracyPtr = magicAccuracy; - return candidateByData; } } - } - if (candidatesByName.m_allMatchingMimeTypes.count() > 1) { - candidatesByName.m_matchingMimeTypes.sort(); // make it deterministic - *accuracyPtr = 20; - const QMimeType mime = mimeTypeForName(candidatesByName.m_matchingMimeTypes.at(0)); - if (mime.isValid()) - return mime; - } + if (candidatesByName.m_allMatchingMimeTypes.count() > 1) { + candidatesByName.m_matchingMimeTypes.sort(); // make it deterministic + *accuracyPtr = 20; + const QMimeType mime = mimeTypeForName(candidatesByName.m_matchingMimeTypes.at(0)); + if (mime.isValid()) + return mime; + } - return mimeTypeForName(defaultMimeType()); + return mimeTypeForName(defaultMimeType()); + }; + + if (device) + return matchOnContent(device); + + QFile fallbackFile(fileName); + fallbackFile.open(QIODevice::ReadOnly); // error handling: matchOnContent() will check isOpen() + return matchOnContent(&fallbackFile); } QList<QMimeType> QMimeDatabasePrivate::allMimeTypes() @@ -566,12 +571,12 @@ QMimeType QMimeDatabase::mimeTypeForFile(const QFileInfo &fileInfo, MatchMode mo if (fileInfo.isDir()) return d->mimeTypeForName(QLatin1String("inode/directory")); - QFile file(fileInfo.absoluteFilePath()); + const QString filePath = fileInfo.filePath(); #ifdef Q_OS_UNIX // Cannot access statBuf.st_mode from the filesystem engine, so we have to stat again. // In addition we want to follow symlinks. - const QByteArray nativeFilePath = QFile::encodeName(file.fileName()); + const QByteArray nativeFilePath = QFile::encodeName(filePath); QT_STATBUF statBuffer; if (QT_STAT(nativeFilePath.constData(), &statBuffer) == 0) { if (S_ISCHR(statBuffer.st_mode)) @@ -588,18 +593,19 @@ QMimeType QMimeDatabase::mimeTypeForFile(const QFileInfo &fileInfo, MatchMode mo int priority = 0; switch (mode) { case MatchDefault: - file.open(QIODevice::ReadOnly); // isOpen() will be tested by method below - return d->mimeTypeForFileNameAndData(fileInfo.absoluteFilePath(), &file, &priority); + return d->mimeTypeForFileNameAndData(filePath, nullptr, &priority); case MatchExtension: locker.unlock(); - return mimeTypeForFile(fileInfo.absoluteFilePath(), mode); - case MatchContent: + return mimeTypeForFile(filePath, mode); + case MatchContent: { + QFile file(filePath); if (file.open(QIODevice::ReadOnly)) { locker.unlock(); return mimeTypeForData(&file); } else { return d->mimeTypeForName(d->defaultMimeType()); } + } default: Q_ASSERT(false); } @@ -664,7 +670,7 @@ QList<QMimeType> QMimeDatabase::mimeTypesForFileName(const QString &fileName) co QString QMimeDatabase::suffixForFileName(const QString &fileName) const { QMutexLocker locker(&d->mutex); - const int suffixLength = d->findByFileName(QFileInfo(fileName).fileName()).m_knownSuffixLength; + const int suffixLength = d->findByFileName(fileName).m_knownSuffixLength; return fileName.right(suffixLength); } @@ -756,6 +762,10 @@ QMimeType QMimeDatabase::mimeTypeForUrl(const QUrl &url) const QMimeType QMimeDatabase::mimeTypeForFileNameAndData(const QString &fileName, QIODevice *device) const { QMutexLocker locker(&d->mutex); + + if (fileName.endsWith(QLatin1Char('/'))) + return d->mimeTypeForName(QLatin1String("inode/directory")); + int accuracy = 0; const bool openedByUs = !device->isOpen() && device->open(QIODevice::ReadOnly); const QMimeType result = d->mimeTypeForFileNameAndData(fileName, device, &accuracy); @@ -783,6 +793,10 @@ QMimeType QMimeDatabase::mimeTypeForFileNameAndData(const QString &fileName, QIO QMimeType QMimeDatabase::mimeTypeForFileNameAndData(const QString &fileName, const QByteArray &data) const { QMutexLocker locker(&d->mutex); + + if (fileName.endsWith(QLatin1Char('/'))) + return d->mimeTypeForName(QLatin1String("inode/directory")); + QBuffer buffer(const_cast<QByteArray *>(&data)); buffer.open(QIODevice::ReadOnly); int accuracy = 0; diff --git a/tests/benchmarks/corelib/mimetypes/qmimedatabase/files/N.tar.gz b/tests/benchmarks/corelib/mimetypes/qmimedatabase/files/N.tar.gz Binary files differnew file mode 100644 index 0000000000..0a710447a5 --- /dev/null +++ b/tests/benchmarks/corelib/mimetypes/qmimedatabase/files/N.tar.gz diff --git a/tests/benchmarks/corelib/mimetypes/qmimedatabase/files/X b/tests/benchmarks/corelib/mimetypes/qmimedatabase/files/X new file mode 100644 index 0000000000..a84546e5bc --- /dev/null +++ b/tests/benchmarks/corelib/mimetypes/qmimedatabase/files/X @@ -0,0 +1 @@ +#include <math.h> diff --git a/tests/benchmarks/corelib/mimetypes/qmimedatabase/files/t.c b/tests/benchmarks/corelib/mimetypes/qmimedatabase/files/t.c new file mode 100644 index 0000000000..3774c48f13 --- /dev/null +++ b/tests/benchmarks/corelib/mimetypes/qmimedatabase/files/t.c @@ -0,0 +1 @@ +void x(); diff --git a/tests/benchmarks/corelib/mimetypes/qmimedatabase/files/u.txt b/tests/benchmarks/corelib/mimetypes/qmimedatabase/files/u.txt new file mode 100644 index 0000000000..8d45724eff --- /dev/null +++ b/tests/benchmarks/corelib/mimetypes/qmimedatabase/files/u.txt @@ -0,0 +1,4 @@ +foo +bar +- +_ diff --git a/tests/benchmarks/corelib/mimetypes/qmimedatabase/files/y b/tests/benchmarks/corelib/mimetypes/qmimedatabase/files/y new file mode 100644 index 0000000000..399593b6ce --- /dev/null +++ b/tests/benchmarks/corelib/mimetypes/qmimedatabase/files/y @@ -0,0 +1 @@ +diff --git a/plugins/patchreview/kdevpatchreview.json b/plugins/patchreview/kdevpatchreview.json diff --git a/tests/benchmarks/corelib/mimetypes/qmimedatabase/files/z b/tests/benchmarks/corelib/mimetypes/qmimedatabase/files/z Binary files differnew file mode 100644 index 0000000000..d1ed8a45d1 --- /dev/null +++ b/tests/benchmarks/corelib/mimetypes/qmimedatabase/files/z diff --git a/tests/benchmarks/corelib/mimetypes/qmimedatabase/tst_bench_qmimedatabase.cpp b/tests/benchmarks/corelib/mimetypes/qmimedatabase/tst_bench_qmimedatabase.cpp index fb2a500982..959330f256 100644 --- a/tests/benchmarks/corelib/mimetypes/qmimedatabase/tst_bench_qmimedatabase.cpp +++ b/tests/benchmarks/corelib/mimetypes/qmimedatabase/tst_bench_qmimedatabase.cpp @@ -1,6 +1,7 @@ /**************************************************************************** ** ** Copyright (C) 2016 The Qt Company Ltd. +** Copyright (C) 2021 Igor Kushnir <igorkuo@gmail.com> ** Contact: https://www.qt.io/licensing/ ** ** This file is part of the test suite of the Qt Toolkit. @@ -29,6 +30,36 @@ #include <QTest> #include <QMimeDatabase> +namespace { +struct MatchModeInfo +{ + QMimeDatabase::MatchMode mode; + const char *name; +}; + +constexpr MatchModeInfo matchModes[] = { { QMimeDatabase::MatchDefault, "Default" }, + { QMimeDatabase::MatchExtension, "Extension" }, + { QMimeDatabase::MatchContent, "Content" } }; + +void addFileRows(const char *tag, const QString &fileName, const QStringList &expectedMimeNames) +{ + QCOMPARE(static_cast<std::size_t>(expectedMimeNames.size()), std::size(matchModes)); + for (int i = 0; i < expectedMimeNames.size(); ++i) { + QTest::addRow(qPrintable(tag + QStringLiteral(" - %s")), matchModes[i].name) + << fileName << matchModes[i].mode << expectedMimeNames[i]; + } +} + +void addExistentFileRows(const char *tag, const QString &fileName, + const QStringList &expectedMimeNames) +{ + const QString filePath = QFINDTESTDATA("files/" + fileName); + QVERIFY2(!filePath.isEmpty(), + qPrintable(QStringLiteral("Cannot find test file %1 in files/").arg(fileName))); + addFileRows(tag, filePath, expectedMimeNames); +} +} + class tst_QMimeDatabase: public QObject { @@ -37,6 +68,8 @@ class tst_QMimeDatabase: public QObject private slots: void inheritsPerformance(); void benchMimeTypeForName(); + void benchMimeTypeForFile_data(); + void benchMimeTypeForFile(); }; void tst_QMimeDatabase::inheritsPerformance() @@ -82,6 +115,50 @@ void tst_QMimeDatabase::benchMimeTypeForName() } } +void tst_QMimeDatabase::benchMimeTypeForFile_data() +{ + QTest::addColumn<QString>("fileName"); + QTest::addColumn<QMimeDatabase::MatchMode>("mode"); + QTest::addColumn<QString>("expectedMimeName"); + + addFileRows("archive", "a.tar.gz", + { "application/x-compressed-tar", + "application/x-compressed-tar", + "application/octet-stream" }); + addFileRows("OpenDocument Text", "b.odt", + { "application/vnd.oasis.opendocument.text", + "application/vnd.oasis.opendocument.text", + "application/octet-stream" }); + + addExistentFileRows( + "existent archive with extension", "N.tar.gz", + { "application/x-compressed-tar", "application/x-compressed-tar", "application/gzip" }); + addExistentFileRows("existent C with extension", "t.c", + { "text/x-csrc", "text/x-csrc", "text/plain" }); + addExistentFileRows("existent text file with extension", "u.txt", + { "text/plain", "text/plain", "text/plain" }); + addExistentFileRows("existent C w/o extension", "X", + { "text/x-csrc", "application/octet-stream", "text/x-csrc" }); + addExistentFileRows("existent patch w/o extension", "y", + { "text/x-patch", "application/octet-stream", "text/x-patch" }); + addExistentFileRows("existent archive w/o extension", "z", + { "application/gzip", "application/octet-stream", "application/gzip" }); +} + +void tst_QMimeDatabase::benchMimeTypeForFile() +{ + QFETCH(const QString, fileName); + QFETCH(const QMimeDatabase::MatchMode, mode); + QFETCH(const QString, expectedMimeName); + + QMimeDatabase db; + + QBENCHMARK { + const auto mimeType = db.mimeTypeForFile(fileName, mode); + QCOMPARE(mimeType.name(), expectedMimeName); + } +} + QTEST_MAIN(tst_QMimeDatabase) #include "tst_bench_qmimedatabase.moc" |