summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorThiago Macieira <thiago.macieira@intel.com>2019-06-20 16:57:50 -0700
committerThiago Macieira <thiago.macieira@intel.com>2019-07-11 11:46:59 -0700
commitd01693733f6c1ebe6b3709f9c1284239ce3b5354 (patch)
tree9def99de116762efe0e5da0d347df25a8cd7b8a9 /src
parent786c58817187bb18552934c807ba7a7ea845f49e (diff)
QDirIterator: don't require NFD normalization on Darwin for validity
HFS+ filesystems do enforce NFD normalization, so the test worked for those filesystems. But on APFS, the filesystem is normalization- insensitive but preserves it, so our transformation caused valid files to be rejected. This commit also optimizes the solution for all systems too. Instead of converting from 8-bit to UTF-16 then back to 8-bit (allocating memory in both steps), we only convert to UTF-16. And if we detect the locale is UTF-8, then we use the further optimized QUtf8::isValidUtf8 function that doesn't allocate any memory at all (ditto for US-ASCII, the case of someone running with LANG=C). Fixes: QTBUG-76522 Change-Id: Ief874765cd7b43798de3fffd15aa0d81620ad317 Reviewed-by: Tor Arne Vestbø <tor.arne.vestbo@qt.io> Reviewed-by: Edward Welbourne <edward.welbourne@qt.io> Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
Diffstat (limited to 'src')
-rw-r--r--src/corelib/io/qfilesystemiterator_unix.cpp47
1 files changed, 44 insertions, 3 deletions
diff --git a/src/corelib/io/qfilesystemiterator_unix.cpp b/src/corelib/io/qfilesystemiterator_unix.cpp
index a9acf542d4..92ebdf0341 100644
--- a/src/corelib/io/qfilesystemiterator_unix.cpp
+++ b/src/corelib/io/qfilesystemiterator_unix.cpp
@@ -40,13 +40,54 @@
#include "qplatformdefs.h"
#include "qfilesystemiterator_p.h"
+#if QT_CONFIG(textcodec)
+# include <qtextcodec.h>
+# include <private/qutfcodec_p.h>
+#endif
+
#ifndef QT_NO_FILESYSTEMITERATOR
+#include <memory>
+
#include <stdlib.h>
#include <errno.h>
QT_BEGIN_NAMESPACE
+static bool checkNameDecodable(const char *d_name, qsizetype len)
+{
+ // This function is called in a loop from advance() below, but the loop is
+ // usually run only once.
+
+#if QT_CONFIG(textcodec)
+ // We identify the codecs by their RFC 2978 MIBenum values. In this
+ // function:
+ // 3 US-ASCII (ANSI X3.4-1986)
+ // 4 Latin1 (ISO-8859-1)
+ // 106 UTF-8
+ QTextCodec *codec = QTextCodec::codecForLocale();
+# ifdef QT_LOCALE_IS_UTF8
+ int mibEnum = 106;
+# else
+ int mibEnum = codec->mibEnum();
+# endif
+ if (Q_LIKELY(mibEnum == 106)) // UTF-8
+ return QUtf8::isValidUtf8(d_name, len).isValidUtf8;
+ if (mibEnum == 3) // US-ASCII
+ return QtPrivate::isAscii(QLatin1String(d_name, len));
+ if (mibEnum == 4) // Latin 1
+ return true;
+
+ // fall back to generic QTextCodec
+ QTextCodec::ConverterState cs(QTextCodec::IgnoreHeader);
+ codec->toUnicode(d_name, len, &cs);
+ return cs.invalidChars == 0 && cs.remainingChars == 0;
+#else
+ // if we have no text codecs, then QString::fromLocal8Bit is fromLatin1
+ return true;
+#endif
+}
+
QFileSystemIterator::QFileSystemIterator(const QFileSystemEntry &entry, QDir::Filters filters,
const QStringList &nameFilters, QDirIterator::IteratorFlags flags)
: nativePath(entry.nativeFilePath())
@@ -81,9 +122,9 @@ bool QFileSystemIterator::advance(QFileSystemEntry &fileEntry, QFileSystemMetaDa
dirEntry = QT_READDIR(dir);
if (dirEntry) {
- // process entries with correct UTF-8 names only
- if (QFile::encodeName(QFile::decodeName(dirEntry->d_name)) == dirEntry->d_name) {
- fileEntry = QFileSystemEntry(nativePath + QByteArray(dirEntry->d_name), QFileSystemEntry::FromNativePath());
+ qsizetype len = strlen(dirEntry->d_name);
+ if (checkNameDecodable(dirEntry->d_name, len)) {
+ fileEntry = QFileSystemEntry(nativePath + QByteArray(dirEntry->d_name, len), QFileSystemEntry::FromNativePath());
metaData.fillFromDirEnt(*dirEntry);
return true;
}