diff options
author | Thiago Macieira <thiago.macieira@intel.com> | 2019-06-20 16:57:50 -0700 |
---|---|---|
committer | Thiago Macieira <thiago.macieira@intel.com> | 2019-07-11 11:46:59 -0700 |
commit | d01693733f6c1ebe6b3709f9c1284239ce3b5354 (patch) | |
tree | 9def99de116762efe0e5da0d347df25a8cd7b8a9 /src/corelib | |
parent | 786c58817187bb18552934c807ba7a7ea845f49e (diff) |
QDirIterator: don't require NFD normalization on Darwin for validity
HFS+ filesystems do enforce NFD normalization, so the test worked for
those filesystems. But on APFS, the filesystem is normalization-
insensitive but preserves it, so our transformation caused valid files
to be rejected.
This commit also optimizes the solution for all systems too. Instead of
converting from 8-bit to UTF-16 then back to 8-bit (allocating memory in
both steps), we only convert to UTF-16. And if we detect the locale is
UTF-8, then we use the further optimized QUtf8::isValidUtf8 function
that doesn't allocate any memory at all (ditto for US-ASCII, the case of
someone running with LANG=C).
Fixes: QTBUG-76522
Change-Id: Ief874765cd7b43798de3fffd15aa0d81620ad317
Reviewed-by: Tor Arne Vestbø <tor.arne.vestbo@qt.io>
Reviewed-by: Edward Welbourne <edward.welbourne@qt.io>
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
Diffstat (limited to 'src/corelib')
-rw-r--r-- | src/corelib/io/qfilesystemiterator_unix.cpp | 47 |
1 files changed, 44 insertions, 3 deletions
diff --git a/src/corelib/io/qfilesystemiterator_unix.cpp b/src/corelib/io/qfilesystemiterator_unix.cpp index a9acf542d4..92ebdf0341 100644 --- a/src/corelib/io/qfilesystemiterator_unix.cpp +++ b/src/corelib/io/qfilesystemiterator_unix.cpp @@ -40,13 +40,54 @@ #include "qplatformdefs.h" #include "qfilesystemiterator_p.h" +#if QT_CONFIG(textcodec) +# include <qtextcodec.h> +# include <private/qutfcodec_p.h> +#endif + #ifndef QT_NO_FILESYSTEMITERATOR +#include <memory> + #include <stdlib.h> #include <errno.h> QT_BEGIN_NAMESPACE +static bool checkNameDecodable(const char *d_name, qsizetype len) +{ + // This function is called in a loop from advance() below, but the loop is + // usually run only once. + +#if QT_CONFIG(textcodec) + // We identify the codecs by their RFC 2978 MIBenum values. In this + // function: + // 3 US-ASCII (ANSI X3.4-1986) + // 4 Latin1 (ISO-8859-1) + // 106 UTF-8 + QTextCodec *codec = QTextCodec::codecForLocale(); +# ifdef QT_LOCALE_IS_UTF8 + int mibEnum = 106; +# else + int mibEnum = codec->mibEnum(); +# endif + if (Q_LIKELY(mibEnum == 106)) // UTF-8 + return QUtf8::isValidUtf8(d_name, len).isValidUtf8; + if (mibEnum == 3) // US-ASCII + return QtPrivate::isAscii(QLatin1String(d_name, len)); + if (mibEnum == 4) // Latin 1 + return true; + + // fall back to generic QTextCodec + QTextCodec::ConverterState cs(QTextCodec::IgnoreHeader); + codec->toUnicode(d_name, len, &cs); + return cs.invalidChars == 0 && cs.remainingChars == 0; +#else + // if we have no text codecs, then QString::fromLocal8Bit is fromLatin1 + return true; +#endif +} + QFileSystemIterator::QFileSystemIterator(const QFileSystemEntry &entry, QDir::Filters filters, const QStringList &nameFilters, QDirIterator::IteratorFlags flags) : nativePath(entry.nativeFilePath()) @@ -81,9 +122,9 @@ bool QFileSystemIterator::advance(QFileSystemEntry &fileEntry, QFileSystemMetaDa dirEntry = QT_READDIR(dir); if (dirEntry) { - // process entries with correct UTF-8 names only - if (QFile::encodeName(QFile::decodeName(dirEntry->d_name)) == dirEntry->d_name) { - fileEntry = QFileSystemEntry(nativePath + QByteArray(dirEntry->d_name), QFileSystemEntry::FromNativePath()); + qsizetype len = strlen(dirEntry->d_name); + if (checkNameDecodable(dirEntry->d_name, len)) { + fileEntry = QFileSystemEntry(nativePath + QByteArray(dirEntry->d_name, len), QFileSystemEntry::FromNativePath()); metaData.fillFromDirEnt(*dirEntry); return true; } |