From d01693733f6c1ebe6b3709f9c1284239ce3b5354 Mon Sep 17 00:00:00 2001 From: Thiago Macieira Date: Thu, 20 Jun 2019 16:57:50 -0700 Subject: QDirIterator: don't require NFD normalization on Darwin for validity MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit HFS+ filesystems do enforce NFD normalization, so the test worked for those filesystems. But on APFS, the filesystem is normalization- insensitive but preserves it, so our transformation caused valid files to be rejected. This commit also optimizes the solution for all systems too. Instead of converting from 8-bit to UTF-16 then back to 8-bit (allocating memory in both steps), we only convert to UTF-16. And if we detect the locale is UTF-8, then we use the further optimized QUtf8::isValidUtf8 function that doesn't allocate any memory at all (ditto for US-ASCII, the case of someone running with LANG=C). Fixes: QTBUG-76522 Change-Id: Ief874765cd7b43798de3fffd15aa0d81620ad317 Reviewed-by: Tor Arne Vestbø Reviewed-by: Edward Welbourne Reviewed-by: Thiago Macieira --- src/corelib/io/qfilesystemiterator_unix.cpp | 47 +++++++++++++++++++++++++++-- 1 file changed, 44 insertions(+), 3 deletions(-) diff --git a/src/corelib/io/qfilesystemiterator_unix.cpp b/src/corelib/io/qfilesystemiterator_unix.cpp index a9acf542d4..92ebdf0341 100644 --- a/src/corelib/io/qfilesystemiterator_unix.cpp +++ b/src/corelib/io/qfilesystemiterator_unix.cpp @@ -40,13 +40,54 @@ #include "qplatformdefs.h" #include "qfilesystemiterator_p.h" +#if QT_CONFIG(textcodec) +# include +# include +#endif + #ifndef QT_NO_FILESYSTEMITERATOR +#include + #include #include QT_BEGIN_NAMESPACE +static bool checkNameDecodable(const char *d_name, qsizetype len) +{ + // This function is called in a loop from advance() below, but the loop is + // usually run only once. + +#if QT_CONFIG(textcodec) + // We identify the codecs by their RFC 2978 MIBenum values. In this + // function: + // 3 US-ASCII (ANSI X3.4-1986) + // 4 Latin1 (ISO-8859-1) + // 106 UTF-8 + QTextCodec *codec = QTextCodec::codecForLocale(); +# ifdef QT_LOCALE_IS_UTF8 + int mibEnum = 106; +# else + int mibEnum = codec->mibEnum(); +# endif + if (Q_LIKELY(mibEnum == 106)) // UTF-8 + return QUtf8::isValidUtf8(d_name, len).isValidUtf8; + if (mibEnum == 3) // US-ASCII + return QtPrivate::isAscii(QLatin1String(d_name, len)); + if (mibEnum == 4) // Latin 1 + return true; + + // fall back to generic QTextCodec + QTextCodec::ConverterState cs(QTextCodec::IgnoreHeader); + codec->toUnicode(d_name, len, &cs); + return cs.invalidChars == 0 && cs.remainingChars == 0; +#else + // if we have no text codecs, then QString::fromLocal8Bit is fromLatin1 + return true; +#endif +} + QFileSystemIterator::QFileSystemIterator(const QFileSystemEntry &entry, QDir::Filters filters, const QStringList &nameFilters, QDirIterator::IteratorFlags flags) : nativePath(entry.nativeFilePath()) @@ -81,9 +122,9 @@ bool QFileSystemIterator::advance(QFileSystemEntry &fileEntry, QFileSystemMetaDa dirEntry = QT_READDIR(dir); if (dirEntry) { - // process entries with correct UTF-8 names only - if (QFile::encodeName(QFile::decodeName(dirEntry->d_name)) == dirEntry->d_name) { - fileEntry = QFileSystemEntry(nativePath + QByteArray(dirEntry->d_name), QFileSystemEntry::FromNativePath()); + qsizetype len = strlen(dirEntry->d_name); + if (checkNameDecodable(dirEntry->d_name, len)) { + fileEntry = QFileSystemEntry(nativePath + QByteArray(dirEntry->d_name, len), QFileSystemEntry::FromNativePath()); metaData.fillFromDirEnt(*dirEntry); return true; } -- cgit v1.2.3