From 3746eb8412ea42d7e3c519926460482530782a75 Mon Sep 17 00:00:00 2001 From: Samuel Gaist Date: Wed, 1 Aug 2018 00:28:26 +0200 Subject: QRegularExpression: refactor wildcard translation This patch refactors the wildcardToRegularExpression method to generate a simpler regular expression. It also fixes some shortcomings of the previous implementation. Tests have been updated to ensure all cases are properly supported. Change-Id: I454e3fe8fe0bb663b2f319d6fa2fa8aec626c50d Reviewed-by: Shawn Rutledge --- src/corelib/tools/qregularexpression.cpp | 59 +++++++++++++--------- .../qregularexpression/tst_qregularexpression.cpp | 19 +++++++ 2 files changed, 54 insertions(+), 24 deletions(-) diff --git a/src/corelib/tools/qregularexpression.cpp b/src/corelib/tools/qregularexpression.cpp index 17caaabc97..1bd06a73cd 100644 --- a/src/corelib/tools/qregularexpression.cpp +++ b/src/corelib/tools/qregularexpression.cpp @@ -1874,6 +1874,9 @@ QString QRegularExpression::escape(const QString &str) \since 5.12 Returns a regular expression representation of the given glob \a pattern. + The transformation is targeting file path globbing, which means in particular + that path separators receive special treatment. This implies that it is not + just a basic translation from "*" to ".*". \snippet code/src_corelib_tools_qregularexpression.cpp 31 @@ -1917,19 +1920,35 @@ QString QRegularExpression::wildcardToRegularExpression(const QString &pattern) { const int wclen = pattern.length(); QString rx; + rx.reserve(wclen + wclen / 16); int i = 0; - bool hasNegativeBracket = false; const QChar *wc = pattern.unicode(); +#ifdef Q_OS_WIN + const QLatin1Char nativePathSeparator('\\'); + const QLatin1String starEscape("[^/\\\\]*"); + const QLatin1String questionMarkEscape("[^/\\\\]"); +#else + const QLatin1Char nativePathSeparator('/'); + const QLatin1String starEscape("[^/]*"); + const QLatin1String questionMarkEscape("[^/]"); +#endif + while (i < wclen) { const QChar c = wc[i++]; switch (c.unicode()) { case '*': - rx += QLatin1String(".*"); + rx += starEscape; break; case '?': - rx += QLatin1Char('.'); + rx += questionMarkEscape; + break; + case '\\': +#ifdef Q_OS_WIN + case '/': + rx += QLatin1String("[/\\\\]"); break; +#endif case '$': case '(': case ')': @@ -1943,35 +1962,27 @@ QString QRegularExpression::wildcardToRegularExpression(const QString &pattern) rx += c; break; case '[': + rx += c; // Support for the [!abc] or [!a-c] syntax - // Implements a negative look-behind for one char. if (i < wclen) { - if (wc[i] == QLatin1Char(']')) { - rx += c; - rx += wc[i++]; - } else if (wc[i] == QLatin1Char('!')) { - rx += QLatin1String(".(?<"); - rx += wc[i++]; - rx += c; - hasNegativeBracket = true; - } else { - rx += c; + if (wc[i] == QLatin1Char('!')) { + rx += QLatin1Char('^'); + ++i; } + + if (i < wclen && wc[i] == QLatin1Char(']')) + rx += wc[i++]; + while (i < wclen && wc[i] != QLatin1Char(']')) { + // The '/' appearing in a character class invalidates the + // regular expression parsing. It also concerns '\\' on + // Windows OS types. + if (wc[i] == QLatin1Char('/') || wc[i] == nativePathSeparator) + return rx; if (wc[i] == QLatin1Char('\\')) rx += QLatin1Char('\\'); rx += wc[i++]; } - } else { - rx += c; - } - break; - case ']': - rx += c; - // Closes the negative look-behind expression. - if (hasNegativeBracket) { - rx += QLatin1Char(')'); - hasNegativeBracket = false; } break; default: diff --git a/tests/auto/corelib/tools/qregularexpression/tst_qregularexpression.cpp b/tests/auto/corelib/tools/qregularexpression/tst_qregularexpression.cpp index 987ca519ee..f520e9742a 100644 --- a/tests/auto/corelib/tools/qregularexpression/tst_qregularexpression.cpp +++ b/tests/auto/corelib/tools/qregularexpression/tst_qregularexpression.cpp @@ -2173,6 +2173,7 @@ void tst_QRegularExpression::wildcard_data() addRow("?m", "test.html", 6); addRow("[*]", "test.html", -1); addRow("[?]","test.html", -1); + addRow("[?]","test.h?ml", 6); addRow("[[]","test.h[ml", 6); addRow("[]]","test.h]ml", 6); addRow(".h[a-z]ml", "test.html", 4); @@ -2187,6 +2188,24 @@ void tst_QRegularExpression::wildcard_data() addRow(".h[][!]", "test.h]ml", 4); addRow(".h[][!]", "test.h[ml", 4); addRow(".h[][!]", "test.h!ml", 4); + + addRow("foo/*/bar", "Qt/foo/baz/bar", 3); + addRow("foo/(*)/bar", "Qt/foo/baz/bar", -1); + addRow("foo/(*)/bar", "Qt/foo/(baz)/bar", 3); + addRow("foo/?/bar", "Qt/foo/Q/bar", 3); + addRow("foo/?/bar", "Qt/foo/Qt/bar", -1); + addRow("foo/(?)/bar", "Qt/foo/Q/bar", -1); + addRow("foo/(?)/bar", "Qt/foo/(Q)/bar", 3); + +#ifdef Q_OS_WIN + addRow("foo\\*\\bar", "Qt\\foo\\baz\\bar", 3); + addRow("foo\\(*)\\bar", "Qt\\foo\\baz\\bar", -1); + addRow("foo\\(*)\\bar", "Qt\\foo\\(baz)\\bar", 3); + addRow("foo\\?\\bar", "Qt\\foo\\Q\\bar", 3); + addRow("foo\\?\\bar", "Qt\\foo\\Qt\\bar", -1); + addRow("foo\\(?)\\bar", "Qt\\foo\\Q\\bar", -1); + addRow("foo\\(?)\\bar", "Qt\\foo\\(Q)\\bar", 3); +#endif } void tst_QRegularExpression::wildcard() -- cgit v1.2.3