diff options
-rw-r--r-- | src/corelib/text/qregularexpression.cpp | 93 | ||||
-rw-r--r-- | src/corelib/text/qregularexpression.h | 3 | ||||
-rw-r--r-- | tests/auto/corelib/text/qregularexpression/tst_qregularexpression.cpp | 115 |
3 files changed, 135 insertions, 76 deletions
diff --git a/src/corelib/text/qregularexpression.cpp b/src/corelib/text/qregularexpression.cpp index 8b3cf63dd2..2c83d122a3 100644 --- a/src/corelib/text/qregularexpression.cpp +++ b/src/corelib/text/qregularexpression.cpp @@ -1856,22 +1856,31 @@ QString QRegularExpression::escape(QStringView str) \value UnanchoredWildcardConversion The conversion will not anchor the pattern. This allows for partial string matches of wildcard expressions. + + \value NonPathWildcardConversion + The conversion will \e{not} interpret the pattern as filepath globbing. + This enum value has been introduced in Qt 6.6. + + \sa QRegularExpression::wildcardToRegularExpression */ /*! \since 5.15 Returns a regular expression representation of the given glob \a pattern. - The transformation is targeting file path globbing, which means in particular - that path separators receive special treatment. This implies that it is not - just a basic translation from "*" to ".*". + + There are two transformations possible, one that targets file path + globbing, and another one which is more generic. + + By default, the transformation is targeting file path globbing, + which means in particular that path separators receive special + treatment. This implies that it is not just a basic translation + from "*" to ".*" and similar. \snippet code/src_corelib_text_qregularexpression.cpp 31 - By default, the returned regular expression is fully anchored. In other - words, there is no need of calling anchoredPattern() again on the - result. To get a regular expression that is not anchored, pass - UnanchoredWildcardConversion as the conversion \a options. + The more generic globbing transformation is available by passing + \c NonPathWildcardConversion in the conversion \a options. This implementation follows closely the definition of wildcard for glob patterns: @@ -1880,10 +1889,12 @@ QString QRegularExpression::escape(QStringView str) \li Any character represents itself apart from those mentioned below. Thus \b{c} matches the character \e c. \row \li \b{?} - \li Matches any single character. It is the same as - \b{.} in full regexps. + \li Matches any single character, except for a path separator + (in case file path globbing has been selected). It is the + same as b{.} in full regexps. \row \li \b{*} - \li Matches zero or more of any characters. It is the + \li Matches zero or more of any characters, except for path + separators (in case file path globbing has been selected). It is the same as \b{.*} in full regexps. \row \li \b{[abc]} \li Matches one character given in the bracket. @@ -1897,9 +1908,10 @@ QString QRegularExpression::escape(QStringView str) bracket. It is the same as \b{[^a-c]} in full regexp. \endtable - \note The backslash (\\) character is \e not an escape char in this context. - In order to match one of the special characters, place it in square brackets - (for example, \c{[?]}). + \note For historical reasons, a backslash (\\) character is \e not + an escape char in this context. In order to match one of the + special characters, place it in square brackets (for example, + \c{[?]}). More information about the implementation can be found in: \list @@ -1907,6 +1919,11 @@ QString QRegularExpression::escape(QStringView str) \li \c {man 7 glob} \endlist + By default, the returned regular expression is fully anchored. In other + words, there is no need of calling anchoredPattern() again on the + result. To get a regular expression that is not anchored, pass + UnanchoredWildcardConversion in the conversion \a options. + \sa escape() */ QString QRegularExpression::wildcardToRegularExpression(QStringView pattern, WildcardConversionOptions options) @@ -1917,29 +1934,49 @@ QString QRegularExpression::wildcardToRegularExpression(QStringView pattern, Wil qsizetype i = 0; const QChar *wc = pattern.data(); + struct GlobSettings { + char16_t nativePathSeparator; + QStringView starEscape; + QStringView questionMarkEscape; + }; + + const GlobSettings settings = [options]() { + if (options.testFlag(NonPathWildcardConversion)) { + return GlobSettings{ u'\0', u".*", u"." }; + } else { #ifdef Q_OS_WIN - const char16_t nativePathSeparator = u'\\'; - const auto starEscape = "[^/\\\\]*"_L1; - const auto questionMarkEscape = "[^/\\\\]"_L1; + return GlobSettings{ u'\\', u"[^/\\\\]*", u"[^/\\\\]" }; #else - const char16_t nativePathSeparator = u'/'; - const auto starEscape = "[^/]*"_L1; - const auto questionMarkEscape = "[^/]"_L1; + return GlobSettings{ u'/', u"[^/]*", u"[^/]" }; #endif + } + }(); while (i < wclen) { const QChar c = wc[i++]; switch (c.unicode()) { case '*': - rx += starEscape; + rx += settings.starEscape; break; case '?': - rx += questionMarkEscape; + rx += settings.questionMarkEscape; break; + // When not using filepath globbing: \ is escaped, / is itself + // When using filepath globbing: + // * Unix: \ gets escaped. / is itself + // * Windows: \ and / can match each other -- they become [/\\] in regexp case '\\': #ifdef Q_OS_WIN + if (options.testFlag(NonPathWildcardConversion)) + rx += u"\\\\"; + else + rx += u"[/\\\\]"; + break; case '/': - rx += "[/\\\\]"_L1; + if (options.testFlag(NonPathWildcardConversion)) + rx += u'/'; + else + rx += u"[/\\\\]"; break; #endif case '$': @@ -1967,11 +2004,13 @@ QString QRegularExpression::wildcardToRegularExpression(QStringView pattern, Wil rx += wc[i++]; while (i < wclen && wc[i] != u']') { - // The '/' appearing in a character class invalidates the - // regular expression parsing. It also concerns '\\' on - // Windows OS types. - if (wc[i] == u'/' || wc[i] == nativePathSeparator) - return rx; + if (!options.testFlag(NonPathWildcardConversion)) { + // The '/' appearing in a character class invalidates the + // regular expression parsing. It also concerns '\\' on + // Windows OS types. + if (wc[i] == u'/' || wc[i] == settings.nativePathSeparator) + return rx; + } if (wc[i] == u'\\') rx += u'\\'; rx += wc[i++]; diff --git a/src/corelib/text/qregularexpression.h b/src/corelib/text/qregularexpression.h index 4f1ad8d13f..a264717b28 100644 --- a/src/corelib/text/qregularexpression.h +++ b/src/corelib/text/qregularexpression.h @@ -131,7 +131,8 @@ public: enum WildcardConversionOption { DefaultWildcardConversion = 0x0, - UnanchoredWildcardConversion = 0x1 + UnanchoredWildcardConversion = 0x1, + NonPathWildcardConversion = 0x2, }; Q_DECLARE_FLAGS(WildcardConversionOptions, WildcardConversionOption) diff --git a/tests/auto/corelib/text/qregularexpression/tst_qregularexpression.cpp b/tests/auto/corelib/text/qregularexpression/tst_qregularexpression.cpp index 6d7e15de6b..212e315a34 100644 --- a/tests/auto/corelib/text/qregularexpression/tst_qregularexpression.cpp +++ b/tests/auto/corelib/text/qregularexpression/tst_qregularexpression.cpp @@ -2446,54 +2446,68 @@ void tst_QRegularExpression::wildcard_data() { QTest::addColumn<QString>("pattern"); QTest::addColumn<QString>("string"); - QTest::addColumn<qsizetype>("foundIndex"); + QTest::addColumn<bool>("matchesPathGlob"); + QTest::addColumn<bool>("matchesNonPathGlob"); - auto addRow = [](const char *pattern, const char *string, qsizetype foundIndex) { - QTest::addRow("%s@%s", pattern, string) << pattern << string << foundIndex; + auto addRow = [](const char *pattern, const char *string, bool matchesPathGlob, bool matchesNonPathGlob) { + QTest::addRow("%s@%s", pattern, string) << pattern << string << matchesPathGlob << matchesNonPathGlob; }; - addRow("*.html", "test.html", 0); - addRow("*.html", "test.htm", -1); - addRow("*bar*", "foobarbaz", 0); - addRow("*", "Qt Rocks!", 0); - addRow("*.h", "test.cpp", -1); - addRow("*.???l", "test.html", 0); - addRow("*?", "test.html", 0); - addRow("*?ml", "test.html", 0); - addRow("*[*]", "test.html", -1); - addRow("*[?]","test.html", -1); - addRow("*[?]ml","test.h?ml", 0); - addRow("*[[]ml","test.h[ml", 0); - addRow("*[]]ml","test.h]ml", 0); - addRow("*.h[a-z]ml", "test.html", 0); - addRow("*.h[A-Z]ml", "test.html", -1); - addRow("*.h[A-Z]ml", "test.hTml", 0); - addRow("*.h[!A-Z]ml", "test.hTml", -1); - addRow("*.h[!A-Z]ml", "test.html", 0); - addRow("*.h[!T]ml", "test.hTml", -1); - addRow("*.h[!T]ml", "test.html", 0); - addRow("*.h[!T]m[!L]", "test.htmL", -1); - addRow("*.h[!T]m[!L]", "test.html", 0); - addRow("*.h[][!]ml", "test.h]ml", 0); - addRow("*.h[][!]ml", "test.h[ml", 0); - addRow("*.h[][!]ml", "test.h!ml", 0); - - addRow("foo/*/bar", "foo/baz/bar", 0); - addRow("foo/(*)/bar", "foo/baz/bar", -1); - addRow("foo/(*)/bar", "foo/(baz)/bar", 0); - addRow("foo/?/bar", "foo/Q/bar", 0); - addRow("foo/?/bar", "foo/Qt/bar", -1); - addRow("foo/(?)/bar", "foo/Q/bar", -1); - addRow("foo/(?)/bar", "foo/(Q)/bar", 0); + addRow("*.html", "test.html", true, true); + addRow("*.html", "test.htm", false, false); + addRow("*bar*", "foobarbaz", true, true); + addRow("*", "Qt Rocks!", true, true); + addRow("*.h", "test.cpp", false, false); + addRow("*.???l", "test.html", true, true); + addRow("*?", "test.html", true, true); + addRow("*?ml", "test.html", true, true); + addRow("*[*]", "test.html", false, false); + addRow("*[?]","test.html", false, false); + addRow("*[?]ml","test.h?ml", true, true); + addRow("*[[]ml","test.h[ml", true, true); + addRow("*[]]ml","test.h]ml", true, true); + addRow("*.h[a-z]ml", "test.html", true, true); + addRow("*.h[A-Z]ml", "test.html", false, false); + addRow("*.h[A-Z]ml", "test.hTml", true, true); + addRow("*.h[!A-Z]ml", "test.hTml", false, false); + addRow("*.h[!A-Z]ml", "test.html", true, true); + addRow("*.h[!T]ml", "test.hTml", false, false); + addRow("*.h[!T]ml", "test.html", true, true); + addRow("*.h[!T]m[!L]", "test.htmL", false, false); + addRow("*.h[!T]m[!L]", "test.html", true, true); + addRow("*.h[][!]ml", "test.h]ml", true, true); + addRow("*.h[][!]ml", "test.h[ml", true, true); + addRow("*.h[][!]ml", "test.h!ml", true, true); + + addRow("foo/*/bar", "foo/baz/bar", true, true); + addRow("foo/*/bar", "foo/fie/baz/bar", false, true); + addRow("foo?bar", "foo/bar", false, true); + addRow("foo/(*)/bar", "foo/baz/bar", false, false); + addRow("foo/(*)/bar", "foo/(baz)/bar", true, true); + addRow("foo/?/bar", "foo/Q/bar", true, true); + addRow("foo/?/bar", "foo/Qt/bar", false, false); + addRow("foo/(?)/bar", "foo/Q/bar", false, false); + addRow("foo/(?)/bar", "foo/(Q)/bar", true, true); + + addRow("foo*bar", "foo/fie/baz/bar", false, true); + addRow("fie*bar", "foo/fie/baz/bar", false, false); // regexp is anchored #ifdef Q_OS_WIN - addRow("foo\\*\\bar", "foo\\baz\\bar", 0); - addRow("foo\\(*)\\bar", "foo\\baz\\bar", -1); - addRow("foo\\(*)\\bar", "foo\\(baz)\\bar", 0); - addRow("foo\\?\\bar", "foo\\Q\\bar", 0); - addRow("foo\\?\\bar", "foo\\Qt\\bar", -1); - addRow("foo\\(?)\\bar", "foo\\Q\\bar", -1); - addRow("foo\\(?)\\bar", "foo\\(Q)\\bar", 0); + addRow("foo\\*\\bar", "foo\\baz\\bar", true, true); + addRow("foo\\*\\bar", "foo/baz/bar", true, false); + addRow("foo\\*\\bar", "foo/baz\\bar", true, false); + addRow("foo\\*\\bar", "foo\\fie\\baz\\bar", false, true); + addRow("foo\\*\\bar", "foo/fie/baz/bar", false, false); + addRow("foo/*/bar", "foo\\baz\\bar", true, false); + addRow("foo/*/bar", "foo/baz/bar", true, true); + addRow("foo/*/bar", "foo\\fie\\baz\\bar", false, false); + addRow("foo/*/bar", "foo/fie/baz/bar", false, true); + addRow("foo\\(*)\\bar", "foo\\baz\\bar", false, false); + addRow("foo\\(*)\\bar", "foo\\(baz)\\bar", true, true); + addRow("foo\\?\\bar", "foo\\Q\\bar", true, true); + addRow("foo\\?\\bar", "foo\\Qt\\bar", false, false); + addRow("foo\\(?)\\bar", "foo\\Q\\bar", false, false); + addRow("foo\\(?)\\bar", "foo\\(Q)\\bar", true, true); #endif } @@ -2501,12 +2515,17 @@ void tst_QRegularExpression::wildcard() { QFETCH(QString, pattern); QFETCH(QString, string); - QFETCH(qsizetype, foundIndex); + QFETCH(bool, matchesPathGlob); + QFETCH(bool, matchesNonPathGlob); - QRegularExpression re(QRegularExpression::wildcardToRegularExpression(pattern)); - QRegularExpressionMatch match = re.match(string); - - QCOMPARE(match.capturedStart(), foundIndex); + { + QRegularExpression re(QRegularExpression::wildcardToRegularExpression(pattern)); + QCOMPARE(string.contains(re), matchesPathGlob); + } + { + QRegularExpression re(QRegularExpression::wildcardToRegularExpression(pattern, QRegularExpression::NonPathWildcardConversion)); + QCOMPARE(string.contains(re), matchesNonPathGlob); + } } void tst_QRegularExpression::testInvalidWildcard_data() |