summaryrefslogtreecommitdiffstats
path: root/src/corelib/text
diff options
context:
space:
mode:
authorGiuseppe D'Angelo <giuseppe.dangelo@kdab.com>2023-02-16 21:20:50 +0100
committerGiuseppe D'Angelo <giuseppe.dangelo@kdab.com>2023-03-07 20:57:43 +0100
commit4b197c3f52b443c11e980f87aa035b81948871ce (patch)
treec3fdea6364b1cb827e5ee9f2527978f13d5fbb80 /src/corelib/text
parentb3a60e49cdd2ff82bb820be4278cf00298777a37 (diff)
QRegularExpression: add support for non-filepath wildcards/globbing
A glob pattern has different semantics depending on whether it's used in "filepath mode" (FNM_PATHNAME) or not. QRegularExpression only implemented the former, but the latter is also useful, and possibly more "intuitive" for certain use cases (e.g. offering users a simplified version of regexps that however still need "*" to match a "/"). Add this support. The problems highlighted by QTBUG-111234 have not been addressed, I've just amended a bit of documentation relating backslashes. [ChangeLog][QtCore][QRegularExpression] Support for non-filepath wildcards has been added to wildcardToRegularExpression(). Fixes: QTBUG-110901 Task-number: QTBUG-104585 Task-number: QTBUG-111234 Change-Id: If9850616267980fa843bda996fcb4552b5375938 Reviewed-by: Eike Ziller <eike.ziller@qt.io> Reviewed-by: Samuel Gaist <samuel.gaist@idiap.ch> Reviewed-by: Volker Hilsheimer <volker.hilsheimer@qt.io>
Diffstat (limited to 'src/corelib/text')
-rw-r--r--src/corelib/text/qregularexpression.cpp93
-rw-r--r--src/corelib/text/qregularexpression.h3
2 files changed, 68 insertions, 28 deletions
diff --git a/src/corelib/text/qregularexpression.cpp b/src/corelib/text/qregularexpression.cpp
index 8b3cf63dd2..2c83d122a3 100644
--- a/src/corelib/text/qregularexpression.cpp
+++ b/src/corelib/text/qregularexpression.cpp
@@ -1856,22 +1856,31 @@ QString QRegularExpression::escape(QStringView str)
\value UnanchoredWildcardConversion
The conversion will not anchor the pattern. This allows for partial string matches of
wildcard expressions.
+
+ \value NonPathWildcardConversion
+ The conversion will \e{not} interpret the pattern as filepath globbing.
+ This enum value has been introduced in Qt 6.6.
+
+ \sa QRegularExpression::wildcardToRegularExpression
*/
/*!
\since 5.15
Returns a regular expression representation of the given glob \a pattern.
- The transformation is targeting file path globbing, which means in particular
- that path separators receive special treatment. This implies that it is not
- just a basic translation from "*" to ".*".
+
+ There are two transformations possible, one that targets file path
+ globbing, and another one which is more generic.
+
+ By default, the transformation is targeting file path globbing,
+ which means in particular that path separators receive special
+ treatment. This implies that it is not just a basic translation
+ from "*" to ".*" and similar.
\snippet code/src_corelib_text_qregularexpression.cpp 31
- By default, the returned regular expression is fully anchored. In other
- words, there is no need of calling anchoredPattern() again on the
- result. To get a regular expression that is not anchored, pass
- UnanchoredWildcardConversion as the conversion \a options.
+ The more generic globbing transformation is available by passing
+ \c NonPathWildcardConversion in the conversion \a options.
This implementation follows closely the definition
of wildcard for glob patterns:
@@ -1880,10 +1889,12 @@ QString QRegularExpression::escape(QStringView str)
\li Any character represents itself apart from those mentioned
below. Thus \b{c} matches the character \e c.
\row \li \b{?}
- \li Matches any single character. It is the same as
- \b{.} in full regexps.
+ \li Matches any single character, except for a path separator
+ (in case file path globbing has been selected). It is the
+ same as b{.} in full regexps.
\row \li \b{*}
- \li Matches zero or more of any characters. It is the
+ \li Matches zero or more of any characters, except for path
+ separators (in case file path globbing has been selected). It is the
same as \b{.*} in full regexps.
\row \li \b{[abc]}
\li Matches one character given in the bracket.
@@ -1897,9 +1908,10 @@ QString QRegularExpression::escape(QStringView str)
bracket. It is the same as \b{[^a-c]} in full regexp.
\endtable
- \note The backslash (\\) character is \e not an escape char in this context.
- In order to match one of the special characters, place it in square brackets
- (for example, \c{[?]}).
+ \note For historical reasons, a backslash (\\) character is \e not
+ an escape char in this context. In order to match one of the
+ special characters, place it in square brackets (for example,
+ \c{[?]}).
More information about the implementation can be found in:
\list
@@ -1907,6 +1919,11 @@ QString QRegularExpression::escape(QStringView str)
\li \c {man 7 glob}
\endlist
+ By default, the returned regular expression is fully anchored. In other
+ words, there is no need of calling anchoredPattern() again on the
+ result. To get a regular expression that is not anchored, pass
+ UnanchoredWildcardConversion in the conversion \a options.
+
\sa escape()
*/
QString QRegularExpression::wildcardToRegularExpression(QStringView pattern, WildcardConversionOptions options)
@@ -1917,29 +1934,49 @@ QString QRegularExpression::wildcardToRegularExpression(QStringView pattern, Wil
qsizetype i = 0;
const QChar *wc = pattern.data();
+ struct GlobSettings {
+ char16_t nativePathSeparator;
+ QStringView starEscape;
+ QStringView questionMarkEscape;
+ };
+
+ const GlobSettings settings = [options]() {
+ if (options.testFlag(NonPathWildcardConversion)) {
+ return GlobSettings{ u'\0', u".*", u"." };
+ } else {
#ifdef Q_OS_WIN
- const char16_t nativePathSeparator = u'\\';
- const auto starEscape = "[^/\\\\]*"_L1;
- const auto questionMarkEscape = "[^/\\\\]"_L1;
+ return GlobSettings{ u'\\', u"[^/\\\\]*", u"[^/\\\\]" };
#else
- const char16_t nativePathSeparator = u'/';
- const auto starEscape = "[^/]*"_L1;
- const auto questionMarkEscape = "[^/]"_L1;
+ return GlobSettings{ u'/', u"[^/]*", u"[^/]" };
#endif
+ }
+ }();
while (i < wclen) {
const QChar c = wc[i++];
switch (c.unicode()) {
case '*':
- rx += starEscape;
+ rx += settings.starEscape;
break;
case '?':
- rx += questionMarkEscape;
+ rx += settings.questionMarkEscape;
break;
+ // When not using filepath globbing: \ is escaped, / is itself
+ // When using filepath globbing:
+ // * Unix: \ gets escaped. / is itself
+ // * Windows: \ and / can match each other -- they become [/\\] in regexp
case '\\':
#ifdef Q_OS_WIN
+ if (options.testFlag(NonPathWildcardConversion))
+ rx += u"\\\\";
+ else
+ rx += u"[/\\\\]";
+ break;
case '/':
- rx += "[/\\\\]"_L1;
+ if (options.testFlag(NonPathWildcardConversion))
+ rx += u'/';
+ else
+ rx += u"[/\\\\]";
break;
#endif
case '$':
@@ -1967,11 +2004,13 @@ QString QRegularExpression::wildcardToRegularExpression(QStringView pattern, Wil
rx += wc[i++];
while (i < wclen && wc[i] != u']') {
- // The '/' appearing in a character class invalidates the
- // regular expression parsing. It also concerns '\\' on
- // Windows OS types.
- if (wc[i] == u'/' || wc[i] == nativePathSeparator)
- return rx;
+ if (!options.testFlag(NonPathWildcardConversion)) {
+ // The '/' appearing in a character class invalidates the
+ // regular expression parsing. It also concerns '\\' on
+ // Windows OS types.
+ if (wc[i] == u'/' || wc[i] == settings.nativePathSeparator)
+ return rx;
+ }
if (wc[i] == u'\\')
rx += u'\\';
rx += wc[i++];
diff --git a/src/corelib/text/qregularexpression.h b/src/corelib/text/qregularexpression.h
index 4f1ad8d13f..a264717b28 100644
--- a/src/corelib/text/qregularexpression.h
+++ b/src/corelib/text/qregularexpression.h
@@ -131,7 +131,8 @@ public:
enum WildcardConversionOption {
DefaultWildcardConversion = 0x0,
- UnanchoredWildcardConversion = 0x1
+ UnanchoredWildcardConversion = 0x1,
+ NonPathWildcardConversion = 0x2,
};
Q_DECLARE_FLAGS(WildcardConversionOptions, WildcardConversionOption)