summaryrefslogtreecommitdiffstats
path: root/src/corelib/text/qregularexpression.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/corelib/text/qregularexpression.cpp')
-rw-r--r--src/corelib/text/qregularexpression.cpp184
1 files changed, 121 insertions, 63 deletions
diff --git a/src/corelib/text/qregularexpression.cpp b/src/corelib/text/qregularexpression.cpp
index b4b0a3d47d..95fd0e3d9a 100644
--- a/src/corelib/text/qregularexpression.cpp
+++ b/src/corelib/text/qregularexpression.cpp
@@ -894,7 +894,7 @@ void QRegularExpressionPrivate::compilePattern()
PCRE2_SIZE patternErrorOffset;
compiledPattern = pcre2_compile_16(reinterpret_cast<PCRE2_SPTR16>(pattern.constData()),
- pattern.length(),
+ pattern.size(),
options,
&errorCode,
&patternErrorOffset,
@@ -954,7 +954,7 @@ struct PcreJitStackFree
pcre2_jit_stack_free_16(stack);
}
};
-static thread_local std::unique_ptr<pcre2_jit_stack_16, PcreJitStackFree> jitStacks;
+Q_CONSTINIT static thread_local std::unique_ptr<pcre2_jit_stack_16, PcreJitStackFree> jitStacks;
}
/*!
@@ -1104,7 +1104,7 @@ void QRegularExpressionPrivate::doMatch(QRegularExpressionMatchPrivate *priv,
const QRegularExpressionMatchPrivate *previous) const
{
Q_ASSERT(priv);
- Q_ASSUME(priv != previous);
+ Q_ASSERT(priv != previous);
const qsizetype subjectLength = priv->subject.size();
@@ -1354,10 +1354,7 @@ QRegularExpression::QRegularExpression(const QString &pattern, PatternOptions op
\sa operator=()
*/
-QRegularExpression::QRegularExpression(const QRegularExpression &re)
- : d(re.d)
-{
-}
+QRegularExpression::QRegularExpression(const QRegularExpression &re) noexcept = default;
/*!
\fn QRegularExpression::QRegularExpression(QRegularExpression &&re)
@@ -1386,11 +1383,7 @@ QT_DEFINE_QESDP_SPECIALIZATION_DTOR(QRegularExpressionPrivate)
Assigns the regular expression \a re to this object, and returns a reference
to the copy. Both the pattern and the pattern options are copied.
*/
-QRegularExpression &QRegularExpression::operator=(const QRegularExpression &re)
-{
- d = re.d;
- return *this;
-}
+QRegularExpression &QRegularExpression::operator=(const QRegularExpression &re) noexcept = default;
/*!
\fn void QRegularExpression::swap(QRegularExpression &other)
@@ -1515,7 +1508,7 @@ QStringList QRegularExpression::namedCaptureGroups() const
reinterpret_cast<const char16_t *>(namedCapturingTable) + namedCapturingTableEntrySize * i;
const int index = *currentNamedCapturingTableRow;
- result[index] = QString::fromUtf16(currentNamedCapturingTableRow + 1);
+ result[index] = QStringView(currentNamedCapturingTableRow + 1).toString();
}
return result;
@@ -1547,10 +1540,10 @@ QString QRegularExpression::errorString() const
QString errorString;
int errorStringLength;
do {
- errorString.resize(errorString.length() + 64);
+ errorString.resize(errorString.size() + 64);
errorStringLength = pcre2_get_error_message_16(d->errorCode,
reinterpret_cast<ushort *>(errorString.data()),
- errorString.length());
+ errorString.size());
} while (errorStringLength < 0);
errorString.resize(errorStringLength);
@@ -1588,11 +1581,6 @@ qsizetype QRegularExpression::patternErrorOffset() const
The returned QRegularExpressionMatch object contains the results of the
match.
- \note The data referenced by \a subject should remain valid as long
- as there are QRegularExpressionMatch objects using it. At the moment
- Qt makes a (shallow) copy of the data, but this behavior may change
- in a future version of Qt.
-
\sa QRegularExpressionMatch, {normal matching}
*/
QRegularExpressionMatch QRegularExpression::match(const QString &subject,
@@ -1610,9 +1598,26 @@ QRegularExpressionMatch QRegularExpression::match(const QString &subject,
return QRegularExpressionMatch(*priv);
}
+#if QT_DEPRECATED_SINCE(6, 8)
/*!
\since 6.0
\overload
+ \obsolete
+
+ Use matchView() instead.
+*/
+QRegularExpressionMatch QRegularExpression::match(QStringView subjectView,
+ qsizetype offset,
+ MatchType matchType,
+ MatchOptions matchOptions) const
+{
+ return matchView(subjectView, offset, matchType, matchOptions);
+}
+#endif // QT_DEPRECATED_SINCE(6, 8)
+
+/*!
+ \since 6.5
+ \overload
Attempts to match the regular expression against the given \a subjectView
string view, starting at the position \a offset inside the subject, using a
@@ -1626,10 +1631,10 @@ QRegularExpressionMatch QRegularExpression::match(const QString &subject,
\sa QRegularExpressionMatch, {normal matching}
*/
-QRegularExpressionMatch QRegularExpression::match(QStringView subjectView,
- qsizetype offset,
- MatchType matchType,
- MatchOptions matchOptions) const
+QRegularExpressionMatch QRegularExpression::matchView(QStringView subjectView,
+ qsizetype offset,
+ MatchType matchType,
+ MatchOptions matchOptions) const
{
d.data()->compilePattern();
auto priv = new QRegularExpressionMatchPrivate(*this,
@@ -1650,11 +1655,6 @@ QRegularExpressionMatch QRegularExpression::match(QStringView subjectView,
The returned QRegularExpressionMatchIterator is positioned before the
first match result (if any).
- \note The data referenced by \a subject should remain valid as long
- as there are QRegularExpressionMatch objects using it. At the moment
- Qt makes a (shallow) copy of the data, but this behavior may change
- in a future version of Qt.
-
\sa QRegularExpressionMatchIterator, {global matching}
*/
QRegularExpressionMatchIterator QRegularExpression::globalMatch(const QString &subject,
@@ -1671,9 +1671,26 @@ QRegularExpressionMatchIterator QRegularExpression::globalMatch(const QString &s
return QRegularExpressionMatchIterator(*priv);
}
+#if QT_DEPRECATED_SINCE(6, 8)
/*!
\since 6.0
\overload
+ \obsolete
+
+ Use globalMatchView() instead.
+*/
+QRegularExpressionMatchIterator QRegularExpression::globalMatch(QStringView subjectView,
+ qsizetype offset,
+ MatchType matchType,
+ MatchOptions matchOptions) const
+{
+ return globalMatchView(subjectView, offset, matchType, matchOptions);
+}
+#endif // QT_DEPRECATED_SINCE(6, 8)
+
+/*!
+ \since 6.5
+ \overload
Attempts to perform a global match of the regular expression against the
given \a subjectView string view, starting at the position \a offset inside the
@@ -1689,16 +1706,16 @@ QRegularExpressionMatchIterator QRegularExpression::globalMatch(const QString &s
\sa QRegularExpressionMatchIterator, {global matching}
*/
-QRegularExpressionMatchIterator QRegularExpression::globalMatch(QStringView subjectView,
- qsizetype offset,
- MatchType matchType,
- MatchOptions matchOptions) const
+QRegularExpressionMatchIterator QRegularExpression::globalMatchView(QStringView subjectView,
+ qsizetype offset,
+ MatchType matchType,
+ MatchOptions matchOptions) const
{
QRegularExpressionMatchIteratorPrivate *priv =
new QRegularExpressionMatchIteratorPrivate(*this,
matchType,
matchOptions,
- match(subjectView, offset, matchType, matchOptions));
+ matchView(subjectView, offset, matchType, matchOptions));
return QRegularExpressionMatchIterator(*priv);
}
@@ -1839,22 +1856,30 @@ QString QRegularExpression::escape(QStringView str)
\value UnanchoredWildcardConversion
The conversion will not anchor the pattern. This allows for partial string matches of
wildcard expressions.
+
+ \value [since 6.6] NonPathWildcardConversion
+ The conversion will \e{not} interpret the pattern as filepath globbing.
+
+ \sa QRegularExpression::wildcardToRegularExpression
*/
/*!
\since 5.15
Returns a regular expression representation of the given glob \a pattern.
- The transformation is targeting file path globbing, which means in particular
- that path separators receive special treatment. This implies that it is not
- just a basic translation from "*" to ".*".
+
+ There are two transformations possible, one that targets file path
+ globbing, and another one which is more generic.
+
+ By default, the transformation is targeting file path globbing,
+ which means in particular that path separators receive special
+ treatment. This implies that it is not just a basic translation
+ from "*" to ".*" and similar.
\snippet code/src_corelib_text_qregularexpression.cpp 31
- By default, the returned regular expression is fully anchored. In other
- words, there is no need of calling anchoredPattern() again on the
- result. To get an a regular expression that is not anchored, pass
- UnanchoredWildcardConversion as the conversion \a options.
+ The more generic globbing transformation is available by passing
+ \c NonPathWildcardConversion in the conversion \a options.
This implementation follows closely the definition
of wildcard for glob patterns:
@@ -1863,10 +1888,12 @@ QString QRegularExpression::escape(QStringView str)
\li Any character represents itself apart from those mentioned
below. Thus \b{c} matches the character \e c.
\row \li \b{?}
- \li Matches any single character. It is the same as
- \b{.} in full regexps.
+ \li Matches any single character, except for a path separator
+ (in case file path globbing has been selected). It is the
+ same as b{.} in full regexps.
\row \li \b{*}
- \li Matches zero or more of any characters. It is the
+ \li Matches zero or more of any characters, except for path
+ separators (in case file path globbing has been selected). It is the
same as \b{.*} in full regexps.
\row \li \b{[abc]}
\li Matches one character given in the bracket.
@@ -1880,9 +1907,10 @@ QString QRegularExpression::escape(QStringView str)
bracket. It is the same as \b{[^a-c]} in full regexp.
\endtable
- \note The backslash (\\) character is \e not an escape char in this context.
- In order to match one of the special characters, place it in square brackets
- (for example, \c{[?]}).
+ \note For historical reasons, a backslash (\\) character is \e not
+ an escape char in this context. In order to match one of the
+ special characters, place it in square brackets (for example,
+ \c{[?]}).
More information about the implementation can be found in:
\list
@@ -1890,6 +1918,11 @@ QString QRegularExpression::escape(QStringView str)
\li \c {man 7 glob}
\endlist
+ By default, the returned regular expression is fully anchored. In other
+ words, there is no need of calling anchoredPattern() again on the
+ result. To get a regular expression that is not anchored, pass
+ UnanchoredWildcardConversion in the conversion \a options.
+
\sa escape()
*/
QString QRegularExpression::wildcardToRegularExpression(QStringView pattern, WildcardConversionOptions options)
@@ -1900,29 +1933,51 @@ QString QRegularExpression::wildcardToRegularExpression(QStringView pattern, Wil
qsizetype i = 0;
const QChar *wc = pattern.data();
+ struct GlobSettings {
+ char16_t nativePathSeparator;
+ QStringView starEscape;
+ QStringView questionMarkEscape;
+ };
+
+ const GlobSettings settings = [options]() {
+ if (options.testFlag(NonPathWildcardConversion)) {
+ // using [\d\D] to mean "match everything";
+ // dot doesn't match newlines, unless in /s mode
+ return GlobSettings{ u'\0', u"[\\d\\D]*", u"[\\d\\D]" };
+ } else {
#ifdef Q_OS_WIN
- const char16_t nativePathSeparator = u'\\';
- const auto starEscape = "[^/\\\\]*"_L1;
- const auto questionMarkEscape = "[^/\\\\]"_L1;
+ return GlobSettings{ u'\\', u"[^/\\\\]*", u"[^/\\\\]" };
#else
- const char16_t nativePathSeparator = u'/';
- const auto starEscape = "[^/]*"_L1;
- const auto questionMarkEscape = "[^/]"_L1;
+ return GlobSettings{ u'/', u"[^/]*", u"[^/]" };
#endif
+ }
+ }();
while (i < wclen) {
const QChar c = wc[i++];
switch (c.unicode()) {
case '*':
- rx += starEscape;
+ rx += settings.starEscape;
break;
case '?':
- rx += questionMarkEscape;
+ rx += settings.questionMarkEscape;
break;
+ // When not using filepath globbing: \ is escaped, / is itself
+ // When using filepath globbing:
+ // * Unix: \ gets escaped. / is itself
+ // * Windows: \ and / can match each other -- they become [/\\] in regexp
case '\\':
#ifdef Q_OS_WIN
+ if (options.testFlag(NonPathWildcardConversion))
+ rx += u"\\\\";
+ else
+ rx += u"[/\\\\]";
+ break;
case '/':
- rx += "[/\\\\]"_L1;
+ if (options.testFlag(NonPathWildcardConversion))
+ rx += u'/';
+ else
+ rx += u"[/\\\\]";
break;
#endif
case '$':
@@ -1950,11 +2005,13 @@ QString QRegularExpression::wildcardToRegularExpression(QStringView pattern, Wil
rx += wc[i++];
while (i < wclen && wc[i] != u']') {
- // The '/' appearing in a character class invalidates the
- // regular expression parsing. It also concerns '\\' on
- // Windows OS types.
- if (wc[i] == u'/' || wc[i] == nativePathSeparator)
- return rx;
+ if (!options.testFlag(NonPathWildcardConversion)) {
+ // The '/' appearing in a character class invalidates the
+ // regular expression parsing. It also concerns '\\' on
+ // Windows OS types.
+ if (wc[i] == u'/' || wc[i] == settings.nativePathSeparator)
+ return rx;
+ }
if (wc[i] == u'\\')
rx += u'\\';
rx += wc[i++];
@@ -2654,7 +2711,7 @@ QRegularExpressionMatch QRegularExpressionMatchIterator::next()
}
d.detach();
- return qExchange(d->next, d->next.d.constData()->nextMatch());
+ return std::exchange(d->next, d->next.d.constData()->nextMatch());
}
/*!
@@ -3043,7 +3100,8 @@ static const char *pcreCompileErrorCodes[] =
QT_TRANSLATE_NOOP("QRegularExpression", "heap limit exceeded"),
QT_TRANSLATE_NOOP("QRegularExpression", "invalid syntax"),
QT_TRANSLATE_NOOP("QRegularExpression", "internal error - duplicate substitution match"),
- QT_TRANSLATE_NOOP("QRegularExpression", "PCRE2_MATCH_INVALID_UTF is not supported for DFA matching")
+ QT_TRANSLATE_NOOP("QRegularExpression", "PCRE2_MATCH_INVALID_UTF is not supported for DFA matching"),
+ QT_TRANSLATE_NOOP("QRegularExpression", "INTERNAL ERROR: invalid substring offset")
};
#endif // #if 0