summaryrefslogtreecommitdiffstats
path: root/src/corelib/text/qregularexpression.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/corelib/text/qregularexpression.cpp')
-rw-r--r--src/corelib/text/qregularexpression.cpp280
1 files changed, 158 insertions, 122 deletions
diff --git a/src/corelib/text/qregularexpression.cpp b/src/corelib/text/qregularexpression.cpp
index b4b0a3d47d..78261e14cb 100644
--- a/src/corelib/text/qregularexpression.cpp
+++ b/src/corelib/text/qregularexpression.cpp
@@ -42,6 +42,7 @@ using namespace Qt::StringLiterals;
\keyword regular expression
+ \compares equality
Regular expressions, or \e{regexps}, are a very powerful tool to handle
strings and texts. This is useful in many contexts, e.g.,
@@ -721,7 +722,7 @@ struct QRegularExpressionPrivate : QSharedData
CheckSubjectStringOption checkSubjectStringOption = CheckSubjectString,
const QRegularExpressionMatchPrivate *previous = nullptr) const;
- int captureIndexForName(QStringView name) const;
+ int captureIndexForName(QAnyStringView name) const;
// sizeof(QSharedData) == 4, so start our members with an enum
QRegularExpression::PatternOptions patternOptions;
@@ -894,7 +895,7 @@ void QRegularExpressionPrivate::compilePattern()
PCRE2_SIZE patternErrorOffset;
compiledPattern = pcre2_compile_16(reinterpret_cast<PCRE2_SPTR16>(pattern.constData()),
- pattern.length(),
+ pattern.size(),
options,
&errorCode,
&patternErrorOffset,
@@ -954,7 +955,7 @@ struct PcreJitStackFree
pcre2_jit_stack_free_16(stack);
}
};
-static thread_local std::unique_ptr<pcre2_jit_stack_16, PcreJitStackFree> jitStacks;
+Q_CONSTINIT static thread_local std::unique_ptr<pcre2_jit_stack_16, PcreJitStackFree> jitStacks;
}
/*!
@@ -1013,7 +1014,7 @@ void QRegularExpressionPrivate::optimizePattern()
Returns the capturing group number for the given name. Duplicated names for
capturing groups are not supported.
*/
-int QRegularExpressionPrivate::captureIndexForName(QStringView name) const
+int QRegularExpressionPrivate::captureIndexForName(QAnyStringView name) const
{
Q_ASSERT(!name.isEmpty());
@@ -1104,7 +1105,7 @@ void QRegularExpressionPrivate::doMatch(QRegularExpressionMatchPrivate *priv,
const QRegularExpressionMatchPrivate *previous) const
{
Q_ASSERT(priv);
- Q_ASSUME(priv != previous);
+ Q_ASSERT(priv != previous);
const qsizetype subjectLength = priv->subject.size();
@@ -1354,10 +1355,7 @@ QRegularExpression::QRegularExpression(const QString &pattern, PatternOptions op
\sa operator=()
*/
-QRegularExpression::QRegularExpression(const QRegularExpression &re)
- : d(re.d)
-{
-}
+QRegularExpression::QRegularExpression(const QRegularExpression &re) noexcept = default;
/*!
\fn QRegularExpression::QRegularExpression(QRegularExpression &&re)
@@ -1386,11 +1384,7 @@ QT_DEFINE_QESDP_SPECIALIZATION_DTOR(QRegularExpressionPrivate)
Assigns the regular expression \a re to this object, and returns a reference
to the copy. Both the pattern and the pattern options are copied.
*/
-QRegularExpression &QRegularExpression::operator=(const QRegularExpression &re)
-{
- d = re.d;
- return *this;
-}
+QRegularExpression &QRegularExpression::operator=(const QRegularExpression &re) noexcept = default;
/*!
\fn void QRegularExpression::swap(QRegularExpression &other)
@@ -1515,7 +1509,7 @@ QStringList QRegularExpression::namedCaptureGroups() const
reinterpret_cast<const char16_t *>(namedCapturingTable) + namedCapturingTableEntrySize * i;
const int index = *currentNamedCapturingTableRow;
- result[index] = QString::fromUtf16(currentNamedCapturingTableRow + 1);
+ result[index] = QStringView(currentNamedCapturingTableRow + 1).toString();
}
return result;
@@ -1547,10 +1541,10 @@ QString QRegularExpression::errorString() const
QString errorString;
int errorStringLength;
do {
- errorString.resize(errorString.length() + 64);
+ errorString.resize(errorString.size() + 64);
errorStringLength = pcre2_get_error_message_16(d->errorCode,
reinterpret_cast<ushort *>(errorString.data()),
- errorString.length());
+ errorString.size());
} while (errorStringLength < 0);
errorString.resize(errorStringLength);
@@ -1588,11 +1582,6 @@ qsizetype QRegularExpression::patternErrorOffset() const
The returned QRegularExpressionMatch object contains the results of the
match.
- \note The data referenced by \a subject should remain valid as long
- as there are QRegularExpressionMatch objects using it. At the moment
- Qt makes a (shallow) copy of the data, but this behavior may change
- in a future version of Qt.
-
\sa QRegularExpressionMatch, {normal matching}
*/
QRegularExpressionMatch QRegularExpression::match(const QString &subject,
@@ -1610,9 +1599,26 @@ QRegularExpressionMatch QRegularExpression::match(const QString &subject,
return QRegularExpressionMatch(*priv);
}
+#if QT_DEPRECATED_SINCE(6, 8)
/*!
\since 6.0
\overload
+ \obsolete
+
+ Use matchView() instead.
+*/
+QRegularExpressionMatch QRegularExpression::match(QStringView subjectView,
+ qsizetype offset,
+ MatchType matchType,
+ MatchOptions matchOptions) const
+{
+ return matchView(subjectView, offset, matchType, matchOptions);
+}
+#endif // QT_DEPRECATED_SINCE(6, 8)
+
+/*!
+ \since 6.5
+ \overload
Attempts to match the regular expression against the given \a subjectView
string view, starting at the position \a offset inside the subject, using a
@@ -1626,10 +1632,10 @@ QRegularExpressionMatch QRegularExpression::match(const QString &subject,
\sa QRegularExpressionMatch, {normal matching}
*/
-QRegularExpressionMatch QRegularExpression::match(QStringView subjectView,
- qsizetype offset,
- MatchType matchType,
- MatchOptions matchOptions) const
+QRegularExpressionMatch QRegularExpression::matchView(QStringView subjectView,
+ qsizetype offset,
+ MatchType matchType,
+ MatchOptions matchOptions) const
{
d.data()->compilePattern();
auto priv = new QRegularExpressionMatchPrivate(*this,
@@ -1650,11 +1656,6 @@ QRegularExpressionMatch QRegularExpression::match(QStringView subjectView,
The returned QRegularExpressionMatchIterator is positioned before the
first match result (if any).
- \note The data referenced by \a subject should remain valid as long
- as there are QRegularExpressionMatch objects using it. At the moment
- Qt makes a (shallow) copy of the data, but this behavior may change
- in a future version of Qt.
-
\sa QRegularExpressionMatchIterator, {global matching}
*/
QRegularExpressionMatchIterator QRegularExpression::globalMatch(const QString &subject,
@@ -1671,9 +1672,26 @@ QRegularExpressionMatchIterator QRegularExpression::globalMatch(const QString &s
return QRegularExpressionMatchIterator(*priv);
}
+#if QT_DEPRECATED_SINCE(6, 8)
/*!
\since 6.0
\overload
+ \obsolete
+
+ Use globalMatchView() instead.
+*/
+QRegularExpressionMatchIterator QRegularExpression::globalMatch(QStringView subjectView,
+ qsizetype offset,
+ MatchType matchType,
+ MatchOptions matchOptions) const
+{
+ return globalMatchView(subjectView, offset, matchType, matchOptions);
+}
+#endif // QT_DEPRECATED_SINCE(6, 8)
+
+/*!
+ \since 6.5
+ \overload
Attempts to perform a global match of the regular expression against the
given \a subjectView string view, starting at the position \a offset inside the
@@ -1689,16 +1707,16 @@ QRegularExpressionMatchIterator QRegularExpression::globalMatch(const QString &s
\sa QRegularExpressionMatchIterator, {global matching}
*/
-QRegularExpressionMatchIterator QRegularExpression::globalMatch(QStringView subjectView,
- qsizetype offset,
- MatchType matchType,
- MatchOptions matchOptions) const
+QRegularExpressionMatchIterator QRegularExpression::globalMatchView(QStringView subjectView,
+ qsizetype offset,
+ MatchType matchType,
+ MatchOptions matchOptions) const
{
QRegularExpressionMatchIteratorPrivate *priv =
new QRegularExpressionMatchIteratorPrivate(*this,
matchType,
matchOptions,
- match(subjectView, offset, matchType, matchOptions));
+ matchView(subjectView, offset, matchType, matchOptions));
return QRegularExpressionMatchIterator(*priv);
}
@@ -1717,18 +1735,20 @@ void QRegularExpression::optimize() const
}
/*!
- Returns \c true if the regular expression is equal to \a re, or false
+ \fn bool QRegularExpression::operator==(const QRegularExpression &lhs, const QRegularExpression &rhs) noexcept
+
+ Returns \c true if the \a lhs regular expression is equal to the \a rhs, or false
otherwise. Two QRegularExpression objects are equal if they have
the same pattern string and the same pattern options.
\sa operator!=()
*/
-bool QRegularExpression::operator==(const QRegularExpression &re) const
+bool comparesEqual(const QRegularExpression &lhs,
+ const QRegularExpression &rhs) noexcept
{
- return (d == re.d) ||
- (d->pattern == re.d->pattern && d->patternOptions == re.d->patternOptions);
+ return (lhs.d == rhs.d) ||
+ (lhs.d->pattern == rhs.d->pattern && lhs.d->patternOptions == rhs.d->patternOptions);
}
-
/*!
\fn QRegularExpression & QRegularExpression::operator=(QRegularExpression && re)
@@ -1741,9 +1761,9 @@ bool QRegularExpression::operator==(const QRegularExpression &re) const
*/
/*!
- \fn bool QRegularExpression::operator!=(const QRegularExpression &re) const
+ \fn bool QRegularExpression::operator!=(const QRegularExpression &lhs, const QRegularExpression &rhs) noexcept
- Returns \c true if the regular expression is different from \a re, or
+ Returns \c true if the \a lhs regular expression is different from the \a rhs, or
false otherwise.
\sa operator==()
@@ -1839,22 +1859,30 @@ QString QRegularExpression::escape(QStringView str)
\value UnanchoredWildcardConversion
The conversion will not anchor the pattern. This allows for partial string matches of
wildcard expressions.
+
+ \value [since 6.6] NonPathWildcardConversion
+ The conversion will \e{not} interpret the pattern as filepath globbing.
+
+ \sa QRegularExpression::wildcardToRegularExpression
*/
/*!
\since 5.15
Returns a regular expression representation of the given glob \a pattern.
- The transformation is targeting file path globbing, which means in particular
- that path separators receive special treatment. This implies that it is not
- just a basic translation from "*" to ".*".
+
+ There are two transformations possible, one that targets file path
+ globbing, and another one which is more generic.
+
+ By default, the transformation is targeting file path globbing,
+ which means in particular that path separators receive special
+ treatment. This implies that it is not just a basic translation
+ from "*" to ".*" and similar.
\snippet code/src_corelib_text_qregularexpression.cpp 31
- By default, the returned regular expression is fully anchored. In other
- words, there is no need of calling anchoredPattern() again on the
- result. To get an a regular expression that is not anchored, pass
- UnanchoredWildcardConversion as the conversion \a options.
+ The more generic globbing transformation is available by passing
+ \c NonPathWildcardConversion in the conversion \a options.
This implementation follows closely the definition
of wildcard for glob patterns:
@@ -1863,10 +1891,12 @@ QString QRegularExpression::escape(QStringView str)
\li Any character represents itself apart from those mentioned
below. Thus \b{c} matches the character \e c.
\row \li \b{?}
- \li Matches any single character. It is the same as
- \b{.} in full regexps.
+ \li Matches any single character, except for a path separator
+ (in case file path globbing has been selected). It is the
+ same as b{.} in full regexps.
\row \li \b{*}
- \li Matches zero or more of any characters. It is the
+ \li Matches zero or more of any characters, except for path
+ separators (in case file path globbing has been selected). It is the
same as \b{.*} in full regexps.
\row \li \b{[abc]}
\li Matches one character given in the bracket.
@@ -1880,9 +1910,10 @@ QString QRegularExpression::escape(QStringView str)
bracket. It is the same as \b{[^a-c]} in full regexp.
\endtable
- \note The backslash (\\) character is \e not an escape char in this context.
- In order to match one of the special characters, place it in square brackets
- (for example, \c{[?]}).
+ \note For historical reasons, a backslash (\\) character is \e not
+ an escape char in this context. In order to match one of the
+ special characters, place it in square brackets (for example,
+ \c{[?]}).
More information about the implementation can be found in:
\list
@@ -1890,6 +1921,11 @@ QString QRegularExpression::escape(QStringView str)
\li \c {man 7 glob}
\endlist
+ By default, the returned regular expression is fully anchored. In other
+ words, there is no need of calling anchoredPattern() again on the
+ result. To get a regular expression that is not anchored, pass
+ UnanchoredWildcardConversion in the conversion \a options.
+
\sa escape()
*/
QString QRegularExpression::wildcardToRegularExpression(QStringView pattern, WildcardConversionOptions options)
@@ -1900,29 +1936,51 @@ QString QRegularExpression::wildcardToRegularExpression(QStringView pattern, Wil
qsizetype i = 0;
const QChar *wc = pattern.data();
+ struct GlobSettings {
+ char16_t nativePathSeparator;
+ QStringView starEscape;
+ QStringView questionMarkEscape;
+ };
+
+ const GlobSettings settings = [options]() {
+ if (options.testFlag(NonPathWildcardConversion)) {
+ // using [\d\D] to mean "match everything";
+ // dot doesn't match newlines, unless in /s mode
+ return GlobSettings{ u'\0', u"[\\d\\D]*", u"[\\d\\D]" };
+ } else {
#ifdef Q_OS_WIN
- const char16_t nativePathSeparator = u'\\';
- const auto starEscape = "[^/\\\\]*"_L1;
- const auto questionMarkEscape = "[^/\\\\]"_L1;
+ return GlobSettings{ u'\\', u"[^/\\\\]*", u"[^/\\\\]" };
#else
- const char16_t nativePathSeparator = u'/';
- const auto starEscape = "[^/]*"_L1;
- const auto questionMarkEscape = "[^/]"_L1;
+ return GlobSettings{ u'/', u"[^/]*", u"[^/]" };
#endif
+ }
+ }();
while (i < wclen) {
const QChar c = wc[i++];
switch (c.unicode()) {
case '*':
- rx += starEscape;
+ rx += settings.starEscape;
break;
case '?':
- rx += questionMarkEscape;
+ rx += settings.questionMarkEscape;
break;
+ // When not using filepath globbing: \ is escaped, / is itself
+ // When using filepath globbing:
+ // * Unix: \ gets escaped. / is itself
+ // * Windows: \ and / can match each other -- they become [/\\] in regexp
case '\\':
#ifdef Q_OS_WIN
+ if (options.testFlag(NonPathWildcardConversion))
+ rx += u"\\\\";
+ else
+ rx += u"[/\\\\]";
+ break;
case '/':
- rx += "[/\\\\]"_L1;
+ if (options.testFlag(NonPathWildcardConversion))
+ rx += u'/';
+ else
+ rx += u"[/\\\\]";
break;
#endif
case '$':
@@ -1950,11 +2008,13 @@ QString QRegularExpression::wildcardToRegularExpression(QStringView pattern, Wil
rx += wc[i++];
while (i < wclen && wc[i] != u']') {
- // The '/' appearing in a character class invalidates the
- // regular expression parsing. It also concerns '\\' on
- // Windows OS types.
- if (wc[i] == u'/' || wc[i] == nativePathSeparator)
- return rx;
+ if (!options.testFlag(NonPathWildcardConversion)) {
+ // The '/' appearing in a character class invalidates the
+ // regular expression parsing. It also concerns '\\' on
+ // Windows OS types.
+ if (wc[i] == u'/' || wc[i] == settings.nativePathSeparator)
+ return rx;
+ }
if (wc[i] == u'\\')
rx += u'\\';
rx += wc[i++];
@@ -2160,8 +2220,7 @@ int QRegularExpressionMatch::lastCapturedIndex() const
}
/*!
- \fn bool QRegularExpressionMatch::hasCaptured(const QString &name) const
- \fn bool QRegularExpressionMatch::hasCaptured(QStringView name) const
+ \fn bool QRegularExpressionMatch::hasCaptured(QAnyStringView name) const
\since 6.3
Returns true if the capturing group named \a name captured something
@@ -2178,9 +2237,12 @@ int QRegularExpressionMatch::lastCapturedIndex() const
Similarly, a capturing group may capture a substring of length 0;
this function will return \c{true} for such a capturing group.
+ \note In Qt versions prior to 6.8, this function took QString or
+ QStringView, not QAnyStringView.
+
\sa captured(), hasMatch()
*/
-bool QRegularExpressionMatch::hasCaptured(QStringView name) const
+bool QRegularExpressionMatch::hasCaptured(QAnyStringView name) const
{
const int nth = d->regularExpression.d->captureIndexForName(name);
return hasCaptured(nth);
@@ -2260,17 +2322,6 @@ QStringView QRegularExpressionMatch::capturedView(int nth) const
return d->subject.mid(start, capturedLength(nth));
}
-/*! \fn QString QRegularExpressionMatch::captured(const QString &name) const
-
- Returns the substring captured by the capturing group named \a name.
-
- If the named capturing group \a name did not capture a string, or if
- there is no capturing group named \a name, returns a null QString.
-
- \sa capturedView(), capturedStart(), capturedEnd(), capturedLength(),
- QString::isNull()
-*/
-
/*!
\since 5.10
@@ -2279,10 +2330,13 @@ QStringView QRegularExpressionMatch::capturedView(int nth) const
If the named capturing group \a name did not capture a string, or if
there is no capturing group named \a name, returns a null QString.
+ \note In Qt versions prior to 6.8, this function took QString or
+ QStringView, not QAnyStringView.
+
\sa capturedView(), capturedStart(), capturedEnd(), capturedLength(),
QString::isNull()
*/
-QString QRegularExpressionMatch::captured(QStringView name) const
+QString QRegularExpressionMatch::captured(QAnyStringView name) const
{
if (name.isEmpty()) {
qWarning("QRegularExpressionMatch::captured: empty capturing group name passed");
@@ -2301,10 +2355,13 @@ QString QRegularExpressionMatch::captured(QStringView name) const
If the named capturing group \a name did not capture a string, or if
there is no capturing group named \a name, returns a null QStringView.
+ \note In Qt versions prior to 6.8, this function took QString or
+ QStringView, not QAnyStringView.
+
\sa captured(), capturedStart(), capturedEnd(), capturedLength(),
QStringView::isNull()
*/
-QStringView QRegularExpressionMatch::capturedView(QStringView name) const
+QStringView QRegularExpressionMatch::capturedView(QAnyStringView name) const
{
if (name.isEmpty()) {
qWarning("QRegularExpressionMatch::capturedView: empty capturing group name passed");
@@ -2376,37 +2433,6 @@ qsizetype QRegularExpressionMatch::capturedEnd(int nth) const
return d->capturedOffsets.at(nth * 2 + 1);
}
-/*! \fn qsizetype QRegularExpressionMatch::capturedStart(const QString &name) const
-
- Returns the offset inside the subject string corresponding to the starting
- position of the substring captured by the capturing group named \a name.
- If the capturing group named \a name did not capture a string or doesn't
- exist, returns -1.
-
- \sa capturedEnd(), capturedLength(), captured()
-*/
-
-/*! \fn qsizetype QRegularExpressionMatch::capturedLength(const QString &name) const
-
- Returns the length of the substring captured by the capturing group named
- \a name.
-
- \note This function returns 0 if the capturing group named \a name did not
- capture a string or doesn't exist.
-
- \sa capturedStart(), capturedEnd(), captured()
-*/
-
-/*! \fn qsizetype QRegularExpressionMatch::capturedEnd(const QString &name) const
-
- Returns the offset inside the subject string immediately after the ending
- position of the substring captured by the capturing group named \a name. If
- the capturing group named \a name did not capture a string or doesn't
- exist, returns -1.
-
- \sa capturedStart(), capturedLength(), captured()
-*/
-
/*!
\since 5.10
@@ -2415,9 +2441,12 @@ qsizetype QRegularExpressionMatch::capturedEnd(int nth) const
If the capturing group named \a name did not capture a string or doesn't
exist, returns -1.
+ \note In Qt versions prior to 6.8, this function took QString or
+ QStringView, not QAnyStringView.
+
\sa capturedEnd(), capturedLength(), captured()
*/
-qsizetype QRegularExpressionMatch::capturedStart(QStringView name) const
+qsizetype QRegularExpressionMatch::capturedStart(QAnyStringView name) const
{
if (name.isEmpty()) {
qWarning("QRegularExpressionMatch::capturedStart: empty capturing group name passed");
@@ -2438,9 +2467,12 @@ qsizetype QRegularExpressionMatch::capturedStart(QStringView name) const
\note This function returns 0 if the capturing group named \a name did not
capture a string or doesn't exist.
+ \note In Qt versions prior to 6.8, this function took QString or
+ QStringView, not QAnyStringView.
+
\sa capturedStart(), capturedEnd(), captured()
*/
-qsizetype QRegularExpressionMatch::capturedLength(QStringView name) const
+qsizetype QRegularExpressionMatch::capturedLength(QAnyStringView name) const
{
if (name.isEmpty()) {
qWarning("QRegularExpressionMatch::capturedLength: empty capturing group name passed");
@@ -2460,9 +2492,12 @@ qsizetype QRegularExpressionMatch::capturedLength(QStringView name) const
the capturing group named \a name did not capture a string or doesn't
exist, returns -1.
+ \note In Qt versions prior to 6.8, this function took QString or
+ QStringView, not QAnyStringView.
+
\sa capturedStart(), capturedLength(), captured()
*/
-qsizetype QRegularExpressionMatch::capturedEnd(QStringView name) const
+qsizetype QRegularExpressionMatch::capturedEnd(QAnyStringView name) const
{
if (name.isEmpty()) {
qWarning("QRegularExpressionMatch::capturedEnd: empty capturing group name passed");
@@ -2654,7 +2689,7 @@ QRegularExpressionMatch QRegularExpressionMatchIterator::next()
}
d.detach();
- return qExchange(d->next, d->next.d.constData()->nextMatch());
+ return std::exchange(d->next, d->next.d.constData()->nextMatch());
}
/*!
@@ -3043,7 +3078,8 @@ static const char *pcreCompileErrorCodes[] =
QT_TRANSLATE_NOOP("QRegularExpression", "heap limit exceeded"),
QT_TRANSLATE_NOOP("QRegularExpression", "invalid syntax"),
QT_TRANSLATE_NOOP("QRegularExpression", "internal error - duplicate substitution match"),
- QT_TRANSLATE_NOOP("QRegularExpression", "PCRE2_MATCH_INVALID_UTF is not supported for DFA matching")
+ QT_TRANSLATE_NOOP("QRegularExpression", "PCRE2_MATCH_INVALID_UTF is not supported for DFA matching"),
+ QT_TRANSLATE_NOOP("QRegularExpression", "INTERNAL ERROR: invalid substring offset")
};
#endif // #if 0