summaryrefslogtreecommitdiffstats
path: root/src/corelib/text/qregularexpression.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/corelib/text/qregularexpression.cpp')
-rw-r--r--src/corelib/text/qregularexpression.cpp525
1 files changed, 300 insertions, 225 deletions
diff --git a/src/corelib/text/qregularexpression.cpp b/src/corelib/text/qregularexpression.cpp
index a311c0878f..78261e14cb 100644
--- a/src/corelib/text/qregularexpression.cpp
+++ b/src/corelib/text/qregularexpression.cpp
@@ -1,43 +1,7 @@
-/****************************************************************************
-**
-** Copyright (C) 2020 Giuseppe D'Angelo <dangelog@gmail.com>.
-** Copyright (C) 2020 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com, author Giuseppe D'Angelo <giuseppe.dangelo@kdab.com>
-** Copyright (C) 2021 The Qt Company Ltd.
-** Contact: https://www.qt.io/licensing/
-**
-** This file is part of the QtCore module of the Qt Toolkit.
-**
-** $QT_BEGIN_LICENSE:LGPL$
-** Commercial License Usage
-** Licensees holding valid commercial Qt licenses may use this file in
-** accordance with the commercial license agreement provided with the
-** Software or, alternatively, in accordance with the terms contained in
-** a written agreement between you and The Qt Company. For licensing terms
-** and conditions see https://www.qt.io/terms-conditions. For further
-** information use the contact form at https://www.qt.io/contact-us.
-**
-** GNU Lesser General Public License Usage
-** Alternatively, this file may be used under the terms of the GNU Lesser
-** General Public License version 3 as published by the Free Software
-** Foundation and appearing in the file LICENSE.LGPL3 included in the
-** packaging of this file. Please review the following information to
-** ensure the GNU Lesser General Public License version 3 requirements
-** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
-**
-** GNU General Public License Usage
-** Alternatively, this file may be used under the terms of the GNU
-** General Public License version 2.0 or (at your option) the GNU General
-** Public license version 3 or any later version approved by the KDE Free
-** Qt Foundation. The licenses are as published by the Free Software
-** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
-** included in the packaging of this file. Please review the following
-** information to ensure the GNU General Public License requirements will
-** be met: https://www.gnu.org/licenses/gpl-2.0.html and
-** https://www.gnu.org/licenses/gpl-3.0.html.
-**
-** $QT_END_LICENSE$
-**
-****************************************************************************/
+// Copyright (C) 2020 Giuseppe D'Angelo <dangelog@gmail.com>.
+// Copyright (C) 2020 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com, author Giuseppe D'Angelo <giuseppe.dangelo@kdab.com>
+// Copyright (C) 2021 The Qt Company Ltd.
+// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
#include "qregularexpression.h"
@@ -47,17 +11,22 @@
#include <QtCore/qmutex.h>
#include <QtCore/qstringlist.h>
#include <QtCore/qdebug.h>
-#include <QtCore/qthreadstorage.h>
#include <QtCore/qglobal.h>
#include <QtCore/qatomic.h>
#include <QtCore/qdatastream.h>
+#if defined(Q_OS_MACOS)
+#include <QtCore/private/qcore_mac_p.h>
+#endif
+
#define PCRE2_CODE_UNIT_WIDTH 16
#include <pcre2.h>
QT_BEGIN_NAMESPACE
+using namespace Qt::StringLiterals;
+
/*!
\class QRegularExpression
\inmodule QtCore
@@ -73,6 +42,7 @@ QT_BEGIN_NAMESPACE
\keyword regular expression
+ \compares equality
Regular expressions, or \e{regexps}, are a very powerful tool to handle
strings and texts. This is useful in many contexts, e.g.,
@@ -752,7 +722,7 @@ struct QRegularExpressionPrivate : QSharedData
CheckSubjectStringOption checkSubjectStringOption = CheckSubjectString,
const QRegularExpressionMatchPrivate *previous = nullptr) const;
- int captureIndexForName(QStringView name) const;
+ int captureIndexForName(QAnyStringView name) const;
// sizeof(QSharedData) == 4, so start our members with an enum
QRegularExpression::PatternOptions patternOptions;
@@ -822,6 +792,24 @@ struct QRegularExpressionMatchIteratorPrivate : QSharedData
/*!
\internal
+
+ Used to centralize the warning about using an invalid QRegularExpression.
+ In case the pattern is an illegal UTF-16 string, we can't pass print it
+ (pass it to qUtf16Printable, etc.), so we need to check for that.
+*/
+Q_DECL_COLD_FUNCTION
+void qtWarnAboutInvalidRegularExpression(const QString &pattern, const char *where)
+{
+ if (pattern.isValidUtf16()) {
+ qWarning("%s(): called on an invalid QRegularExpression object "
+ "(pattern is '%ls')", where, qUtf16Printable(pattern));
+ } else {
+ qWarning("%s(): called on an invalid QRegularExpression object", where);
+ }
+}
+
+/*!
+ \internal
*/
QRegularExpression::QRegularExpression(QRegularExpressionPrivate &dd)
: d(&dd)
@@ -906,8 +894,8 @@ void QRegularExpressionPrivate::compilePattern()
options |= PCRE2_UTF;
PCRE2_SIZE patternErrorOffset;
- compiledPattern = pcre2_compile_16(reinterpret_cast<PCRE2_SPTR16>(pattern.utf16()),
- pattern.length(),
+ compiledPattern = pcre2_compile_16(reinterpret_cast<PCRE2_SPTR16>(pattern.constData()),
+ pattern.size(),
options,
&errorCode,
&patternErrorOffset,
@@ -958,43 +946,24 @@ void QRegularExpressionPrivate::getPatternInfo()
Simple "smartpointer" wrapper around a pcre2_jit_stack_16, to be used with
QThreadStorage.
*/
-class QPcreJitStackPointer
+namespace {
+struct PcreJitStackFree
{
- Q_DISABLE_COPY(QPcreJitStackPointer)
-
-public:
- /*!
- \internal
- */
- QPcreJitStackPointer()
- {
- // The default JIT stack size in PCRE is 32K,
- // we allocate from 32K up to 512K.
- stack = pcre2_jit_stack_create_16(32 * 1024, 512 * 1024, nullptr);
- }
- /*!
- \internal
- */
- ~QPcreJitStackPointer()
+ void operator()(pcre2_jit_stack_16 *stack)
{
if (stack)
pcre2_jit_stack_free_16(stack);
}
-
- pcre2_jit_stack_16 *stack;
};
-
-Q_GLOBAL_STATIC(QThreadStorage<QPcreJitStackPointer *>, jitStacks)
+Q_CONSTINIT static thread_local std::unique_ptr<pcre2_jit_stack_16, PcreJitStackFree> jitStacks;
+}
/*!
\internal
*/
static pcre2_jit_stack_16 *qtPcreCallback(void *)
{
- if (jitStacks()->hasLocalData())
- return jitStacks()->localData()->stack;
-
- return nullptr;
+ return jitStacks.get();
}
/*!
@@ -1011,6 +980,8 @@ static bool isJitEnabled()
#ifdef QT_DEBUG
return false;
+#elif defined(Q_OS_MACOS)
+ return !qt_mac_runningUnderRosetta();
#else
return true;
#endif
@@ -1043,7 +1014,7 @@ void QRegularExpressionPrivate::optimizePattern()
Returns the capturing group number for the given name. Duplicated names for
capturing groups are not supported.
*/
-int QRegularExpressionPrivate::captureIndexForName(QStringView name) const
+int QRegularExpressionPrivate::captureIndexForName(QAnyStringView name) const
{
Q_ASSERT(!name.isEmpty());
@@ -1088,9 +1059,10 @@ static int safe_pcre2_match_16(const pcre2_code_16 *code,
int result = pcre2_match_16(code, subject, length,
startOffset, options, matchData, matchContext);
- if (result == PCRE2_ERROR_JIT_STACKLIMIT && !jitStacks()->hasLocalData()) {
- QPcreJitStackPointer *p = new QPcreJitStackPointer;
- jitStacks()->setLocalData(p);
+ if (result == PCRE2_ERROR_JIT_STACKLIMIT && !jitStacks) {
+ // The default JIT stack size in PCRE is 32K,
+ // we allocate from 32K up to 512K.
+ jitStacks.reset(pcre2_jit_stack_create_16(32 * 1024, 512 * 1024, NULL));
result = pcre2_match_16(code, subject, length,
startOffset, options, matchData, matchContext);
@@ -1133,7 +1105,7 @@ void QRegularExpressionPrivate::doMatch(QRegularExpressionMatchPrivate *priv,
const QRegularExpressionMatchPrivate *previous) const
{
Q_ASSERT(priv);
- Q_ASSUME(priv != previous);
+ Q_ASSERT(priv != previous);
const qsizetype subjectLength = priv->subject.size();
@@ -1144,7 +1116,7 @@ void QRegularExpressionPrivate::doMatch(QRegularExpressionMatchPrivate *priv,
return;
if (Q_UNLIKELY(!compiledPattern)) {
- qWarning("QRegularExpressionPrivate::doMatch(): called on an invalid QRegularExpression object");
+ qtWarnAboutInvalidRegularExpression(pattern, "QRegularExpressionPrivate::doMatch");
return;
}
@@ -1174,7 +1146,19 @@ void QRegularExpressionPrivate::doMatch(QRegularExpressionMatchPrivate *priv,
pcre2_jit_stack_assign_16(matchContext, &qtPcreCallback, nullptr);
pcre2_match_data_16 *matchData = pcre2_match_data_create_from_pattern_16(compiledPattern, nullptr);
- const char16_t * const subjectUtf16 = priv->subject.utf16();
+ // PCRE does not accept a null pointer as subject string, even if
+ // its length is zero. We however allow it in input: a QStringView
+ // subject may have data == nullptr. In this case, to keep PCRE
+ // happy, pass a pointer to a dummy character.
+ const char16_t dummySubject = 0;
+ const char16_t * const subjectUtf16 = [&]()
+ {
+ const auto subjectUtf16 = priv->subject.utf16();
+ if (subjectUtf16)
+ return subjectUtf16;
+ Q_ASSERT(subjectLength == 0);
+ return &dummySubject;
+ }();
int result;
@@ -1194,8 +1178,8 @@ void QRegularExpressionPrivate::doMatch(QRegularExpressionMatchPrivate *priv,
if (usingCrLfNewlines
&& offset < subjectLength
- && subjectUtf16[offset - 1] == QLatin1Char('\r')
- && subjectUtf16[offset] == QLatin1Char('\n')) {
+ && subjectUtf16[offset - 1] == u'\r'
+ && subjectUtf16[offset] == u'\n') {
++offset;
} else if (offset < subjectLength
&& QChar::isLowSurrogate(subjectUtf16[offset])) {
@@ -1247,6 +1231,12 @@ void QRegularExpressionPrivate::doMatch(QRegularExpressionMatchPrivate *priv,
PCRE2_SIZE *ovector = pcre2_get_ovector_pointer_16(matchData);
qsizetype *const capturedOffsets = priv->capturedOffsets.data();
+ // We rely on the fact that capturing groups that did not
+ // capture anything have offset -1, but PCRE technically
+ // returns "PCRE2_UNSET". Test that out, better safe than
+ // sorry...
+ static_assert(qsizetype(PCRE2_UNSET) == qsizetype(-1), "Internal error: PCRE2 changed its API");
+
for (int i = 0; i < priv->capturedCount * 2; ++i)
capturedOffsets[i] = qsizetype(ovector[i]);
@@ -1365,10 +1355,7 @@ QRegularExpression::QRegularExpression(const QString &pattern, PatternOptions op
\sa operator=()
*/
-QRegularExpression::QRegularExpression(const QRegularExpression &re)
- : d(re.d)
-{
-}
+QRegularExpression::QRegularExpression(const QRegularExpression &re) noexcept = default;
/*!
\fn QRegularExpression::QRegularExpression(QRegularExpression &&re)
@@ -1397,11 +1384,7 @@ QT_DEFINE_QESDP_SPECIALIZATION_DTOR(QRegularExpressionPrivate)
Assigns the regular expression \a re to this object, and returns a reference
to the copy. Both the pattern and the pattern options are copied.
*/
-QRegularExpression &QRegularExpression::operator=(const QRegularExpression &re)
-{
- d = re.d;
- return *this;
-}
+QRegularExpression &QRegularExpression::operator=(const QRegularExpression &re) noexcept = default;
/*!
\fn void QRegularExpression::swap(QRegularExpression &other)
@@ -1428,6 +1411,8 @@ QString QRegularExpression::pattern() const
*/
void QRegularExpression::setPattern(const QString &pattern)
{
+ if (d->pattern == pattern)
+ return;
d.detach();
d->isDirty = true;
d->pattern = pattern;
@@ -1451,6 +1436,8 @@ QRegularExpression::PatternOptions QRegularExpression::patternOptions() const
*/
void QRegularExpression::setPatternOptions(PatternOptions options)
{
+ if (d->patternOptions == options)
+ return;
d.detach();
d->isDirty = true;
d->patternOptions = options;
@@ -1522,7 +1509,7 @@ QStringList QRegularExpression::namedCaptureGroups() const
reinterpret_cast<const char16_t *>(namedCapturingTable) + namedCapturingTableEntrySize * i;
const int index = *currentNamedCapturingTableRow;
- result[index] = QString::fromUtf16(currentNamedCapturingTableRow + 1);
+ result[index] = QStringView(currentNamedCapturingTableRow + 1).toString();
}
return result;
@@ -1554,10 +1541,10 @@ QString QRegularExpression::errorString() const
QString errorString;
int errorStringLength;
do {
- errorString.resize(errorString.length() + 64);
+ errorString.resize(errorString.size() + 64);
errorStringLength = pcre2_get_error_message_16(d->errorCode,
reinterpret_cast<ushort *>(errorString.data()),
- errorString.length());
+ errorString.size());
} while (errorStringLength < 0);
errorString.resize(errorStringLength);
@@ -1568,7 +1555,7 @@ QString QRegularExpression::errorString() const
#endif
}
#ifdef QT_NO_TRANSLATION
- return QLatin1String("no error");
+ return u"no error"_s;
#else
return QCoreApplication::translate("QRegularExpression", "no error");
#endif
@@ -1595,11 +1582,6 @@ qsizetype QRegularExpression::patternErrorOffset() const
The returned QRegularExpressionMatch object contains the results of the
match.
- \note The data referenced by \a subject should remain valid as long
- as there are QRegularExpressionMatch objects using it. At the moment
- Qt makes a (shallow) copy of the data, but this behavior may change
- in a future version of Qt.
-
\sa QRegularExpressionMatch, {normal matching}
*/
QRegularExpressionMatch QRegularExpression::match(const QString &subject,
@@ -1610,16 +1592,33 @@ QRegularExpressionMatch QRegularExpression::match(const QString &subject,
d.data()->compilePattern();
auto priv = new QRegularExpressionMatchPrivate(*this,
subject,
- qToStringViewIgnoringNull(subject),
+ QStringView(subject),
matchType,
matchOptions);
d->doMatch(priv, offset);
return QRegularExpressionMatch(*priv);
}
+#if QT_DEPRECATED_SINCE(6, 8)
/*!
\since 6.0
\overload
+ \obsolete
+
+ Use matchView() instead.
+*/
+QRegularExpressionMatch QRegularExpression::match(QStringView subjectView,
+ qsizetype offset,
+ MatchType matchType,
+ MatchOptions matchOptions) const
+{
+ return matchView(subjectView, offset, matchType, matchOptions);
+}
+#endif // QT_DEPRECATED_SINCE(6, 8)
+
+/*!
+ \since 6.5
+ \overload
Attempts to match the regular expression against the given \a subjectView
string view, starting at the position \a offset inside the subject, using a
@@ -1633,10 +1632,10 @@ QRegularExpressionMatch QRegularExpression::match(const QString &subject,
\sa QRegularExpressionMatch, {normal matching}
*/
-QRegularExpressionMatch QRegularExpression::match(QStringView subjectView,
- qsizetype offset,
- MatchType matchType,
- MatchOptions matchOptions) const
+QRegularExpressionMatch QRegularExpression::matchView(QStringView subjectView,
+ qsizetype offset,
+ MatchType matchType,
+ MatchOptions matchOptions) const
{
d.data()->compilePattern();
auto priv = new QRegularExpressionMatchPrivate(*this,
@@ -1657,11 +1656,6 @@ QRegularExpressionMatch QRegularExpression::match(QStringView subjectView,
The returned QRegularExpressionMatchIterator is positioned before the
first match result (if any).
- \note The data referenced by \a subject should remain valid as long
- as there are QRegularExpressionMatch objects using it. At the moment
- Qt makes a (shallow) copy of the data, but this behavior may change
- in a future version of Qt.
-
\sa QRegularExpressionMatchIterator, {global matching}
*/
QRegularExpressionMatchIterator QRegularExpression::globalMatch(const QString &subject,
@@ -1678,9 +1672,26 @@ QRegularExpressionMatchIterator QRegularExpression::globalMatch(const QString &s
return QRegularExpressionMatchIterator(*priv);
}
+#if QT_DEPRECATED_SINCE(6, 8)
/*!
\since 6.0
\overload
+ \obsolete
+
+ Use globalMatchView() instead.
+*/
+QRegularExpressionMatchIterator QRegularExpression::globalMatch(QStringView subjectView,
+ qsizetype offset,
+ MatchType matchType,
+ MatchOptions matchOptions) const
+{
+ return globalMatchView(subjectView, offset, matchType, matchOptions);
+}
+#endif // QT_DEPRECATED_SINCE(6, 8)
+
+/*!
+ \since 6.5
+ \overload
Attempts to perform a global match of the regular expression against the
given \a subjectView string view, starting at the position \a offset inside the
@@ -1696,16 +1707,16 @@ QRegularExpressionMatchIterator QRegularExpression::globalMatch(const QString &s
\sa QRegularExpressionMatchIterator, {global matching}
*/
-QRegularExpressionMatchIterator QRegularExpression::globalMatch(QStringView subjectView,
- qsizetype offset,
- MatchType matchType,
- MatchOptions matchOptions) const
+QRegularExpressionMatchIterator QRegularExpression::globalMatchView(QStringView subjectView,
+ qsizetype offset,
+ MatchType matchType,
+ MatchOptions matchOptions) const
{
QRegularExpressionMatchIteratorPrivate *priv =
new QRegularExpressionMatchIteratorPrivate(*this,
matchType,
matchOptions,
- match(subjectView, offset, matchType, matchOptions));
+ matchView(subjectView, offset, matchType, matchOptions));
return QRegularExpressionMatchIterator(*priv);
}
@@ -1724,18 +1735,20 @@ void QRegularExpression::optimize() const
}
/*!
- Returns \c true if the regular expression is equal to \a re, or false
+ \fn bool QRegularExpression::operator==(const QRegularExpression &lhs, const QRegularExpression &rhs) noexcept
+
+ Returns \c true if the \a lhs regular expression is equal to the \a rhs, or false
otherwise. Two QRegularExpression objects are equal if they have
the same pattern string and the same pattern options.
\sa operator!=()
*/
-bool QRegularExpression::operator==(const QRegularExpression &re) const
+bool comparesEqual(const QRegularExpression &lhs,
+ const QRegularExpression &rhs) noexcept
{
- return (d == re.d) ||
- (d->pattern == re.d->pattern && d->patternOptions == re.d->patternOptions);
+ return (lhs.d == rhs.d) ||
+ (lhs.d->pattern == rhs.d->pattern && lhs.d->patternOptions == rhs.d->patternOptions);
}
-
/*!
\fn QRegularExpression & QRegularExpression::operator=(QRegularExpression && re)
@@ -1748,9 +1761,9 @@ bool QRegularExpression::operator==(const QRegularExpression &re) const
*/
/*!
- \fn bool QRegularExpression::operator!=(const QRegularExpression &re) const
+ \fn bool QRegularExpression::operator!=(const QRegularExpression &lhs, const QRegularExpression &rhs) noexcept
- Returns \c true if the regular expression is different from \a re, or
+ Returns \c true if the \a lhs regular expression is different from the \a rhs, or
false otherwise.
\sa operator==()
@@ -1768,12 +1781,10 @@ size_t qHash(const QRegularExpression &key, size_t seed) noexcept
return qHashMulti(seed, key.d->pattern, key.d->patternOptions);
}
-#if QT_STRINGVIEW_LEVEL < 2
/*!
\fn QString QRegularExpression::escape(const QString &str)
\overload
*/
-#endif // QT_STRINGVIEW_LEVEL < 2
/*!
\since 5.15
@@ -1810,14 +1821,13 @@ QString QRegularExpression::escape(QStringView str)
// unlike Perl, a literal NUL must be escaped with
// "\\0" (backslash + 0) and not "\\\0" (backslash + NUL),
// because pcre16_compile uses a NUL-terminated string
- result.append(QLatin1Char('\\'));
- result.append(QLatin1Char('0'));
- } else if ( (current < QLatin1Char('a') || current > QLatin1Char('z')) &&
- (current < QLatin1Char('A') || current > QLatin1Char('Z')) &&
- (current < QLatin1Char('0') || current > QLatin1Char('9')) &&
- current != QLatin1Char('_') )
- {
- result.append(QLatin1Char('\\'));
+ result.append(u'\\');
+ result.append(u'0');
+ } else if ((current < u'a' || current > u'z') &&
+ (current < u'A' || current > u'Z') &&
+ (current < u'0' || current > u'9') &&
+ current != u'_') {
+ result.append(u'\\');
result.append(current);
if (current.isHighSurrogate() && i < (count - 1))
result.append(str.at(++i));
@@ -1830,13 +1840,11 @@ QString QRegularExpression::escape(QStringView str)
return result;
}
-#if QT_STRINGVIEW_LEVEL < 2
/*!
\since 5.12
\fn QString QRegularExpression::wildcardToRegularExpression(const QString &pattern, WildcardConversionOptions options)
\overload
*/
-#endif // QT_STRINGVIEW_LEVEL < 2
/*!
\since 6.0
@@ -1851,22 +1859,30 @@ QString QRegularExpression::escape(QStringView str)
\value UnanchoredWildcardConversion
The conversion will not anchor the pattern. This allows for partial string matches of
wildcard expressions.
+
+ \value [since 6.6] NonPathWildcardConversion
+ The conversion will \e{not} interpret the pattern as filepath globbing.
+
+ \sa QRegularExpression::wildcardToRegularExpression
*/
/*!
\since 5.15
Returns a regular expression representation of the given glob \a pattern.
- The transformation is targeting file path globbing, which means in particular
- that path separators receive special treatment. This implies that it is not
- just a basic translation from "*" to ".*".
+
+ There are two transformations possible, one that targets file path
+ globbing, and another one which is more generic.
+
+ By default, the transformation is targeting file path globbing,
+ which means in particular that path separators receive special
+ treatment. This implies that it is not just a basic translation
+ from "*" to ".*" and similar.
\snippet code/src_corelib_text_qregularexpression.cpp 31
- By default, the returned regular expression is fully anchored. In other
- words, there is no need of calling anchoredPattern() again on the
- result. To get an a regular expression that is not anchored, pass
- UnanchoredWildcardConversion as the conversion \a options.
+ The more generic globbing transformation is available by passing
+ \c NonPathWildcardConversion in the conversion \a options.
This implementation follows closely the definition
of wildcard for glob patterns:
@@ -1875,10 +1891,12 @@ QString QRegularExpression::escape(QStringView str)
\li Any character represents itself apart from those mentioned
below. Thus \b{c} matches the character \e c.
\row \li \b{?}
- \li Matches any single character. It is the same as
- \b{.} in full regexps.
+ \li Matches any single character, except for a path separator
+ (in case file path globbing has been selected). It is the
+ same as b{.} in full regexps.
\row \li \b{*}
- \li Matches zero or more of any characters. It is the
+ \li Matches zero or more of any characters, except for path
+ separators (in case file path globbing has been selected). It is the
same as \b{.*} in full regexps.
\row \li \b{[abc]}
\li Matches one character given in the bracket.
@@ -1892,9 +1910,10 @@ QString QRegularExpression::escape(QStringView str)
bracket. It is the same as \b{[^a-c]} in full regexp.
\endtable
- \note The backslash (\\) character is \e not an escape char in this context.
- In order to match one of the special characters, place it in square brackets
- (for example, \c{[?]}).
+ \note For historical reasons, a backslash (\\) character is \e not
+ an escape char in this context. In order to match one of the
+ special characters, place it in square brackets (for example,
+ \c{[?]}).
More information about the implementation can be found in:
\list
@@ -1902,6 +1921,11 @@ QString QRegularExpression::escape(QStringView str)
\li \c {man 7 glob}
\endlist
+ By default, the returned regular expression is fully anchored. In other
+ words, there is no need of calling anchoredPattern() again on the
+ result. To get a regular expression that is not anchored, pass
+ UnanchoredWildcardConversion in the conversion \a options.
+
\sa escape()
*/
QString QRegularExpression::wildcardToRegularExpression(QStringView pattern, WildcardConversionOptions options)
@@ -1912,29 +1936,51 @@ QString QRegularExpression::wildcardToRegularExpression(QStringView pattern, Wil
qsizetype i = 0;
const QChar *wc = pattern.data();
+ struct GlobSettings {
+ char16_t nativePathSeparator;
+ QStringView starEscape;
+ QStringView questionMarkEscape;
+ };
+
+ const GlobSettings settings = [options]() {
+ if (options.testFlag(NonPathWildcardConversion)) {
+ // using [\d\D] to mean "match everything";
+ // dot doesn't match newlines, unless in /s mode
+ return GlobSettings{ u'\0', u"[\\d\\D]*", u"[\\d\\D]" };
+ } else {
#ifdef Q_OS_WIN
- const QLatin1Char nativePathSeparator('\\');
- const QLatin1String starEscape("[^/\\\\]*");
- const QLatin1String questionMarkEscape("[^/\\\\]");
+ return GlobSettings{ u'\\', u"[^/\\\\]*", u"[^/\\\\]" };
#else
- const QLatin1Char nativePathSeparator('/');
- const QLatin1String starEscape("[^/]*");
- const QLatin1String questionMarkEscape("[^/]");
+ return GlobSettings{ u'/', u"[^/]*", u"[^/]" };
#endif
+ }
+ }();
while (i < wclen) {
const QChar c = wc[i++];
switch (c.unicode()) {
case '*':
- rx += starEscape;
+ rx += settings.starEscape;
break;
case '?':
- rx += questionMarkEscape;
+ rx += settings.questionMarkEscape;
break;
+ // When not using filepath globbing: \ is escaped, / is itself
+ // When using filepath globbing:
+ // * Unix: \ gets escaped. / is itself
+ // * Windows: \ and / can match each other -- they become [/\\] in regexp
case '\\':
#ifdef Q_OS_WIN
+ if (options.testFlag(NonPathWildcardConversion))
+ rx += u"\\\\";
+ else
+ rx += u"[/\\\\]";
+ break;
case '/':
- rx += QLatin1String("[/\\\\]");
+ if (options.testFlag(NonPathWildcardConversion))
+ rx += u'/';
+ else
+ rx += u"[/\\\\]";
break;
#endif
case '$':
@@ -1946,29 +1992,31 @@ QString QRegularExpression::wildcardToRegularExpression(QStringView pattern, Wil
case '{':
case '|':
case '}':
- rx += QLatin1Char('\\');
+ rx += u'\\';
rx += c;
break;
case '[':
rx += c;
// Support for the [!abc] or [!a-c] syntax
if (i < wclen) {
- if (wc[i] == QLatin1Char('!')) {
- rx += QLatin1Char('^');
+ if (wc[i] == u'!') {
+ rx += u'^';
++i;
}
- if (i < wclen && wc[i] == QLatin1Char(']'))
+ if (i < wclen && wc[i] == u']')
rx += wc[i++];
- while (i < wclen && wc[i] != QLatin1Char(']')) {
- // The '/' appearing in a character class invalidates the
- // regular expression parsing. It also concerns '\\' on
- // Windows OS types.
- if (wc[i] == QLatin1Char('/') || wc[i] == nativePathSeparator)
- return rx;
- if (wc[i] == QLatin1Char('\\'))
- rx += QLatin1Char('\\');
+ while (i < wclen && wc[i] != u']') {
+ if (!options.testFlag(NonPathWildcardConversion)) {
+ // The '/' appearing in a character class invalidates the
+ // regular expression parsing. It also concerns '\\' on
+ // Windows OS types.
+ if (wc[i] == u'/' || wc[i] == settings.nativePathSeparator)
+ return rx;
+ }
+ if (wc[i] == u'\\')
+ rx += u'\\';
rx += wc[i++];
}
}
@@ -2006,13 +2054,11 @@ QRegularExpression QRegularExpression::fromWildcard(QStringView pattern, Qt::Cas
return QRegularExpression(wildcardToRegularExpression(pattern, options), reOptions);
}
-#if QT_STRINGVIEW_LEVEL < 2
/*!
\fn QRegularExpression::anchoredPattern(const QString &expression)
\since 5.12
\overload
*/
-#endif // QT_STRINGVIEW_LEVEL < 2
/*!
\since 5.15
@@ -2023,9 +2069,9 @@ QRegularExpression QRegularExpression::fromWildcard(QStringView pattern, Qt::Cas
QString QRegularExpression::anchoredPattern(QStringView expression)
{
return QString()
- + QLatin1String("\\A(?:")
+ + "\\A(?:"_L1
+ expression
- + QLatin1String(")\\z");
+ + ")\\z"_L1;
}
/*!
@@ -2166,7 +2212,7 @@ QRegularExpression::MatchOptions QRegularExpressionMatch::matchOptions() const
If the regular expression did not match, this function returns -1.
- \sa captured(), capturedStart(), capturedEnd(), capturedLength()
+ \sa hasCaptured(), captured(), capturedStart(), capturedEnd(), capturedLength()
*/
int QRegularExpressionMatch::lastCapturedIndex() const
{
@@ -2174,6 +2220,65 @@ int QRegularExpressionMatch::lastCapturedIndex() const
}
/*!
+ \fn bool QRegularExpressionMatch::hasCaptured(QAnyStringView name) const
+ \since 6.3
+
+ Returns true if the capturing group named \a name captured something
+ in the subject string, and false otherwise (or if there is no
+ capturing group called \a name).
+
+ \note Some capturing groups in a regular expression may not have
+ captured anything even if the regular expression matched. This may
+ happen, for instance, if a conditional operator is used in the
+ pattern:
+
+ \snippet code/src_corelib_text_qregularexpression.cpp 36
+
+ Similarly, a capturing group may capture a substring of length 0;
+ this function will return \c{true} for such a capturing group.
+
+ \note In Qt versions prior to 6.8, this function took QString or
+ QStringView, not QAnyStringView.
+
+ \sa captured(), hasMatch()
+*/
+bool QRegularExpressionMatch::hasCaptured(QAnyStringView name) const
+{
+ const int nth = d->regularExpression.d->captureIndexForName(name);
+ return hasCaptured(nth);
+}
+
+/*!
+ \since 6.3
+
+ Returns true if the \a nth capturing group captured something
+ in the subject string, and false otherwise (or if there is no
+ such capturing group).
+
+ \note The implicit capturing group number 0 captures the substring
+ matched by the entire pattern.
+
+ \note Some capturing groups in a regular expression may not have
+ captured anything even if the regular expression matched. This may
+ happen, for instance, if a conditional operator is used in the
+ pattern:
+
+ \snippet code/src_corelib_text_qregularexpression.cpp 36
+
+ Similarly, a capturing group may capture a substring of length 0;
+ this function will return \c{true} for such a capturing group.
+
+ \sa captured(), lastCapturedIndex(), hasMatch()
+*/
+bool QRegularExpressionMatch::hasCaptured(int nth) const
+{
+ if (nth < 0 || nth > lastCapturedIndex())
+ return false;
+
+ return d->capturedOffsets.at(nth * 2) != -1;
+}
+
+/*!
Returns the substring captured by the \a nth capturing group.
If the \a nth capturing group did not capture a string, or if there is no
@@ -2206,7 +2311,7 @@ QString QRegularExpressionMatch::captured(int nth) const
*/
QStringView QRegularExpressionMatch::capturedView(int nth) const
{
- if (nth < 0 || nth > lastCapturedIndex())
+ if (!hasCaptured(nth))
return QStringView();
qsizetype start = capturedStart(nth);
@@ -2217,19 +2322,6 @@ QStringView QRegularExpressionMatch::capturedView(int nth) const
return d->subject.mid(start, capturedLength(nth));
}
-#if QT_STRINGVIEW_LEVEL < 2
-/*! \fn QString QRegularExpressionMatch::captured(const QString &name) const
-
- Returns the substring captured by the capturing group named \a name.
-
- If the named capturing group \a name did not capture a string, or if
- there is no capturing group named \a name, returns a null QString.
-
- \sa capturedView(), capturedStart(), capturedEnd(), capturedLength(),
- QString::isNull()
-*/
-#endif // QT_STRINGVIEW_LEVEL < 2
-
/*!
\since 5.10
@@ -2238,10 +2330,13 @@ QStringView QRegularExpressionMatch::capturedView(int nth) const
If the named capturing group \a name did not capture a string, or if
there is no capturing group named \a name, returns a null QString.
+ \note In Qt versions prior to 6.8, this function took QString or
+ QStringView, not QAnyStringView.
+
\sa capturedView(), capturedStart(), capturedEnd(), capturedLength(),
QString::isNull()
*/
-QString QRegularExpressionMatch::captured(QStringView name) const
+QString QRegularExpressionMatch::captured(QAnyStringView name) const
{
if (name.isEmpty()) {
qWarning("QRegularExpressionMatch::captured: empty capturing group name passed");
@@ -2260,10 +2355,13 @@ QString QRegularExpressionMatch::captured(QStringView name) const
If the named capturing group \a name did not capture a string, or if
there is no capturing group named \a name, returns a null QStringView.
+ \note In Qt versions prior to 6.8, this function took QString or
+ QStringView, not QAnyStringView.
+
\sa captured(), capturedStart(), capturedEnd(), capturedLength(),
QStringView::isNull()
*/
-QStringView QRegularExpressionMatch::capturedView(QStringView name) const
+QStringView QRegularExpressionMatch::capturedView(QAnyStringView name) const
{
if (name.isEmpty()) {
qWarning("QRegularExpressionMatch::capturedView: empty capturing group name passed");
@@ -2300,7 +2398,7 @@ QStringList QRegularExpressionMatch::capturedTexts() const
*/
qsizetype QRegularExpressionMatch::capturedStart(int nth) const
{
- if (nth < 0 || nth > lastCapturedIndex())
+ if (!hasCaptured(nth))
return -1;
return d->capturedOffsets.at(nth * 2);
@@ -2329,45 +2427,12 @@ qsizetype QRegularExpressionMatch::capturedLength(int nth) const
*/
qsizetype QRegularExpressionMatch::capturedEnd(int nth) const
{
- if (nth < 0 || nth > lastCapturedIndex())
+ if (!hasCaptured(nth))
return -1;
return d->capturedOffsets.at(nth * 2 + 1);
}
-#if QT_STRINGVIEW_LEVEL < 2
-/*! \fn qsizetype QRegularExpressionMatch::capturedStart(const QString &name) const
-
- Returns the offset inside the subject string corresponding to the starting
- position of the substring captured by the capturing group named \a name.
- If the capturing group named \a name did not capture a string or doesn't
- exist, returns -1.
-
- \sa capturedEnd(), capturedLength(), captured()
-*/
-
-/*! \fn qsizetype QRegularExpressionMatch::capturedLength(const QString &name) const
-
- Returns the length of the substring captured by the capturing group named
- \a name.
-
- \note This function returns 0 if the capturing group named \a name did not
- capture a string or doesn't exist.
-
- \sa capturedStart(), capturedEnd(), captured()
-*/
-
-/*! \fn qsizetype QRegularExpressionMatch::capturedEnd(const QString &name) const
-
- Returns the offset inside the subject string immediately after the ending
- position of the substring captured by the capturing group named \a name. If
- the capturing group named \a name did not capture a string or doesn't
- exist, returns -1.
-
- \sa capturedStart(), capturedLength(), captured()
-*/
-#endif // QT_STRINGVIEW_LEVEL < 2
-
/*!
\since 5.10
@@ -2376,9 +2441,12 @@ qsizetype QRegularExpressionMatch::capturedEnd(int nth) const
If the capturing group named \a name did not capture a string or doesn't
exist, returns -1.
+ \note In Qt versions prior to 6.8, this function took QString or
+ QStringView, not QAnyStringView.
+
\sa capturedEnd(), capturedLength(), captured()
*/
-qsizetype QRegularExpressionMatch::capturedStart(QStringView name) const
+qsizetype QRegularExpressionMatch::capturedStart(QAnyStringView name) const
{
if (name.isEmpty()) {
qWarning("QRegularExpressionMatch::capturedStart: empty capturing group name passed");
@@ -2399,9 +2467,12 @@ qsizetype QRegularExpressionMatch::capturedStart(QStringView name) const
\note This function returns 0 if the capturing group named \a name did not
capture a string or doesn't exist.
+ \note In Qt versions prior to 6.8, this function took QString or
+ QStringView, not QAnyStringView.
+
\sa capturedStart(), capturedEnd(), captured()
*/
-qsizetype QRegularExpressionMatch::capturedLength(QStringView name) const
+qsizetype QRegularExpressionMatch::capturedLength(QAnyStringView name) const
{
if (name.isEmpty()) {
qWarning("QRegularExpressionMatch::capturedLength: empty capturing group name passed");
@@ -2421,9 +2492,12 @@ qsizetype QRegularExpressionMatch::capturedLength(QStringView name) const
the capturing group named \a name did not capture a string or doesn't
exist, returns -1.
+ \note In Qt versions prior to 6.8, this function took QString or
+ QStringView, not QAnyStringView.
+
\sa capturedStart(), capturedLength(), captured()
*/
-qsizetype QRegularExpressionMatch::capturedEnd(QStringView name) const
+qsizetype QRegularExpressionMatch::capturedEnd(QAnyStringView name) const
{
if (name.isEmpty()) {
qWarning("QRegularExpressionMatch::capturedEnd: empty capturing group name passed");
@@ -2615,7 +2689,7 @@ QRegularExpressionMatch QRegularExpressionMatchIterator::next()
}
d.detach();
- return qExchange(d->next, d->next.d.constData()->nextMatch());
+ return std::exchange(d->next, d->next.d.constData()->nextMatch());
}
/*!
@@ -3004,7 +3078,8 @@ static const char *pcreCompileErrorCodes[] =
QT_TRANSLATE_NOOP("QRegularExpression", "heap limit exceeded"),
QT_TRANSLATE_NOOP("QRegularExpression", "invalid syntax"),
QT_TRANSLATE_NOOP("QRegularExpression", "internal error - duplicate substitution match"),
- QT_TRANSLATE_NOOP("QRegularExpression", "PCRE2_MATCH_INVALID_UTF is not supported for DFA matching")
+ QT_TRANSLATE_NOOP("QRegularExpression", "PCRE2_MATCH_INVALID_UTF is not supported for DFA matching"),
+ QT_TRANSLATE_NOOP("QRegularExpression", "INTERNAL ERROR: invalid substring offset")
};
#endif // #if 0