summaryrefslogtreecommitdiffstats
path: root/src/corelib/text/qregularexpression.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/corelib/text/qregularexpression.cpp')
-rw-r--r--src/corelib/text/qregularexpression.cpp439
1 files changed, 268 insertions, 171 deletions
diff --git a/src/corelib/text/qregularexpression.cpp b/src/corelib/text/qregularexpression.cpp
index d92c658cb8..95fd0e3d9a 100644
--- a/src/corelib/text/qregularexpression.cpp
+++ b/src/corelib/text/qregularexpression.cpp
@@ -1,43 +1,7 @@
-/****************************************************************************
-**
-** Copyright (C) 2020 Giuseppe D'Angelo <dangelog@gmail.com>.
-** Copyright (C) 2020 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com, author Giuseppe D'Angelo <giuseppe.dangelo@kdab.com>
-** Copyright (C) 2021 The Qt Company Ltd.
-** Contact: https://www.qt.io/licensing/
-**
-** This file is part of the QtCore module of the Qt Toolkit.
-**
-** $QT_BEGIN_LICENSE:LGPL$
-** Commercial License Usage
-** Licensees holding valid commercial Qt licenses may use this file in
-** accordance with the commercial license agreement provided with the
-** Software or, alternatively, in accordance with the terms contained in
-** a written agreement between you and The Qt Company. For licensing terms
-** and conditions see https://www.qt.io/terms-conditions. For further
-** information use the contact form at https://www.qt.io/contact-us.
-**
-** GNU Lesser General Public License Usage
-** Alternatively, this file may be used under the terms of the GNU Lesser
-** General Public License version 3 as published by the Free Software
-** Foundation and appearing in the file LICENSE.LGPL3 included in the
-** packaging of this file. Please review the following information to
-** ensure the GNU Lesser General Public License version 3 requirements
-** will be met: https://www.gnu.org/licenses/lgpl-3.0.html.
-**
-** GNU General Public License Usage
-** Alternatively, this file may be used under the terms of the GNU
-** General Public License version 2.0 or (at your option) the GNU General
-** Public license version 3 or any later version approved by the KDE Free
-** Qt Foundation. The licenses are as published by the Free Software
-** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3
-** included in the packaging of this file. Please review the following
-** information to ensure the GNU General Public License requirements will
-** be met: https://www.gnu.org/licenses/gpl-2.0.html and
-** https://www.gnu.org/licenses/gpl-3.0.html.
-**
-** $QT_END_LICENSE$
-**
-****************************************************************************/
+// Copyright (C) 2020 Giuseppe D'Angelo <dangelog@gmail.com>.
+// Copyright (C) 2020 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com, author Giuseppe D'Angelo <giuseppe.dangelo@kdab.com>
+// Copyright (C) 2021 The Qt Company Ltd.
+// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only
#include "qregularexpression.h"
@@ -47,17 +11,22 @@
#include <QtCore/qmutex.h>
#include <QtCore/qstringlist.h>
#include <QtCore/qdebug.h>
-#include <QtCore/qthreadstorage.h>
#include <QtCore/qglobal.h>
#include <QtCore/qatomic.h>
#include <QtCore/qdatastream.h>
+#if defined(Q_OS_MACOS)
+#include <QtCore/private/qcore_mac_p.h>
+#endif
+
#define PCRE2_CODE_UNIT_WIDTH 16
#include <pcre2.h>
QT_BEGIN_NAMESPACE
+using namespace Qt::StringLiterals;
+
/*!
\class QRegularExpression
\inmodule QtCore
@@ -822,6 +791,24 @@ struct QRegularExpressionMatchIteratorPrivate : QSharedData
/*!
\internal
+
+ Used to centralize the warning about using an invalid QRegularExpression.
+ In case the pattern is an illegal UTF-16 string, we can't pass print it
+ (pass it to qUtf16Printable, etc.), so we need to check for that.
+*/
+Q_DECL_COLD_FUNCTION
+void qtWarnAboutInvalidRegularExpression(const QString &pattern, const char *where)
+{
+ if (pattern.isValidUtf16()) {
+ qWarning("%s(): called on an invalid QRegularExpression object "
+ "(pattern is '%ls')", where, qUtf16Printable(pattern));
+ } else {
+ qWarning("%s(): called on an invalid QRegularExpression object", where);
+ }
+}
+
+/*!
+ \internal
*/
QRegularExpression::QRegularExpression(QRegularExpressionPrivate &dd)
: d(&dd)
@@ -906,8 +893,8 @@ void QRegularExpressionPrivate::compilePattern()
options |= PCRE2_UTF;
PCRE2_SIZE patternErrorOffset;
- compiledPattern = pcre2_compile_16(reinterpret_cast<PCRE2_SPTR16>(pattern.utf16()),
- pattern.length(),
+ compiledPattern = pcre2_compile_16(reinterpret_cast<PCRE2_SPTR16>(pattern.constData()),
+ pattern.size(),
options,
&errorCode,
&patternErrorOffset,
@@ -958,43 +945,24 @@ void QRegularExpressionPrivate::getPatternInfo()
Simple "smartpointer" wrapper around a pcre2_jit_stack_16, to be used with
QThreadStorage.
*/
-class QPcreJitStackPointer
+namespace {
+struct PcreJitStackFree
{
- Q_DISABLE_COPY(QPcreJitStackPointer)
-
-public:
- /*!
- \internal
- */
- QPcreJitStackPointer()
- {
- // The default JIT stack size in PCRE is 32K,
- // we allocate from 32K up to 512K.
- stack = pcre2_jit_stack_create_16(32 * 1024, 512 * 1024, nullptr);
- }
- /*!
- \internal
- */
- ~QPcreJitStackPointer()
+ void operator()(pcre2_jit_stack_16 *stack)
{
if (stack)
pcre2_jit_stack_free_16(stack);
}
-
- pcre2_jit_stack_16 *stack;
};
-
-Q_GLOBAL_STATIC(QThreadStorage<QPcreJitStackPointer *>, jitStacks)
+Q_CONSTINIT static thread_local std::unique_ptr<pcre2_jit_stack_16, PcreJitStackFree> jitStacks;
+}
/*!
\internal
*/
static pcre2_jit_stack_16 *qtPcreCallback(void *)
{
- if (jitStacks()->hasLocalData())
- return jitStacks()->localData()->stack;
-
- return nullptr;
+ return jitStacks.get();
}
/*!
@@ -1011,6 +979,8 @@ static bool isJitEnabled()
#ifdef QT_DEBUG
return false;
+#elif defined(Q_OS_MACOS)
+ return !qt_mac_runningUnderRosetta();
#else
return true;
#endif
@@ -1088,9 +1058,10 @@ static int safe_pcre2_match_16(const pcre2_code_16 *code,
int result = pcre2_match_16(code, subject, length,
startOffset, options, matchData, matchContext);
- if (result == PCRE2_ERROR_JIT_STACKLIMIT && !jitStacks()->hasLocalData()) {
- QPcreJitStackPointer *p = new QPcreJitStackPointer;
- jitStacks()->setLocalData(p);
+ if (result == PCRE2_ERROR_JIT_STACKLIMIT && !jitStacks) {
+ // The default JIT stack size in PCRE is 32K,
+ // we allocate from 32K up to 512K.
+ jitStacks.reset(pcre2_jit_stack_create_16(32 * 1024, 512 * 1024, NULL));
result = pcre2_match_16(code, subject, length,
startOffset, options, matchData, matchContext);
@@ -1133,7 +1104,7 @@ void QRegularExpressionPrivate::doMatch(QRegularExpressionMatchPrivate *priv,
const QRegularExpressionMatchPrivate *previous) const
{
Q_ASSERT(priv);
- Q_ASSUME(priv != previous);
+ Q_ASSERT(priv != previous);
const qsizetype subjectLength = priv->subject.size();
@@ -1144,7 +1115,7 @@ void QRegularExpressionPrivate::doMatch(QRegularExpressionMatchPrivate *priv,
return;
if (Q_UNLIKELY(!compiledPattern)) {
- qWarning("QRegularExpressionPrivate::doMatch(): called on an invalid QRegularExpression object");
+ qtWarnAboutInvalidRegularExpression(pattern, "QRegularExpressionPrivate::doMatch");
return;
}
@@ -1174,7 +1145,19 @@ void QRegularExpressionPrivate::doMatch(QRegularExpressionMatchPrivate *priv,
pcre2_jit_stack_assign_16(matchContext, &qtPcreCallback, nullptr);
pcre2_match_data_16 *matchData = pcre2_match_data_create_from_pattern_16(compiledPattern, nullptr);
- const char16_t * const subjectUtf16 = priv->subject.utf16();
+ // PCRE does not accept a null pointer as subject string, even if
+ // its length is zero. We however allow it in input: a QStringView
+ // subject may have data == nullptr. In this case, to keep PCRE
+ // happy, pass a pointer to a dummy character.
+ const char16_t dummySubject = 0;
+ const char16_t * const subjectUtf16 = [&]()
+ {
+ const auto subjectUtf16 = priv->subject.utf16();
+ if (subjectUtf16)
+ return subjectUtf16;
+ Q_ASSERT(subjectLength == 0);
+ return &dummySubject;
+ }();
int result;
@@ -1194,8 +1177,8 @@ void QRegularExpressionPrivate::doMatch(QRegularExpressionMatchPrivate *priv,
if (usingCrLfNewlines
&& offset < subjectLength
- && subjectUtf16[offset - 1] == QLatin1Char('\r')
- && subjectUtf16[offset] == QLatin1Char('\n')) {
+ && subjectUtf16[offset - 1] == u'\r'
+ && subjectUtf16[offset] == u'\n') {
++offset;
} else if (offset < subjectLength
&& QChar::isLowSurrogate(subjectUtf16[offset])) {
@@ -1247,6 +1230,12 @@ void QRegularExpressionPrivate::doMatch(QRegularExpressionMatchPrivate *priv,
PCRE2_SIZE *ovector = pcre2_get_ovector_pointer_16(matchData);
qsizetype *const capturedOffsets = priv->capturedOffsets.data();
+ // We rely on the fact that capturing groups that did not
+ // capture anything have offset -1, but PCRE technically
+ // returns "PCRE2_UNSET". Test that out, better safe than
+ // sorry...
+ static_assert(qsizetype(PCRE2_UNSET) == qsizetype(-1), "Internal error: PCRE2 changed its API");
+
for (int i = 0; i < priv->capturedCount * 2; ++i)
capturedOffsets[i] = qsizetype(ovector[i]);
@@ -1365,10 +1354,7 @@ QRegularExpression::QRegularExpression(const QString &pattern, PatternOptions op
\sa operator=()
*/
-QRegularExpression::QRegularExpression(const QRegularExpression &re)
- : d(re.d)
-{
-}
+QRegularExpression::QRegularExpression(const QRegularExpression &re) noexcept = default;
/*!
\fn QRegularExpression::QRegularExpression(QRegularExpression &&re)
@@ -1397,11 +1383,7 @@ QT_DEFINE_QESDP_SPECIALIZATION_DTOR(QRegularExpressionPrivate)
Assigns the regular expression \a re to this object, and returns a reference
to the copy. Both the pattern and the pattern options are copied.
*/
-QRegularExpression &QRegularExpression::operator=(const QRegularExpression &re)
-{
- d = re.d;
- return *this;
-}
+QRegularExpression &QRegularExpression::operator=(const QRegularExpression &re) noexcept = default;
/*!
\fn void QRegularExpression::swap(QRegularExpression &other)
@@ -1428,6 +1410,8 @@ QString QRegularExpression::pattern() const
*/
void QRegularExpression::setPattern(const QString &pattern)
{
+ if (d->pattern == pattern)
+ return;
d.detach();
d->isDirty = true;
d->pattern = pattern;
@@ -1451,6 +1435,8 @@ QRegularExpression::PatternOptions QRegularExpression::patternOptions() const
*/
void QRegularExpression::setPatternOptions(PatternOptions options)
{
+ if (d->patternOptions == options)
+ return;
d.detach();
d->isDirty = true;
d->patternOptions = options;
@@ -1522,7 +1508,7 @@ QStringList QRegularExpression::namedCaptureGroups() const
reinterpret_cast<const char16_t *>(namedCapturingTable) + namedCapturingTableEntrySize * i;
const int index = *currentNamedCapturingTableRow;
- result[index] = QString::fromUtf16(currentNamedCapturingTableRow + 1);
+ result[index] = QStringView(currentNamedCapturingTableRow + 1).toString();
}
return result;
@@ -1554,10 +1540,10 @@ QString QRegularExpression::errorString() const
QString errorString;
int errorStringLength;
do {
- errorString.resize(errorString.length() + 64);
+ errorString.resize(errorString.size() + 64);
errorStringLength = pcre2_get_error_message_16(d->errorCode,
reinterpret_cast<ushort *>(errorString.data()),
- errorString.length());
+ errorString.size());
} while (errorStringLength < 0);
errorString.resize(errorStringLength);
@@ -1568,7 +1554,7 @@ QString QRegularExpression::errorString() const
#endif
}
#ifdef QT_NO_TRANSLATION
- return QLatin1String("no error");
+ return u"no error"_s;
#else
return QCoreApplication::translate("QRegularExpression", "no error");
#endif
@@ -1595,11 +1581,6 @@ qsizetype QRegularExpression::patternErrorOffset() const
The returned QRegularExpressionMatch object contains the results of the
match.
- \note The data referenced by \a subject should remain valid as long
- as there are QRegularExpressionMatch objects using it. At the moment
- Qt makes a (shallow) copy of the data, but this behavior may change
- in a future version of Qt.
-
\sa QRegularExpressionMatch, {normal matching}
*/
QRegularExpressionMatch QRegularExpression::match(const QString &subject,
@@ -1610,16 +1591,33 @@ QRegularExpressionMatch QRegularExpression::match(const QString &subject,
d.data()->compilePattern();
auto priv = new QRegularExpressionMatchPrivate(*this,
subject,
- qToStringViewIgnoringNull(subject),
+ QStringView(subject),
matchType,
matchOptions);
d->doMatch(priv, offset);
return QRegularExpressionMatch(*priv);
}
+#if QT_DEPRECATED_SINCE(6, 8)
/*!
\since 6.0
\overload
+ \obsolete
+
+ Use matchView() instead.
+*/
+QRegularExpressionMatch QRegularExpression::match(QStringView subjectView,
+ qsizetype offset,
+ MatchType matchType,
+ MatchOptions matchOptions) const
+{
+ return matchView(subjectView, offset, matchType, matchOptions);
+}
+#endif // QT_DEPRECATED_SINCE(6, 8)
+
+/*!
+ \since 6.5
+ \overload
Attempts to match the regular expression against the given \a subjectView
string view, starting at the position \a offset inside the subject, using a
@@ -1633,10 +1631,10 @@ QRegularExpressionMatch QRegularExpression::match(const QString &subject,
\sa QRegularExpressionMatch, {normal matching}
*/
-QRegularExpressionMatch QRegularExpression::match(QStringView subjectView,
- qsizetype offset,
- MatchType matchType,
- MatchOptions matchOptions) const
+QRegularExpressionMatch QRegularExpression::matchView(QStringView subjectView,
+ qsizetype offset,
+ MatchType matchType,
+ MatchOptions matchOptions) const
{
d.data()->compilePattern();
auto priv = new QRegularExpressionMatchPrivate(*this,
@@ -1657,11 +1655,6 @@ QRegularExpressionMatch QRegularExpression::match(QStringView subjectView,
The returned QRegularExpressionMatchIterator is positioned before the
first match result (if any).
- \note The data referenced by \a subject should remain valid as long
- as there are QRegularExpressionMatch objects using it. At the moment
- Qt makes a (shallow) copy of the data, but this behavior may change
- in a future version of Qt.
-
\sa QRegularExpressionMatchIterator, {global matching}
*/
QRegularExpressionMatchIterator QRegularExpression::globalMatch(const QString &subject,
@@ -1678,9 +1671,26 @@ QRegularExpressionMatchIterator QRegularExpression::globalMatch(const QString &s
return QRegularExpressionMatchIterator(*priv);
}
+#if QT_DEPRECATED_SINCE(6, 8)
/*!
\since 6.0
\overload
+ \obsolete
+
+ Use globalMatchView() instead.
+*/
+QRegularExpressionMatchIterator QRegularExpression::globalMatch(QStringView subjectView,
+ qsizetype offset,
+ MatchType matchType,
+ MatchOptions matchOptions) const
+{
+ return globalMatchView(subjectView, offset, matchType, matchOptions);
+}
+#endif // QT_DEPRECATED_SINCE(6, 8)
+
+/*!
+ \since 6.5
+ \overload
Attempts to perform a global match of the regular expression against the
given \a subjectView string view, starting at the position \a offset inside the
@@ -1696,16 +1706,16 @@ QRegularExpressionMatchIterator QRegularExpression::globalMatch(const QString &s
\sa QRegularExpressionMatchIterator, {global matching}
*/
-QRegularExpressionMatchIterator QRegularExpression::globalMatch(QStringView subjectView,
- qsizetype offset,
- MatchType matchType,
- MatchOptions matchOptions) const
+QRegularExpressionMatchIterator QRegularExpression::globalMatchView(QStringView subjectView,
+ qsizetype offset,
+ MatchType matchType,
+ MatchOptions matchOptions) const
{
QRegularExpressionMatchIteratorPrivate *priv =
new QRegularExpressionMatchIteratorPrivate(*this,
matchType,
matchOptions,
- match(subjectView, offset, matchType, matchOptions));
+ matchView(subjectView, offset, matchType, matchOptions));
return QRegularExpressionMatchIterator(*priv);
}
@@ -1768,12 +1778,10 @@ size_t qHash(const QRegularExpression &key, size_t seed) noexcept
return qHashMulti(seed, key.d->pattern, key.d->patternOptions);
}
-#if QT_STRINGVIEW_LEVEL < 2
/*!
\fn QString QRegularExpression::escape(const QString &str)
\overload
*/
-#endif // QT_STRINGVIEW_LEVEL < 2
/*!
\since 5.15
@@ -1810,14 +1818,13 @@ QString QRegularExpression::escape(QStringView str)
// unlike Perl, a literal NUL must be escaped with
// "\\0" (backslash + 0) and not "\\\0" (backslash + NUL),
// because pcre16_compile uses a NUL-terminated string
- result.append(QLatin1Char('\\'));
- result.append(QLatin1Char('0'));
- } else if ( (current < QLatin1Char('a') || current > QLatin1Char('z')) &&
- (current < QLatin1Char('A') || current > QLatin1Char('Z')) &&
- (current < QLatin1Char('0') || current > QLatin1Char('9')) &&
- current != QLatin1Char('_') )
- {
- result.append(QLatin1Char('\\'));
+ result.append(u'\\');
+ result.append(u'0');
+ } else if ((current < u'a' || current > u'z') &&
+ (current < u'A' || current > u'Z') &&
+ (current < u'0' || current > u'9') &&
+ current != u'_') {
+ result.append(u'\\');
result.append(current);
if (current.isHighSurrogate() && i < (count - 1))
result.append(str.at(++i));
@@ -1830,13 +1837,11 @@ QString QRegularExpression::escape(QStringView str)
return result;
}
-#if QT_STRINGVIEW_LEVEL < 2
/*!
\since 5.12
\fn QString QRegularExpression::wildcardToRegularExpression(const QString &pattern, WildcardConversionOptions options)
\overload
*/
-#endif // QT_STRINGVIEW_LEVEL < 2
/*!
\since 6.0
@@ -1851,22 +1856,30 @@ QString QRegularExpression::escape(QStringView str)
\value UnanchoredWildcardConversion
The conversion will not anchor the pattern. This allows for partial string matches of
wildcard expressions.
+
+ \value [since 6.6] NonPathWildcardConversion
+ The conversion will \e{not} interpret the pattern as filepath globbing.
+
+ \sa QRegularExpression::wildcardToRegularExpression
*/
/*!
\since 5.15
Returns a regular expression representation of the given glob \a pattern.
- The transformation is targeting file path globbing, which means in particular
- that path separators receive special treatment. This implies that it is not
- just a basic translation from "*" to ".*".
+
+ There are two transformations possible, one that targets file path
+ globbing, and another one which is more generic.
+
+ By default, the transformation is targeting file path globbing,
+ which means in particular that path separators receive special
+ treatment. This implies that it is not just a basic translation
+ from "*" to ".*" and similar.
\snippet code/src_corelib_text_qregularexpression.cpp 31
- By default, the returned regular expression is fully anchored. In other
- words, there is no need of calling anchoredPattern() again on the
- result. To get an a regular expression that is not anchored, pass
- UnanchoredWildcardConversion as the conversion \a options.
+ The more generic globbing transformation is available by passing
+ \c NonPathWildcardConversion in the conversion \a options.
This implementation follows closely the definition
of wildcard for glob patterns:
@@ -1875,10 +1888,12 @@ QString QRegularExpression::escape(QStringView str)
\li Any character represents itself apart from those mentioned
below. Thus \b{c} matches the character \e c.
\row \li \b{?}
- \li Matches any single character. It is the same as
- \b{.} in full regexps.
+ \li Matches any single character, except for a path separator
+ (in case file path globbing has been selected). It is the
+ same as b{.} in full regexps.
\row \li \b{*}
- \li Matches zero or more of any characters. It is the
+ \li Matches zero or more of any characters, except for path
+ separators (in case file path globbing has been selected). It is the
same as \b{.*} in full regexps.
\row \li \b{[abc]}
\li Matches one character given in the bracket.
@@ -1892,9 +1907,10 @@ QString QRegularExpression::escape(QStringView str)
bracket. It is the same as \b{[^a-c]} in full regexp.
\endtable
- \note The backslash (\\) character is \e not an escape char in this context.
- In order to match one of the special characters, place it in square brackets
- (for example, \c{[?]}).
+ \note For historical reasons, a backslash (\\) character is \e not
+ an escape char in this context. In order to match one of the
+ special characters, place it in square brackets (for example,
+ \c{[?]}).
More information about the implementation can be found in:
\list
@@ -1902,6 +1918,11 @@ QString QRegularExpression::escape(QStringView str)
\li \c {man 7 glob}
\endlist
+ By default, the returned regular expression is fully anchored. In other
+ words, there is no need of calling anchoredPattern() again on the
+ result. To get a regular expression that is not anchored, pass
+ UnanchoredWildcardConversion in the conversion \a options.
+
\sa escape()
*/
QString QRegularExpression::wildcardToRegularExpression(QStringView pattern, WildcardConversionOptions options)
@@ -1912,29 +1933,51 @@ QString QRegularExpression::wildcardToRegularExpression(QStringView pattern, Wil
qsizetype i = 0;
const QChar *wc = pattern.data();
+ struct GlobSettings {
+ char16_t nativePathSeparator;
+ QStringView starEscape;
+ QStringView questionMarkEscape;
+ };
+
+ const GlobSettings settings = [options]() {
+ if (options.testFlag(NonPathWildcardConversion)) {
+ // using [\d\D] to mean "match everything";
+ // dot doesn't match newlines, unless in /s mode
+ return GlobSettings{ u'\0', u"[\\d\\D]*", u"[\\d\\D]" };
+ } else {
#ifdef Q_OS_WIN
- const QLatin1Char nativePathSeparator('\\');
- const QLatin1String starEscape("[^/\\\\]*");
- const QLatin1String questionMarkEscape("[^/\\\\]");
+ return GlobSettings{ u'\\', u"[^/\\\\]*", u"[^/\\\\]" };
#else
- const QLatin1Char nativePathSeparator('/');
- const QLatin1String starEscape("[^/]*");
- const QLatin1String questionMarkEscape("[^/]");
+ return GlobSettings{ u'/', u"[^/]*", u"[^/]" };
#endif
+ }
+ }();
while (i < wclen) {
const QChar c = wc[i++];
switch (c.unicode()) {
case '*':
- rx += starEscape;
+ rx += settings.starEscape;
break;
case '?':
- rx += questionMarkEscape;
+ rx += settings.questionMarkEscape;
break;
+ // When not using filepath globbing: \ is escaped, / is itself
+ // When using filepath globbing:
+ // * Unix: \ gets escaped. / is itself
+ // * Windows: \ and / can match each other -- they become [/\\] in regexp
case '\\':
#ifdef Q_OS_WIN
+ if (options.testFlag(NonPathWildcardConversion))
+ rx += u"\\\\";
+ else
+ rx += u"[/\\\\]";
+ break;
case '/':
- rx += QLatin1String("[/\\\\]");
+ if (options.testFlag(NonPathWildcardConversion))
+ rx += u'/';
+ else
+ rx += u"[/\\\\]";
break;
#endif
case '$':
@@ -1946,29 +1989,31 @@ QString QRegularExpression::wildcardToRegularExpression(QStringView pattern, Wil
case '{':
case '|':
case '}':
- rx += QLatin1Char('\\');
+ rx += u'\\';
rx += c;
break;
case '[':
rx += c;
// Support for the [!abc] or [!a-c] syntax
if (i < wclen) {
- if (wc[i] == QLatin1Char('!')) {
- rx += QLatin1Char('^');
+ if (wc[i] == u'!') {
+ rx += u'^';
++i;
}
- if (i < wclen && wc[i] == QLatin1Char(']'))
+ if (i < wclen && wc[i] == u']')
rx += wc[i++];
- while (i < wclen && wc[i] != QLatin1Char(']')) {
- // The '/' appearing in a character class invalidates the
- // regular expression parsing. It also concerns '\\' on
- // Windows OS types.
- if (wc[i] == QLatin1Char('/') || wc[i] == nativePathSeparator)
- return rx;
- if (wc[i] == QLatin1Char('\\'))
- rx += QLatin1Char('\\');
+ while (i < wclen && wc[i] != u']') {
+ if (!options.testFlag(NonPathWildcardConversion)) {
+ // The '/' appearing in a character class invalidates the
+ // regular expression parsing. It also concerns '\\' on
+ // Windows OS types.
+ if (wc[i] == u'/' || wc[i] == settings.nativePathSeparator)
+ return rx;
+ }
+ if (wc[i] == u'\\')
+ rx += u'\\';
rx += wc[i++];
}
}
@@ -2006,13 +2051,11 @@ QRegularExpression QRegularExpression::fromWildcard(QStringView pattern, Qt::Cas
return QRegularExpression(wildcardToRegularExpression(pattern, options), reOptions);
}
-#if QT_STRINGVIEW_LEVEL < 2
/*!
\fn QRegularExpression::anchoredPattern(const QString &expression)
\since 5.12
\overload
*/
-#endif // QT_STRINGVIEW_LEVEL < 2
/*!
\since 5.15
@@ -2023,9 +2066,9 @@ QRegularExpression QRegularExpression::fromWildcard(QStringView pattern, Qt::Cas
QString QRegularExpression::anchoredPattern(QStringView expression)
{
return QString()
- + QLatin1String("\\A(?:")
+ + "\\A(?:"_L1
+ expression
- + QLatin1String(")\\z");
+ + ")\\z"_L1;
}
/*!
@@ -2166,7 +2209,7 @@ QRegularExpression::MatchOptions QRegularExpressionMatch::matchOptions() const
If the regular expression did not match, this function returns -1.
- \sa captured(), capturedStart(), capturedEnd(), capturedLength()
+ \sa hasCaptured(), captured(), capturedStart(), capturedEnd(), capturedLength()
*/
int QRegularExpressionMatch::lastCapturedIndex() const
{
@@ -2174,6 +2217,63 @@ int QRegularExpressionMatch::lastCapturedIndex() const
}
/*!
+ \fn bool QRegularExpressionMatch::hasCaptured(const QString &name) const
+ \fn bool QRegularExpressionMatch::hasCaptured(QStringView name) const
+ \since 6.3
+
+ Returns true if the capturing group named \a name captured something
+ in the subject string, and false otherwise (or if there is no
+ capturing group called \a name).
+
+ \note Some capturing groups in a regular expression may not have
+ captured anything even if the regular expression matched. This may
+ happen, for instance, if a conditional operator is used in the
+ pattern:
+
+ \snippet code/src_corelib_text_qregularexpression.cpp 36
+
+ Similarly, a capturing group may capture a substring of length 0;
+ this function will return \c{true} for such a capturing group.
+
+ \sa captured(), hasMatch()
+*/
+bool QRegularExpressionMatch::hasCaptured(QStringView name) const
+{
+ const int nth = d->regularExpression.d->captureIndexForName(name);
+ return hasCaptured(nth);
+}
+
+/*!
+ \since 6.3
+
+ Returns true if the \a nth capturing group captured something
+ in the subject string, and false otherwise (or if there is no
+ such capturing group).
+
+ \note The implicit capturing group number 0 captures the substring
+ matched by the entire pattern.
+
+ \note Some capturing groups in a regular expression may not have
+ captured anything even if the regular expression matched. This may
+ happen, for instance, if a conditional operator is used in the
+ pattern:
+
+ \snippet code/src_corelib_text_qregularexpression.cpp 36
+
+ Similarly, a capturing group may capture a substring of length 0;
+ this function will return \c{true} for such a capturing group.
+
+ \sa captured(), lastCapturedIndex(), hasMatch()
+*/
+bool QRegularExpressionMatch::hasCaptured(int nth) const
+{
+ if (nth < 0 || nth > lastCapturedIndex())
+ return false;
+
+ return d->capturedOffsets.at(nth * 2) != -1;
+}
+
+/*!
Returns the substring captured by the \a nth capturing group.
If the \a nth capturing group did not capture a string, or if there is no
@@ -2206,7 +2306,7 @@ QString QRegularExpressionMatch::captured(int nth) const
*/
QStringView QRegularExpressionMatch::capturedView(int nth) const
{
- if (nth < 0 || nth > lastCapturedIndex())
+ if (!hasCaptured(nth))
return QStringView();
qsizetype start = capturedStart(nth);
@@ -2217,7 +2317,6 @@ QStringView QRegularExpressionMatch::capturedView(int nth) const
return d->subject.mid(start, capturedLength(nth));
}
-#if QT_STRINGVIEW_LEVEL < 2
/*! \fn QString QRegularExpressionMatch::captured(const QString &name) const
Returns the substring captured by the capturing group named \a name.
@@ -2228,7 +2327,6 @@ QStringView QRegularExpressionMatch::capturedView(int nth) const
\sa capturedView(), capturedStart(), capturedEnd(), capturedLength(),
QString::isNull()
*/
-#endif // QT_STRINGVIEW_LEVEL < 2
/*!
\since 5.10
@@ -2300,7 +2398,7 @@ QStringList QRegularExpressionMatch::capturedTexts() const
*/
qsizetype QRegularExpressionMatch::capturedStart(int nth) const
{
- if (nth < 0 || nth > lastCapturedIndex())
+ if (!hasCaptured(nth))
return -1;
return d->capturedOffsets.at(nth * 2);
@@ -2329,13 +2427,12 @@ qsizetype QRegularExpressionMatch::capturedLength(int nth) const
*/
qsizetype QRegularExpressionMatch::capturedEnd(int nth) const
{
- if (nth < 0 || nth > lastCapturedIndex())
+ if (!hasCaptured(nth))
return -1;
return d->capturedOffsets.at(nth * 2 + 1);
}
-#if QT_STRINGVIEW_LEVEL < 2
/*! \fn qsizetype QRegularExpressionMatch::capturedStart(const QString &name) const
Returns the offset inside the subject string corresponding to the starting
@@ -2366,7 +2463,6 @@ qsizetype QRegularExpressionMatch::capturedEnd(int nth) const
\sa capturedStart(), capturedLength(), captured()
*/
-#endif // QT_STRINGVIEW_LEVEL < 2
/*!
\since 5.10
@@ -2615,7 +2711,7 @@ QRegularExpressionMatch QRegularExpressionMatchIterator::next()
}
d.detach();
- return qExchange(d->next, d->next.d.constData()->nextMatch());
+ return std::exchange(d->next, d->next.d.constData()->nextMatch());
}
/*!
@@ -2676,7 +2772,7 @@ QtPrivate::QRegularExpressionMatchIteratorRangeBasedForIterator begin(const QReg
*/
QDataStream &operator<<(QDataStream &out, const QRegularExpression &re)
{
- out << re.pattern() << quint32(re.patternOptions());
+ out << re.pattern() << quint32(re.patternOptions().toInt());
return out;
}
@@ -2693,7 +2789,7 @@ QDataStream &operator>>(QDataStream &in, QRegularExpression &re)
quint32 patternOptions;
in >> pattern >> patternOptions;
re.setPattern(pattern);
- re.setPatternOptions(QRegularExpression::PatternOptions(patternOptions));
+ re.setPatternOptions(QRegularExpression::PatternOptions::fromInt(patternOptions));
return in;
}
#endif
@@ -3004,7 +3100,8 @@ static const char *pcreCompileErrorCodes[] =
QT_TRANSLATE_NOOP("QRegularExpression", "heap limit exceeded"),
QT_TRANSLATE_NOOP("QRegularExpression", "invalid syntax"),
QT_TRANSLATE_NOOP("QRegularExpression", "internal error - duplicate substitution match"),
- QT_TRANSLATE_NOOP("QRegularExpression", "PCRE2_MATCH_INVALID_UTF is not supported for DFA matching")
+ QT_TRANSLATE_NOOP("QRegularExpression", "PCRE2_MATCH_INVALID_UTF is not supported for DFA matching"),
+ QT_TRANSLATE_NOOP("QRegularExpression", "INTERNAL ERROR: invalid substring offset")
};
#endif // #if 0