diff options
Diffstat (limited to 'src/corelib/text/qregularexpression.cpp')
-rw-r--r-- | src/corelib/text/qregularexpression.cpp | 439 |
1 files changed, 268 insertions, 171 deletions
diff --git a/src/corelib/text/qregularexpression.cpp b/src/corelib/text/qregularexpression.cpp index d92c658cb8..95fd0e3d9a 100644 --- a/src/corelib/text/qregularexpression.cpp +++ b/src/corelib/text/qregularexpression.cpp @@ -1,43 +1,7 @@ -/**************************************************************************** -** -** Copyright (C) 2020 Giuseppe D'Angelo <dangelog@gmail.com>. -** Copyright (C) 2020 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com, author Giuseppe D'Angelo <giuseppe.dangelo@kdab.com> -** Copyright (C) 2021 The Qt Company Ltd. -** Contact: https://www.qt.io/licensing/ -** -** This file is part of the QtCore module of the Qt Toolkit. -** -** $QT_BEGIN_LICENSE:LGPL$ -** Commercial License Usage -** Licensees holding valid commercial Qt licenses may use this file in -** accordance with the commercial license agreement provided with the -** Software or, alternatively, in accordance with the terms contained in -** a written agreement between you and The Qt Company. For licensing terms -** and conditions see https://www.qt.io/terms-conditions. For further -** information use the contact form at https://www.qt.io/contact-us. -** -** GNU Lesser General Public License Usage -** Alternatively, this file may be used under the terms of the GNU Lesser -** General Public License version 3 as published by the Free Software -** Foundation and appearing in the file LICENSE.LGPL3 included in the -** packaging of this file. Please review the following information to -** ensure the GNU Lesser General Public License version 3 requirements -** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. -** -** GNU General Public License Usage -** Alternatively, this file may be used under the terms of the GNU -** General Public License version 2.0 or (at your option) the GNU General -** Public license version 3 or any later version approved by the KDE Free -** Qt Foundation. The licenses are as published by the Free Software -** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 -** included in the packaging of this file. Please review the following -** information to ensure the GNU General Public License requirements will -** be met: https://www.gnu.org/licenses/gpl-2.0.html and -** https://www.gnu.org/licenses/gpl-3.0.html. -** -** $QT_END_LICENSE$ -** -****************************************************************************/ +// Copyright (C) 2020 Giuseppe D'Angelo <dangelog@gmail.com>. +// Copyright (C) 2020 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com, author Giuseppe D'Angelo <giuseppe.dangelo@kdab.com> +// Copyright (C) 2021 The Qt Company Ltd. +// SPDX-License-Identifier: LicenseRef-Qt-Commercial OR LGPL-3.0-only OR GPL-2.0-only OR GPL-3.0-only #include "qregularexpression.h" @@ -47,17 +11,22 @@ #include <QtCore/qmutex.h> #include <QtCore/qstringlist.h> #include <QtCore/qdebug.h> -#include <QtCore/qthreadstorage.h> #include <QtCore/qglobal.h> #include <QtCore/qatomic.h> #include <QtCore/qdatastream.h> +#if defined(Q_OS_MACOS) +#include <QtCore/private/qcore_mac_p.h> +#endif + #define PCRE2_CODE_UNIT_WIDTH 16 #include <pcre2.h> QT_BEGIN_NAMESPACE +using namespace Qt::StringLiterals; + /*! \class QRegularExpression \inmodule QtCore @@ -822,6 +791,24 @@ struct QRegularExpressionMatchIteratorPrivate : QSharedData /*! \internal + + Used to centralize the warning about using an invalid QRegularExpression. + In case the pattern is an illegal UTF-16 string, we can't pass print it + (pass it to qUtf16Printable, etc.), so we need to check for that. +*/ +Q_DECL_COLD_FUNCTION +void qtWarnAboutInvalidRegularExpression(const QString &pattern, const char *where) +{ + if (pattern.isValidUtf16()) { + qWarning("%s(): called on an invalid QRegularExpression object " + "(pattern is '%ls')", where, qUtf16Printable(pattern)); + } else { + qWarning("%s(): called on an invalid QRegularExpression object", where); + } +} + +/*! + \internal */ QRegularExpression::QRegularExpression(QRegularExpressionPrivate &dd) : d(&dd) @@ -906,8 +893,8 @@ void QRegularExpressionPrivate::compilePattern() options |= PCRE2_UTF; PCRE2_SIZE patternErrorOffset; - compiledPattern = pcre2_compile_16(reinterpret_cast<PCRE2_SPTR16>(pattern.utf16()), - pattern.length(), + compiledPattern = pcre2_compile_16(reinterpret_cast<PCRE2_SPTR16>(pattern.constData()), + pattern.size(), options, &errorCode, &patternErrorOffset, @@ -958,43 +945,24 @@ void QRegularExpressionPrivate::getPatternInfo() Simple "smartpointer" wrapper around a pcre2_jit_stack_16, to be used with QThreadStorage. */ -class QPcreJitStackPointer +namespace { +struct PcreJitStackFree { - Q_DISABLE_COPY(QPcreJitStackPointer) - -public: - /*! - \internal - */ - QPcreJitStackPointer() - { - // The default JIT stack size in PCRE is 32K, - // we allocate from 32K up to 512K. - stack = pcre2_jit_stack_create_16(32 * 1024, 512 * 1024, nullptr); - } - /*! - \internal - */ - ~QPcreJitStackPointer() + void operator()(pcre2_jit_stack_16 *stack) { if (stack) pcre2_jit_stack_free_16(stack); } - - pcre2_jit_stack_16 *stack; }; - -Q_GLOBAL_STATIC(QThreadStorage<QPcreJitStackPointer *>, jitStacks) +Q_CONSTINIT static thread_local std::unique_ptr<pcre2_jit_stack_16, PcreJitStackFree> jitStacks; +} /*! \internal */ static pcre2_jit_stack_16 *qtPcreCallback(void *) { - if (jitStacks()->hasLocalData()) - return jitStacks()->localData()->stack; - - return nullptr; + return jitStacks.get(); } /*! @@ -1011,6 +979,8 @@ static bool isJitEnabled() #ifdef QT_DEBUG return false; +#elif defined(Q_OS_MACOS) + return !qt_mac_runningUnderRosetta(); #else return true; #endif @@ -1088,9 +1058,10 @@ static int safe_pcre2_match_16(const pcre2_code_16 *code, int result = pcre2_match_16(code, subject, length, startOffset, options, matchData, matchContext); - if (result == PCRE2_ERROR_JIT_STACKLIMIT && !jitStacks()->hasLocalData()) { - QPcreJitStackPointer *p = new QPcreJitStackPointer; - jitStacks()->setLocalData(p); + if (result == PCRE2_ERROR_JIT_STACKLIMIT && !jitStacks) { + // The default JIT stack size in PCRE is 32K, + // we allocate from 32K up to 512K. + jitStacks.reset(pcre2_jit_stack_create_16(32 * 1024, 512 * 1024, NULL)); result = pcre2_match_16(code, subject, length, startOffset, options, matchData, matchContext); @@ -1133,7 +1104,7 @@ void QRegularExpressionPrivate::doMatch(QRegularExpressionMatchPrivate *priv, const QRegularExpressionMatchPrivate *previous) const { Q_ASSERT(priv); - Q_ASSUME(priv != previous); + Q_ASSERT(priv != previous); const qsizetype subjectLength = priv->subject.size(); @@ -1144,7 +1115,7 @@ void QRegularExpressionPrivate::doMatch(QRegularExpressionMatchPrivate *priv, return; if (Q_UNLIKELY(!compiledPattern)) { - qWarning("QRegularExpressionPrivate::doMatch(): called on an invalid QRegularExpression object"); + qtWarnAboutInvalidRegularExpression(pattern, "QRegularExpressionPrivate::doMatch"); return; } @@ -1174,7 +1145,19 @@ void QRegularExpressionPrivate::doMatch(QRegularExpressionMatchPrivate *priv, pcre2_jit_stack_assign_16(matchContext, &qtPcreCallback, nullptr); pcre2_match_data_16 *matchData = pcre2_match_data_create_from_pattern_16(compiledPattern, nullptr); - const char16_t * const subjectUtf16 = priv->subject.utf16(); + // PCRE does not accept a null pointer as subject string, even if + // its length is zero. We however allow it in input: a QStringView + // subject may have data == nullptr. In this case, to keep PCRE + // happy, pass a pointer to a dummy character. + const char16_t dummySubject = 0; + const char16_t * const subjectUtf16 = [&]() + { + const auto subjectUtf16 = priv->subject.utf16(); + if (subjectUtf16) + return subjectUtf16; + Q_ASSERT(subjectLength == 0); + return &dummySubject; + }(); int result; @@ -1194,8 +1177,8 @@ void QRegularExpressionPrivate::doMatch(QRegularExpressionMatchPrivate *priv, if (usingCrLfNewlines && offset < subjectLength - && subjectUtf16[offset - 1] == QLatin1Char('\r') - && subjectUtf16[offset] == QLatin1Char('\n')) { + && subjectUtf16[offset - 1] == u'\r' + && subjectUtf16[offset] == u'\n') { ++offset; } else if (offset < subjectLength && QChar::isLowSurrogate(subjectUtf16[offset])) { @@ -1247,6 +1230,12 @@ void QRegularExpressionPrivate::doMatch(QRegularExpressionMatchPrivate *priv, PCRE2_SIZE *ovector = pcre2_get_ovector_pointer_16(matchData); qsizetype *const capturedOffsets = priv->capturedOffsets.data(); + // We rely on the fact that capturing groups that did not + // capture anything have offset -1, but PCRE technically + // returns "PCRE2_UNSET". Test that out, better safe than + // sorry... + static_assert(qsizetype(PCRE2_UNSET) == qsizetype(-1), "Internal error: PCRE2 changed its API"); + for (int i = 0; i < priv->capturedCount * 2; ++i) capturedOffsets[i] = qsizetype(ovector[i]); @@ -1365,10 +1354,7 @@ QRegularExpression::QRegularExpression(const QString &pattern, PatternOptions op \sa operator=() */ -QRegularExpression::QRegularExpression(const QRegularExpression &re) - : d(re.d) -{ -} +QRegularExpression::QRegularExpression(const QRegularExpression &re) noexcept = default; /*! \fn QRegularExpression::QRegularExpression(QRegularExpression &&re) @@ -1397,11 +1383,7 @@ QT_DEFINE_QESDP_SPECIALIZATION_DTOR(QRegularExpressionPrivate) Assigns the regular expression \a re to this object, and returns a reference to the copy. Both the pattern and the pattern options are copied. */ -QRegularExpression &QRegularExpression::operator=(const QRegularExpression &re) -{ - d = re.d; - return *this; -} +QRegularExpression &QRegularExpression::operator=(const QRegularExpression &re) noexcept = default; /*! \fn void QRegularExpression::swap(QRegularExpression &other) @@ -1428,6 +1410,8 @@ QString QRegularExpression::pattern() const */ void QRegularExpression::setPattern(const QString &pattern) { + if (d->pattern == pattern) + return; d.detach(); d->isDirty = true; d->pattern = pattern; @@ -1451,6 +1435,8 @@ QRegularExpression::PatternOptions QRegularExpression::patternOptions() const */ void QRegularExpression::setPatternOptions(PatternOptions options) { + if (d->patternOptions == options) + return; d.detach(); d->isDirty = true; d->patternOptions = options; @@ -1522,7 +1508,7 @@ QStringList QRegularExpression::namedCaptureGroups() const reinterpret_cast<const char16_t *>(namedCapturingTable) + namedCapturingTableEntrySize * i; const int index = *currentNamedCapturingTableRow; - result[index] = QString::fromUtf16(currentNamedCapturingTableRow + 1); + result[index] = QStringView(currentNamedCapturingTableRow + 1).toString(); } return result; @@ -1554,10 +1540,10 @@ QString QRegularExpression::errorString() const QString errorString; int errorStringLength; do { - errorString.resize(errorString.length() + 64); + errorString.resize(errorString.size() + 64); errorStringLength = pcre2_get_error_message_16(d->errorCode, reinterpret_cast<ushort *>(errorString.data()), - errorString.length()); + errorString.size()); } while (errorStringLength < 0); errorString.resize(errorStringLength); @@ -1568,7 +1554,7 @@ QString QRegularExpression::errorString() const #endif } #ifdef QT_NO_TRANSLATION - return QLatin1String("no error"); + return u"no error"_s; #else return QCoreApplication::translate("QRegularExpression", "no error"); #endif @@ -1595,11 +1581,6 @@ qsizetype QRegularExpression::patternErrorOffset() const The returned QRegularExpressionMatch object contains the results of the match. - \note The data referenced by \a subject should remain valid as long - as there are QRegularExpressionMatch objects using it. At the moment - Qt makes a (shallow) copy of the data, but this behavior may change - in a future version of Qt. - \sa QRegularExpressionMatch, {normal matching} */ QRegularExpressionMatch QRegularExpression::match(const QString &subject, @@ -1610,16 +1591,33 @@ QRegularExpressionMatch QRegularExpression::match(const QString &subject, d.data()->compilePattern(); auto priv = new QRegularExpressionMatchPrivate(*this, subject, - qToStringViewIgnoringNull(subject), + QStringView(subject), matchType, matchOptions); d->doMatch(priv, offset); return QRegularExpressionMatch(*priv); } +#if QT_DEPRECATED_SINCE(6, 8) /*! \since 6.0 \overload + \obsolete + + Use matchView() instead. +*/ +QRegularExpressionMatch QRegularExpression::match(QStringView subjectView, + qsizetype offset, + MatchType matchType, + MatchOptions matchOptions) const +{ + return matchView(subjectView, offset, matchType, matchOptions); +} +#endif // QT_DEPRECATED_SINCE(6, 8) + +/*! + \since 6.5 + \overload Attempts to match the regular expression against the given \a subjectView string view, starting at the position \a offset inside the subject, using a @@ -1633,10 +1631,10 @@ QRegularExpressionMatch QRegularExpression::match(const QString &subject, \sa QRegularExpressionMatch, {normal matching} */ -QRegularExpressionMatch QRegularExpression::match(QStringView subjectView, - qsizetype offset, - MatchType matchType, - MatchOptions matchOptions) const +QRegularExpressionMatch QRegularExpression::matchView(QStringView subjectView, + qsizetype offset, + MatchType matchType, + MatchOptions matchOptions) const { d.data()->compilePattern(); auto priv = new QRegularExpressionMatchPrivate(*this, @@ -1657,11 +1655,6 @@ QRegularExpressionMatch QRegularExpression::match(QStringView subjectView, The returned QRegularExpressionMatchIterator is positioned before the first match result (if any). - \note The data referenced by \a subject should remain valid as long - as there are QRegularExpressionMatch objects using it. At the moment - Qt makes a (shallow) copy of the data, but this behavior may change - in a future version of Qt. - \sa QRegularExpressionMatchIterator, {global matching} */ QRegularExpressionMatchIterator QRegularExpression::globalMatch(const QString &subject, @@ -1678,9 +1671,26 @@ QRegularExpressionMatchIterator QRegularExpression::globalMatch(const QString &s return QRegularExpressionMatchIterator(*priv); } +#if QT_DEPRECATED_SINCE(6, 8) /*! \since 6.0 \overload + \obsolete + + Use globalMatchView() instead. +*/ +QRegularExpressionMatchIterator QRegularExpression::globalMatch(QStringView subjectView, + qsizetype offset, + MatchType matchType, + MatchOptions matchOptions) const +{ + return globalMatchView(subjectView, offset, matchType, matchOptions); +} +#endif // QT_DEPRECATED_SINCE(6, 8) + +/*! + \since 6.5 + \overload Attempts to perform a global match of the regular expression against the given \a subjectView string view, starting at the position \a offset inside the @@ -1696,16 +1706,16 @@ QRegularExpressionMatchIterator QRegularExpression::globalMatch(const QString &s \sa QRegularExpressionMatchIterator, {global matching} */ -QRegularExpressionMatchIterator QRegularExpression::globalMatch(QStringView subjectView, - qsizetype offset, - MatchType matchType, - MatchOptions matchOptions) const +QRegularExpressionMatchIterator QRegularExpression::globalMatchView(QStringView subjectView, + qsizetype offset, + MatchType matchType, + MatchOptions matchOptions) const { QRegularExpressionMatchIteratorPrivate *priv = new QRegularExpressionMatchIteratorPrivate(*this, matchType, matchOptions, - match(subjectView, offset, matchType, matchOptions)); + matchView(subjectView, offset, matchType, matchOptions)); return QRegularExpressionMatchIterator(*priv); } @@ -1768,12 +1778,10 @@ size_t qHash(const QRegularExpression &key, size_t seed) noexcept return qHashMulti(seed, key.d->pattern, key.d->patternOptions); } -#if QT_STRINGVIEW_LEVEL < 2 /*! \fn QString QRegularExpression::escape(const QString &str) \overload */ -#endif // QT_STRINGVIEW_LEVEL < 2 /*! \since 5.15 @@ -1810,14 +1818,13 @@ QString QRegularExpression::escape(QStringView str) // unlike Perl, a literal NUL must be escaped with // "\\0" (backslash + 0) and not "\\\0" (backslash + NUL), // because pcre16_compile uses a NUL-terminated string - result.append(QLatin1Char('\\')); - result.append(QLatin1Char('0')); - } else if ( (current < QLatin1Char('a') || current > QLatin1Char('z')) && - (current < QLatin1Char('A') || current > QLatin1Char('Z')) && - (current < QLatin1Char('0') || current > QLatin1Char('9')) && - current != QLatin1Char('_') ) - { - result.append(QLatin1Char('\\')); + result.append(u'\\'); + result.append(u'0'); + } else if ((current < u'a' || current > u'z') && + (current < u'A' || current > u'Z') && + (current < u'0' || current > u'9') && + current != u'_') { + result.append(u'\\'); result.append(current); if (current.isHighSurrogate() && i < (count - 1)) result.append(str.at(++i)); @@ -1830,13 +1837,11 @@ QString QRegularExpression::escape(QStringView str) return result; } -#if QT_STRINGVIEW_LEVEL < 2 /*! \since 5.12 \fn QString QRegularExpression::wildcardToRegularExpression(const QString &pattern, WildcardConversionOptions options) \overload */ -#endif // QT_STRINGVIEW_LEVEL < 2 /*! \since 6.0 @@ -1851,22 +1856,30 @@ QString QRegularExpression::escape(QStringView str) \value UnanchoredWildcardConversion The conversion will not anchor the pattern. This allows for partial string matches of wildcard expressions. + + \value [since 6.6] NonPathWildcardConversion + The conversion will \e{not} interpret the pattern as filepath globbing. + + \sa QRegularExpression::wildcardToRegularExpression */ /*! \since 5.15 Returns a regular expression representation of the given glob \a pattern. - The transformation is targeting file path globbing, which means in particular - that path separators receive special treatment. This implies that it is not - just a basic translation from "*" to ".*". + + There are two transformations possible, one that targets file path + globbing, and another one which is more generic. + + By default, the transformation is targeting file path globbing, + which means in particular that path separators receive special + treatment. This implies that it is not just a basic translation + from "*" to ".*" and similar. \snippet code/src_corelib_text_qregularexpression.cpp 31 - By default, the returned regular expression is fully anchored. In other - words, there is no need of calling anchoredPattern() again on the - result. To get an a regular expression that is not anchored, pass - UnanchoredWildcardConversion as the conversion \a options. + The more generic globbing transformation is available by passing + \c NonPathWildcardConversion in the conversion \a options. This implementation follows closely the definition of wildcard for glob patterns: @@ -1875,10 +1888,12 @@ QString QRegularExpression::escape(QStringView str) \li Any character represents itself apart from those mentioned below. Thus \b{c} matches the character \e c. \row \li \b{?} - \li Matches any single character. It is the same as - \b{.} in full regexps. + \li Matches any single character, except for a path separator + (in case file path globbing has been selected). It is the + same as b{.} in full regexps. \row \li \b{*} - \li Matches zero or more of any characters. It is the + \li Matches zero or more of any characters, except for path + separators (in case file path globbing has been selected). It is the same as \b{.*} in full regexps. \row \li \b{[abc]} \li Matches one character given in the bracket. @@ -1892,9 +1907,10 @@ QString QRegularExpression::escape(QStringView str) bracket. It is the same as \b{[^a-c]} in full regexp. \endtable - \note The backslash (\\) character is \e not an escape char in this context. - In order to match one of the special characters, place it in square brackets - (for example, \c{[?]}). + \note For historical reasons, a backslash (\\) character is \e not + an escape char in this context. In order to match one of the + special characters, place it in square brackets (for example, + \c{[?]}). More information about the implementation can be found in: \list @@ -1902,6 +1918,11 @@ QString QRegularExpression::escape(QStringView str) \li \c {man 7 glob} \endlist + By default, the returned regular expression is fully anchored. In other + words, there is no need of calling anchoredPattern() again on the + result. To get a regular expression that is not anchored, pass + UnanchoredWildcardConversion in the conversion \a options. + \sa escape() */ QString QRegularExpression::wildcardToRegularExpression(QStringView pattern, WildcardConversionOptions options) @@ -1912,29 +1933,51 @@ QString QRegularExpression::wildcardToRegularExpression(QStringView pattern, Wil qsizetype i = 0; const QChar *wc = pattern.data(); + struct GlobSettings { + char16_t nativePathSeparator; + QStringView starEscape; + QStringView questionMarkEscape; + }; + + const GlobSettings settings = [options]() { + if (options.testFlag(NonPathWildcardConversion)) { + // using [\d\D] to mean "match everything"; + // dot doesn't match newlines, unless in /s mode + return GlobSettings{ u'\0', u"[\\d\\D]*", u"[\\d\\D]" }; + } else { #ifdef Q_OS_WIN - const QLatin1Char nativePathSeparator('\\'); - const QLatin1String starEscape("[^/\\\\]*"); - const QLatin1String questionMarkEscape("[^/\\\\]"); + return GlobSettings{ u'\\', u"[^/\\\\]*", u"[^/\\\\]" }; #else - const QLatin1Char nativePathSeparator('/'); - const QLatin1String starEscape("[^/]*"); - const QLatin1String questionMarkEscape("[^/]"); + return GlobSettings{ u'/', u"[^/]*", u"[^/]" }; #endif + } + }(); while (i < wclen) { const QChar c = wc[i++]; switch (c.unicode()) { case '*': - rx += starEscape; + rx += settings.starEscape; break; case '?': - rx += questionMarkEscape; + rx += settings.questionMarkEscape; break; + // When not using filepath globbing: \ is escaped, / is itself + // When using filepath globbing: + // * Unix: \ gets escaped. / is itself + // * Windows: \ and / can match each other -- they become [/\\] in regexp case '\\': #ifdef Q_OS_WIN + if (options.testFlag(NonPathWildcardConversion)) + rx += u"\\\\"; + else + rx += u"[/\\\\]"; + break; case '/': - rx += QLatin1String("[/\\\\]"); + if (options.testFlag(NonPathWildcardConversion)) + rx += u'/'; + else + rx += u"[/\\\\]"; break; #endif case '$': @@ -1946,29 +1989,31 @@ QString QRegularExpression::wildcardToRegularExpression(QStringView pattern, Wil case '{': case '|': case '}': - rx += QLatin1Char('\\'); + rx += u'\\'; rx += c; break; case '[': rx += c; // Support for the [!abc] or [!a-c] syntax if (i < wclen) { - if (wc[i] == QLatin1Char('!')) { - rx += QLatin1Char('^'); + if (wc[i] == u'!') { + rx += u'^'; ++i; } - if (i < wclen && wc[i] == QLatin1Char(']')) + if (i < wclen && wc[i] == u']') rx += wc[i++]; - while (i < wclen && wc[i] != QLatin1Char(']')) { - // The '/' appearing in a character class invalidates the - // regular expression parsing. It also concerns '\\' on - // Windows OS types. - if (wc[i] == QLatin1Char('/') || wc[i] == nativePathSeparator) - return rx; - if (wc[i] == QLatin1Char('\\')) - rx += QLatin1Char('\\'); + while (i < wclen && wc[i] != u']') { + if (!options.testFlag(NonPathWildcardConversion)) { + // The '/' appearing in a character class invalidates the + // regular expression parsing. It also concerns '\\' on + // Windows OS types. + if (wc[i] == u'/' || wc[i] == settings.nativePathSeparator) + return rx; + } + if (wc[i] == u'\\') + rx += u'\\'; rx += wc[i++]; } } @@ -2006,13 +2051,11 @@ QRegularExpression QRegularExpression::fromWildcard(QStringView pattern, Qt::Cas return QRegularExpression(wildcardToRegularExpression(pattern, options), reOptions); } -#if QT_STRINGVIEW_LEVEL < 2 /*! \fn QRegularExpression::anchoredPattern(const QString &expression) \since 5.12 \overload */ -#endif // QT_STRINGVIEW_LEVEL < 2 /*! \since 5.15 @@ -2023,9 +2066,9 @@ QRegularExpression QRegularExpression::fromWildcard(QStringView pattern, Qt::Cas QString QRegularExpression::anchoredPattern(QStringView expression) { return QString() - + QLatin1String("\\A(?:") + + "\\A(?:"_L1 + expression - + QLatin1String(")\\z"); + + ")\\z"_L1; } /*! @@ -2166,7 +2209,7 @@ QRegularExpression::MatchOptions QRegularExpressionMatch::matchOptions() const If the regular expression did not match, this function returns -1. - \sa captured(), capturedStart(), capturedEnd(), capturedLength() + \sa hasCaptured(), captured(), capturedStart(), capturedEnd(), capturedLength() */ int QRegularExpressionMatch::lastCapturedIndex() const { @@ -2174,6 +2217,63 @@ int QRegularExpressionMatch::lastCapturedIndex() const } /*! + \fn bool QRegularExpressionMatch::hasCaptured(const QString &name) const + \fn bool QRegularExpressionMatch::hasCaptured(QStringView name) const + \since 6.3 + + Returns true if the capturing group named \a name captured something + in the subject string, and false otherwise (or if there is no + capturing group called \a name). + + \note Some capturing groups in a regular expression may not have + captured anything even if the regular expression matched. This may + happen, for instance, if a conditional operator is used in the + pattern: + + \snippet code/src_corelib_text_qregularexpression.cpp 36 + + Similarly, a capturing group may capture a substring of length 0; + this function will return \c{true} for such a capturing group. + + \sa captured(), hasMatch() +*/ +bool QRegularExpressionMatch::hasCaptured(QStringView name) const +{ + const int nth = d->regularExpression.d->captureIndexForName(name); + return hasCaptured(nth); +} + +/*! + \since 6.3 + + Returns true if the \a nth capturing group captured something + in the subject string, and false otherwise (or if there is no + such capturing group). + + \note The implicit capturing group number 0 captures the substring + matched by the entire pattern. + + \note Some capturing groups in a regular expression may not have + captured anything even if the regular expression matched. This may + happen, for instance, if a conditional operator is used in the + pattern: + + \snippet code/src_corelib_text_qregularexpression.cpp 36 + + Similarly, a capturing group may capture a substring of length 0; + this function will return \c{true} for such a capturing group. + + \sa captured(), lastCapturedIndex(), hasMatch() +*/ +bool QRegularExpressionMatch::hasCaptured(int nth) const +{ + if (nth < 0 || nth > lastCapturedIndex()) + return false; + + return d->capturedOffsets.at(nth * 2) != -1; +} + +/*! Returns the substring captured by the \a nth capturing group. If the \a nth capturing group did not capture a string, or if there is no @@ -2206,7 +2306,7 @@ QString QRegularExpressionMatch::captured(int nth) const */ QStringView QRegularExpressionMatch::capturedView(int nth) const { - if (nth < 0 || nth > lastCapturedIndex()) + if (!hasCaptured(nth)) return QStringView(); qsizetype start = capturedStart(nth); @@ -2217,7 +2317,6 @@ QStringView QRegularExpressionMatch::capturedView(int nth) const return d->subject.mid(start, capturedLength(nth)); } -#if QT_STRINGVIEW_LEVEL < 2 /*! \fn QString QRegularExpressionMatch::captured(const QString &name) const Returns the substring captured by the capturing group named \a name. @@ -2228,7 +2327,6 @@ QStringView QRegularExpressionMatch::capturedView(int nth) const \sa capturedView(), capturedStart(), capturedEnd(), capturedLength(), QString::isNull() */ -#endif // QT_STRINGVIEW_LEVEL < 2 /*! \since 5.10 @@ -2300,7 +2398,7 @@ QStringList QRegularExpressionMatch::capturedTexts() const */ qsizetype QRegularExpressionMatch::capturedStart(int nth) const { - if (nth < 0 || nth > lastCapturedIndex()) + if (!hasCaptured(nth)) return -1; return d->capturedOffsets.at(nth * 2); @@ -2329,13 +2427,12 @@ qsizetype QRegularExpressionMatch::capturedLength(int nth) const */ qsizetype QRegularExpressionMatch::capturedEnd(int nth) const { - if (nth < 0 || nth > lastCapturedIndex()) + if (!hasCaptured(nth)) return -1; return d->capturedOffsets.at(nth * 2 + 1); } -#if QT_STRINGVIEW_LEVEL < 2 /*! \fn qsizetype QRegularExpressionMatch::capturedStart(const QString &name) const Returns the offset inside the subject string corresponding to the starting @@ -2366,7 +2463,6 @@ qsizetype QRegularExpressionMatch::capturedEnd(int nth) const \sa capturedStart(), capturedLength(), captured() */ -#endif // QT_STRINGVIEW_LEVEL < 2 /*! \since 5.10 @@ -2615,7 +2711,7 @@ QRegularExpressionMatch QRegularExpressionMatchIterator::next() } d.detach(); - return qExchange(d->next, d->next.d.constData()->nextMatch()); + return std::exchange(d->next, d->next.d.constData()->nextMatch()); } /*! @@ -2676,7 +2772,7 @@ QtPrivate::QRegularExpressionMatchIteratorRangeBasedForIterator begin(const QReg */ QDataStream &operator<<(QDataStream &out, const QRegularExpression &re) { - out << re.pattern() << quint32(re.patternOptions()); + out << re.pattern() << quint32(re.patternOptions().toInt()); return out; } @@ -2693,7 +2789,7 @@ QDataStream &operator>>(QDataStream &in, QRegularExpression &re) quint32 patternOptions; in >> pattern >> patternOptions; re.setPattern(pattern); - re.setPatternOptions(QRegularExpression::PatternOptions(patternOptions)); + re.setPatternOptions(QRegularExpression::PatternOptions::fromInt(patternOptions)); return in; } #endif @@ -3004,7 +3100,8 @@ static const char *pcreCompileErrorCodes[] = QT_TRANSLATE_NOOP("QRegularExpression", "heap limit exceeded"), QT_TRANSLATE_NOOP("QRegularExpression", "invalid syntax"), QT_TRANSLATE_NOOP("QRegularExpression", "internal error - duplicate substitution match"), - QT_TRANSLATE_NOOP("QRegularExpression", "PCRE2_MATCH_INVALID_UTF is not supported for DFA matching") + QT_TRANSLATE_NOOP("QRegularExpression", "PCRE2_MATCH_INVALID_UTF is not supported for DFA matching"), + QT_TRANSLATE_NOOP("QRegularExpression", "INTERNAL ERROR: invalid substring offset") }; #endif // #if 0 |