diff options
Diffstat (limited to 'src/corelib/text/qregularexpression.cpp')
-rw-r--r-- | src/corelib/text/qregularexpression.cpp | 184 |
1 files changed, 121 insertions, 63 deletions
diff --git a/src/corelib/text/qregularexpression.cpp b/src/corelib/text/qregularexpression.cpp index b4b0a3d47d..95fd0e3d9a 100644 --- a/src/corelib/text/qregularexpression.cpp +++ b/src/corelib/text/qregularexpression.cpp @@ -894,7 +894,7 @@ void QRegularExpressionPrivate::compilePattern() PCRE2_SIZE patternErrorOffset; compiledPattern = pcre2_compile_16(reinterpret_cast<PCRE2_SPTR16>(pattern.constData()), - pattern.length(), + pattern.size(), options, &errorCode, &patternErrorOffset, @@ -954,7 +954,7 @@ struct PcreJitStackFree pcre2_jit_stack_free_16(stack); } }; -static thread_local std::unique_ptr<pcre2_jit_stack_16, PcreJitStackFree> jitStacks; +Q_CONSTINIT static thread_local std::unique_ptr<pcre2_jit_stack_16, PcreJitStackFree> jitStacks; } /*! @@ -1104,7 +1104,7 @@ void QRegularExpressionPrivate::doMatch(QRegularExpressionMatchPrivate *priv, const QRegularExpressionMatchPrivate *previous) const { Q_ASSERT(priv); - Q_ASSUME(priv != previous); + Q_ASSERT(priv != previous); const qsizetype subjectLength = priv->subject.size(); @@ -1354,10 +1354,7 @@ QRegularExpression::QRegularExpression(const QString &pattern, PatternOptions op \sa operator=() */ -QRegularExpression::QRegularExpression(const QRegularExpression &re) - : d(re.d) -{ -} +QRegularExpression::QRegularExpression(const QRegularExpression &re) noexcept = default; /*! \fn QRegularExpression::QRegularExpression(QRegularExpression &&re) @@ -1386,11 +1383,7 @@ QT_DEFINE_QESDP_SPECIALIZATION_DTOR(QRegularExpressionPrivate) Assigns the regular expression \a re to this object, and returns a reference to the copy. Both the pattern and the pattern options are copied. */ -QRegularExpression &QRegularExpression::operator=(const QRegularExpression &re) -{ - d = re.d; - return *this; -} +QRegularExpression &QRegularExpression::operator=(const QRegularExpression &re) noexcept = default; /*! \fn void QRegularExpression::swap(QRegularExpression &other) @@ -1515,7 +1508,7 @@ QStringList QRegularExpression::namedCaptureGroups() const reinterpret_cast<const char16_t *>(namedCapturingTable) + namedCapturingTableEntrySize * i; const int index = *currentNamedCapturingTableRow; - result[index] = QString::fromUtf16(currentNamedCapturingTableRow + 1); + result[index] = QStringView(currentNamedCapturingTableRow + 1).toString(); } return result; @@ -1547,10 +1540,10 @@ QString QRegularExpression::errorString() const QString errorString; int errorStringLength; do { - errorString.resize(errorString.length() + 64); + errorString.resize(errorString.size() + 64); errorStringLength = pcre2_get_error_message_16(d->errorCode, reinterpret_cast<ushort *>(errorString.data()), - errorString.length()); + errorString.size()); } while (errorStringLength < 0); errorString.resize(errorStringLength); @@ -1588,11 +1581,6 @@ qsizetype QRegularExpression::patternErrorOffset() const The returned QRegularExpressionMatch object contains the results of the match. - \note The data referenced by \a subject should remain valid as long - as there are QRegularExpressionMatch objects using it. At the moment - Qt makes a (shallow) copy of the data, but this behavior may change - in a future version of Qt. - \sa QRegularExpressionMatch, {normal matching} */ QRegularExpressionMatch QRegularExpression::match(const QString &subject, @@ -1610,9 +1598,26 @@ QRegularExpressionMatch QRegularExpression::match(const QString &subject, return QRegularExpressionMatch(*priv); } +#if QT_DEPRECATED_SINCE(6, 8) /*! \since 6.0 \overload + \obsolete + + Use matchView() instead. +*/ +QRegularExpressionMatch QRegularExpression::match(QStringView subjectView, + qsizetype offset, + MatchType matchType, + MatchOptions matchOptions) const +{ + return matchView(subjectView, offset, matchType, matchOptions); +} +#endif // QT_DEPRECATED_SINCE(6, 8) + +/*! + \since 6.5 + \overload Attempts to match the regular expression against the given \a subjectView string view, starting at the position \a offset inside the subject, using a @@ -1626,10 +1631,10 @@ QRegularExpressionMatch QRegularExpression::match(const QString &subject, \sa QRegularExpressionMatch, {normal matching} */ -QRegularExpressionMatch QRegularExpression::match(QStringView subjectView, - qsizetype offset, - MatchType matchType, - MatchOptions matchOptions) const +QRegularExpressionMatch QRegularExpression::matchView(QStringView subjectView, + qsizetype offset, + MatchType matchType, + MatchOptions matchOptions) const { d.data()->compilePattern(); auto priv = new QRegularExpressionMatchPrivate(*this, @@ -1650,11 +1655,6 @@ QRegularExpressionMatch QRegularExpression::match(QStringView subjectView, The returned QRegularExpressionMatchIterator is positioned before the first match result (if any). - \note The data referenced by \a subject should remain valid as long - as there are QRegularExpressionMatch objects using it. At the moment - Qt makes a (shallow) copy of the data, but this behavior may change - in a future version of Qt. - \sa QRegularExpressionMatchIterator, {global matching} */ QRegularExpressionMatchIterator QRegularExpression::globalMatch(const QString &subject, @@ -1671,9 +1671,26 @@ QRegularExpressionMatchIterator QRegularExpression::globalMatch(const QString &s return QRegularExpressionMatchIterator(*priv); } +#if QT_DEPRECATED_SINCE(6, 8) /*! \since 6.0 \overload + \obsolete + + Use globalMatchView() instead. +*/ +QRegularExpressionMatchIterator QRegularExpression::globalMatch(QStringView subjectView, + qsizetype offset, + MatchType matchType, + MatchOptions matchOptions) const +{ + return globalMatchView(subjectView, offset, matchType, matchOptions); +} +#endif // QT_DEPRECATED_SINCE(6, 8) + +/*! + \since 6.5 + \overload Attempts to perform a global match of the regular expression against the given \a subjectView string view, starting at the position \a offset inside the @@ -1689,16 +1706,16 @@ QRegularExpressionMatchIterator QRegularExpression::globalMatch(const QString &s \sa QRegularExpressionMatchIterator, {global matching} */ -QRegularExpressionMatchIterator QRegularExpression::globalMatch(QStringView subjectView, - qsizetype offset, - MatchType matchType, - MatchOptions matchOptions) const +QRegularExpressionMatchIterator QRegularExpression::globalMatchView(QStringView subjectView, + qsizetype offset, + MatchType matchType, + MatchOptions matchOptions) const { QRegularExpressionMatchIteratorPrivate *priv = new QRegularExpressionMatchIteratorPrivate(*this, matchType, matchOptions, - match(subjectView, offset, matchType, matchOptions)); + matchView(subjectView, offset, matchType, matchOptions)); return QRegularExpressionMatchIterator(*priv); } @@ -1839,22 +1856,30 @@ QString QRegularExpression::escape(QStringView str) \value UnanchoredWildcardConversion The conversion will not anchor the pattern. This allows for partial string matches of wildcard expressions. + + \value [since 6.6] NonPathWildcardConversion + The conversion will \e{not} interpret the pattern as filepath globbing. + + \sa QRegularExpression::wildcardToRegularExpression */ /*! \since 5.15 Returns a regular expression representation of the given glob \a pattern. - The transformation is targeting file path globbing, which means in particular - that path separators receive special treatment. This implies that it is not - just a basic translation from "*" to ".*". + + There are two transformations possible, one that targets file path + globbing, and another one which is more generic. + + By default, the transformation is targeting file path globbing, + which means in particular that path separators receive special + treatment. This implies that it is not just a basic translation + from "*" to ".*" and similar. \snippet code/src_corelib_text_qregularexpression.cpp 31 - By default, the returned regular expression is fully anchored. In other - words, there is no need of calling anchoredPattern() again on the - result. To get an a regular expression that is not anchored, pass - UnanchoredWildcardConversion as the conversion \a options. + The more generic globbing transformation is available by passing + \c NonPathWildcardConversion in the conversion \a options. This implementation follows closely the definition of wildcard for glob patterns: @@ -1863,10 +1888,12 @@ QString QRegularExpression::escape(QStringView str) \li Any character represents itself apart from those mentioned below. Thus \b{c} matches the character \e c. \row \li \b{?} - \li Matches any single character. It is the same as - \b{.} in full regexps. + \li Matches any single character, except for a path separator + (in case file path globbing has been selected). It is the + same as b{.} in full regexps. \row \li \b{*} - \li Matches zero or more of any characters. It is the + \li Matches zero or more of any characters, except for path + separators (in case file path globbing has been selected). It is the same as \b{.*} in full regexps. \row \li \b{[abc]} \li Matches one character given in the bracket. @@ -1880,9 +1907,10 @@ QString QRegularExpression::escape(QStringView str) bracket. It is the same as \b{[^a-c]} in full regexp. \endtable - \note The backslash (\\) character is \e not an escape char in this context. - In order to match one of the special characters, place it in square brackets - (for example, \c{[?]}). + \note For historical reasons, a backslash (\\) character is \e not + an escape char in this context. In order to match one of the + special characters, place it in square brackets (for example, + \c{[?]}). More information about the implementation can be found in: \list @@ -1890,6 +1918,11 @@ QString QRegularExpression::escape(QStringView str) \li \c {man 7 glob} \endlist + By default, the returned regular expression is fully anchored. In other + words, there is no need of calling anchoredPattern() again on the + result. To get a regular expression that is not anchored, pass + UnanchoredWildcardConversion in the conversion \a options. + \sa escape() */ QString QRegularExpression::wildcardToRegularExpression(QStringView pattern, WildcardConversionOptions options) @@ -1900,29 +1933,51 @@ QString QRegularExpression::wildcardToRegularExpression(QStringView pattern, Wil qsizetype i = 0; const QChar *wc = pattern.data(); + struct GlobSettings { + char16_t nativePathSeparator; + QStringView starEscape; + QStringView questionMarkEscape; + }; + + const GlobSettings settings = [options]() { + if (options.testFlag(NonPathWildcardConversion)) { + // using [\d\D] to mean "match everything"; + // dot doesn't match newlines, unless in /s mode + return GlobSettings{ u'\0', u"[\\d\\D]*", u"[\\d\\D]" }; + } else { #ifdef Q_OS_WIN - const char16_t nativePathSeparator = u'\\'; - const auto starEscape = "[^/\\\\]*"_L1; - const auto questionMarkEscape = "[^/\\\\]"_L1; + return GlobSettings{ u'\\', u"[^/\\\\]*", u"[^/\\\\]" }; #else - const char16_t nativePathSeparator = u'/'; - const auto starEscape = "[^/]*"_L1; - const auto questionMarkEscape = "[^/]"_L1; + return GlobSettings{ u'/', u"[^/]*", u"[^/]" }; #endif + } + }(); while (i < wclen) { const QChar c = wc[i++]; switch (c.unicode()) { case '*': - rx += starEscape; + rx += settings.starEscape; break; case '?': - rx += questionMarkEscape; + rx += settings.questionMarkEscape; break; + // When not using filepath globbing: \ is escaped, / is itself + // When using filepath globbing: + // * Unix: \ gets escaped. / is itself + // * Windows: \ and / can match each other -- they become [/\\] in regexp case '\\': #ifdef Q_OS_WIN + if (options.testFlag(NonPathWildcardConversion)) + rx += u"\\\\"; + else + rx += u"[/\\\\]"; + break; case '/': - rx += "[/\\\\]"_L1; + if (options.testFlag(NonPathWildcardConversion)) + rx += u'/'; + else + rx += u"[/\\\\]"; break; #endif case '$': @@ -1950,11 +2005,13 @@ QString QRegularExpression::wildcardToRegularExpression(QStringView pattern, Wil rx += wc[i++]; while (i < wclen && wc[i] != u']') { - // The '/' appearing in a character class invalidates the - // regular expression parsing. It also concerns '\\' on - // Windows OS types. - if (wc[i] == u'/' || wc[i] == nativePathSeparator) - return rx; + if (!options.testFlag(NonPathWildcardConversion)) { + // The '/' appearing in a character class invalidates the + // regular expression parsing. It also concerns '\\' on + // Windows OS types. + if (wc[i] == u'/' || wc[i] == settings.nativePathSeparator) + return rx; + } if (wc[i] == u'\\') rx += u'\\'; rx += wc[i++]; @@ -2654,7 +2711,7 @@ QRegularExpressionMatch QRegularExpressionMatchIterator::next() } d.detach(); - return qExchange(d->next, d->next.d.constData()->nextMatch()); + return std::exchange(d->next, d->next.d.constData()->nextMatch()); } /*! @@ -3043,7 +3100,8 @@ static const char *pcreCompileErrorCodes[] = QT_TRANSLATE_NOOP("QRegularExpression", "heap limit exceeded"), QT_TRANSLATE_NOOP("QRegularExpression", "invalid syntax"), QT_TRANSLATE_NOOP("QRegularExpression", "internal error - duplicate substitution match"), - QT_TRANSLATE_NOOP("QRegularExpression", "PCRE2_MATCH_INVALID_UTF is not supported for DFA matching") + QT_TRANSLATE_NOOP("QRegularExpression", "PCRE2_MATCH_INVALID_UTF is not supported for DFA matching"), + QT_TRANSLATE_NOOP("QRegularExpression", "INTERNAL ERROR: invalid substring offset") }; #endif // #if 0 |