diff options
Diffstat (limited to 'src/corelib/text/qregularexpression.cpp')
-rw-r--r-- | src/corelib/text/qregularexpression.cpp | 111 |
1 files changed, 76 insertions, 35 deletions
diff --git a/src/corelib/text/qregularexpression.cpp b/src/corelib/text/qregularexpression.cpp index 2e5eabbbc8..95fd0e3d9a 100644 --- a/src/corelib/text/qregularexpression.cpp +++ b/src/corelib/text/qregularexpression.cpp @@ -894,7 +894,7 @@ void QRegularExpressionPrivate::compilePattern() PCRE2_SIZE patternErrorOffset; compiledPattern = pcre2_compile_16(reinterpret_cast<PCRE2_SPTR16>(pattern.constData()), - pattern.length(), + pattern.size(), options, &errorCode, &patternErrorOffset, @@ -954,7 +954,7 @@ struct PcreJitStackFree pcre2_jit_stack_free_16(stack); } }; -static thread_local std::unique_ptr<pcre2_jit_stack_16, PcreJitStackFree> jitStacks; +Q_CONSTINIT static thread_local std::unique_ptr<pcre2_jit_stack_16, PcreJitStackFree> jitStacks; } /*! @@ -1104,7 +1104,7 @@ void QRegularExpressionPrivate::doMatch(QRegularExpressionMatchPrivate *priv, const QRegularExpressionMatchPrivate *previous) const { Q_ASSERT(priv); - Q_ASSUME(priv != previous); + Q_ASSERT(priv != previous); const qsizetype subjectLength = priv->subject.size(); @@ -1508,7 +1508,7 @@ QStringList QRegularExpression::namedCaptureGroups() const reinterpret_cast<const char16_t *>(namedCapturingTable) + namedCapturingTableEntrySize * i; const int index = *currentNamedCapturingTableRow; - result[index] = QString::fromUtf16(currentNamedCapturingTableRow + 1); + result[index] = QStringView(currentNamedCapturingTableRow + 1).toString(); } return result; @@ -1540,10 +1540,10 @@ QString QRegularExpression::errorString() const QString errorString; int errorStringLength; do { - errorString.resize(errorString.length() + 64); + errorString.resize(errorString.size() + 64); errorStringLength = pcre2_get_error_message_16(d->errorCode, reinterpret_cast<ushort *>(errorString.data()), - errorString.length()); + errorString.size()); } while (errorStringLength < 0); errorString.resize(errorStringLength); @@ -1856,22 +1856,30 @@ QString QRegularExpression::escape(QStringView str) \value UnanchoredWildcardConversion The conversion will not anchor the pattern. This allows for partial string matches of wildcard expressions. + + \value [since 6.6] NonPathWildcardConversion + The conversion will \e{not} interpret the pattern as filepath globbing. + + \sa QRegularExpression::wildcardToRegularExpression */ /*! \since 5.15 Returns a regular expression representation of the given glob \a pattern. - The transformation is targeting file path globbing, which means in particular - that path separators receive special treatment. This implies that it is not - just a basic translation from "*" to ".*". + + There are two transformations possible, one that targets file path + globbing, and another one which is more generic. + + By default, the transformation is targeting file path globbing, + which means in particular that path separators receive special + treatment. This implies that it is not just a basic translation + from "*" to ".*" and similar. \snippet code/src_corelib_text_qregularexpression.cpp 31 - By default, the returned regular expression is fully anchored. In other - words, there is no need of calling anchoredPattern() again on the - result. To get a regular expression that is not anchored, pass - UnanchoredWildcardConversion as the conversion \a options. + The more generic globbing transformation is available by passing + \c NonPathWildcardConversion in the conversion \a options. This implementation follows closely the definition of wildcard for glob patterns: @@ -1880,10 +1888,12 @@ QString QRegularExpression::escape(QStringView str) \li Any character represents itself apart from those mentioned below. Thus \b{c} matches the character \e c. \row \li \b{?} - \li Matches any single character. It is the same as - \b{.} in full regexps. + \li Matches any single character, except for a path separator + (in case file path globbing has been selected). It is the + same as b{.} in full regexps. \row \li \b{*} - \li Matches zero or more of any characters. It is the + \li Matches zero or more of any characters, except for path + separators (in case file path globbing has been selected). It is the same as \b{.*} in full regexps. \row \li \b{[abc]} \li Matches one character given in the bracket. @@ -1897,9 +1907,10 @@ QString QRegularExpression::escape(QStringView str) bracket. It is the same as \b{[^a-c]} in full regexp. \endtable - \note The backslash (\\) character is \e not an escape char in this context. - In order to match one of the special characters, place it in square brackets - (for example, \c{[?]}). + \note For historical reasons, a backslash (\\) character is \e not + an escape char in this context. In order to match one of the + special characters, place it in square brackets (for example, + \c{[?]}). More information about the implementation can be found in: \list @@ -1907,6 +1918,11 @@ QString QRegularExpression::escape(QStringView str) \li \c {man 7 glob} \endlist + By default, the returned regular expression is fully anchored. In other + words, there is no need of calling anchoredPattern() again on the + result. To get a regular expression that is not anchored, pass + UnanchoredWildcardConversion in the conversion \a options. + \sa escape() */ QString QRegularExpression::wildcardToRegularExpression(QStringView pattern, WildcardConversionOptions options) @@ -1917,29 +1933,51 @@ QString QRegularExpression::wildcardToRegularExpression(QStringView pattern, Wil qsizetype i = 0; const QChar *wc = pattern.data(); + struct GlobSettings { + char16_t nativePathSeparator; + QStringView starEscape; + QStringView questionMarkEscape; + }; + + const GlobSettings settings = [options]() { + if (options.testFlag(NonPathWildcardConversion)) { + // using [\d\D] to mean "match everything"; + // dot doesn't match newlines, unless in /s mode + return GlobSettings{ u'\0', u"[\\d\\D]*", u"[\\d\\D]" }; + } else { #ifdef Q_OS_WIN - const char16_t nativePathSeparator = u'\\'; - const auto starEscape = "[^/\\\\]*"_L1; - const auto questionMarkEscape = "[^/\\\\]"_L1; + return GlobSettings{ u'\\', u"[^/\\\\]*", u"[^/\\\\]" }; #else - const char16_t nativePathSeparator = u'/'; - const auto starEscape = "[^/]*"_L1; - const auto questionMarkEscape = "[^/]"_L1; + return GlobSettings{ u'/', u"[^/]*", u"[^/]" }; #endif + } + }(); while (i < wclen) { const QChar c = wc[i++]; switch (c.unicode()) { case '*': - rx += starEscape; + rx += settings.starEscape; break; case '?': - rx += questionMarkEscape; + rx += settings.questionMarkEscape; break; + // When not using filepath globbing: \ is escaped, / is itself + // When using filepath globbing: + // * Unix: \ gets escaped. / is itself + // * Windows: \ and / can match each other -- they become [/\\] in regexp case '\\': #ifdef Q_OS_WIN + if (options.testFlag(NonPathWildcardConversion)) + rx += u"\\\\"; + else + rx += u"[/\\\\]"; + break; case '/': - rx += "[/\\\\]"_L1; + if (options.testFlag(NonPathWildcardConversion)) + rx += u'/'; + else + rx += u"[/\\\\]"; break; #endif case '$': @@ -1967,11 +2005,13 @@ QString QRegularExpression::wildcardToRegularExpression(QStringView pattern, Wil rx += wc[i++]; while (i < wclen && wc[i] != u']') { - // The '/' appearing in a character class invalidates the - // regular expression parsing. It also concerns '\\' on - // Windows OS types. - if (wc[i] == u'/' || wc[i] == nativePathSeparator) - return rx; + if (!options.testFlag(NonPathWildcardConversion)) { + // The '/' appearing in a character class invalidates the + // regular expression parsing. It also concerns '\\' on + // Windows OS types. + if (wc[i] == u'/' || wc[i] == settings.nativePathSeparator) + return rx; + } if (wc[i] == u'\\') rx += u'\\'; rx += wc[i++]; @@ -2671,7 +2711,7 @@ QRegularExpressionMatch QRegularExpressionMatchIterator::next() } d.detach(); - return qExchange(d->next, d->next.d.constData()->nextMatch()); + return std::exchange(d->next, d->next.d.constData()->nextMatch()); } /*! @@ -3060,7 +3100,8 @@ static const char *pcreCompileErrorCodes[] = QT_TRANSLATE_NOOP("QRegularExpression", "heap limit exceeded"), QT_TRANSLATE_NOOP("QRegularExpression", "invalid syntax"), QT_TRANSLATE_NOOP("QRegularExpression", "internal error - duplicate substitution match"), - QT_TRANSLATE_NOOP("QRegularExpression", "PCRE2_MATCH_INVALID_UTF is not supported for DFA matching") + QT_TRANSLATE_NOOP("QRegularExpression", "PCRE2_MATCH_INVALID_UTF is not supported for DFA matching"), + QT_TRANSLATE_NOOP("QRegularExpression", "INTERNAL ERROR: invalid substring offset") }; #endif // #if 0 |