diff options
Diffstat (limited to 'src/corelib/tools/qregularexpression.cpp')
-rw-r--r-- | src/corelib/tools/qregularexpression.cpp | 256 |
1 files changed, 172 insertions, 84 deletions
diff --git a/src/corelib/tools/qregularexpression.cpp b/src/corelib/tools/qregularexpression.cpp index 13eff07c04..1026d4ab28 100644 --- a/src/corelib/tools/qregularexpression.cpp +++ b/src/corelib/tools/qregularexpression.cpp @@ -43,7 +43,7 @@ #include <QtCore/qcoreapplication.h> #include <QtCore/qhashfunctions.h> -#include <QtCore/qreadwritelock.h> +#include <QtCore/qmutex.h> #include <QtCore/qvector.h> #include <QtCore/qstringlist.h> #include <QtCore/qdebug.h> @@ -483,6 +483,11 @@ QT_BEGIN_NAMESPACE Note the usage of the non-capturing group in order to preserve the meaning of the branch operator inside the pattern. + The QRegularExpression::anchoredPattern() helper method does exactly that for + you. + + \sa anchoredPattern + \section3 Porting from QRegExp's Partial Matching When using QRegExp::exactMatch(), if an exact match was not found, one @@ -516,10 +521,10 @@ QT_BEGIN_NAMESPACE \section2 Wildcard matching - There is no equivalent of wildcard matching in QRegularExpression. - Nevertheless, rewriting a regular expression in wildcard syntax to a - Perl-compatible regular expression is a very easy task, given the fact - that wildcard syntax supported by QRegExp is very simple. + There is no direct way to do wildcard matching in QRegularExpression. + However, the wildcardToRegularExpression method is provided to translate + glob patterns into a Perl-compatible regular expression that can be used + for that purpose. \section2 Other pattern syntaxes @@ -720,21 +725,14 @@ QT_BEGIN_NAMESPACE to the \c{/u} modifier in Perl regular expressions. \value OptimizeOnFirstUsageOption - The regular expression will be optimized (and possibly - JIT-compiled) on its first usage, instead of after a certain (undefined) - number of usages. See also \l{QRegularExpression::}{optimize()}. - This enum value has been introduced in Qt 5.4. + This option is ignored. A regular expression is automatically optimized + (including JIT compiling) the first time it is used. This enum value + was introduced in Qt 5.4. \value DontAutomaticallyOptimizeOption - Regular expressions are automatically optimized after a - certain number of usages; setting this option prevents such - optimizations, therefore avoiding possible unpredictable spikes in - CPU and memory usage. If both this option and the - \c{OptimizeOnFirstUsageOption} option are set, then this option takes - precedence. Note: this option will still let the regular expression - to be optimized by manually calling - \l{QRegularExpression::}{optimize()}. This enum value has been - introduced in Qt 5.4. + This option is ignored. A regular expression is automatically optimized + (including JIT compiling) the first time it is used. This enum value + was introduced in Qt 5.4. */ /*! @@ -791,13 +789,6 @@ QT_BEGIN_NAMESPACE Qt 5.4. */ -// after how many usages we optimize the regexp -#ifdef QT_BUILD_INTERNAL -Q_AUTOTEST_EXPORT unsigned int qt_qregularexpression_optimize_after_use_count = 10; -#else -static const unsigned int qt_qregularexpression_optimize_after_use_count = 10; -#endif // QT_BUILD_INTERNAL - /*! \internal */ @@ -847,13 +838,7 @@ struct QRegularExpressionPrivate : QSharedData void cleanCompiledPattern(); void compilePattern(); void getPatternInfo(); - - enum OptimizePatternOption { - LazyOptimizeOption, - ImmediateOptimizeOption - }; - - void optimizePattern(OptimizePatternOption option); + void optimizePattern(); enum CheckSubjectStringOption { CheckSubjectString, @@ -878,16 +863,15 @@ struct QRegularExpressionPrivate : QSharedData // *All* of the following members are managed while holding this mutex, // except for isDirty which is set to true by QRegularExpression setters // (right after a detach happened). - mutable QReadWriteLock mutex; + mutable QMutex mutex; // The PCRE code pointer is reference-counted by the QRegularExpressionPrivate // objects themselves; when the private is copied (i.e. a detach happened) - // they are set to 0 + // it is set to nullptr pcre2_code_16 *compiledPattern; int errorCode; int errorOffset; int capturingCount; - unsigned int usedCount; bool usingCrLfNewlines; bool isDirty; }; @@ -952,11 +936,10 @@ QRegularExpressionPrivate::QRegularExpressionPrivate() patternOptions(0), pattern(), mutex(), - compiledPattern(0), + compiledPattern(nullptr), errorCode(0), errorOffset(-1), capturingCount(0), - usedCount(0), usingCrLfNewlines(false), isDirty(true) { @@ -974,8 +957,8 @@ QRegularExpressionPrivate::~QRegularExpressionPrivate() \internal Copies the private, which means copying only the pattern and the pattern - options. The compiledPattern and the studyData pointers are NOT copied (we - do not own them any more), and in general all the members set when + options. The compiledPattern pointer is NOT copied (we + do not own it any more), and in general all the members set when compiling a pattern are set to default values. isDirty is set back to true so that the pattern has to be recompiled again. */ @@ -984,11 +967,10 @@ QRegularExpressionPrivate::QRegularExpressionPrivate(const QRegularExpressionPri patternOptions(other.patternOptions), pattern(other.pattern), mutex(), - compiledPattern(0), + compiledPattern(nullptr), errorCode(0), errorOffset(-1), capturingCount(0), - usedCount(0), usingCrLfNewlines(false), isDirty(true) { @@ -1000,11 +982,10 @@ QRegularExpressionPrivate::QRegularExpressionPrivate(const QRegularExpressionPri void QRegularExpressionPrivate::cleanCompiledPattern() { pcre2_code_free_16(compiledPattern); - compiledPattern = 0; + compiledPattern = nullptr; errorCode = 0; errorOffset = -1; capturingCount = 0; - usedCount = 0; usingCrLfNewlines = false; } @@ -1013,7 +994,7 @@ void QRegularExpressionPrivate::cleanCompiledPattern() */ void QRegularExpressionPrivate::compilePattern() { - const QWriteLocker lock(&mutex); + const QMutexLocker lock(&mutex); if (!isDirty) return; @@ -1040,6 +1021,7 @@ void QRegularExpressionPrivate::compilePattern() errorCode = 0; } + optimizePattern(); getPatternInfo(); } @@ -1140,15 +1122,10 @@ static bool isJitEnabled() The purpose of the function is to call pcre2_jit_compile_16, which JIT-compiles the pattern. - It gets called by doMatch() every time a match is performed. - - As of now, the optimizations on the pattern are performed after a certain - number of usages (i.e. the qt_qregularexpression_optimize_after_use_count - constant) unless the DontAutomaticallyOptimizeOption option is set on the - QRegularExpression object, or anyhow by calling optimize() (which will pass - ImmediateOptimizeOption). + It gets called when a pattern is recompiled by us (in compilePattern()), + under mutex protection. */ -void QRegularExpressionPrivate::optimizePattern(OptimizePatternOption option) +void QRegularExpressionPrivate::optimizePattern() { Q_ASSERT(compiledPattern); @@ -1157,11 +1134,6 @@ void QRegularExpressionPrivate::optimizePattern(OptimizePatternOption option) if (!enableJit) return; - const QWriteLocker lock(&mutex); - - if ((option == LazyOptimizeOption) && (++usedCount != qt_qregularexpression_optimize_after_use_count)) - return; - pcre2_jit_compile_16(compiledPattern, PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_PARTIAL_HARD); } @@ -1267,22 +1239,12 @@ QRegularExpressionMatchPrivate *QRegularExpressionPrivate::doMatch(const QString return priv; } - // skip optimizing and doing the actual matching if NoMatch type was requested + // skip doing the actual matching if NoMatch type was requested if (matchType == QRegularExpression::NoMatch) { priv->isValid = true; return priv; } - if (!(patternOptions & QRegularExpression::DontAutomaticallyOptimizeOption)) { - const OptimizePatternOption optimizePatternOption = - (patternOptions & QRegularExpression::OptimizeOnFirstUsageOption) - ? ImmediateOptimizeOption - : LazyOptimizeOption; - - // this is mutex protected - const_cast<QRegularExpressionPrivate *>(this)->optimizePattern(optimizePatternOption); - } - int pcreOptions = convertToPcreOptions(matchOptions); if (matchType == QRegularExpression::PartialPreferCompleteMatch) @@ -1307,8 +1269,6 @@ QRegularExpressionMatchPrivate *QRegularExpressionPrivate::doMatch(const QString int result; - QReadLocker lock(&mutex); - if (!previousMatchWasEmpty) { result = safe_pcre2_match_16(compiledPattern, subjectUtf16, subjectLength, @@ -1340,8 +1300,6 @@ QRegularExpressionMatchPrivate *QRegularExpressionPrivate::doMatch(const QString } } - lock.unlock(); - #ifdef QREGULAREXPRESSION_DEBUG qDebug() << "Matching" << pattern << "against" << subject << "starting at" << subjectStart << "len" << subjectLength @@ -1810,22 +1768,14 @@ QRegularExpressionMatchIterator QRegularExpression::globalMatch(const QStringRef /*! \since 5.4 - Forces an immediate optimization of the pattern, including - JIT-compiling it (if the JIT compiler is enabled). - - Patterns are normally optimized only after a certain number of usages. - If you can predict that this QRegularExpression object is going to be - used for several matches, it may be convenient to optimize it in - advance by calling this function. + Compiles the pattern immediately, including JIT compiling it (if + the JIT is enabled) for optimization. - \sa QRegularExpression::OptimizeOnFirstUsageOption + \sa isValid(), {Debugging Code that Uses QRegularExpression} */ void QRegularExpression::optimize() const { - if (!isValid()) // will compile the pattern - return; - - d->optimizePattern(QRegularExpressionPrivate::ImmediateOptimizeOption); + d.data()->compilePattern(); } /*! @@ -1926,6 +1876,141 @@ QString QRegularExpression::escape(const QString &str) } /*! + \since 5.12 + + Returns a regular expression representation of the given glob \a pattern. + The transformation is targeting file path globbing, which means in particular + that path separators receive special treatment. This implies that it is not + just a basic translation from "*" to ".*". + + \snippet code/src_corelib_tools_qregularexpression.cpp 31 + + \warning Unlike QRegExp, this implementation follows closely the definition + of wildcard for glob patterns: + \table + \row \li \b{c} + \li Any character represents itself apart from those mentioned + below. Thus \b{c} matches the character \e c. + \row \li \b{?} + \li Matches any single character. It is the same as + \b{.} in full regexps. + \row \li \b{*} + \li Matches zero or more of any characters. It is the + same as \b{.*} in full regexps. + \row \li \b{[abc]} + \li Matches one character given in the bracket. + \row \li \b{[a-c]} + \li Matches one character from the range given in the bracket. + \row \li \b{[!abc]} + \li Matches one character that is not given in the bracket. It is the + same as \b{[^abc]} in full regexp. + \row \li \b{[!a-c]} + \li Matches one character that is not from the range given in the + bracket. It is the same as \b{[^a-c]} in full regexp. + \endtable + + \note The backslash (\\) character is \e not an escape char in this context. + In order to match one of the special characters, place it in square brackets + (for example, "[?]"). + + More information about the implementation can be found in: + \list + \li \l {https://en.wikipedia.org/wiki/Glob_(programming)} {The Wikipedia Glob article} + \li \c man 7 glob + \endlist + + \sa escape() +*/ +QString QRegularExpression::wildcardToRegularExpression(const QString &pattern) +{ + const int wclen = pattern.length(); + QString rx; + rx.reserve(wclen + wclen / 16); + int i = 0; + const QChar *wc = pattern.unicode(); + +#ifdef Q_OS_WIN + const QLatin1Char nativePathSeparator('\\'); + const QLatin1String starEscape("[^/\\\\]*"); + const QLatin1String questionMarkEscape("[^/\\\\]"); +#else + const QLatin1Char nativePathSeparator('/'); + const QLatin1String starEscape("[^/]*"); + const QLatin1String questionMarkEscape("[^/]"); +#endif + + while (i < wclen) { + const QChar c = wc[i++]; + switch (c.unicode()) { + case '*': + rx += starEscape; + break; + case '?': + rx += questionMarkEscape; + break; + case '\\': +#ifdef Q_OS_WIN + case '/': + rx += QLatin1String("[/\\\\]"); + break; +#endif + case '$': + case '(': + case ')': + case '+': + case '.': + case '^': + case '{': + case '|': + case '}': + rx += QLatin1Char('\\'); + rx += c; + break; + case '[': + rx += c; + // Support for the [!abc] or [!a-c] syntax + if (i < wclen) { + if (wc[i] == QLatin1Char('!')) { + rx += QLatin1Char('^'); + ++i; + } + + if (i < wclen && wc[i] == QLatin1Char(']')) + rx += wc[i++]; + + while (i < wclen && wc[i] != QLatin1Char(']')) { + // The '/' appearing in a character class invalidates the + // regular expression parsing. It also concerns '\\' on + // Windows OS types. + if (wc[i] == QLatin1Char('/') || wc[i] == nativePathSeparator) + return rx; + if (wc[i] == QLatin1Char('\\')) + rx += QLatin1Char('\\'); + rx += wc[i++]; + } + } + break; + default: + rx += c; + break; + } + } + + return rx; +} + +/*! + \fn QRegularExpression::anchoredPattern(const QString &expression) + + \since 5.12 + + Returns the expression wrapped between the \c{\A} and \c{\z} anchors to be + used for exact matching. + + \sa {Porting from QRegExp's Exact Matching} +*/ + +/*! \since 5.1 Constructs a valid, empty QRegularExpressionMatch object. The regular @@ -2657,10 +2742,13 @@ QDebug operator<<(QDebug debug, QRegularExpression::PatternOptions patternOption flags.append("DontCaptureOption|"); if (patternOptions & QRegularExpression::UseUnicodePropertiesOption) flags.append("UseUnicodePropertiesOption|"); +QT_WARNING_PUSH +QT_WARNING_DISABLE_DEPRECATED if (patternOptions & QRegularExpression::OptimizeOnFirstUsageOption) flags.append("OptimizeOnFirstUsageOption|"); if (patternOptions & QRegularExpression::DontAutomaticallyOptimizeOption) flags.append("DontAutomaticallyOptimizeOption|"); +QT_WARNING_POP flags.chop(1); } |