summaryrefslogtreecommitdiffstats
path: root/src/corelib/tools/qregularexpression.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/corelib/tools/qregularexpression.cpp')
-rw-r--r--src/corelib/tools/qregularexpression.cpp256
1 files changed, 172 insertions, 84 deletions
diff --git a/src/corelib/tools/qregularexpression.cpp b/src/corelib/tools/qregularexpression.cpp
index 13eff07c04..1026d4ab28 100644
--- a/src/corelib/tools/qregularexpression.cpp
+++ b/src/corelib/tools/qregularexpression.cpp
@@ -43,7 +43,7 @@
#include <QtCore/qcoreapplication.h>
#include <QtCore/qhashfunctions.h>
-#include <QtCore/qreadwritelock.h>
+#include <QtCore/qmutex.h>
#include <QtCore/qvector.h>
#include <QtCore/qstringlist.h>
#include <QtCore/qdebug.h>
@@ -483,6 +483,11 @@ QT_BEGIN_NAMESPACE
Note the usage of the non-capturing group in order to preserve the meaning
of the branch operator inside the pattern.
+ The QRegularExpression::anchoredPattern() helper method does exactly that for
+ you.
+
+ \sa anchoredPattern
+
\section3 Porting from QRegExp's Partial Matching
When using QRegExp::exactMatch(), if an exact match was not found, one
@@ -516,10 +521,10 @@ QT_BEGIN_NAMESPACE
\section2 Wildcard matching
- There is no equivalent of wildcard matching in QRegularExpression.
- Nevertheless, rewriting a regular expression in wildcard syntax to a
- Perl-compatible regular expression is a very easy task, given the fact
- that wildcard syntax supported by QRegExp is very simple.
+ There is no direct way to do wildcard matching in QRegularExpression.
+ However, the wildcardToRegularExpression method is provided to translate
+ glob patterns into a Perl-compatible regular expression that can be used
+ for that purpose.
\section2 Other pattern syntaxes
@@ -720,21 +725,14 @@ QT_BEGIN_NAMESPACE
to the \c{/u} modifier in Perl regular expressions.
\value OptimizeOnFirstUsageOption
- The regular expression will be optimized (and possibly
- JIT-compiled) on its first usage, instead of after a certain (undefined)
- number of usages. See also \l{QRegularExpression::}{optimize()}.
- This enum value has been introduced in Qt 5.4.
+ This option is ignored. A regular expression is automatically optimized
+ (including JIT compiling) the first time it is used. This enum value
+ was introduced in Qt 5.4.
\value DontAutomaticallyOptimizeOption
- Regular expressions are automatically optimized after a
- certain number of usages; setting this option prevents such
- optimizations, therefore avoiding possible unpredictable spikes in
- CPU and memory usage. If both this option and the
- \c{OptimizeOnFirstUsageOption} option are set, then this option takes
- precedence. Note: this option will still let the regular expression
- to be optimized by manually calling
- \l{QRegularExpression::}{optimize()}. This enum value has been
- introduced in Qt 5.4.
+ This option is ignored. A regular expression is automatically optimized
+ (including JIT compiling) the first time it is used. This enum value
+ was introduced in Qt 5.4.
*/
/*!
@@ -791,13 +789,6 @@ QT_BEGIN_NAMESPACE
Qt 5.4.
*/
-// after how many usages we optimize the regexp
-#ifdef QT_BUILD_INTERNAL
-Q_AUTOTEST_EXPORT unsigned int qt_qregularexpression_optimize_after_use_count = 10;
-#else
-static const unsigned int qt_qregularexpression_optimize_after_use_count = 10;
-#endif // QT_BUILD_INTERNAL
-
/*!
\internal
*/
@@ -847,13 +838,7 @@ struct QRegularExpressionPrivate : QSharedData
void cleanCompiledPattern();
void compilePattern();
void getPatternInfo();
-
- enum OptimizePatternOption {
- LazyOptimizeOption,
- ImmediateOptimizeOption
- };
-
- void optimizePattern(OptimizePatternOption option);
+ void optimizePattern();
enum CheckSubjectStringOption {
CheckSubjectString,
@@ -878,16 +863,15 @@ struct QRegularExpressionPrivate : QSharedData
// *All* of the following members are managed while holding this mutex,
// except for isDirty which is set to true by QRegularExpression setters
// (right after a detach happened).
- mutable QReadWriteLock mutex;
+ mutable QMutex mutex;
// The PCRE code pointer is reference-counted by the QRegularExpressionPrivate
// objects themselves; when the private is copied (i.e. a detach happened)
- // they are set to 0
+ // it is set to nullptr
pcre2_code_16 *compiledPattern;
int errorCode;
int errorOffset;
int capturingCount;
- unsigned int usedCount;
bool usingCrLfNewlines;
bool isDirty;
};
@@ -952,11 +936,10 @@ QRegularExpressionPrivate::QRegularExpressionPrivate()
patternOptions(0),
pattern(),
mutex(),
- compiledPattern(0),
+ compiledPattern(nullptr),
errorCode(0),
errorOffset(-1),
capturingCount(0),
- usedCount(0),
usingCrLfNewlines(false),
isDirty(true)
{
@@ -974,8 +957,8 @@ QRegularExpressionPrivate::~QRegularExpressionPrivate()
\internal
Copies the private, which means copying only the pattern and the pattern
- options. The compiledPattern and the studyData pointers are NOT copied (we
- do not own them any more), and in general all the members set when
+ options. The compiledPattern pointer is NOT copied (we
+ do not own it any more), and in general all the members set when
compiling a pattern are set to default values. isDirty is set back to true
so that the pattern has to be recompiled again.
*/
@@ -984,11 +967,10 @@ QRegularExpressionPrivate::QRegularExpressionPrivate(const QRegularExpressionPri
patternOptions(other.patternOptions),
pattern(other.pattern),
mutex(),
- compiledPattern(0),
+ compiledPattern(nullptr),
errorCode(0),
errorOffset(-1),
capturingCount(0),
- usedCount(0),
usingCrLfNewlines(false),
isDirty(true)
{
@@ -1000,11 +982,10 @@ QRegularExpressionPrivate::QRegularExpressionPrivate(const QRegularExpressionPri
void QRegularExpressionPrivate::cleanCompiledPattern()
{
pcre2_code_free_16(compiledPattern);
- compiledPattern = 0;
+ compiledPattern = nullptr;
errorCode = 0;
errorOffset = -1;
capturingCount = 0;
- usedCount = 0;
usingCrLfNewlines = false;
}
@@ -1013,7 +994,7 @@ void QRegularExpressionPrivate::cleanCompiledPattern()
*/
void QRegularExpressionPrivate::compilePattern()
{
- const QWriteLocker lock(&mutex);
+ const QMutexLocker lock(&mutex);
if (!isDirty)
return;
@@ -1040,6 +1021,7 @@ void QRegularExpressionPrivate::compilePattern()
errorCode = 0;
}
+ optimizePattern();
getPatternInfo();
}
@@ -1140,15 +1122,10 @@ static bool isJitEnabled()
The purpose of the function is to call pcre2_jit_compile_16, which
JIT-compiles the pattern.
- It gets called by doMatch() every time a match is performed.
-
- As of now, the optimizations on the pattern are performed after a certain
- number of usages (i.e. the qt_qregularexpression_optimize_after_use_count
- constant) unless the DontAutomaticallyOptimizeOption option is set on the
- QRegularExpression object, or anyhow by calling optimize() (which will pass
- ImmediateOptimizeOption).
+ It gets called when a pattern is recompiled by us (in compilePattern()),
+ under mutex protection.
*/
-void QRegularExpressionPrivate::optimizePattern(OptimizePatternOption option)
+void QRegularExpressionPrivate::optimizePattern()
{
Q_ASSERT(compiledPattern);
@@ -1157,11 +1134,6 @@ void QRegularExpressionPrivate::optimizePattern(OptimizePatternOption option)
if (!enableJit)
return;
- const QWriteLocker lock(&mutex);
-
- if ((option == LazyOptimizeOption) && (++usedCount != qt_qregularexpression_optimize_after_use_count))
- return;
-
pcre2_jit_compile_16(compiledPattern, PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_PARTIAL_HARD);
}
@@ -1267,22 +1239,12 @@ QRegularExpressionMatchPrivate *QRegularExpressionPrivate::doMatch(const QString
return priv;
}
- // skip optimizing and doing the actual matching if NoMatch type was requested
+ // skip doing the actual matching if NoMatch type was requested
if (matchType == QRegularExpression::NoMatch) {
priv->isValid = true;
return priv;
}
- if (!(patternOptions & QRegularExpression::DontAutomaticallyOptimizeOption)) {
- const OptimizePatternOption optimizePatternOption =
- (patternOptions & QRegularExpression::OptimizeOnFirstUsageOption)
- ? ImmediateOptimizeOption
- : LazyOptimizeOption;
-
- // this is mutex protected
- const_cast<QRegularExpressionPrivate *>(this)->optimizePattern(optimizePatternOption);
- }
-
int pcreOptions = convertToPcreOptions(matchOptions);
if (matchType == QRegularExpression::PartialPreferCompleteMatch)
@@ -1307,8 +1269,6 @@ QRegularExpressionMatchPrivate *QRegularExpressionPrivate::doMatch(const QString
int result;
- QReadLocker lock(&mutex);
-
if (!previousMatchWasEmpty) {
result = safe_pcre2_match_16(compiledPattern,
subjectUtf16, subjectLength,
@@ -1340,8 +1300,6 @@ QRegularExpressionMatchPrivate *QRegularExpressionPrivate::doMatch(const QString
}
}
- lock.unlock();
-
#ifdef QREGULAREXPRESSION_DEBUG
qDebug() << "Matching" << pattern << "against" << subject
<< "starting at" << subjectStart << "len" << subjectLength
@@ -1810,22 +1768,14 @@ QRegularExpressionMatchIterator QRegularExpression::globalMatch(const QStringRef
/*!
\since 5.4
- Forces an immediate optimization of the pattern, including
- JIT-compiling it (if the JIT compiler is enabled).
-
- Patterns are normally optimized only after a certain number of usages.
- If you can predict that this QRegularExpression object is going to be
- used for several matches, it may be convenient to optimize it in
- advance by calling this function.
+ Compiles the pattern immediately, including JIT compiling it (if
+ the JIT is enabled) for optimization.
- \sa QRegularExpression::OptimizeOnFirstUsageOption
+ \sa isValid(), {Debugging Code that Uses QRegularExpression}
*/
void QRegularExpression::optimize() const
{
- if (!isValid()) // will compile the pattern
- return;
-
- d->optimizePattern(QRegularExpressionPrivate::ImmediateOptimizeOption);
+ d.data()->compilePattern();
}
/*!
@@ -1926,6 +1876,141 @@ QString QRegularExpression::escape(const QString &str)
}
/*!
+ \since 5.12
+
+ Returns a regular expression representation of the given glob \a pattern.
+ The transformation is targeting file path globbing, which means in particular
+ that path separators receive special treatment. This implies that it is not
+ just a basic translation from "*" to ".*".
+
+ \snippet code/src_corelib_tools_qregularexpression.cpp 31
+
+ \warning Unlike QRegExp, this implementation follows closely the definition
+ of wildcard for glob patterns:
+ \table
+ \row \li \b{c}
+ \li Any character represents itself apart from those mentioned
+ below. Thus \b{c} matches the character \e c.
+ \row \li \b{?}
+ \li Matches any single character. It is the same as
+ \b{.} in full regexps.
+ \row \li \b{*}
+ \li Matches zero or more of any characters. It is the
+ same as \b{.*} in full regexps.
+ \row \li \b{[abc]}
+ \li Matches one character given in the bracket.
+ \row \li \b{[a-c]}
+ \li Matches one character from the range given in the bracket.
+ \row \li \b{[!abc]}
+ \li Matches one character that is not given in the bracket. It is the
+ same as \b{[^abc]} in full regexp.
+ \row \li \b{[!a-c]}
+ \li Matches one character that is not from the range given in the
+ bracket. It is the same as \b{[^a-c]} in full regexp.
+ \endtable
+
+ \note The backslash (\\) character is \e not an escape char in this context.
+ In order to match one of the special characters, place it in square brackets
+ (for example, "[?]").
+
+ More information about the implementation can be found in:
+ \list
+ \li \l {https://en.wikipedia.org/wiki/Glob_(programming)} {The Wikipedia Glob article}
+ \li \c man 7 glob
+ \endlist
+
+ \sa escape()
+*/
+QString QRegularExpression::wildcardToRegularExpression(const QString &pattern)
+{
+ const int wclen = pattern.length();
+ QString rx;
+ rx.reserve(wclen + wclen / 16);
+ int i = 0;
+ const QChar *wc = pattern.unicode();
+
+#ifdef Q_OS_WIN
+ const QLatin1Char nativePathSeparator('\\');
+ const QLatin1String starEscape("[^/\\\\]*");
+ const QLatin1String questionMarkEscape("[^/\\\\]");
+#else
+ const QLatin1Char nativePathSeparator('/');
+ const QLatin1String starEscape("[^/]*");
+ const QLatin1String questionMarkEscape("[^/]");
+#endif
+
+ while (i < wclen) {
+ const QChar c = wc[i++];
+ switch (c.unicode()) {
+ case '*':
+ rx += starEscape;
+ break;
+ case '?':
+ rx += questionMarkEscape;
+ break;
+ case '\\':
+#ifdef Q_OS_WIN
+ case '/':
+ rx += QLatin1String("[/\\\\]");
+ break;
+#endif
+ case '$':
+ case '(':
+ case ')':
+ case '+':
+ case '.':
+ case '^':
+ case '{':
+ case '|':
+ case '}':
+ rx += QLatin1Char('\\');
+ rx += c;
+ break;
+ case '[':
+ rx += c;
+ // Support for the [!abc] or [!a-c] syntax
+ if (i < wclen) {
+ if (wc[i] == QLatin1Char('!')) {
+ rx += QLatin1Char('^');
+ ++i;
+ }
+
+ if (i < wclen && wc[i] == QLatin1Char(']'))
+ rx += wc[i++];
+
+ while (i < wclen && wc[i] != QLatin1Char(']')) {
+ // The '/' appearing in a character class invalidates the
+ // regular expression parsing. It also concerns '\\' on
+ // Windows OS types.
+ if (wc[i] == QLatin1Char('/') || wc[i] == nativePathSeparator)
+ return rx;
+ if (wc[i] == QLatin1Char('\\'))
+ rx += QLatin1Char('\\');
+ rx += wc[i++];
+ }
+ }
+ break;
+ default:
+ rx += c;
+ break;
+ }
+ }
+
+ return rx;
+}
+
+/*!
+ \fn QRegularExpression::anchoredPattern(const QString &expression)
+
+ \since 5.12
+
+ Returns the expression wrapped between the \c{\A} and \c{\z} anchors to be
+ used for exact matching.
+
+ \sa {Porting from QRegExp's Exact Matching}
+*/
+
+/*!
\since 5.1
Constructs a valid, empty QRegularExpressionMatch object. The regular
@@ -2657,10 +2742,13 @@ QDebug operator<<(QDebug debug, QRegularExpression::PatternOptions patternOption
flags.append("DontCaptureOption|");
if (patternOptions & QRegularExpression::UseUnicodePropertiesOption)
flags.append("UseUnicodePropertiesOption|");
+QT_WARNING_PUSH
+QT_WARNING_DISABLE_DEPRECATED
if (patternOptions & QRegularExpression::OptimizeOnFirstUsageOption)
flags.append("OptimizeOnFirstUsageOption|");
if (patternOptions & QRegularExpression::DontAutomaticallyOptimizeOption)
flags.append("DontAutomaticallyOptimizeOption|");
+QT_WARNING_POP
flags.chop(1);
}