summaryrefslogtreecommitdiffstats
path: root/src/corelib/tools/qregularexpression.cpp
diff options
context:
space:
mode:
authorGiuseppe D'Angelo <giuseppe.dangelo@kdab.com>2018-06-19 21:25:02 +0200
committerGiuseppe D'Angelo <giuseppe.dangelo@kdab.com>2018-06-22 19:43:54 +0000
commit06af9a1e3891751d89a64e9013e204176c28ebdc (patch)
tree6043152468ef3a8319a4d15d5f5cb621481ad13c /src/corelib/tools/qregularexpression.cpp
parent305f57411d450c9b19330cd56f64702163bdcb34 (diff)
QRegularExpression: refactor pattern optimization
After the move to PCRE2, optimizing patterns has been a thorn in the side due to the fact that PCRE2's JIT compiler modifies the pattern object itself (instead of returning a new set of data, like PCRE1 did). To make this fit with the existing behavior, a read/write lock was introduced, with the read part locking when matching and the write when compiling (or JIT-compiling) the pattern. This locking strategy however introduced a performance issue, as we needed: * to acquire a write lock to compile/optimize the pattern (incl. the common case where the pattern was already compiled, so bailing out immediately); * to acquire a read lock during the actual match, to prevent some other thread from optimizing the pattern under our nose. This was due to the "lazy" optimization policy of QRegularExpression -- optimize a pattern after a certain number of usages. The excessive amount of locking effectively limited scalability. Simplify the code, and drop that policy altogether: since JIT compiling in PCRE2 is faster and pretty much "always recommended", just always do it for any pattern (unless it gets disabled via env variables) when compiling it. This allows to go back to a plain QMutex, and now the actual matching doesn't require acquiring any locks any longer. Of course, there is still a mutex acquired just before matching for checking whether the pattern needs recompiling in the first place; this can probably be further optimized via double-checked locking (using atomics), but not doing it right now. This shift makes a couple of pattern options controlling optimization useless, and allows to centralize the 3 QRegularExpression tests (which were actually the very same test, just setting slightly different optimizations strategies). While at it, install a stress-test for threading, with the idea of running it under TSAN or helgrind to catch bugs in QRegularExpression's locking. [ChangeLog][Important Behavior Changes][QRegularExpression] Regular expressions are now automatically optimized (including JIT compiling) on their first usage. The pattern options OptimizeOnFirstUsageOption and DontAutomaticallyOptimizeOption no longer have any effect, and will get removed in a future version of Qt. QRegularExpression::optimize() can be still used to compile and optimize the regular expression in advance (before any match), if needed. Task-number: QTBUG-66781 Change-Id: Ia0e97208ae78255fe811b78029ed01c204e47bd2 Reviewed-by: David Faure <david.faure@kdab.com>
Diffstat (limited to 'src/corelib/tools/qregularexpression.cpp')
-rw-r--r--src/corelib/tools/qregularexpression.cpp94
1 files changed, 19 insertions, 75 deletions
diff --git a/src/corelib/tools/qregularexpression.cpp b/src/corelib/tools/qregularexpression.cpp
index 29ad578013..4388fe5712 100644
--- a/src/corelib/tools/qregularexpression.cpp
+++ b/src/corelib/tools/qregularexpression.cpp
@@ -43,7 +43,7 @@
#include <QtCore/qcoreapplication.h>
#include <QtCore/qhashfunctions.h>
-#include <QtCore/qreadwritelock.h>
+#include <QtCore/qmutex.h>
#include <QtCore/qvector.h>
#include <QtCore/qstringlist.h>
#include <QtCore/qdebug.h>
@@ -720,21 +720,14 @@ QT_BEGIN_NAMESPACE
to the \c{/u} modifier in Perl regular expressions.
\value OptimizeOnFirstUsageOption
- The regular expression will be optimized (and possibly
- JIT-compiled) on its first usage, instead of after a certain (undefined)
- number of usages. See also \l{QRegularExpression::}{optimize()}.
- This enum value has been introduced in Qt 5.4.
+ This option is ignored. A regular expression is automatically optimized
+ (including JIT compiling) the first time it is used. This enum value
+ was introduced in Qt 5.4.
\value DontAutomaticallyOptimizeOption
- Regular expressions are automatically optimized after a
- certain number of usages; setting this option prevents such
- optimizations, therefore avoiding possible unpredictable spikes in
- CPU and memory usage. If both this option and the
- \c{OptimizeOnFirstUsageOption} option are set, then this option takes
- precedence. Note: this option will still let the regular expression
- to be optimized by manually calling
- \l{QRegularExpression::}{optimize()}. This enum value has been
- introduced in Qt 5.4.
+ This option is ignored. A regular expression is automatically optimized
+ (including JIT compiling) the first time it is used. This enum value
+ was introduced in Qt 5.4.
*/
/*!
@@ -791,14 +784,6 @@ QT_BEGIN_NAMESPACE
Qt 5.4.
*/
-// after how many usages we optimize the regexp
-#ifdef QT_BUILD_INTERNAL
-Q_AUTOTEST_EXPORT unsigned int qt_qregularexpression_optimize_after_use_count = 10;
-#else
-static const unsigned int qt_qregularexpression_optimize_after_use_count = 10;
-#endif // QT_BUILD_INTERNAL
-
-
namespace QtPrivate {
/*!
internal
@@ -924,13 +909,7 @@ struct QRegularExpressionPrivate : QSharedData
void cleanCompiledPattern();
void compilePattern();
void getPatternInfo();
-
- enum OptimizePatternOption {
- LazyOptimizeOption,
- ImmediateOptimizeOption
- };
-
- void optimizePattern(OptimizePatternOption option);
+ void optimizePattern();
enum CheckSubjectStringOption {
CheckSubjectString,
@@ -955,7 +934,7 @@ struct QRegularExpressionPrivate : QSharedData
// *All* of the following members are managed while holding this mutex,
// except for isDirty which is set to true by QRegularExpression setters
// (right after a detach happened).
- mutable QReadWriteLock mutex;
+ mutable QMutex mutex;
// The PCRE code pointer is reference-counted by the QRegularExpressionPrivate
// objects themselves; when the private is copied (i.e. a detach happened)
@@ -964,7 +943,6 @@ struct QRegularExpressionPrivate : QSharedData
int errorCode;
int errorOffset;
int capturingCount;
- unsigned int usedCount;
bool usingCrLfNewlines;
bool isDirty;
};
@@ -1033,7 +1011,6 @@ QRegularExpressionPrivate::QRegularExpressionPrivate()
errorCode(0),
errorOffset(-1),
capturingCount(0),
- usedCount(0),
usingCrLfNewlines(false),
isDirty(true)
{
@@ -1065,7 +1042,6 @@ QRegularExpressionPrivate::QRegularExpressionPrivate(const QRegularExpressionPri
errorCode(0),
errorOffset(-1),
capturingCount(0),
- usedCount(0),
usingCrLfNewlines(false),
isDirty(true)
{
@@ -1081,7 +1057,6 @@ void QRegularExpressionPrivate::cleanCompiledPattern()
errorCode = 0;
errorOffset = -1;
capturingCount = 0;
- usedCount = 0;
usingCrLfNewlines = false;
}
@@ -1090,7 +1065,7 @@ void QRegularExpressionPrivate::cleanCompiledPattern()
*/
void QRegularExpressionPrivate::compilePattern()
{
- const QWriteLocker lock(&mutex);
+ const QMutexLocker lock(&mutex);
if (!isDirty)
return;
@@ -1117,6 +1092,7 @@ void QRegularExpressionPrivate::compilePattern()
errorCode = 0;
}
+ optimizePattern();
getPatternInfo();
}
@@ -1217,15 +1193,10 @@ static bool isJitEnabled()
The purpose of the function is to call pcre2_jit_compile_16, which
JIT-compiles the pattern.
- It gets called by doMatch() every time a match is performed.
-
- As of now, the optimizations on the pattern are performed after a certain
- number of usages (i.e. the qt_qregularexpression_optimize_after_use_count
- constant) unless the DontAutomaticallyOptimizeOption option is set on the
- QRegularExpression object, or anyhow by calling optimize() (which will pass
- ImmediateOptimizeOption).
+ It gets called when a pattern is recompiled by us (in compilePattern()),
+ under mutex protection.
*/
-void QRegularExpressionPrivate::optimizePattern(OptimizePatternOption option)
+void QRegularExpressionPrivate::optimizePattern()
{
Q_ASSERT(compiledPattern);
@@ -1234,11 +1205,6 @@ void QRegularExpressionPrivate::optimizePattern(OptimizePatternOption option)
if (!enableJit)
return;
- const QWriteLocker lock(&mutex);
-
- if ((option == LazyOptimizeOption) && (++usedCount != qt_qregularexpression_optimize_after_use_count))
- return;
-
pcre2_jit_compile_16(compiledPattern, PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_PARTIAL_HARD);
}
@@ -1344,22 +1310,12 @@ QRegularExpressionMatchPrivate *QRegularExpressionPrivate::doMatch(const QString
return priv;
}
- // skip optimizing and doing the actual matching if NoMatch type was requested
+ // skip doing the actual matching if NoMatch type was requested
if (matchType == QRegularExpression::NoMatch) {
priv->isValid = true;
return priv;
}
- if (!(patternOptions & QRegularExpression::DontAutomaticallyOptimizeOption)) {
- const OptimizePatternOption optimizePatternOption =
- (patternOptions & QRegularExpression::OptimizeOnFirstUsageOption)
- ? ImmediateOptimizeOption
- : LazyOptimizeOption;
-
- // this is mutex protected
- const_cast<QRegularExpressionPrivate *>(this)->optimizePattern(optimizePatternOption);
- }
-
int pcreOptions = convertToPcreOptions(matchOptions);
if (matchType == QRegularExpression::PartialPreferCompleteMatch)
@@ -1384,8 +1340,6 @@ QRegularExpressionMatchPrivate *QRegularExpressionPrivate::doMatch(const QString
int result;
- QReadLocker lock(&mutex);
-
if (!previousMatchWasEmpty) {
result = safe_pcre2_match_16(compiledPattern,
subjectUtf16, subjectLength,
@@ -1417,8 +1371,6 @@ QRegularExpressionMatchPrivate *QRegularExpressionPrivate::doMatch(const QString
}
}
- lock.unlock();
-
#ifdef QREGULAREXPRESSION_DEBUG
qDebug() << "Matching" << pattern << "against" << subject
<< "starting at" << subjectStart << "len" << subjectLength
@@ -1928,22 +1880,14 @@ QRegularExpressionMatchIterator QRegularExpression::globalMatch(const QStringRef
/*!
\since 5.4
- Forces an immediate optimization of the pattern, including
- JIT-compiling it (if the JIT compiler is enabled).
+ Compiles the pattern immediately, including JIT compiling it (if
+ the JIT is enabled) for optimization.
- Patterns are normally optimized only after a certain number of usages.
- If you can predict that this QRegularExpression object is going to be
- used for several matches, it may be convenient to optimize it in
- advance by calling this function.
-
- \sa QRegularExpression::OptimizeOnFirstUsageOption
+ \sa isValid(), {Debugging Code that Uses QRegularExpression}
*/
void QRegularExpression::optimize() const
{
- if (!isValid()) // will compile the pattern
- return;
-
- d->optimizePattern(QRegularExpressionPrivate::ImmediateOptimizeOption);
+ d.data()->compilePattern();
}
/*!