summaryrefslogtreecommitdiffstats
path: root/src/corelib/tools/qregularexpression.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/corelib/tools/qregularexpression.cpp')
-rw-r--r--src/corelib/tools/qregularexpression.cpp115
1 files changed, 101 insertions, 14 deletions
diff --git a/src/corelib/tools/qregularexpression.cpp b/src/corelib/tools/qregularexpression.cpp
index 257141efdc..cafa1a1070 100644
--- a/src/corelib/tools/qregularexpression.cpp
+++ b/src/corelib/tools/qregularexpression.cpp
@@ -53,6 +53,7 @@
#include <QtCore/qthreadstorage.h>
#include <QtCore/qglobal.h>
#include <QtCore/qatomic.h>
+#include <QtCore/qdatastream.h>
#include <pcre.h>
@@ -701,6 +702,23 @@ QT_BEGIN_NAMESPACE
\c{\w} to match any character with either the Unicode L (letter) or N
(digit) property, plus underscore, and so on. This option corresponds
to the \c{/u} modifier in Perl regular expressions.
+
+ \value OptimizeOnFirstUsageOption
+ The regular expression will be optimized (and possibly
+ JIT-compiled) on its first usage, instead of after a certain (undefined)
+ number of usages. See also \l{QRegularExpression::}{optimize()}.
+ This enum value has been introduced in Qt 5.4.
+
+ \value DontAutomaticallyOptimizeOption
+ Regular expressions are automatically optimized after a
+ certain number of usages; setting this option prevents such
+ optimizations, therefore avoiding possible unpredictable spikes in
+ CPU and memory usage. If both this option and the
+ \c{OptimizeOnFirstUsageOption} option are set, then this option takes
+ precedence. Note: this option will still let the regular expression
+ to be optimized by manually calling
+ \l{QRegularExpression::}{optimize()}. This enum value has been
+ introduced in Qt 5.4.
*/
/*!
@@ -748,6 +766,13 @@ QT_BEGIN_NAMESPACE
The match is constrained to start exactly at the offset passed to
match() in order to be successful, even if the pattern string does not
contain any metacharacter that anchors the match at that point.
+
+ \value DontCheckSubjectStringMatchOption
+ The subject string is not checked for UTF-16 validity before
+ attempting a match. Use this option with extreme caution, as
+ attempting to match an invalid string may crash the program and/or
+ constitute a security issue. This enum value has been introduced in
+ Qt 5.4.
*/
// after how many usages we optimize the regexp
@@ -804,13 +829,24 @@ struct QRegularExpressionPrivate : QSharedData
void cleanCompiledPattern();
void compilePattern();
void getPatternInfo();
- void optimizePattern();
+
+ enum OptimizePatternOption {
+ LazyOptimizeOption,
+ ImmediateOptimizeOption
+ };
+
+ void optimizePattern(OptimizePatternOption option);
+
+ enum CheckSubjectStringOption {
+ CheckSubjectString,
+ DontCheckSubjectString
+ };
QRegularExpressionMatchPrivate *doMatch(const QString &subject,
int offset,
QRegularExpression::MatchType matchType,
QRegularExpression::MatchOptions matchOptions,
- bool checkSubjectString = true,
+ CheckSubjectStringOption checkSubjectStringOption = CheckSubjectString,
const QRegularExpressionMatchPrivate *previous = 0) const;
int captureIndexForName(const QString &name) const;
@@ -1098,7 +1134,10 @@ static bool isJitEnabled()
setting the studyData member variable to the result of the study. It gets
called by doMatch() every time a match is performed. As of now, the
optimizations on the pattern are performed after a certain number of usages
- (i.e. the qt_qregularexpression_optimize_after_use_count constant).
+ (i.e. the qt_qregularexpression_optimize_after_use_count constant) unless
+ the DontAutomaticallyOptimizeOption option is set on the QRegularExpression
+ object, or anyhow by calling optimize() (which will pass
+ ImmediateOptimizeOption).
Notice that although the method is protected by a mutex, one thread may
invoke this function and return immediately (i.e. not study the pattern,
@@ -1109,20 +1148,23 @@ static bool isJitEnabled()
(localStudyData) before using storeRelease on studyData. In doMatch there's
the corresponding loadAcquire.
*/
-void QRegularExpressionPrivate::optimizePattern()
+void QRegularExpressionPrivate::optimizePattern(OptimizePatternOption option)
{
Q_ASSERT(compiledPattern);
QMutexLocker lock(&mutex);
- if (studyData.load() || (++usedCount != qt_qregularexpression_optimize_after_use_count))
+ if (studyData.load()) // already optimized
+ return;
+
+ if ((option == LazyOptimizeOption) && (++usedCount != qt_qregularexpression_optimize_after_use_count))
return;
static const bool enableJit = isJitEnabled();
int studyOptions = 0;
if (enableJit)
- studyOptions |= PCRE_STUDY_JIT_COMPILE;
+ studyOptions |= (PCRE_STUDY_JIT_COMPILE | PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE | PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE);
const char *err;
pcre16_extra * const localStudyData = pcre16_study(compiledPattern, studyOptions, &err);
@@ -1189,7 +1231,9 @@ static int pcre16SafeExec(const pcre16 *code, const pcre16_extra *extra,
options \a matchOptions and returns the QRegularExpressionMatchPrivate of
the result. It also advances a match if a previous result is given as \a
previous. The \a subject string goes a Unicode validity check if
- \a checkSubjectString is true (PCRE doesn't like illegal UTF-16 sequences).
+ \a checkSubjectString is CheckSubjectString and the match options don't
+ include DontCheckSubjectStringMatchOption (PCRE doesn't like illegal
+ UTF-16 sequences).
Advancing a match is a tricky algorithm. If the previous match matched a
non-empty string, we just do an ordinary match at the offset position.
@@ -1206,7 +1250,7 @@ QRegularExpressionMatchPrivate *QRegularExpressionPrivate::doMatch(const QString
int offset,
QRegularExpression::MatchType matchType,
QRegularExpression::MatchOptions matchOptions,
- bool checkSubjectString,
+ CheckSubjectStringOption checkSubjectStringOption,
const QRegularExpressionMatchPrivate *previous) const
{
if (offset < 0)
@@ -1235,8 +1279,15 @@ QRegularExpressionMatchPrivate *QRegularExpressionPrivate::doMatch(const QString
matchType, matchOptions,
capturingCount + 1);
- // this is mutex protected
- const_cast<QRegularExpressionPrivate *>(this)->optimizePattern();
+ if (!(patternOptions & QRegularExpression::DontAutomaticallyOptimizeOption)) {
+ const OptimizePatternOption optimizePatternOption =
+ (patternOptions & QRegularExpression::OptimizeOnFirstUsageOption)
+ ? ImmediateOptimizeOption
+ : LazyOptimizeOption;
+
+ // this is mutex protected
+ const_cast<QRegularExpressionPrivate *>(this)->optimizePattern(optimizePatternOption);
+ }
// work with a local copy of the study data, as we are running pcre_exec
// potentially more than once, and we don't want to run call it
@@ -1250,8 +1301,10 @@ QRegularExpressionMatchPrivate *QRegularExpressionPrivate::doMatch(const QString
else if (matchType == QRegularExpression::PartialPreferFirstMatch)
pcreOptions |= PCRE_PARTIAL_HARD;
- if (!checkSubjectString)
+ if (checkSubjectStringOption == DontCheckSubjectString
+ || matchOptions & QRegularExpression::DontCheckSubjectStringMatchOption) {
pcreOptions |= PCRE_NO_UTF16_CHECK;
+ }
bool previousMatchWasEmpty = false;
if (previous && previous->hasMatch &&
@@ -1362,7 +1415,7 @@ QRegularExpressionMatch QRegularExpressionMatchPrivate::nextMatch() const
Q_ASSERT(isValid);
Q_ASSERT(hasMatch || hasPartialMatch);
- // Note the "false" passed for the check of the subject string:
+ // Note the DontCheckSubjectString passed for the check of the subject string:
// if we're advancing a match on the same subject,
// then that subject was already checked at least once (when this object
// was created, or when the object that created this one was created, etc.)
@@ -1370,7 +1423,7 @@ QRegularExpressionMatch QRegularExpressionMatchPrivate::nextMatch() const
capturedOffsets.at(1),
matchType,
matchOptions,
- false,
+ QRegularExpressionPrivate::DontCheckSubjectString,
this);
return QRegularExpressionMatch(*nextPrivate);
}
@@ -1669,6 +1722,27 @@ QRegularExpressionMatchIterator QRegularExpression::globalMatch(const QString &s
}
/*!
+ \since 5.4
+
+ Forces an immediate optimization of the pattern, including
+ JIT-compiling it (if the JIT compiler is enabled).
+
+ Patterns are normally optimized only after a certain number of usages.
+ If you can predict that this QRegularExpression object is going to be
+ used for several matches, it may be convenient to optimize it in
+ advance by calling this function.
+
+ \sa QRegularExpression::OptimizeOnFirstUsageOption
+*/
+void QRegularExpression::optimize() const
+{
+ if (!isValid()) // will compile the pattern
+ return;
+
+ d->optimizePattern(QRegularExpressionPrivate::ImmediateOptimizeOption);
+}
+
+/*!
Returns \c true if the regular expression is equal to \a re, or false
otherwise. Two QRegularExpression objects are equal if they have
the same pattern string and the same pattern options.
@@ -2329,6 +2403,10 @@ QDebug operator<<(QDebug debug, QRegularExpression::PatternOptions patternOption
flags.append("DontCaptureOption|");
if (patternOptions & QRegularExpression::UseUnicodePropertiesOption)
flags.append("UseUnicodePropertiesOption|");
+ if (patternOptions & QRegularExpression::OptimizeOnFirstUsageOption)
+ flags.append("OptimizeOnFirstUsageOption|");
+ if (patternOptions & QRegularExpression::DontAutomaticallyOptimizeOption)
+ flags.append("DontAutomaticallyOptimizeOption|");
flags.chop(1);
}
@@ -2492,7 +2570,16 @@ static const char *pcreCompileErrorCodes[] =
QT_TRANSLATE_NOOP("QRegularExpression", "disallowed Unicode code point (>= 0xd800 && <= 0xdfff)"),
QT_TRANSLATE_NOOP("QRegularExpression", "invalid UTF-16 string"),
QT_TRANSLATE_NOOP("QRegularExpression", "name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)"),
- QT_TRANSLATE_NOOP("QRegularExpression", "character value in \\u.... sequence is too large")
+ QT_TRANSLATE_NOOP("QRegularExpression", "character value in \\u.... sequence is too large"),
+ QT_TRANSLATE_NOOP("QRegularExpression", "invalid UTF-32 string"),
+ QT_TRANSLATE_NOOP("QRegularExpression", "setting UTF is disabled by the application"),
+ QT_TRANSLATE_NOOP("QRegularExpression", "non-hex character in \\x{} (closing brace missing?)"),
+ QT_TRANSLATE_NOOP("QRegularExpression", "non-octal character in \\o{} (closing brace missing?)"),
+ QT_TRANSLATE_NOOP("QRegularExpression", "missing opening brace after \\o"),
+ QT_TRANSLATE_NOOP("QRegularExpression", "parentheses are too deeply nested"),
+ QT_TRANSLATE_NOOP("QRegularExpression", "invalid range in character class"),
+ QT_TRANSLATE_NOOP("QRegularExpression", "group name must start with a non-digit"),
+ QT_TRANSLATE_NOOP("QRegularExpression", "parentheses are too deeply nested (stack check)")
};
#endif // #if 0