path: root/src/corelib/tools
diff options
authorGiuseppe D'Angelo <>2014-05-11 11:48:47 +0200
committerThe Qt Project <>2014-05-12 09:47:19 +0200
commitfd80cad07e9ab98ebb8cd1b056aeabc0aed336ea (patch)
tree9b4b46381a51918b2d10b9f098a14877af7ed5f1 /src/corelib/tools
parent45326692857c558bea4b263899bcae7ec88535aa (diff)
QRegularExpression: allow users to skip the UTF-16 check of the subject string
PCRE does not handle invalid UTF-16 sequences. For this reason we always check a subject string's UTF-16 validity before attempting any match over it (actually we let PCRE do that). The only exception so far has been global matching -- once the first match was done, we skipped re-doing the check over and over again the same string (PCRE actually checks the /entire/ string, not only the part it uses for matching). Still, users had no way to skip this check if they were 100% sure the string was a valid UTF-16 string. This commit introduces a way for them to skip the check. Change-Id: Iea352c06f531aa2153863b3a1681acaab7ac375c Reviewed-by: Thiago Macieira <>
Diffstat (limited to 'src/corelib/tools')
2 files changed, 14 insertions, 3 deletions
diff --git a/src/corelib/tools/qregularexpression.cpp b/src/corelib/tools/qregularexpression.cpp
index 2552a04e55..4babfe1e19 100644
--- a/src/corelib/tools/qregularexpression.cpp
+++ b/src/corelib/tools/qregularexpression.cpp
@@ -763,6 +763,13 @@ QT_BEGIN_NAMESPACE
The match is constrained to start exactly at the offset passed to
match() in order to be successful, even if the pattern string does not
contain any metacharacter that anchors the match at that point.
+ \value DontCheckSubjectStringMatchOption
+ The subject string is not checked for UTF-16 validity before
+ attempting a match. Use this option with extreme caution, as
+ attempting to match an invalid string may crash the program and/or
+ constitute a security issue. This enum value has been introduced in
+ Qt 5.4.
// after how many usages we optimize the regexp
@@ -1221,7 +1228,8 @@ static int pcre16SafeExec(const pcre16 *code, const pcre16_extra *extra,
options \a matchOptions and returns the QRegularExpressionMatchPrivate of
the result. It also advances a match if a previous result is given as \a
previous. The \a subject string goes a Unicode validity check if
- \a checkSubjectString is CheckSubjectString (PCRE doesn't like illegal
+ \a checkSubjectString is CheckSubjectString and the match options don't
+ include DontCheckSubjectStringMatchOption (PCRE doesn't like illegal
UTF-16 sequences).
Advancing a match is a tricky algorithm. If the previous match matched a
@@ -1290,8 +1298,10 @@ QRegularExpressionMatchPrivate *QRegularExpressionPrivate::doMatch(const QString
else if (matchType == QRegularExpression::PartialPreferFirstMatch)
pcreOptions |= PCRE_PARTIAL_HARD;
- if (checkSubjectStringOption == DontCheckSubjectString)
+ if (checkSubjectStringOption == DontCheckSubjectString
+ || matchOptions & QRegularExpression::DontCheckSubjectStringMatchOption) {
pcreOptions |= PCRE_NO_UTF16_CHECK;
+ }
bool previousMatchWasEmpty = false;
if (previous && previous->hasMatch &&
diff --git a/src/corelib/tools/qregularexpression.h b/src/corelib/tools/qregularexpression.h
index bf75d12a05..3352e2b3d5 100644
--- a/src/corelib/tools/qregularexpression.h
+++ b/src/corelib/tools/qregularexpression.h
@@ -110,7 +110,8 @@ public:
enum MatchOption {
NoMatchOption = 0x0000,
- AnchoredMatchOption = 0x0001
+ AnchoredMatchOption = 0x0001,
+ DontCheckSubjectStringMatchOption = 0x0002
Q_DECLARE_FLAGS(MatchOptions, MatchOption)