summaryrefslogtreecommitdiffstats
path: root/src/corelib/text/qregularexpression.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/corelib/text/qregularexpression.cpp')
-rw-r--r--src/corelib/text/qregularexpression.cpp97
1 files changed, 72 insertions, 25 deletions
diff --git a/src/corelib/text/qregularexpression.cpp b/src/corelib/text/qregularexpression.cpp
index e05bef450b..068c960910 100644
--- a/src/corelib/text/qregularexpression.cpp
+++ b/src/corelib/text/qregularexpression.cpp
@@ -443,6 +443,38 @@ QT_BEGIN_NAMESPACE
Other differences are outlined below.
+ \section2 Different pattern syntax
+
+ Porting a regular expression from QRegExp to QRegularExpression may require
+ changes to the pattern itself.
+
+ In certain scenarios, QRegExp was too lenient and accepted patterns that
+ are simply invalid when using QRegularExpression. These are somehow easy
+ to detect, because the QRegularExpression objects built with these patterns
+ are not valid (cf. isValid()).
+
+ In other cases, a pattern ported from QRegExp to QRegularExpression may
+ silently change semantics. Therefore, it is necessary to review the
+ patterns used. The most notable cases of silent incompatibility are:
+
+ \list
+
+ \li Curly braces are needed in order to use a hexadecimal escape like
+ \c{\xHHHH} with more than 2 digits. A pattern like \c{\x2022} neeeds to
+ be ported to \c{\x{2022}}, or it will match a space (\c{0x20}) followed
+ by the string \c{"22"}. In general, it is highly recommended to always use
+ curly braces with the \c{\\x} escape, no matter the amount of digits
+ specified.
+
+ \li A 0-to-n quantification like \c{{,n}} needs to be ported to c{{0,n}} to
+ preserve semantics. Otherwise, a pattern such as \c{\d{,3}} would
+ actually match a digit followed by the exact string \c{"{,3}"}.
+
+ \li QRegExp by default does Unicode-aware matching, while
+ QRegularExpression requires a separate option; see below for more details.
+
+ \endlist
+
\section2 Porting from QRegExp::exactMatch()
QRegExp::exactMatch() in Qt 4 served two purposes: it exactly matched
@@ -2835,7 +2867,7 @@ static const char *pcreCompileErrorCodes[] =
QT_TRANSLATE_NOOP("QRegularExpression", "numbers out of order in {} quantifier"),
QT_TRANSLATE_NOOP("QRegularExpression", "number too big in {} quantifier"),
QT_TRANSLATE_NOOP("QRegularExpression", "missing terminating ] for character class"),
- QT_TRANSLATE_NOOP("QRegularExpression", "invalid escape sequence in character class"),
+ QT_TRANSLATE_NOOP("QRegularExpression", "escape sequence is invalid in character class"),
QT_TRANSLATE_NOOP("QRegularExpression", "range out of order in character class"),
QT_TRANSLATE_NOOP("QRegularExpression", "quantifier does not follow a repeatable item"),
QT_TRANSLATE_NOOP("QRegularExpression", "internal error: unexpected repeat"),
@@ -2852,46 +2884,46 @@ static const char *pcreCompileErrorCodes[] =
QT_TRANSLATE_NOOP("QRegularExpression", "failed to allocate heap memory"),
QT_TRANSLATE_NOOP("QRegularExpression", "unmatched closing parenthesis"),
QT_TRANSLATE_NOOP("QRegularExpression", "internal error: code overflow"),
- QT_TRANSLATE_NOOP("QRegularExpression", "letter or underscore expected after (?< or (?'"),
+ QT_TRANSLATE_NOOP("QRegularExpression", "missing closing parenthesis for condition"),
QT_TRANSLATE_NOOP("QRegularExpression", "lookbehind assertion is not fixed length"),
- QT_TRANSLATE_NOOP("QRegularExpression", "malformed number or name after (?("),
- QT_TRANSLATE_NOOP("QRegularExpression", "conditional group contains more than two branches"),
+ QT_TRANSLATE_NOOP("QRegularExpression", "a relative value of zero is not allowed"),
+ QT_TRANSLATE_NOOP("QRegularExpression", "conditional subpattern contains more than two branches"),
QT_TRANSLATE_NOOP("QRegularExpression", "assertion expected after (?( or (?(?C)"),
- QT_TRANSLATE_NOOP("QRegularExpression", "(?R or (?[+-]digits must be followed by )"),
+ QT_TRANSLATE_NOOP("QRegularExpression", "digit expected after (?+ or (?-"),
QT_TRANSLATE_NOOP("QRegularExpression", "unknown POSIX class name"),
QT_TRANSLATE_NOOP("QRegularExpression", "internal error in pcre2_study(): should not occur"),
QT_TRANSLATE_NOOP("QRegularExpression", "this version of PCRE2 does not have Unicode support"),
QT_TRANSLATE_NOOP("QRegularExpression", "parentheses are too deeply nested (stack check)"),
QT_TRANSLATE_NOOP("QRegularExpression", "character code point value in \\x{} or \\o{} is too large"),
- QT_TRANSLATE_NOOP("QRegularExpression", "invalid condition (?(0)"),
- QT_TRANSLATE_NOOP("QRegularExpression", "\\C is not allowed in a lookbehind assertion"),
- QT_TRANSLATE_NOOP("QRegularExpression", "PCRE does not support \\L, \\l, \\N{name}, \\U, or \\u"),
+ QT_TRANSLATE_NOOP("QRegularExpression", "lookbehind is too complicated"),
+ QT_TRANSLATE_NOOP("QRegularExpression", "\\C is not allowed in a lookbehind assertion in UTF-" "16" " mode"),
+ QT_TRANSLATE_NOOP("QRegularExpression", "PCRE2 does not support \\F, \\L, \\l, \\N{name}, \\U, or \\u"),
QT_TRANSLATE_NOOP("QRegularExpression", "number after (?C is greater than 255"),
QT_TRANSLATE_NOOP("QRegularExpression", "closing parenthesis for (?C expected"),
QT_TRANSLATE_NOOP("QRegularExpression", "invalid escape sequence in (*VERB) name"),
QT_TRANSLATE_NOOP("QRegularExpression", "unrecognized character after (?P"),
- QT_TRANSLATE_NOOP("QRegularExpression", "syntax error in subpattern name (missing terminator)"),
+ QT_TRANSLATE_NOOP("QRegularExpression", "syntax error in subpattern name (missing terminator?)"),
QT_TRANSLATE_NOOP("QRegularExpression", "two named subpatterns have the same name (PCRE2_DUPNAMES not set)"),
- QT_TRANSLATE_NOOP("QRegularExpression", "group name must start with a non-digit"),
+ QT_TRANSLATE_NOOP("QRegularExpression", "subpattern name must start with a non-digit"),
QT_TRANSLATE_NOOP("QRegularExpression", "this version of PCRE2 does not have support for \\P, \\p, or \\X"),
QT_TRANSLATE_NOOP("QRegularExpression", "malformed \\P or \\p sequence"),
QT_TRANSLATE_NOOP("QRegularExpression", "unknown property name after \\P or \\p"),
- QT_TRANSLATE_NOOP("QRegularExpression", "subpattern name is too long (maximum " "10000" " characters)"),
- QT_TRANSLATE_NOOP("QRegularExpression", "too many named subpatterns (maximum " "256" ")"),
+ QT_TRANSLATE_NOOP("QRegularExpression", "subpattern name is too long (maximum " "32" " code units)"),
+ QT_TRANSLATE_NOOP("QRegularExpression", "too many named subpatterns (maximum " "10000" ")"),
QT_TRANSLATE_NOOP("QRegularExpression", "invalid range in character class"),
QT_TRANSLATE_NOOP("QRegularExpression", "octal value is greater than \\377 in 8-bit non-UTF-8 mode"),
QT_TRANSLATE_NOOP("QRegularExpression", "internal error: overran compiling workspace"),
QT_TRANSLATE_NOOP("QRegularExpression", "internal error: previously-checked referenced subpattern not found"),
- QT_TRANSLATE_NOOP("QRegularExpression", "DEFINE group contains more than one branch"),
+ QT_TRANSLATE_NOOP("QRegularExpression", "DEFINE subpattern contains more than one branch"),
QT_TRANSLATE_NOOP("QRegularExpression", "missing opening brace after \\o"),
QT_TRANSLATE_NOOP("QRegularExpression", "internal error: unknown newline setting"),
QT_TRANSLATE_NOOP("QRegularExpression", "\\g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number"),
- QT_TRANSLATE_NOOP("QRegularExpression", "a numbered reference must not be zero"),
- QT_TRANSLATE_NOOP("QRegularExpression", "an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT)"),
+ QT_TRANSLATE_NOOP("QRegularExpression", "(?R (recursive pattern call) must be followed by a closing parenthesis"),
+ QT_TRANSLATE_NOOP("QRegularExpression", "obsolete error (should not occur)"),
QT_TRANSLATE_NOOP("QRegularExpression", "(*VERB) not recognized or malformed"),
- QT_TRANSLATE_NOOP("QRegularExpression", "number is too big"),
+ QT_TRANSLATE_NOOP("QRegularExpression", "subpattern number is too big"),
QT_TRANSLATE_NOOP("QRegularExpression", "subpattern name expected"),
- QT_TRANSLATE_NOOP("QRegularExpression", "digit expected after (?+"),
+ QT_TRANSLATE_NOOP("QRegularExpression", "internal error: parsed pattern overflow"),
QT_TRANSLATE_NOOP("QRegularExpression", "non-octal character in \\o{} (closing brace missing?)"),
QT_TRANSLATE_NOOP("QRegularExpression", "different names for subpatterns of the same number are not allowed"),
QT_TRANSLATE_NOOP("QRegularExpression", "(*MARK) must have an argument"),
@@ -2899,16 +2931,16 @@ static const char *pcreCompileErrorCodes[] =
QT_TRANSLATE_NOOP("QRegularExpression", "\\c must be followed by a printable ASCII character"),
QT_TRANSLATE_NOOP("QRegularExpression", "\\c must be followed by a letter or one of [\\]^_?"),
QT_TRANSLATE_NOOP("QRegularExpression", "\\k is not followed by a braced, angle-bracketed, or quoted name"),
- QT_TRANSLATE_NOOP("QRegularExpression", "internal error: unknown opcode in find_fixedlength()"),
+ QT_TRANSLATE_NOOP("QRegularExpression", "internal error: unknown meta code in check_lookbehinds()"),
QT_TRANSLATE_NOOP("QRegularExpression", "\\N is not supported in a class"),
- QT_TRANSLATE_NOOP("QRegularExpression", "SPARE ERROR"),
+ QT_TRANSLATE_NOOP("QRegularExpression", "callout string is too long"),
QT_TRANSLATE_NOOP("QRegularExpression", "disallowed Unicode code point (>= 0xd800 && <= 0xdfff)"),
QT_TRANSLATE_NOOP("QRegularExpression", "using UTF is disabled by the application"),
QT_TRANSLATE_NOOP("QRegularExpression", "using UCP is disabled by the application"),
QT_TRANSLATE_NOOP("QRegularExpression", "name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)"),
QT_TRANSLATE_NOOP("QRegularExpression", "character code point value in \\u.... sequence is too large"),
- QT_TRANSLATE_NOOP("QRegularExpression", "digits missing in \\x{} or \\o{}"),
- QT_TRANSLATE_NOOP("QRegularExpression", "syntax error in (?(VERSION condition"),
+ QT_TRANSLATE_NOOP("QRegularExpression", "digits missing in \\x{} or \\o{} or \\N{U+}"),
+ QT_TRANSLATE_NOOP("QRegularExpression", "syntax error or number too big in (?(VERSION condition"),
QT_TRANSLATE_NOOP("QRegularExpression", "internal error: unknown opcode in auto_possessify()"),
QT_TRANSLATE_NOOP("QRegularExpression", "missing terminating delimiter for callout with string argument"),
QT_TRANSLATE_NOOP("QRegularExpression", "unrecognized string delimiter follows (?C"),
@@ -2918,6 +2950,16 @@ static const char *pcreCompileErrorCodes[] =
QT_TRANSLATE_NOOP("QRegularExpression", "regular expression is too complicated"),
QT_TRANSLATE_NOOP("QRegularExpression", "lookbehind assertion is too long"),
QT_TRANSLATE_NOOP("QRegularExpression", "pattern string is longer than the limit set by the application"),
+ QT_TRANSLATE_NOOP("QRegularExpression", "internal error: unknown code in parsed pattern"),
+ QT_TRANSLATE_NOOP("QRegularExpression", "internal error: bad code value in parsed_skip()"),
+ QT_TRANSLATE_NOOP("QRegularExpression", "PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is not allowed in UTF-16 mode"),
+ QT_TRANSLATE_NOOP("QRegularExpression", "invalid option bits with PCRE2_LITERAL"),
+ QT_TRANSLATE_NOOP("QRegularExpression", "\\N{U+dddd} is supported only in Unicode (UTF) mode"),
+ QT_TRANSLATE_NOOP("QRegularExpression", "invalid hyphen in option setting"),
+ QT_TRANSLATE_NOOP("QRegularExpression", "(*alpha_assertion) not recognized"),
+ QT_TRANSLATE_NOOP("QRegularExpression", "script runs require Unicode support, which this version of PCRE2 does not have"),
+ QT_TRANSLATE_NOOP("QRegularExpression", "too many capturing groups (maximum 65535)"),
+ QT_TRANSLATE_NOOP("QRegularExpression", "atomic assertion expected after (?( or (?(?C)"),
QT_TRANSLATE_NOOP("QRegularExpression", "no error"),
QT_TRANSLATE_NOOP("QRegularExpression", "no match"),
QT_TRANSLATE_NOOP("QRegularExpression", "partial match"),
@@ -2955,7 +2997,7 @@ static const char *pcreCompileErrorCodes[] =
QT_TRANSLATE_NOOP("QRegularExpression", "bad option value"),
QT_TRANSLATE_NOOP("QRegularExpression", "invalid replacement string"),
QT_TRANSLATE_NOOP("QRegularExpression", "bad offset into UTF string"),
- QT_TRANSLATE_NOOP("QRegularExpression", "callout error code"), /* Never returned by PCRE2 itself */
+ QT_TRANSLATE_NOOP("QRegularExpression", "callout error code"),
QT_TRANSLATE_NOOP("QRegularExpression", "invalid data in workspace for DFA restart"),
QT_TRANSLATE_NOOP("QRegularExpression", "too much recursion for DFA matching"),
QT_TRANSLATE_NOOP("QRegularExpression", "backreference condition or recursion test is not supported for DFA matching"),
@@ -2971,15 +3013,20 @@ static const char *pcreCompileErrorCodes[] =
QT_TRANSLATE_NOOP("QRegularExpression", "non-unique substring name"),
QT_TRANSLATE_NOOP("QRegularExpression", "NULL argument passed"),
QT_TRANSLATE_NOOP("QRegularExpression", "nested recursion at the same subject position"),
- QT_TRANSLATE_NOOP("QRegularExpression", "recursion limit exceeded"),
+ QT_TRANSLATE_NOOP("QRegularExpression", "matching depth limit exceeded"),
QT_TRANSLATE_NOOP("QRegularExpression", "requested value is not available"),
QT_TRANSLATE_NOOP("QRegularExpression", "requested value is not set"),
QT_TRANSLATE_NOOP("QRegularExpression", "offset limit set without PCRE2_USE_OFFSET_LIMIT"),
QT_TRANSLATE_NOOP("QRegularExpression", "bad escape sequence in replacement string"),
QT_TRANSLATE_NOOP("QRegularExpression", "expected closing curly bracket in replacement string"),
QT_TRANSLATE_NOOP("QRegularExpression", "bad substitution in replacement string"),
- QT_TRANSLATE_NOOP("QRegularExpression", "match with end before start is not supported"),
- QT_TRANSLATE_NOOP("QRegularExpression", "too many replacements (more than INT_MAX)")
+ QT_TRANSLATE_NOOP("QRegularExpression", "match with end before start or start moved backwards is not supported"),
+ QT_TRANSLATE_NOOP("QRegularExpression", "too many replacements (more than INT_MAX)"),
+ QT_TRANSLATE_NOOP("QRegularExpression", "bad serialized data"),
+ QT_TRANSLATE_NOOP("QRegularExpression", "heap limit exceeded"),
+ QT_TRANSLATE_NOOP("QRegularExpression", "invalid syntax"),
+ QT_TRANSLATE_NOOP("QRegularExpression", "internal error - duplicate substitution match"),
+ QT_TRANSLATE_NOOP("QRegularExpression", "PCRE2_MATCH_INVALID_UTF is not supported for DFA matching")
};
#endif // #if 0