diff options
Diffstat (limited to 'src/corelib/text/qregularexpression.cpp')
-rw-r--r-- | src/corelib/text/qregularexpression.cpp | 97 |
1 files changed, 72 insertions, 25 deletions
diff --git a/src/corelib/text/qregularexpression.cpp b/src/corelib/text/qregularexpression.cpp index e05bef450b..068c960910 100644 --- a/src/corelib/text/qregularexpression.cpp +++ b/src/corelib/text/qregularexpression.cpp @@ -443,6 +443,38 @@ QT_BEGIN_NAMESPACE Other differences are outlined below. + \section2 Different pattern syntax + + Porting a regular expression from QRegExp to QRegularExpression may require + changes to the pattern itself. + + In certain scenarios, QRegExp was too lenient and accepted patterns that + are simply invalid when using QRegularExpression. These are somehow easy + to detect, because the QRegularExpression objects built with these patterns + are not valid (cf. isValid()). + + In other cases, a pattern ported from QRegExp to QRegularExpression may + silently change semantics. Therefore, it is necessary to review the + patterns used. The most notable cases of silent incompatibility are: + + \list + + \li Curly braces are needed in order to use a hexadecimal escape like + \c{\xHHHH} with more than 2 digits. A pattern like \c{\x2022} neeeds to + be ported to \c{\x{2022}}, or it will match a space (\c{0x20}) followed + by the string \c{"22"}. In general, it is highly recommended to always use + curly braces with the \c{\\x} escape, no matter the amount of digits + specified. + + \li A 0-to-n quantification like \c{{,n}} needs to be ported to c{{0,n}} to + preserve semantics. Otherwise, a pattern such as \c{\d{,3}} would + actually match a digit followed by the exact string \c{"{,3}"}. + + \li QRegExp by default does Unicode-aware matching, while + QRegularExpression requires a separate option; see below for more details. + + \endlist + \section2 Porting from QRegExp::exactMatch() QRegExp::exactMatch() in Qt 4 served two purposes: it exactly matched @@ -2835,7 +2867,7 @@ static const char *pcreCompileErrorCodes[] = QT_TRANSLATE_NOOP("QRegularExpression", "numbers out of order in {} quantifier"), QT_TRANSLATE_NOOP("QRegularExpression", "number too big in {} quantifier"), QT_TRANSLATE_NOOP("QRegularExpression", "missing terminating ] for character class"), - QT_TRANSLATE_NOOP("QRegularExpression", "invalid escape sequence in character class"), + QT_TRANSLATE_NOOP("QRegularExpression", "escape sequence is invalid in character class"), QT_TRANSLATE_NOOP("QRegularExpression", "range out of order in character class"), QT_TRANSLATE_NOOP("QRegularExpression", "quantifier does not follow a repeatable item"), QT_TRANSLATE_NOOP("QRegularExpression", "internal error: unexpected repeat"), @@ -2852,46 +2884,46 @@ static const char *pcreCompileErrorCodes[] = QT_TRANSLATE_NOOP("QRegularExpression", "failed to allocate heap memory"), QT_TRANSLATE_NOOP("QRegularExpression", "unmatched closing parenthesis"), QT_TRANSLATE_NOOP("QRegularExpression", "internal error: code overflow"), - QT_TRANSLATE_NOOP("QRegularExpression", "letter or underscore expected after (?< or (?'"), + QT_TRANSLATE_NOOP("QRegularExpression", "missing closing parenthesis for condition"), QT_TRANSLATE_NOOP("QRegularExpression", "lookbehind assertion is not fixed length"), - QT_TRANSLATE_NOOP("QRegularExpression", "malformed number or name after (?("), - QT_TRANSLATE_NOOP("QRegularExpression", "conditional group contains more than two branches"), + QT_TRANSLATE_NOOP("QRegularExpression", "a relative value of zero is not allowed"), + QT_TRANSLATE_NOOP("QRegularExpression", "conditional subpattern contains more than two branches"), QT_TRANSLATE_NOOP("QRegularExpression", "assertion expected after (?( or (?(?C)"), - QT_TRANSLATE_NOOP("QRegularExpression", "(?R or (?[+-]digits must be followed by )"), + QT_TRANSLATE_NOOP("QRegularExpression", "digit expected after (?+ or (?-"), QT_TRANSLATE_NOOP("QRegularExpression", "unknown POSIX class name"), QT_TRANSLATE_NOOP("QRegularExpression", "internal error in pcre2_study(): should not occur"), QT_TRANSLATE_NOOP("QRegularExpression", "this version of PCRE2 does not have Unicode support"), QT_TRANSLATE_NOOP("QRegularExpression", "parentheses are too deeply nested (stack check)"), QT_TRANSLATE_NOOP("QRegularExpression", "character code point value in \\x{} or \\o{} is too large"), - QT_TRANSLATE_NOOP("QRegularExpression", "invalid condition (?(0)"), - QT_TRANSLATE_NOOP("QRegularExpression", "\\C is not allowed in a lookbehind assertion"), - QT_TRANSLATE_NOOP("QRegularExpression", "PCRE does not support \\L, \\l, \\N{name}, \\U, or \\u"), + QT_TRANSLATE_NOOP("QRegularExpression", "lookbehind is too complicated"), + QT_TRANSLATE_NOOP("QRegularExpression", "\\C is not allowed in a lookbehind assertion in UTF-" "16" " mode"), + QT_TRANSLATE_NOOP("QRegularExpression", "PCRE2 does not support \\F, \\L, \\l, \\N{name}, \\U, or \\u"), QT_TRANSLATE_NOOP("QRegularExpression", "number after (?C is greater than 255"), QT_TRANSLATE_NOOP("QRegularExpression", "closing parenthesis for (?C expected"), QT_TRANSLATE_NOOP("QRegularExpression", "invalid escape sequence in (*VERB) name"), QT_TRANSLATE_NOOP("QRegularExpression", "unrecognized character after (?P"), - QT_TRANSLATE_NOOP("QRegularExpression", "syntax error in subpattern name (missing terminator)"), + QT_TRANSLATE_NOOP("QRegularExpression", "syntax error in subpattern name (missing terminator?)"), QT_TRANSLATE_NOOP("QRegularExpression", "two named subpatterns have the same name (PCRE2_DUPNAMES not set)"), - QT_TRANSLATE_NOOP("QRegularExpression", "group name must start with a non-digit"), + QT_TRANSLATE_NOOP("QRegularExpression", "subpattern name must start with a non-digit"), QT_TRANSLATE_NOOP("QRegularExpression", "this version of PCRE2 does not have support for \\P, \\p, or \\X"), QT_TRANSLATE_NOOP("QRegularExpression", "malformed \\P or \\p sequence"), QT_TRANSLATE_NOOP("QRegularExpression", "unknown property name after \\P or \\p"), - QT_TRANSLATE_NOOP("QRegularExpression", "subpattern name is too long (maximum " "10000" " characters)"), - QT_TRANSLATE_NOOP("QRegularExpression", "too many named subpatterns (maximum " "256" ")"), + QT_TRANSLATE_NOOP("QRegularExpression", "subpattern name is too long (maximum " "32" " code units)"), + QT_TRANSLATE_NOOP("QRegularExpression", "too many named subpatterns (maximum " "10000" ")"), QT_TRANSLATE_NOOP("QRegularExpression", "invalid range in character class"), QT_TRANSLATE_NOOP("QRegularExpression", "octal value is greater than \\377 in 8-bit non-UTF-8 mode"), QT_TRANSLATE_NOOP("QRegularExpression", "internal error: overran compiling workspace"), QT_TRANSLATE_NOOP("QRegularExpression", "internal error: previously-checked referenced subpattern not found"), - QT_TRANSLATE_NOOP("QRegularExpression", "DEFINE group contains more than one branch"), + QT_TRANSLATE_NOOP("QRegularExpression", "DEFINE subpattern contains more than one branch"), QT_TRANSLATE_NOOP("QRegularExpression", "missing opening brace after \\o"), QT_TRANSLATE_NOOP("QRegularExpression", "internal error: unknown newline setting"), QT_TRANSLATE_NOOP("QRegularExpression", "\\g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number"), - QT_TRANSLATE_NOOP("QRegularExpression", "a numbered reference must not be zero"), - QT_TRANSLATE_NOOP("QRegularExpression", "an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT)"), + QT_TRANSLATE_NOOP("QRegularExpression", "(?R (recursive pattern call) must be followed by a closing parenthesis"), + QT_TRANSLATE_NOOP("QRegularExpression", "obsolete error (should not occur)"), QT_TRANSLATE_NOOP("QRegularExpression", "(*VERB) not recognized or malformed"), - QT_TRANSLATE_NOOP("QRegularExpression", "number is too big"), + QT_TRANSLATE_NOOP("QRegularExpression", "subpattern number is too big"), QT_TRANSLATE_NOOP("QRegularExpression", "subpattern name expected"), - QT_TRANSLATE_NOOP("QRegularExpression", "digit expected after (?+"), + QT_TRANSLATE_NOOP("QRegularExpression", "internal error: parsed pattern overflow"), QT_TRANSLATE_NOOP("QRegularExpression", "non-octal character in \\o{} (closing brace missing?)"), QT_TRANSLATE_NOOP("QRegularExpression", "different names for subpatterns of the same number are not allowed"), QT_TRANSLATE_NOOP("QRegularExpression", "(*MARK) must have an argument"), @@ -2899,16 +2931,16 @@ static const char *pcreCompileErrorCodes[] = QT_TRANSLATE_NOOP("QRegularExpression", "\\c must be followed by a printable ASCII character"), QT_TRANSLATE_NOOP("QRegularExpression", "\\c must be followed by a letter or one of [\\]^_?"), QT_TRANSLATE_NOOP("QRegularExpression", "\\k is not followed by a braced, angle-bracketed, or quoted name"), - QT_TRANSLATE_NOOP("QRegularExpression", "internal error: unknown opcode in find_fixedlength()"), + QT_TRANSLATE_NOOP("QRegularExpression", "internal error: unknown meta code in check_lookbehinds()"), QT_TRANSLATE_NOOP("QRegularExpression", "\\N is not supported in a class"), - QT_TRANSLATE_NOOP("QRegularExpression", "SPARE ERROR"), + QT_TRANSLATE_NOOP("QRegularExpression", "callout string is too long"), QT_TRANSLATE_NOOP("QRegularExpression", "disallowed Unicode code point (>= 0xd800 && <= 0xdfff)"), QT_TRANSLATE_NOOP("QRegularExpression", "using UTF is disabled by the application"), QT_TRANSLATE_NOOP("QRegularExpression", "using UCP is disabled by the application"), QT_TRANSLATE_NOOP("QRegularExpression", "name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)"), QT_TRANSLATE_NOOP("QRegularExpression", "character code point value in \\u.... sequence is too large"), - QT_TRANSLATE_NOOP("QRegularExpression", "digits missing in \\x{} or \\o{}"), - QT_TRANSLATE_NOOP("QRegularExpression", "syntax error in (?(VERSION condition"), + QT_TRANSLATE_NOOP("QRegularExpression", "digits missing in \\x{} or \\o{} or \\N{U+}"), + QT_TRANSLATE_NOOP("QRegularExpression", "syntax error or number too big in (?(VERSION condition"), QT_TRANSLATE_NOOP("QRegularExpression", "internal error: unknown opcode in auto_possessify()"), QT_TRANSLATE_NOOP("QRegularExpression", "missing terminating delimiter for callout with string argument"), QT_TRANSLATE_NOOP("QRegularExpression", "unrecognized string delimiter follows (?C"), @@ -2918,6 +2950,16 @@ static const char *pcreCompileErrorCodes[] = QT_TRANSLATE_NOOP("QRegularExpression", "regular expression is too complicated"), QT_TRANSLATE_NOOP("QRegularExpression", "lookbehind assertion is too long"), QT_TRANSLATE_NOOP("QRegularExpression", "pattern string is longer than the limit set by the application"), + QT_TRANSLATE_NOOP("QRegularExpression", "internal error: unknown code in parsed pattern"), + QT_TRANSLATE_NOOP("QRegularExpression", "internal error: bad code value in parsed_skip()"), + QT_TRANSLATE_NOOP("QRegularExpression", "PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is not allowed in UTF-16 mode"), + QT_TRANSLATE_NOOP("QRegularExpression", "invalid option bits with PCRE2_LITERAL"), + QT_TRANSLATE_NOOP("QRegularExpression", "\\N{U+dddd} is supported only in Unicode (UTF) mode"), + QT_TRANSLATE_NOOP("QRegularExpression", "invalid hyphen in option setting"), + QT_TRANSLATE_NOOP("QRegularExpression", "(*alpha_assertion) not recognized"), + QT_TRANSLATE_NOOP("QRegularExpression", "script runs require Unicode support, which this version of PCRE2 does not have"), + QT_TRANSLATE_NOOP("QRegularExpression", "too many capturing groups (maximum 65535)"), + QT_TRANSLATE_NOOP("QRegularExpression", "atomic assertion expected after (?( or (?(?C)"), QT_TRANSLATE_NOOP("QRegularExpression", "no error"), QT_TRANSLATE_NOOP("QRegularExpression", "no match"), QT_TRANSLATE_NOOP("QRegularExpression", "partial match"), @@ -2955,7 +2997,7 @@ static const char *pcreCompileErrorCodes[] = QT_TRANSLATE_NOOP("QRegularExpression", "bad option value"), QT_TRANSLATE_NOOP("QRegularExpression", "invalid replacement string"), QT_TRANSLATE_NOOP("QRegularExpression", "bad offset into UTF string"), - QT_TRANSLATE_NOOP("QRegularExpression", "callout error code"), /* Never returned by PCRE2 itself */ + QT_TRANSLATE_NOOP("QRegularExpression", "callout error code"), QT_TRANSLATE_NOOP("QRegularExpression", "invalid data in workspace for DFA restart"), QT_TRANSLATE_NOOP("QRegularExpression", "too much recursion for DFA matching"), QT_TRANSLATE_NOOP("QRegularExpression", "backreference condition or recursion test is not supported for DFA matching"), @@ -2971,15 +3013,20 @@ static const char *pcreCompileErrorCodes[] = QT_TRANSLATE_NOOP("QRegularExpression", "non-unique substring name"), QT_TRANSLATE_NOOP("QRegularExpression", "NULL argument passed"), QT_TRANSLATE_NOOP("QRegularExpression", "nested recursion at the same subject position"), - QT_TRANSLATE_NOOP("QRegularExpression", "recursion limit exceeded"), + QT_TRANSLATE_NOOP("QRegularExpression", "matching depth limit exceeded"), QT_TRANSLATE_NOOP("QRegularExpression", "requested value is not available"), QT_TRANSLATE_NOOP("QRegularExpression", "requested value is not set"), QT_TRANSLATE_NOOP("QRegularExpression", "offset limit set without PCRE2_USE_OFFSET_LIMIT"), QT_TRANSLATE_NOOP("QRegularExpression", "bad escape sequence in replacement string"), QT_TRANSLATE_NOOP("QRegularExpression", "expected closing curly bracket in replacement string"), QT_TRANSLATE_NOOP("QRegularExpression", "bad substitution in replacement string"), - QT_TRANSLATE_NOOP("QRegularExpression", "match with end before start is not supported"), - QT_TRANSLATE_NOOP("QRegularExpression", "too many replacements (more than INT_MAX)") + QT_TRANSLATE_NOOP("QRegularExpression", "match with end before start or start moved backwards is not supported"), + QT_TRANSLATE_NOOP("QRegularExpression", "too many replacements (more than INT_MAX)"), + QT_TRANSLATE_NOOP("QRegularExpression", "bad serialized data"), + QT_TRANSLATE_NOOP("QRegularExpression", "heap limit exceeded"), + QT_TRANSLATE_NOOP("QRegularExpression", "invalid syntax"), + QT_TRANSLATE_NOOP("QRegularExpression", "internal error - duplicate substitution match"), + QT_TRANSLATE_NOOP("QRegularExpression", "PCRE2_MATCH_INVALID_UTF is not supported for DFA matching") }; #endif // #if 0 |