summaryrefslogtreecommitdiffstats
path: root/src/3rdparty/pcre2/src/pcre2_study.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/3rdparty/pcre2/src/pcre2_study.c')
-rw-r--r--src/3rdparty/pcre2/src/pcre2_study.c106
1 files changed, 98 insertions, 8 deletions
diff --git a/src/3rdparty/pcre2/src/pcre2_study.c b/src/3rdparty/pcre2/src/pcre2_study.c
index 9bbb37570f..792e696dad 100644
--- a/src/3rdparty/pcre2/src/pcre2_study.c
+++ b/src/3rdparty/pcre2/src/pcre2_study.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016-2020 University of Cambridge
+ New API code Copyright (c) 2016-2023 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -256,6 +256,7 @@ for (;;)
/* Skip over things that don't match chars */
case OP_REVERSE:
+ case OP_VREVERSE:
case OP_CREF:
case OP_DNCREF:
case OP_RREF:
@@ -273,6 +274,8 @@ for (;;)
case OP_DOLLM:
case OP_NOT_WORD_BOUNDARY:
case OP_WORD_BOUNDARY:
+ case OP_NOT_UCP_WORD_BOUNDARY:
+ case OP_UCP_WORD_BOUNDARY:
cc += PRIV(OP_lengths)[*cc];
break;
@@ -908,7 +911,7 @@ set_nottype_bits(pcre2_real_code *re, int cbit_type, unsigned int table_limit)
{
uint32_t c;
for (c = 0; c < table_limit; c++)
- re->start_bitmap[c] |= ~(re->tables[c+cbits_offset+cbit_type]);
+ re->start_bitmap[c] |= (uint8_t)(~(re->tables[c+cbits_offset+cbit_type]));
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
if (table_limit != 32) for (c = 24; c < 32; c++) re->start_bitmap[c] = 0xff;
#endif
@@ -976,6 +979,7 @@ do
while (try_next) /* Loop for items in this branch */
{
int rc;
+ PCRE2_SPTR ncode;
uint8_t *classmap = NULL;
#ifdef SUPPORT_WIDE_CHARS
PCRE2_UCHAR xclassflags;
@@ -1054,6 +1058,7 @@ do
case OP_REF:
case OP_REFI:
case OP_REVERSE:
+ case OP_VREVERSE:
case OP_RREF:
case OP_SCOND:
case OP_SET_SOM:
@@ -1101,13 +1106,100 @@ do
case OP_WORD_BOUNDARY:
case OP_NOT_WORD_BOUNDARY:
+ case OP_UCP_WORD_BOUNDARY:
+ case OP_NOT_UCP_WORD_BOUNDARY:
tcode++;
break;
- /* If we hit a bracket or a positive lookahead assertion, recurse to set
- bits from within the subpattern. If it can't find anything, we have to
- give up. If it finds some mandatory character(s), we are done for this
- branch. Otherwise, carry on scanning after the subpattern. */
+ /* For a positive lookahead assertion, inspect what immediately follows,
+ ignoring intermediate assertions and callouts. If the next item is one
+ that sets a mandatory character, skip this assertion. Otherwise, treat it
+ the same as other bracket groups. */
+
+ case OP_ASSERT:
+ case OP_ASSERT_NA:
+ ncode = tcode + GET(tcode, 1);
+ while (*ncode == OP_ALT) ncode += GET(ncode, 1);
+ ncode += 1 + LINK_SIZE;
+
+ /* Skip irrelevant items */
+
+ for (BOOL done = FALSE; !done;)
+ {
+ switch (*ncode)
+ {
+ case OP_ASSERT:
+ case OP_ASSERT_NOT:
+ case OP_ASSERTBACK:
+ case OP_ASSERTBACK_NOT:
+ case OP_ASSERT_NA:
+ case OP_ASSERTBACK_NA:
+ ncode += GET(ncode, 1);
+ while (*ncode == OP_ALT) ncode += GET(ncode, 1);
+ ncode += 1 + LINK_SIZE;
+ break;
+
+ case OP_WORD_BOUNDARY:
+ case OP_NOT_WORD_BOUNDARY:
+ case OP_UCP_WORD_BOUNDARY:
+ case OP_NOT_UCP_WORD_BOUNDARY:
+ ncode++;
+ break;
+
+ case OP_CALLOUT:
+ ncode += PRIV(OP_lengths)[OP_CALLOUT];
+ break;
+
+ case OP_CALLOUT_STR:
+ ncode += GET(ncode, 1 + 2*LINK_SIZE);
+ break;
+
+ default:
+ done = TRUE;
+ break;
+ }
+ }
+
+ /* Now check the next significant item. */
+
+ switch(*ncode)
+ {
+ default:
+ break;
+
+ case OP_PROP:
+ if (ncode[1] != PT_CLIST) break;
+ /* Fall through */
+ case OP_ANYNL:
+ case OP_CHAR:
+ case OP_CHARI:
+ case OP_EXACT:
+ case OP_EXACTI:
+ case OP_HSPACE:
+ case OP_MINPLUS:
+ case OP_MINPLUSI:
+ case OP_PLUS:
+ case OP_PLUSI:
+ case OP_POSPLUS:
+ case OP_POSPLUSI:
+ case OP_VSPACE:
+ /* Note that these types will only be present in non-UCP mode. */
+ case OP_DIGIT:
+ case OP_NOT_DIGIT:
+ case OP_WORDCHAR:
+ case OP_NOT_WORDCHAR:
+ case OP_WHITESPACE:
+ case OP_NOT_WHITESPACE:
+ tcode = ncode;
+ continue; /* With the following significant opcode */
+ }
+ /* Fall through */
+
+ /* For a group bracket or a positive assertion without an immediately
+ following mandatory setting, recurse to set bits from within the
+ subpattern. If it can't find anything, we have to give up. If it finds
+ some mandatory character(s), we are done for this branch. Otherwise,
+ carry on scanning after the subpattern. */
case OP_BRA:
case OP_SBRA:
@@ -1119,8 +1211,6 @@ do
case OP_SCBRAPOS:
case OP_ONCE:
case OP_SCRIPT_RUN:
- case OP_ASSERT:
- case OP_ASSERT_NA:
rc = set_start_bits(re, tcode, utf, ucp, depthptr);
if (rc == SSB_DONE)
{