summaryrefslogtreecommitdiffstats
path: root/src/3rdparty/pcre2/src/pcre2_compile.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/3rdparty/pcre2/src/pcre2_compile.c')
-rw-r--r--src/3rdparty/pcre2/src/pcre2_compile.c349
1 files changed, 241 insertions, 108 deletions
diff --git a/src/3rdparty/pcre2/src/pcre2_compile.c b/src/3rdparty/pcre2/src/pcre2_compile.c
index 62393bea74..edf7e82e6e 100644
--- a/src/3rdparty/pcre2/src/pcre2_compile.c
+++ b/src/3rdparty/pcre2/src/pcre2_compile.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016-2020 University of Cambridge
+ New API code Copyright (c) 2016-2022 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -124,7 +124,7 @@ static unsigned int
static int
compile_regex(uint32_t, PCRE2_UCHAR **, uint32_t **, int *, uint32_t,
- uint32_t *, int32_t *, uint32_t *, int32_t *, branch_chain *,
+ uint32_t *, uint32_t *, uint32_t *, uint32_t *, branch_chain *,
compile_block *, PCRE2_SIZE *);
static int
@@ -137,7 +137,7 @@ static BOOL
static int
check_lookbehinds(uint32_t *, uint32_t **, parsed_recurse_check *,
- compile_block *);
+ compile_block *, int *);
/*************************************************
@@ -385,13 +385,15 @@ compiler is clever with identical subexpressions. */
#define SETBIT(a,b) a[(b)/8] = (uint8_t)(a[(b)/8] | (1u << ((b)&7)))
-/* Private flags added to firstcu and reqcu. */
+/* Values and flags for the unsigned xxcuflags variables that accompany xxcu
+variables, which are concerned with first and required code units. A value
+greater than or equal to REQ_NONE means "no code unit set"; otherwise the
+matching xxcu variable is set, and the low valued bits are relevant. */
-#define REQ_CASELESS (1u << 0) /* Indicates caselessness */
-#define REQ_VARY (1u << 1) /* reqcu followed non-literal item */
-/* Negative values for the firstcu and reqcu flags */
-#define REQ_UNSET (-2) /* Not yet found anything */
-#define REQ_NONE (-1) /* Found not fixed char */
+#define REQ_UNSET 0xffffffffu /* Not yet found anything */
+#define REQ_NONE 0xfffffffeu /* Found not fixed character */
+#define REQ_CASELESS 0x00000001u /* Code unit in xxcu is caseless */
+#define REQ_VARY 0x00000002u /* Code unit is followed by non-literal */
/* These flags are used in the groupinfo vector. */
@@ -782,12 +784,15 @@ are allowed. */
#define PUBLIC_COMPILE_EXTRA_OPTIONS \
(PUBLIC_LITERAL_COMPILE_EXTRA_OPTIONS| \
PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES|PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL| \
- PCRE2_EXTRA_ESCAPED_CR_IS_LF|PCRE2_EXTRA_ALT_BSUX)
+ PCRE2_EXTRA_ESCAPED_CR_IS_LF|PCRE2_EXTRA_ALT_BSUX| \
+ PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK)
/* Compile time error code numbers. They are given names so that they can more
easily be tracked. When a new number is added, the tables called eint1 and
eint2 in pcre2posix.c may need to be updated, and a new error text must be
-added to compile_error_texts in pcre2_error.c. */
+added to compile_error_texts in pcre2_error.c. Also, the error codes in
+pcre2.h.in must be updated - their values are exactly 100 greater than these
+values. */
enum { ERR0 = COMPILE_ERROR_BASE,
ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9, ERR10,
@@ -799,7 +804,7 @@ enum { ERR0 = COMPILE_ERROR_BASE,
ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69, ERR70,
ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERR77, ERR78, ERR79, ERR80,
ERR81, ERR82, ERR83, ERR84, ERR85, ERR86, ERR87, ERR88, ERR89, ERR90,
- ERR91, ERR92, ERR93, ERR94, ERR95, ERR96, ERR97, ERR98 };
+ ERR91, ERR92, ERR93, ERR94, ERR95, ERR96, ERR97, ERR98, ERR99 };
/* This is a table of start-of-pattern options such as (*UTF) and settings such
as (*LIMIT_MATCH=nnnn) and (*CRLF). For completeness and backward
@@ -1261,8 +1266,10 @@ PCRE2_SIZE* ref_count;
if (code != NULL)
{
+#ifdef SUPPORT_JIT
if (code->executable_jit != NULL)
PRIV(jit_free)(code->executable_jit, &code->memctl);
+#endif
if ((code->flags & PCRE2_DEREF_TABLES) != 0)
{
@@ -1398,32 +1405,47 @@ static BOOL
read_repeat_counts(PCRE2_SPTR *ptrptr, PCRE2_SPTR ptrend, uint32_t *minp,
uint32_t *maxp, int *errorcodeptr)
{
-PCRE2_SPTR p = *ptrptr;
+PCRE2_SPTR p;
BOOL yield = FALSE;
+BOOL had_comma = FALSE;
int32_t min = 0;
int32_t max = REPEAT_UNLIMITED; /* This value is larger than MAX_REPEAT_COUNT */
-/* NB read_number() initializes the error code to zero. The only error is for a
-number that is too big. */
+/* Check the syntax */
+
+*errorcodeptr = 0;
+for (p = *ptrptr;; p++)
+ {
+ uint32_t c;
+ if (p >= ptrend) return FALSE;
+ c = *p;
+ if (IS_DIGIT(c)) continue;
+ if (c == CHAR_RIGHT_CURLY_BRACKET) break;
+ if (c == CHAR_COMMA)
+ {
+ if (had_comma) return FALSE;
+ had_comma = TRUE;
+ }
+ else return FALSE;
+ }
+
+/* The only error from read_number() is for a number that is too big. */
+p = *ptrptr;
if (!read_number(&p, ptrend, -1, MAX_REPEAT_COUNT, ERR5, &min, errorcodeptr))
goto EXIT;
-if (p >= ptrend) goto EXIT;
-
if (*p == CHAR_RIGHT_CURLY_BRACKET)
{
p++;
max = min;
}
-
else
{
- if (*p++ != CHAR_COMMA || p >= ptrend) goto EXIT;
- if (*p != CHAR_RIGHT_CURLY_BRACKET)
+ if (*(++p) != CHAR_RIGHT_CURLY_BRACKET)
{
if (!read_number(&p, ptrend, -1, MAX_REPEAT_COUNT, ERR5, &max,
- errorcodeptr) || p >= ptrend || *p != CHAR_RIGHT_CURLY_BRACKET)
+ errorcodeptr))
goto EXIT;
if (max < min)
{
@@ -1438,11 +1460,10 @@ yield = TRUE;
if (minp != NULL) *minp = (uint32_t)min;
if (maxp != NULL) *maxp = (uint32_t)max;
-/* Update the pattern pointer on success, or after an error, but not when
-the result is "not a repeat quantifier". */
+/* Update the pattern pointer */
EXIT:
-if (yield || *errorcodeptr != 0) *ptrptr = p;
+*ptrptr = p;
return yield;
}
@@ -1776,19 +1797,23 @@ else
{
oldptr = ptr;
ptr--; /* Back to the digit */
- if (!read_number(&ptr, ptrend, -1, INT_MAX/10 - 1, ERR61, &s,
- errorcodeptr))
- break;
- /* \1 to \9 are always back references. \8x and \9x are too; \1x to \7x
+ /* As we know we are at a digit, the only possible error from
+ read_number() is a number that is too large to be a group number. In this
+ case we fall through handle this as not a group reference. If we have
+ read a small enough number, check for a back reference.
+
+ \1 to \9 are always back references. \8x and \9x are too; \1x to \7x
are octal escapes if there are not that many previous captures. */
- if (s < 10 || oldptr[-1] >= CHAR_8 || s <= (int)cb->bracount)
+ if (read_number(&ptr, ptrend, -1, INT_MAX/10 - 1, 0, &s, errorcodeptr) &&
+ (s < 10 || oldptr[-1] >= CHAR_8 || s <= (int)cb->bracount))
{
if (s > (int)MAX_GROUP_NUMBER) *errorcodeptr = ERR61;
else escape = -s; /* Indicates a back reference */
break;
}
+
ptr = oldptr; /* Put the pointer back and fall through */
}
@@ -2067,7 +2092,9 @@ get_ucp(PCRE2_SPTR *ptrptr, BOOL *negptr, uint16_t *ptypeptr,
PCRE2_UCHAR c;
PCRE2_SIZE i, bot, top;
PCRE2_SPTR ptr = *ptrptr;
-PCRE2_UCHAR name[32];
+PCRE2_UCHAR name[50];
+PCRE2_UCHAR *vptr = NULL;
+uint16_t ptscript = PT_NOTSCRIPT;
if (ptr >= cb->end_pattern) goto ERROR_RETURN;
c = *ptr++;
@@ -2079,36 +2106,95 @@ negation. */
if (c == CHAR_LEFT_CURLY_BRACKET)
{
if (ptr >= cb->end_pattern) goto ERROR_RETURN;
+
if (*ptr == CHAR_CIRCUMFLEX_ACCENT)
{
*negptr = TRUE;
ptr++;
}
+
for (i = 0; i < (int)(sizeof(name) / sizeof(PCRE2_UCHAR)) - 1; i++)
{
if (ptr >= cb->end_pattern) goto ERROR_RETURN;
c = *ptr++;
+ while (c == '_' || c == '-' || isspace(c))
+ {
+ if (ptr >= cb->end_pattern) goto ERROR_RETURN;
+ c = *ptr++;
+ }
if (c == CHAR_NUL) goto ERROR_RETURN;
if (c == CHAR_RIGHT_CURLY_BRACKET) break;
- name[i] = c;
+ name[i] = tolower(c);
+ if ((c == ':' || c == '=') && vptr == NULL) vptr = name + i;
}
+
if (c != CHAR_RIGHT_CURLY_BRACKET) goto ERROR_RETURN;
name[i] = 0;
}
-/* Otherwise there is just one following character, which must be an ASCII
-letter. */
+/* If { doesn't follow \p or \P there is just one following character, which
+must be an ASCII letter. */
else if (MAX_255(c) && (cb->ctypes[c] & ctype_letter) != 0)
{
- name[0] = c;
+ name[0] = tolower(c);
name[1] = 0;
}
else goto ERROR_RETURN;
*ptrptr = ptr;
-/* Search for a recognized property name using binary chop. */
+/* If the property contains ':' or '=' we have class name and value separately
+specified. The following are supported:
+
+ . Bidi_Class (synonym bc), for which the property names are "bidi<name>".
+ . Script (synonym sc) for which the property name is the script name
+ . Script_Extensions (synonym scx), ditto
+
+As this is a small number, we currently just check the names directly. If this
+grows, a sorted table and a switch will be neater.
+
+For both the script properties, set a PT_xxx value so that (1) they can be
+distinguished and (2) invalid script names that happen to be the name of
+another property can be diagnosed. */
+
+if (vptr != NULL)
+ {
+ int offset = 0;
+ PCRE2_UCHAR sname[8];
+
+ *vptr = 0; /* Terminate property name */
+ if (PRIV(strcmp_c8)(name, STRING_bidiclass) == 0 ||
+ PRIV(strcmp_c8)(name, STRING_bc) == 0)
+ {
+ offset = 4;
+ sname[0] = CHAR_b;
+ sname[1] = CHAR_i; /* There is no strcpy_c8 function */
+ sname[2] = CHAR_d;
+ sname[3] = CHAR_i;
+ }
+
+ else if (PRIV(strcmp_c8)(name, STRING_script) == 0 ||
+ PRIV(strcmp_c8)(name, STRING_sc) == 0)
+ ptscript = PT_SC;
+
+ else if (PRIV(strcmp_c8)(name, STRING_scriptextensions) == 0 ||
+ PRIV(strcmp_c8)(name, STRING_scx) == 0)
+ ptscript = PT_SCX;
+
+ else
+ {
+ *errorcodeptr = ERR47;
+ return FALSE;
+ }
+
+ /* Adjust the string in name[] as needed */
+
+ memmove(name + offset, vptr + 1, (name + i - vptr)*sizeof(PCRE2_UCHAR));
+ if (offset != 0) memmove(name, sname, offset*sizeof(PCRE2_UCHAR));
+ }
+
+/* Search for a recognized property using binary chop. */
bot = 0;
top = PRIV(utt_size);
@@ -2118,15 +2204,37 @@ while (bot < top)
int r;
i = (bot + top) >> 1;
r = PRIV(strcmp_c8)(name, PRIV(utt_names) + PRIV(utt)[i].name_offset);
+
+ /* When a matching property is found, some extra checking is needed when the
+ \p{xx:yy} syntax is used and xx is either sc or scx. */
+
if (r == 0)
{
- *ptypeptr = PRIV(utt)[i].type;
*pdataptr = PRIV(utt)[i].value;
- return TRUE;
+ if (vptr == NULL || ptscript == PT_NOTSCRIPT)
+ {
+ *ptypeptr = PRIV(utt)[i].type;
+ return TRUE;
+ }
+
+ switch (PRIV(utt)[i].type)
+ {
+ case PT_SC:
+ *ptypeptr = PT_SC;
+ return TRUE;
+
+ case PT_SCX:
+ *ptypeptr = ptscript;
+ return TRUE;
+ }
+
+ break; /* Non-script found */
}
+
if (r > 0) bot = i + 1; else top = i;
}
-*errorcodeptr = ERR47; /* Unrecognized name */
+
+*errorcodeptr = ERR47; /* Unrecognized property */
return FALSE;
ERROR_RETURN: /* Malformed \P or \p */
@@ -2344,7 +2452,7 @@ if (ptr > *nameptr + MAX_NAME_SIZE)
*errorcodeptr = ERR48;
goto FAILED;
}
-*namelenptr = ptr - *nameptr;
+*namelenptr = (uint32_t)(ptr - *nameptr);
/* Subpattern names must not be empty, and their terminator is checked here.
(What follows a verb or alpha assertion name is checked separately.) */
@@ -2581,7 +2689,7 @@ if ((options & PCRE2_EXTENDED_MORE) != 0) options |= PCRE2_EXTENDED;
while (ptr < ptrend)
{
int prev_expect_cond_assert;
- uint32_t min_repeat, max_repeat;
+ uint32_t min_repeat = 0, max_repeat = 0;
uint32_t set, unset, *optset;
uint32_t terminator;
uint32_t prev_meta_quantifier;
@@ -4331,6 +4439,7 @@ while (ptr < ptrend)
{
if (++ptr >= ptrend || !IS_DIGIT(*ptr)) goto BAD_VERSION_CONDITION;
minor = (*ptr++ - CHAR_0) * 10;
+ if (ptr >= ptrend) goto BAD_VERSION_CONDITION;
if (IS_DIGIT(*ptr)) minor += *ptr++ - CHAR_0;
if (ptr >= ptrend || *ptr != CHAR_RIGHT_PARENTHESIS)
goto BAD_VERSION_CONDITION;
@@ -5263,9 +5372,9 @@ Arguments:
pptrptr points to the current parsed pattern pointer
errorcodeptr points to error code variable
firstcuptr place to put the first required code unit
- firstcuflagsptr place to put the first code unit flags, or a negative number
+ firstcuflagsptr place to put the first code unit flags
reqcuptr place to put the last required code unit
- reqcuflagsptr place to put the last required code unit flags, or a negative number
+ reqcuflagsptr place to put the last required code unit flags
bcptr points to current branch chain
cb contains pointers to tables etc.
lengthptr NULL during the real compile phase
@@ -5278,8 +5387,8 @@ Returns: 0 There's been an error, *errorcodeptr is non-zero
static int
compile_branch(uint32_t *optionsptr, PCRE2_UCHAR **codeptr, uint32_t **pptrptr,
- int *errorcodeptr, uint32_t *firstcuptr, int32_t *firstcuflagsptr,
- uint32_t *reqcuptr, int32_t *reqcuflagsptr, branch_chain *bcptr,
+ int *errorcodeptr, uint32_t *firstcuptr, uint32_t *firstcuflagsptr,
+ uint32_t *reqcuptr, uint32_t *reqcuflagsptr, branch_chain *bcptr,
compile_block *cb, PCRE2_SIZE *lengthptr)
{
int bravalue = 0;
@@ -5294,9 +5403,9 @@ uint32_t zeroreqcu, zerofirstcu;
uint32_t escape;
uint32_t *pptr = *pptrptr;
uint32_t meta, meta_arg;
-int32_t firstcuflags, reqcuflags;
-int32_t zeroreqcuflags, zerofirstcuflags;
-int32_t req_caseopt, reqvary, tempreqvary;
+uint32_t firstcuflags, reqcuflags;
+uint32_t zeroreqcuflags, zerofirstcuflags;
+uint32_t req_caseopt, reqvary, tempreqvary;
PCRE2_SIZE offset = 0;
PCRE2_SIZE length_prevgroup = 0;
PCRE2_UCHAR *code = *codeptr;
@@ -5352,13 +5461,13 @@ item types that can be repeated set these backoff variables appropriately. */
firstcu = reqcu = zerofirstcu = zeroreqcu = 0;
firstcuflags = reqcuflags = zerofirstcuflags = zeroreqcuflags = REQ_UNSET;
-/* The variable req_caseopt contains either the REQ_CASELESS value or zero,
+/* The variable req_caseopt contains either the REQ_CASELESS bit or zero,
according to the current setting of the caseless flag. The REQ_CASELESS value
leaves the lower 28 bit empty. It is added into the firstcu or reqcu variables
to record the case status of the value. This is used only for ASCII characters.
*/
-req_caseopt = ((options & PCRE2_CASELESS) != 0)? REQ_CASELESS:0;
+req_caseopt = ((options & PCRE2_CASELESS) != 0)? REQ_CASELESS : 0;
/* Switch on next META item until the end of the branch */
@@ -5373,13 +5482,12 @@ for (;; pptr++)
BOOL possessive_quantifier;
BOOL note_group_empty;
int class_has_8bitchar;
- int i;
uint32_t mclength;
uint32_t skipunits;
uint32_t subreqcu, subfirstcu;
uint32_t groupnumber;
uint32_t verbarglen, verbculen;
- int32_t subreqcuflags, subfirstcuflags; /* Must be signed */
+ uint32_t subreqcuflags, subfirstcuflags;
open_capitem *oc;
PCRE2_UCHAR mcbuffer[8];
@@ -5748,9 +5856,9 @@ for (;; pptr++)
if (taboffset >= 0)
{
if (tabopt >= 0)
- for (i = 0; i < 32; i++) pbits[i] |= cbits[(int)i + taboffset];
+ for (int i = 0; i < 32; i++) pbits[i] |= cbits[(int)i + taboffset];
else
- for (i = 0; i < 32; i++) pbits[i] &= ~cbits[(int)i + taboffset];
+ for (int i = 0; i < 32; i++) pbits[i] &= ~cbits[(int)i + taboffset];
}
/* Now see if we need to remove any special characters. An option
@@ -5764,9 +5872,9 @@ for (;; pptr++)
being built and we are done. */
if (local_negate)
- for (i = 0; i < 32; i++) classbits[i] |= ~pbits[i];
+ for (int i = 0; i < 32; i++) classbits[i] |= (uint8_t)(~pbits[i]);
else
- for (i = 0; i < 32; i++) classbits[i] |= pbits[i];
+ for (int i = 0; i < 32; i++) classbits[i] |= pbits[i];
/* Every class contains at least one < 256 character. */
@@ -5805,21 +5913,23 @@ for (;; pptr++)
switch(escape)
{
case ESC_d:
- for (i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_digit];
+ for (int i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_digit];
break;
case ESC_D:
should_flip_negation = TRUE;
- for (i = 0; i < 32; i++) classbits[i] |= ~cbits[i+cbit_digit];
+ for (int i = 0; i < 32; i++)
+ classbits[i] |= (uint8_t)(~cbits[i+cbit_digit]);
break;
case ESC_w:
- for (i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_word];
+ for (int i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_word];
break;
case ESC_W:
should_flip_negation = TRUE;
- for (i = 0; i < 32; i++) classbits[i] |= ~cbits[i+cbit_word];
+ for (int i = 0; i < 32; i++)
+ classbits[i] |= (uint8_t)(~cbits[i+cbit_word]);
break;
/* Perl 5.004 onwards omitted VT from \s, but restored it at Perl
@@ -5830,12 +5940,13 @@ for (;; pptr++)
longer treat \s and \S specially. */
case ESC_s:
- for (i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_space];
+ for (int i = 0; i < 32; i++) classbits[i] |= cbits[i+cbit_space];
break;
case ESC_S:
should_flip_negation = TRUE;
- for (i = 0; i < 32; i++) classbits[i] |= ~cbits[i+cbit_space];
+ for (int i = 0; i < 32; i++)
+ classbits[i] |= (uint8_t)(~cbits[i+cbit_space]);
break;
/* When adding the horizontal or vertical space lists to a class, or
@@ -6076,7 +6187,7 @@ for (;; pptr++)
if (negate_class && !xclass_has_prop)
{
/* Using 255 ^ instead of ~ avoids clang sanitize warning. */
- for (i = 0; i < 32; i++) classbits[i] = 255 ^ classbits[i];
+ for (int i = 0; i < 32; i++) classbits[i] = 255 ^ classbits[i];
}
memcpy(code, classbits, 32);
code = class_uchardata + (32 / sizeof(PCRE2_UCHAR));
@@ -6102,7 +6213,7 @@ for (;; pptr++)
if (negate_class)
{
/* Using 255 ^ instead of ~ avoids clang sanitize warning. */
- for (i = 0; i < 32; i++) classbits[i] = 255 ^ classbits[i];
+ for (int i = 0; i < 32; i++) classbits[i] = 255 ^ classbits[i];
}
memcpy(code, classbits, 32);
}
@@ -6176,7 +6287,7 @@ for (;; pptr++)
verbarglen = *(++pptr);
verbculen = 0;
tempcode = code++;
- for (i = 0; i < (int)verbarglen; i++)
+ for (int i = 0; i < (int)verbarglen; i++)
{
meta = *(++pptr);
#ifdef SUPPORT_UNICODE
@@ -6225,6 +6336,7 @@ for (;; pptr++)
bravalue = OP_COND;
{
int count, index;
+ unsigned int i;
PCRE2_SPTR name;
named_group *ng = cb->named_groups;
uint32_t length = *(++pptr);
@@ -6264,7 +6376,7 @@ for (;; pptr++)
groupnumber = 0;
if (meta == META_COND_RNUMBER)
{
- for (i = 1; i < (int)length; i++)
+ for (i = 1; i < length; i++)
{
groupnumber = groupnumber * 10 + name[i] - CHAR_0;
if (groupnumber > MAX_GROUP_NUMBER)
@@ -6586,7 +6698,7 @@ for (;; pptr++)
if (firstcuflags == REQ_UNSET && subfirstcuflags != REQ_UNSET)
{
- if (subfirstcuflags >= 0)
+ if (subfirstcuflags < REQ_NONE)
{
firstcu = subfirstcu;
firstcuflags = subfirstcuflags;
@@ -6600,7 +6712,7 @@ for (;; pptr++)
into reqcu if there wasn't one, using the vary flag that was in
existence beforehand. */
- else if (subfirstcuflags >= 0 && subreqcuflags < 0)
+ else if (subfirstcuflags < REQ_NONE && subreqcuflags >= REQ_NONE)
{
subreqcu = subfirstcu;
subreqcuflags = subfirstcuflags | tempreqvary;
@@ -6609,7 +6721,7 @@ for (;; pptr++)
/* If the subpattern set a required code unit (or set a first code unit
that isn't really the first code unit - see above), set it. */
- if (subreqcuflags >= 0)
+ if (subreqcuflags < REQ_NONE)
{
reqcu = subreqcu;
reqcuflags = subreqcuflags;
@@ -6628,7 +6740,7 @@ for (;; pptr++)
in that example, 'X' ends up set for both. */
else if ((bravalue == OP_ASSERT || bravalue == OP_ASSERT_NA) &&
- subreqcuflags >= 0 && subfirstcuflags >= 0)
+ subreqcuflags < REQ_NONE && subfirstcuflags < REQ_NONE)
{
reqcu = subreqcu;
reqcuflags = subreqcuflags;
@@ -6658,7 +6770,7 @@ for (;; pptr++)
this name is duplicated. */
groupnumber = 0;
- for (i = 0; i < cb->names_found; i++, ng++)
+ for (unsigned int i = 0; i < cb->names_found; i++, ng++)
{
if (length == ng->length &&
PRIV(strncmp)(name, ng->name, length) == 0)
@@ -6913,14 +7025,19 @@ for (;; pptr++)
#endif /* MAYBE_UTF_MULTI */
/* Handle the case of a single code unit - either with no UTF support, or
- with UTF disabled, or for a single-code-unit UTF character. */
+ with UTF disabled, or for a single-code-unit UTF character. In the latter
+ case, for a repeated positive match, get the caseless flag for the
+ required code unit from the previous character, because a class like [Aa]
+ sets a caseless A but by now the req_caseopt flag has been reset. */
+
{
mcbuffer[0] = code[-1];
mclength = 1;
if (op_previous <= OP_CHARI && repeat_min > 1)
{
reqcu = mcbuffer[0];
- reqcuflags = req_caseopt | cb->req_varyopt;
+ reqcuflags = cb->req_varyopt;
+ if (op_previous == OP_CHARI) reqcuflags |= REQ_CASELESS;
}
}
goto OUTPUT_SINGLE_REPEAT; /* Code shared with single character types */
@@ -7012,7 +7129,7 @@ for (;; pptr++)
*lengthptr += delta;
}
- else for (i = 0; i < replicate; i++)
+ else for (int i = 0; i < replicate; i++)
{
memcpy(code, previous, CU2BYTES(1 + LINK_SIZE));
previous = code;
@@ -7188,12 +7305,12 @@ for (;; pptr++)
else
{
- if (groupsetfirstcu && reqcuflags < 0)
+ if (groupsetfirstcu && reqcuflags >= REQ_NONE)
{
reqcu = firstcu;
reqcuflags = firstcuflags;
}
- for (i = 1; (uint32_t)i < repeat_min; i++)
+ for (uint32_t i = 1; i < repeat_min; i++)
{
memcpy(code, previous, CU2BYTES(len));
code += len;
@@ -7237,14 +7354,14 @@ for (;; pptr++)
/* This is compiling for real */
- else for (i = repeat_max - 1; i >= 0; i--)
+ else for (uint32_t i = repeat_max; i >= 1; i--)
{
*code++ = OP_BRAZERO + repeat_type;
/* All but the final copy start a new nesting, maintaining the
chain of brackets outstanding. */
- if (i != 0)
+ if (i != 1)
{
int linkoffset;
*code++ = OP_BRA;
@@ -7780,6 +7897,16 @@ for (;; pptr++)
}
#endif
+ /* \K is forbidden in lookarounds since 10.38 because that's what Perl has
+ done. However, there's an option, in case anyone was relying on it. */
+
+ if (cb->assert_depth > 0 && meta_arg == ESC_K &&
+ (cb->cx->extra_options & PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK) == 0)
+ {
+ *errorcodeptr = ERR99;
+ return 0;
+ }
+
/* For the rest (including \X when Unicode is supported - if not it's
faulted at parse time), the OP value is the escape value when PCRE2_UCP is
not set; if it is set, these escapes do not show up here because they are
@@ -7953,9 +8080,9 @@ Arguments:
errorcodeptr -> pointer to error code variable
skipunits skip this many code units at start (for brackets and OP_COND)
firstcuptr place to put the first required code unit
- firstcuflagsptr place to put the first code unit flags, or a negative number
+ firstcuflagsptr place to put the first code unit flags
reqcuptr place to put the last required code unit
- reqcuflagsptr place to put the last required code unit flags, or a negative number
+ reqcuflagsptr place to put the last required code unit flags
bcptr pointer to the chain of currently open branches
cb points to the data block with tables pointers etc.
lengthptr NULL during the real compile phase
@@ -7969,7 +8096,7 @@ Returns: 0 There has been an error
static int
compile_regex(uint32_t options, PCRE2_UCHAR **codeptr, uint32_t **pptrptr,
int *errorcodeptr, uint32_t skipunits, uint32_t *firstcuptr,
- int32_t *firstcuflagsptr, uint32_t *reqcuptr,int32_t *reqcuflagsptr,
+ uint32_t *firstcuflagsptr, uint32_t *reqcuptr, uint32_t *reqcuflagsptr,
branch_chain *bcptr, compile_block *cb, PCRE2_SIZE *lengthptr)
{
PCRE2_UCHAR *code = *codeptr;
@@ -7982,9 +8109,9 @@ int okreturn = 1;
uint32_t *pptr = *pptrptr;
uint32_t firstcu, reqcu;
uint32_t lookbehindlength;
-int32_t firstcuflags, reqcuflags;
+uint32_t firstcuflags, reqcuflags;
uint32_t branchfirstcu, branchreqcu;
-int32_t branchfirstcuflags, branchreqcuflags;
+uint32_t branchfirstcuflags, branchreqcuflags;
PCRE2_SIZE length;
branch_chain bc;
@@ -8103,9 +8230,9 @@ for (;;)
if (firstcuflags != branchfirstcuflags || firstcu != branchfirstcu)
{
- if (firstcuflags >= 0)
+ if (firstcuflags < REQ_NONE)
{
- if (reqcuflags < 0)
+ if (reqcuflags >= REQ_NONE)
{
reqcu = firstcu;
reqcuflags = firstcuflags;
@@ -8117,8 +8244,8 @@ for (;;)
/* If we (now or from before) have no firstcu, a firstcu from the
branch becomes a reqcu if there isn't a branch reqcu. */
- if (firstcuflags < 0 && branchfirstcuflags >= 0 &&
- branchreqcuflags < 0)
+ if (firstcuflags >= REQ_NONE && branchfirstcuflags < REQ_NONE &&
+ branchreqcuflags >= REQ_NONE)
{
branchreqcu = branchfirstcu;
branchreqcuflags = branchfirstcuflags;
@@ -8266,7 +8393,7 @@ Returns: TRUE or FALSE
*/
static BOOL
-is_anchored(PCRE2_SPTR code, unsigned int bracket_map, compile_block *cb,
+is_anchored(PCRE2_SPTR code, uint32_t bracket_map, compile_block *cb,
int atomcount, BOOL inassert)
{
do {
@@ -8289,7 +8416,7 @@ do {
op == OP_SCBRA || op == OP_SCBRAPOS)
{
int n = GET2(scode, 1+LINK_SIZE);
- int new_map = bracket_map | ((n < 32)? (1u << n) : 1);
+ uint32_t new_map = bracket_map | ((n < 32)? (1u << n) : 1);
if (!is_anchored(scode, new_map, cb, atomcount, inassert)) return FALSE;
}
@@ -8427,7 +8554,7 @@ do {
op == OP_SCBRA || op == OP_SCBRAPOS)
{
int n = GET2(scode, 1+LINK_SIZE);
- int new_map = bracket_map | ((n < 32)? (1u << n) : 1);
+ unsigned int new_map = bracket_map | ((n < 32)? (1u << n) : 1);
if (!is_startline(scode, new_map, cb, atomcount, inassert)) return FALSE;
}
@@ -8649,15 +8776,15 @@ Returns: the fixed first code unit, or 0 with REQ_NONE in flags
*/
static uint32_t
-find_firstassertedcu(PCRE2_SPTR code, int32_t *flags, uint32_t inassert)
+find_firstassertedcu(PCRE2_SPTR code, uint32_t *flags, uint32_t inassert)
{
uint32_t c = 0;
-int cflags = REQ_NONE;
+uint32_t cflags = REQ_NONE;
*flags = REQ_NONE;
do {
uint32_t d;
- int dflags;
+ uint32_t dflags;
int xl = (*code == OP_CBRA || *code == OP_SCBRA ||
*code == OP_CBRAPOS || *code == OP_SCBRAPOS)? IMM2_SIZE:0;
PCRE2_SPTR scode = first_significant_code(code + 1+LINK_SIZE + xl, TRUE);
@@ -8680,9 +8807,8 @@ do {
case OP_SCRIPT_RUN:
d = find_firstassertedcu(scode, &dflags, inassert +
((op == OP_ASSERT || op == OP_ASSERT_NA)?1:0));
- if (dflags < 0)
- return 0;
- if (cflags < 0) { c = d; cflags = dflags; }
+ if (dflags >= REQ_NONE) return 0;
+ if (cflags >= REQ_NONE) { c = d; cflags = dflags; }
else if (c != d || cflags != dflags) return 0;
break;
@@ -8695,7 +8821,7 @@ do {
case OP_MINPLUS:
case OP_POSPLUS:
if (inassert == 0) return 0;
- if (cflags < 0) { c = scode[1]; cflags = 0; }
+ if (cflags >= REQ_NONE) { c = scode[1]; cflags = 0; }
else if (c != scode[1]) return 0;
break;
@@ -8721,7 +8847,7 @@ do {
#endif
#endif
- if (cflags < 0) { c = scode[1]; cflags = REQ_CASELESS; }
+ if (cflags >= REQ_NONE) { c = scode[1]; cflags = REQ_CASELESS; }
else if (c != scode[1]) return 0;
break;
}
@@ -9129,7 +9255,7 @@ for (;; pptr++)
case META_LOOKAHEAD:
case META_LOOKAHEADNOT:
case META_LOOKAHEAD_NA:
- *errcodeptr = check_lookbehinds(pptr + 1, &pptr, recurses, cb);
+ *errcodeptr = check_lookbehinds(pptr + 1, &pptr, recurses, cb, lcptr);
if (*errcodeptr != 0) return -1;
/* Ignore any qualifiers that follow a lookahead assertion. */
@@ -9469,16 +9595,16 @@ Arguments
retptr if not NULL, return the ket pointer here
recurses chain of recurse_check to catch mutual recursion
cb points to the compile block
+ lcptr points to loop counter
Returns: 0 on success, or an errorcode (cb->erroroffset will be set)
*/
static int
check_lookbehinds(uint32_t *pptr, uint32_t **retptr,
- parsed_recurse_check *recurses, compile_block *cb)
+ parsed_recurse_check *recurses, compile_block *cb, int *lcptr)
{
int errorcode = 0;
-int loopcount = 0;
int nestlevel = 0;
cb->erroroffset = PCRE2_UNSET;
@@ -9604,7 +9730,7 @@ for (; *pptr != META_END; pptr++)
case META_LOOKBEHIND:
case META_LOOKBEHINDNOT:
case META_LOOKBEHIND_NA:
- if (!set_lookbehind_lengths(&pptr, &errorcode, &loopcount, recurses, cb))
+ if (!set_lookbehind_lengths(&pptr, &errorcode, lcptr, recurses, cb))
return errorcode;
break;
}
@@ -9657,7 +9783,7 @@ PCRE2_SIZE re_blocksize; /* Size of memory block */
PCRE2_SIZE big32count = 0; /* 32-bit literals >= 0x80000000 */
PCRE2_SIZE parsed_size_needed; /* Needed for parsed pattern */
-int32_t firstcuflags, reqcuflags; /* Type of first/req code unit */
+uint32_t firstcuflags, reqcuflags; /* Type of first/req code unit */
uint32_t firstcu, reqcu; /* Value of first/req code unit */
uint32_t setflags = 0; /* NL and BSR set flags */
@@ -10059,7 +10185,8 @@ lengths. */
if (has_lookbehind)
{
- errorcode = check_lookbehinds(cb.parsed_pattern, NULL, NULL, &cb);
+ int loopcount = 0;
+ errorcode = check_lookbehinds(cb.parsed_pattern, NULL, NULL, &cb, &loopcount);
if (errorcode != 0) goto HAD_CB_ERROR;
}
@@ -10336,13 +10463,13 @@ if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
(these are not saved during the compile because they can cause conflicts with
actual literals that follow). */
- if (firstcuflags < 0)
+ if (firstcuflags >= REQ_NONE)
firstcu = find_firstassertedcu(codestart, &firstcuflags, 0);
/* Save the data for a first code unit. The existence of one means the
minimum length must be at least 1. */
- if (firstcuflags >= 0)
+ if (firstcuflags < REQ_NONE)
{
re->first_codeunit = firstcu;
re->flags |= PCRE2_FIRSTSET;
@@ -10389,16 +10516,16 @@ if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0)
different character and not a non-starting code unit of the first character,
because the minimum length count is in characters, not code units. */
- if (reqcuflags >= 0)
+ if (reqcuflags < REQ_NONE)
{
#if PCRE2_CODE_UNIT_WIDTH == 16
if ((re->overall_options & PCRE2_UTF) == 0 || /* Not UTF */
- firstcuflags < 0 || /* First not set */
+ firstcuflags >= REQ_NONE || /* First not set */
(firstcu & 0xf800) != 0xd800 || /* First not surrogate */
(reqcu & 0xfc00) != 0xdc00) /* Req not low surrogate */
#elif PCRE2_CODE_UNIT_WIDTH == 8
if ((re->overall_options & PCRE2_UTF) == 0 || /* Not UTF */
- firstcuflags < 0 || /* First not set */
+ firstcuflags >= REQ_NONE || /* First not set */
(firstcu & 0x80) == 0 || /* First is ASCII */
(reqcu & 0x80) == 0) /* Req is ASCII */
#endif
@@ -10495,4 +10622,10 @@ re = NULL;
goto EXIT;
}
+/* These #undefs are here to enable unity builds with CMake. */
+
+#undef NLBLOCK /* Block containing newline information */
+#undef PSSTART /* Field containing processed string start */
+#undef PSEND /* Field containing processed string end */
+
/* End of pcre2_compile.c */