summaryrefslogtreecommitdiffstats
path: root/src/3rdparty/pcre2/src/pcre2_match.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/3rdparty/pcre2/src/pcre2_match.c')
-rw-r--r--src/3rdparty/pcre2/src/pcre2_match.c599
1 files changed, 420 insertions, 179 deletions
diff --git a/src/3rdparty/pcre2/src/pcre2_match.c b/src/3rdparty/pcre2/src/pcre2_match.c
index 6354e1bb9e..b4a970313d 100644
--- a/src/3rdparty/pcre2/src/pcre2_match.c
+++ b/src/3rdparty/pcre2/src/pcre2_match.c
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2015-2022 University of Cambridge
+ New API code Copyright (c) 2015-2024 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -43,6 +43,8 @@ POSSIBILITY OF SUCH DAMAGE.
#include "config.h"
#endif
+#include "pcre2_internal.h"
+
/* These defines enable debugging code */
/* #define DEBUG_FRAMES_DISPLAY */
@@ -53,6 +55,10 @@ POSSIBILITY OF SUCH DAMAGE.
#include <stdarg.h>
#endif
+#ifdef DEBUG_SHOW_OPS
+static const char *OP_names[] = { OP_NAME_LIST };
+#endif
+
/* These defines identify the name of the block containing "static"
information, and fields within it. */
@@ -60,8 +66,6 @@ information, and fields within it. */
#define PSSTART start_subject /* Field containing processed string start */
#define PSEND end_subject /* Field containing processed string end */
-#include "pcre2_internal.h"
-
#define RECURSE_UNSET 0xffffffffu /* Bigger than max group number */
/* Masks for identifying the public options that are permitted at match time. */
@@ -69,7 +73,8 @@ information, and fields within it. */
#define PUBLIC_MATCH_OPTIONS \
(PCRE2_ANCHORED|PCRE2_ENDANCHORED|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY| \
PCRE2_NOTEMPTY_ATSTART|PCRE2_NO_UTF_CHECK|PCRE2_PARTIAL_HARD| \
- PCRE2_PARTIAL_SOFT|PCRE2_NO_JIT|PCRE2_COPY_MATCHED_SUBJECT)
+ PCRE2_PARTIAL_SOFT|PCRE2_NO_JIT|PCRE2_COPY_MATCHED_SUBJECT| \
+ PCRE2_DISABLE_RECURSELOOP_CHECK)
#define PUBLIC_JIT_MATCH_OPTIONS \
(PCRE2_NO_UTF_CHECK|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY|\
@@ -150,7 +155,7 @@ changed, the code at RETURN_SWITCH below must be updated in sync. */
enum { RM1=1, RM2, RM3, RM4, RM5, RM6, RM7, RM8, RM9, RM10,
RM11, RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
RM21, RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
- RM31, RM32, RM33, RM34, RM35, RM36 };
+ RM31, RM32, RM33, RM34, RM35, RM36, RM37 };
#ifdef SUPPORT_WIDE_CHARS
enum { RM100=100, RM101 };
@@ -204,6 +209,7 @@ Arguments:
P a previous frame of interest
frame_size the frame size
mb points to the match block
+ match_data points to the match data block
s identification text
Returns: nothing
@@ -211,7 +217,7 @@ Returns: nothing
static void
display_frames(FILE *f, heapframe *F, heapframe *P, PCRE2_SIZE frame_size,
- match_block *mb, const char *s, ...)
+ match_block *mb, pcre2_match_data *match_data, const char *s, ...)
{
uint32_t i;
heapframe *Q;
@@ -223,10 +229,10 @@ vfprintf(f, s, ap);
va_end(ap);
if (P != NULL) fprintf(f, " P=%lu",
- ((char *)P - (char *)(mb->match_frames))/frame_size);
+ ((char *)P - (char *)(match_data->heapframes))/frame_size);
fprintf(f, "\n");
-for (i = 0, Q = mb->match_frames;
+for (i = 0, Q = match_data->heapframes;
Q <= F;
i++, Q = (heapframe *)((char *)Q + frame_size))
{
@@ -490,10 +496,16 @@ A version did exist that used individual frames on the heap instead of calling
match() recursively, but this ran substantially slower. The current version is
a refactoring that uses a vector of frames to remember backtracking points.
This runs no slower, and possibly even a bit faster than the original recursive
-implementation. An initial vector of size START_FRAMES_SIZE (enough for maybe
-50 frames) is allocated on the system stack. If this is not big enough, the
-heap is used for a larger vector.
-
+implementation.
+
+At first, an initial vector of size START_FRAMES_SIZE (enough for maybe 50
+frames) was allocated on the system stack. If this was not big enough, the heap
+was used for a larger vector. However, it turns out that there are environments
+where taking as little as 20KiB from the system stack is an embarrassment.
+After another refactoring, the heap is used exclusively, but a pointer the
+frames vector and its size are cached in the match_data block, so that there is
+no new memory allocation if the same match_data block is used for multiple
+matches (unless the frames vector has to be extended).
*******************************************************************************
******************************************************************************/
@@ -566,10 +578,9 @@ made performance worse.
Arguments:
start_eptr starting character in subject
start_ecode starting position in compiled code
- ovector pointer to the final output vector
- oveccount number of pairs in ovector
top_bracket number of capturing parentheses in the pattern
frame_size size of each backtracking frame
+ match_data pointer to the match_data block
mb pointer to "static" variables block
Returns: MATCH_MATCH if matched ) these values are >= 0
@@ -580,20 +591,23 @@ Returns: MATCH_MATCH if matched ) these values are >= 0
*/
static int
-match(PCRE2_SPTR start_eptr, PCRE2_SPTR start_ecode, PCRE2_SIZE *ovector,
- uint16_t oveccount, uint16_t top_bracket, PCRE2_SIZE frame_size,
- match_block *mb)
+match(PCRE2_SPTR start_eptr, PCRE2_SPTR start_ecode, uint16_t top_bracket,
+ PCRE2_SIZE frame_size, pcre2_match_data *match_data, match_block *mb)
{
/* Frame-handling variables */
heapframe *F; /* Current frame pointer */
heapframe *N = NULL; /* Temporary frame pointers */
heapframe *P = NULL;
+
+heapframe *frames_top; /* End of frames vector */
heapframe *assert_accept_frame = NULL; /* For passing back a frame with captures */
-PCRE2_SIZE frame_copy_size; /* Amount to copy when creating a new frame */
+PCRE2_SIZE frame_copy_size; /* Amount to copy when creating a new frame */
/* Local variables that do not need to be preserved over calls to RRMATCH(). */
+PCRE2_SPTR branch_end = NULL;
+PCRE2_SPTR branch_start;
PCRE2_SPTR bracode; /* Temp pointer to start of group */
PCRE2_SIZE offset; /* Used for group offsets */
PCRE2_SIZE length; /* Used for various length calculations */
@@ -627,10 +641,11 @@ copied when a new frame is created. */
frame_copy_size = frame_size - offsetof(heapframe, eptr);
-/* Set up the first current frame at the start of the vector, and initialize
-fields that are not reset for new frames. */
+/* Set up the first frame and the end of the frames vector. */
+
+F = match_data->heapframes;
+frames_top = (heapframe *)((char *)F + match_data->heapframes_size);
-F = mb->match_frames;
Frdepth = 0; /* "Recursion" depth */
Fcapture_last = 0; /* Number of most recent capture */
Fcurrent_recurse = RECURSE_UNSET; /* Not pattern recursing. */
@@ -646,38 +661,58 @@ backtracking point. */
MATCH_RECURSE:
-/* Set up a new backtracking frame. If the vector is full, get a new one
-on the heap, doubling the size, but constrained by the heap limit. */
+/* Set up a new backtracking frame. If the vector is full, get a new one,
+doubling the size, but constrained by the heap limit (which is in KiB). */
N = (heapframe *)((char *)F + frame_size);
-if (N >= mb->match_frames_top)
+if ((heapframe *)((char *)N + frame_size) >= frames_top)
{
- PCRE2_SIZE newsize = mb->frame_vector_size * 2;
heapframe *new;
+ PCRE2_SIZE newsize;
+ PCRE2_SIZE usedsize = (char *)N - (char *)(match_data->heapframes);
- if ((newsize / 1024) > mb->heap_limit)
+ if (match_data->heapframes_size >= PCRE2_SIZE_MAX / 2)
{
- PCRE2_SIZE maxsize = ((mb->heap_limit * 1024)/frame_size) * frame_size;
- if (mb->frame_vector_size >= maxsize) return PCRE2_ERROR_HEAPLIMIT;
- newsize = maxsize;
+ if (match_data->heapframes_size == PCRE2_SIZE_MAX - 1)
+ return PCRE2_ERROR_NOMEMORY;
+ newsize = PCRE2_SIZE_MAX - 1;
}
+ else
+ newsize = match_data->heapframes_size * 2;
- new = mb->memctl.malloc(newsize, mb->memctl.memory_data);
+ if (newsize / 1024 >= mb->heap_limit)
+ {
+ PCRE2_SIZE old_size = match_data->heapframes_size / 1024;
+ if (mb->heap_limit <= old_size)
+ return PCRE2_ERROR_HEAPLIMIT;
+ else
+ {
+ PCRE2_SIZE max_delta = 1024 * (mb->heap_limit - old_size);
+ int over_bytes = match_data->heapframes_size % 1024;
+ if (over_bytes) max_delta -= (1024 - over_bytes);
+ newsize = match_data->heapframes_size + max_delta;
+ }
+ }
+
+ /* With a heap limit set, the permitted additional size may not be enough for
+ another frame, so do a final check. */
+
+ if (newsize - usedsize < frame_size) return PCRE2_ERROR_HEAPLIMIT;
+ new = match_data->memctl.malloc(newsize, match_data->memctl.memory_data);
if (new == NULL) return PCRE2_ERROR_NOMEMORY;
- memcpy(new, mb->match_frames, mb->frame_vector_size);
+ memcpy(new, match_data->heapframes, usedsize);
- F = (heapframe *)((char *)new + ((char *)F - (char *)mb->match_frames));
- N = (heapframe *)((char *)F + frame_size);
+ N = (heapframe *)((char *)new + usedsize);
+ F = (heapframe *)((char *)N - frame_size);
- if (mb->match_frames != mb->stack_frames)
- mb->memctl.free(mb->match_frames, mb->memctl.memory_data);
- mb->match_frames = new;
- mb->match_frames_top = (heapframe *)((char *)mb->match_frames + newsize);
- mb->frame_vector_size = newsize;
+ match_data->memctl.free(match_data->heapframes, match_data->memctl.memory_data);
+ match_data->heapframes = new;
+ match_data->heapframes_size = newsize;
+ frames_top = (heapframe *)((char *)new + newsize);
}
#ifdef DEBUG_SHOW_RMATCH
-fprintf(stderr, "++ RMATCH %2d frame=%d", Freturn_id, Frdepth + 1);
+fprintf(stderr, "++ RMATCH %d frame=%d", Freturn_id, Frdepth + 1);
if (group_frame_type != 0)
{
fprintf(stderr, " type=%x ", group_frame_type);
@@ -731,7 +766,7 @@ recursion value. */
if (group_frame_type != 0)
{
- Flast_group_offset = (char *)F - (char *)mb->match_frames;
+ Flast_group_offset = (char *)F - (char *)match_data->heapframes;
if (GF_IDMASK(group_frame_type) == GF_RECURSE)
Fcurrent_recurse = GF_DATAMASK(group_frame_type);
group_frame_type = 0;
@@ -747,10 +782,16 @@ opcodes. */
if (mb->match_call_count++ >= mb->match_limit) return PCRE2_ERROR_MATCHLIMIT;
if (Frdepth >= mb->match_limit_depth) return PCRE2_ERROR_DEPTHLIMIT;
+#ifdef DEBUG_SHOW_OPS
+fprintf(stderr, "\n++ New frame: type=0x%x subject offset %ld\n",
+ GF_IDMASK(Fgroup_frame_type), Feptr - mb->start_subject);
+#endif
+
for (;;)
{
#ifdef DEBUG_SHOW_OPS
-fprintf(stderr, "++ op=%d\n", *Fecode);
+fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode,
+ OP_names[*Fecode]);
#endif
Fop = (uint8_t)(*Fecode); /* Cast needed for 16-bit and 32-bit modes */
@@ -773,7 +814,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
for(;;)
{
if (offset == PCRE2_UNSET) return PCRE2_ERROR_INTERNAL;
- N = (heapframe *)((char *)mb->match_frames + offset);
+ N = (heapframe *)((char *)match_data->heapframes + offset);
P = (heapframe *)((char *)N - frame_size);
if (N->group_frame_type == (GF_CAPTURE | number)) break;
offset = P->last_group_offset;
@@ -798,20 +839,21 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
assert_accept_frame = F;
RRETURN(MATCH_ACCEPT);
- /* If recursing, we have to find the most recent recursion. */
+ /* For ACCEPT within a recursion, we have to find the most recent
+ recursion. If not in a recursion, fall through to code that is common with
+ OP_END. */
case OP_ACCEPT:
- case OP_END:
-
- /* Handle end of a recursion. */
-
if (Fcurrent_recurse != RECURSE_UNSET)
{
+#ifdef DEBUG_SHOW_OPS
+ fprintf(stderr, "++ Accept within recursion\n");
+#endif
offset = Flast_group_offset;
for(;;)
{
if (offset == PCRE2_UNSET) return PCRE2_ERROR_INTERNAL;
- N = (heapframe *)((char *)mb->match_frames + offset);
+ N = (heapframe *)((char *)match_data->heapframes + offset);
P = (heapframe *)((char *)N - frame_size);
if (GF_IDMASK(N->group_frame_type) == GF_RECURSE) break;
offset = P->last_group_offset;
@@ -829,27 +871,49 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
Fecode += 1 + LINK_SIZE;
continue;
}
+ /* Fall through */
+
+ /* OP_END itself can never be reached within a recursion because that is
+ picked up when the OP_KET that always precedes OP_END is reached. */
- /* Not a recursion. Fail for an empty string match if either PCRE2_NOTEMPTY
- is set, or if PCRE2_NOTEMPTY_ATSTART is set and we have matched at the
- start of the subject. In both cases, backtracking will then try other
- alternatives, if any. */
+ case OP_END:
+
+ /* Fail for an empty string match if either PCRE2_NOTEMPTY is set, or if
+ PCRE2_NOTEMPTY_ATSTART is set and we have matched at the start of the
+ subject. In both cases, backtracking will then try other alternatives, if
+ any. */
if (Feptr == Fstart_match &&
((mb->moptions & PCRE2_NOTEMPTY) != 0 ||
((mb->moptions & PCRE2_NOTEMPTY_ATSTART) != 0 &&
Fstart_match == mb->start_subject + mb->start_offset)))
+ {
+#ifdef DEBUG_SHOW_OPS
+ fprintf(stderr, "++ Backtrack because empty string\n");
+#endif
RRETURN(MATCH_NOMATCH);
+ }
- /* Also fail if PCRE2_ENDANCHORED is set and the end of the match is not
+ /* Fail if PCRE2_ENDANCHORED is set and the end of the match is not
the end of the subject. After (*ACCEPT) we fail the entire match (at this
- position) but backtrack on reaching the end of the pattern. */
+ position) but backtrack if we've reached the end of the pattern. This
+ applies whether or not we are in a recursion. */
if (Feptr < mb->end_subject &&
((mb->moptions | mb->poptions) & PCRE2_ENDANCHORED) != 0)
{
- if (Fop == OP_END) RRETURN(MATCH_NOMATCH);
- return MATCH_NOMATCH;
+ if (Fop == OP_END)
+ {
+#ifdef DEBUG_SHOW_OPS
+ fprintf(stderr, "++ Backtrack because not at end (endanchored set)\n");
+#endif
+ RRETURN(MATCH_NOMATCH);
+ }
+
+#ifdef DEBUG_SHOW_OPS
+ fprintf(stderr, "++ Failed ACCEPT not at end (endanchnored set)\n");
+#endif
+ return MATCH_NOMATCH; /* (*ACCEPT) */
}
/* We have a successful match of the whole pattern. Record the result and
@@ -864,14 +928,15 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
mb->mark = Fmark; /* and the last success mark */
if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
- ovector[0] = Fstart_match - mb->start_subject;
- ovector[1] = Feptr - mb->start_subject;
+ match_data->ovector[0] = Fstart_match - mb->start_subject;
+ match_data->ovector[1] = Feptr - mb->start_subject;
/* Set i to the smaller of the sizes of the external and frame ovectors. */
- i = 2 * ((top_bracket + 1 > oveccount)? oveccount : top_bracket + 1);
- memcpy(ovector + 2, Fovector, (i - 2) * sizeof(PCRE2_SIZE));
- while (--i >= Foffset_top + 2) ovector[i] = PCRE2_UNSET;
+ i = 2 * ((top_bracket + 1 > match_data->oveccount)?
+ match_data->oveccount : top_bracket + 1);
+ memcpy(match_data->ovector + 2, Fovector, (i - 2) * sizeof(PCRE2_SIZE));
+ while (--i >= Foffset_top + 2) match_data->ovector[i] = PCRE2_UNSET;
return MATCH_MATCH; /* Note: NOT RRETURN */
@@ -2421,6 +2486,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
GETCHARINCTEST(fc, Feptr);
{
const uint32_t *cp;
+ uint32_t chartype;
const ucd_record *prop = GET_UCD(fc);
BOOL notmatch = Fop == OP_NOTPROP;
@@ -2431,9 +2497,10 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
break;
case PT_LAMP:
- if ((prop->chartype == ucp_Lu ||
- prop->chartype == ucp_Ll ||
- prop->chartype == ucp_Lt) == notmatch)
+ chartype = prop->chartype;
+ if ((chartype == ucp_Lu ||
+ chartype == ucp_Ll ||
+ chartype == ucp_Lt) == notmatch)
RRETURN(MATCH_NOMATCH);
break;
@@ -2463,8 +2530,9 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
/* These are specials */
case PT_ALNUM:
- if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
- PRIV(ucp_gentype)[prop->chartype] == ucp_N) == notmatch)
+ chartype = prop->chartype;
+ if ((PRIV(ucp_gentype)[chartype] == ucp_L ||
+ PRIV(ucp_gentype)[chartype] == ucp_N) == notmatch)
RRETURN(MATCH_NOMATCH);
break;
@@ -2489,13 +2557,22 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
break;
case PT_WORD:
- if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
- PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
- fc == CHAR_UNDERSCORE) == notmatch)
+ chartype = prop->chartype;
+ if ((PRIV(ucp_gentype)[chartype] == ucp_L ||
+ PRIV(ucp_gentype)[chartype] == ucp_N ||
+ chartype == ucp_Mn ||
+ chartype == ucp_Pc) == notmatch)
RRETURN(MATCH_NOMATCH);
break;
case PT_CLIST:
+#if PCRE2_CODE_UNIT_WIDTH == 32
+ if (fc > MAX_UTF_CODE_POINT)
+ {
+ if (notmatch) break;;
+ RRETURN(MATCH_NOMATCH);
+ }
+#endif
cp = PRIV(ucd_caseless_sets) + Fecode[2];
for (;;)
{
@@ -2791,16 +2868,17 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
case PT_WORD:
for (i = 1; i <= Lmin; i++)
{
- int category;
+ int chartype, category;
if (Feptr >= mb->end_subject)
{
SCHECK_PARTIAL();
RRETURN(MATCH_NOMATCH);
}
GETCHARINCTEST(fc, Feptr);
- category = UCD_CATEGORY(fc);
+ chartype = UCD_CHARTYPE(fc);
+ category = PRIV(ucp_gentype)[chartype];
if ((category == ucp_L || category == ucp_N ||
- fc == CHAR_UNDERSCORE) == notmatch)
+ chartype == ucp_Mn || chartype == ucp_Pc) == notmatch)
RRETURN(MATCH_NOMATCH);
}
break;
@@ -2815,6 +2893,13 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
RRETURN(MATCH_NOMATCH);
}
GETCHARINCTEST(fc, Feptr);
+#if PCRE2_CODE_UNIT_WIDTH == 32
+ if (fc > MAX_UTF_CODE_POINT)
+ {
+ if (notmatch) continue;
+ RRETURN(MATCH_NOMATCH);
+ }
+#endif
cp = PRIV(ucd_caseless_sets) + Lpropvalue;
for (;;)
{
@@ -3595,7 +3680,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
case PT_WORD:
for (;;)
{
- int category;
+ int chartype, category;
RMATCH(Fecode, RM215);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH);
@@ -3605,10 +3690,12 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
RRETURN(MATCH_NOMATCH);
}
GETCHARINCTEST(fc, Feptr);
- category = UCD_CATEGORY(fc);
+ chartype = UCD_CHARTYPE(fc);
+ category = PRIV(ucp_gentype)[chartype];
if ((category == ucp_L ||
category == ucp_N ||
- fc == CHAR_UNDERSCORE) == (Lctype == OP_NOTPROP))
+ chartype == ucp_Mn ||
+ chartype == ucp_Pc) == (Lctype == OP_NOTPROP))
RRETURN(MATCH_NOMATCH);
}
/* Control never gets here */
@@ -3626,6 +3713,13 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
RRETURN(MATCH_NOMATCH);
}
GETCHARINCTEST(fc, Feptr);
+#if PCRE2_CODE_UNIT_WIDTH == 32
+ if (fc > MAX_UTF_CODE_POINT)
+ {
+ if (Lctype == OP_NOTPROP) continue;
+ RRETURN(MATCH_NOMATCH);
+ }
+#endif
cp = PRIV(ucd_caseless_sets) + Lpropvalue;
for (;;)
{
@@ -4176,7 +4270,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
case PT_WORD:
for (i = Lmin; i < Lmax; i++)
{
- int category;
+ int chartype, category;
int len = 1;
if (Feptr >= mb->end_subject)
{
@@ -4184,9 +4278,12 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
break;
}
GETCHARLENTEST(fc, Feptr, len);
- category = UCD_CATEGORY(fc);
- if ((category == ucp_L || category == ucp_N ||
- fc == CHAR_UNDERSCORE) == notmatch)
+ chartype = UCD_CHARTYPE(fc);
+ category = PRIV(ucp_gentype)[chartype];
+ if ((category == ucp_L ||
+ category == ucp_N ||
+ chartype == ucp_Mn ||
+ chartype == ucp_Pc) == notmatch)
break;
Feptr+= len;
}
@@ -4203,14 +4300,24 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
break;
}
GETCHARLENTEST(fc, Feptr, len);
- cp = PRIV(ucd_caseless_sets) + Lpropvalue;
- for (;;)
+#if PCRE2_CODE_UNIT_WIDTH == 32
+ if (fc > MAX_UTF_CODE_POINT)
{
- if (fc < *cp)
- { if (notmatch) break; else goto GOT_MAX; }
- if (fc == *cp++)
- { if (notmatch) goto GOT_MAX; else break; }
+ if (!notmatch) goto GOT_MAX;
+ }
+ else
+#endif
+ {
+ cp = PRIV(ucd_caseless_sets) + Lpropvalue;
+ for (;;)
+ {
+ if (fc < *cp)
+ { if (notmatch) break; else goto GOT_MAX; }
+ if (fc == *cp++)
+ { if (notmatch) goto GOT_MAX; else break; }
+ }
}
+
Feptr += len;
}
GOT_MAX:
@@ -5308,9 +5415,11 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
/* ===================================================================== */
- /* Recursion either matches the current regex, or some subexpression. The
- offset data is the offset to the starting bracket from the start of the
- whole pattern. (This is so that it works from duplicated subpatterns.) */
+ /* Pattern recursion either matches the current regex, or some
+ subexpression. The offset data is the offset to the starting bracket from
+ the start of the whole pattern. This is so that it works from duplicated
+ subpatterns. For a whole-pattern recursion, we have to infer the number
+ zero. */
#define Lframe_type F->temp_32[0]
#define Lstart_branch F->temp_sptr[0]
@@ -5319,28 +5428,35 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
bracode = mb->start_code + GET(Fecode, 1);
number = (bracode == mb->start_code)? 0 : GET2(bracode, 1 + LINK_SIZE);
- /* If we are already in a recursion, check for repeating the same one
- without advancing the subject pointer. This should catch convoluted mutual
- recursions. (Some simple cases are caught at compile time.) */
+ /* If we are already in a pattern recursion, check for repeating the same
+ one without changing the subject pointer or the last referenced character
+ in the subject. This should catch convoluted mutual recursions; some
+ simple cases are caught at compile time. However, there are rare cases when
+ this check needs to be turned off. In this case, actual recursion loops
+ will be caught by the match or heap limits. */
if (Fcurrent_recurse != RECURSE_UNSET)
{
offset = Flast_group_offset;
while (offset != PCRE2_UNSET)
{
- N = (heapframe *)((char *)mb->match_frames + offset);
+ N = (heapframe *)((char *)match_data->heapframes + offset);
P = (heapframe *)((char *)N - frame_size);
if (N->group_frame_type == (GF_RECURSE | number))
{
- if (Feptr == P->eptr) return PCRE2_ERROR_RECURSELOOP;
+ if (Feptr == P->eptr && mb->last_used_ptr == P->recurse_last_used &&
+ (mb->moptions & PCRE2_DISABLE_RECURSELOOP_CHECK) == 0)
+ return PCRE2_ERROR_RECURSELOOP;
break;
}
offset = P->last_group_offset;
}
}
- /* Now run the recursion, branch by branch. */
+ /* Remember the current last referenced character and then run the
+ recursion branch by branch. */
+ F->recurse_last_used = mb->last_used_ptr;
Lstart_branch = bracode;
Lframe_type = GF_RECURSE | number;
@@ -5669,13 +5785,13 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
/* ===================================================================== */
- /* Move the subject pointer back. This occurs only at the start of each
- branch of a lookbehind assertion. If we are too close to the start to move
- back, fail. When working with UTF-8 we move back a number of characters,
- not bytes. */
+ /* Move the subject pointer back by one fixed amount. This occurs at the
+ start of each branch that has a fixed length in a lookbehind assertion. If
+ we are too close to the start to move back, fail. When working with UTF-8
+ we move back a number of characters, not bytes. */
case OP_REVERSE:
- number = GET(Fecode, 1);
+ number = GET2(Fecode, 1);
#ifdef SUPPORT_UNICODE
if (utf)
{
@@ -5689,7 +5805,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
else
#endif
- /* No UTF-8 support, or not in UTF-8 mode: count is code unit count */
+ /* No UTF support, or not in UTF mode: count is code unit count */
{
if ((ptrdiff_t)number > Feptr - mb->start_subject) RRETURN(MATCH_NOMATCH);
@@ -5699,15 +5815,84 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
/* Save the earliest consulted character, then skip to next opcode */
if (Feptr < mb->start_used_ptr) mb->start_used_ptr = Feptr;
- Fecode += 1 + LINK_SIZE;
+ Fecode += 1 + IMM2_SIZE;
break;
/* ===================================================================== */
+ /* Move the subject pointer back by a variable amount. This occurs at the
+ start of each branch of a lookbehind assertion when the branch has a
+ variable, but limited, length. A loop is needed to try matching the branch
+ after moving back different numbers of characters. If we are too close to
+ the start to move back even the minimum amount, fail. When working with
+ UTF-8 we move back a number of characters, not bytes. */
+
+#define Lmin F->temp_32[0]
+#define Lmax F->temp_32[1]
+#define Leptr F->temp_sptr[0]
+
+ case OP_VREVERSE:
+ Lmin = GET2(Fecode, 1);
+ Lmax = GET2(Fecode, 1 + IMM2_SIZE);
+ Leptr = Feptr;
+
+ /* Move back by the maximum branch length and then work forwards. This
+ ensures that items such as \d{3,5} get the maximum length, which is
+ relevant for captures, and makes for Perl compatibility. */
+
+#ifdef SUPPORT_UNICODE
+ if (utf)
+ {
+ for (i = 0; i < Lmax; i++)
+ {
+ if (Feptr == mb->start_subject)
+ {
+ if (i < Lmin) RRETURN(MATCH_NOMATCH);
+ Lmax = i;
+ break;
+ }
+ Feptr--;
+ BACKCHAR(Feptr);
+ }
+ }
+ else
+#endif
+
+ /* No UTF support or not in UTF mode */
+
+ {
+ ptrdiff_t diff = Feptr - mb->start_subject;
+ uint32_t available = (diff > 65535)? 65535 : ((diff > 0)? diff : 0);
+ if (Lmin > available) RRETURN(MATCH_NOMATCH);
+ if (Lmax > available) Lmax = available;
+ Feptr -= Lmax;
+ }
+
+ /* Now try matching, moving forward one character on failure, until we
+ reach the mimimum back length. */
+
+ for (;;)
+ {
+ RMATCH(Fecode + 1 + 2 * IMM2_SIZE, RM37);
+ if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+ if (Lmax-- <= Lmin) RRETURN(MATCH_NOMATCH);
+ Feptr++;
+#ifdef SUPPORT_UNICODE
+ if (utf) { FORWARDCHARTEST(Feptr, mb->end_subject); }
+#endif
+ }
+ /* Control never reaches here */
+
+#undef Lmin
+#undef Lmax
+#undef Leptr
+
+ /* ===================================================================== */
/* An alternation is the end of a branch; scan along to find the end of the
bracketed group. */
case OP_ALT:
+ branch_end = Fecode;
do Fecode += GET(Fecode,1); while (*Fecode == OP_ALT);
break;
@@ -5715,7 +5900,8 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
/* ===================================================================== */
/* The end of a parenthesized group. For all but OP_BRA and OP_COND, the
starting frame was added to the chained frames in order to remember the
- starting subject position for the group. */
+ starting subject position for the group. (Not true for OP_BRA when it's a
+ whole pattern recursion, but that is handled separately below.)*/
case OP_KET:
case OP_KETRMIN:
@@ -5724,12 +5910,18 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
bracode = Fecode - GET(Fecode, 1);
- /* Point N to the frame at the start of the most recent group.
- Remember the subject pointer at the start of the group. */
+ if (branch_end == NULL) branch_end = Fecode;
+ branch_start = bracode;
+ while (branch_start + GET(branch_start, 1) != branch_end)
+ branch_start += GET(branch_start, 1);
+ branch_end = NULL;
+
+ /* Point N to the frame at the start of the most recent group, and P to its
+ predecessor. Remember the subject pointer at the start of the group. */
if (*bracode != OP_BRA && *bracode != OP_COND)
{
- N = (heapframe *)((char *)mb->match_frames + Flast_group_offset);
+ N = (heapframe *)((char *)match_data->heapframes + Flast_group_offset);
P = (heapframe *)((char *)N - frame_size);
Flast_group_offset = P->last_group_offset;
@@ -5761,27 +5953,64 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
switch (*bracode)
{
- case OP_BRA: /* No need to do anything for these */
- case OP_COND:
+ /* Whole pattern recursion is handled as a recursion into group 0, but
+ the entire pattern is wrapped in OP_BRA/OP_KET rather than a capturing
+ group - a design mistake: it should perhaps have been capture group 0.
+ Anyway, that means the end of such recursion must be handled here. It is
+ detected by checking for an immediately following OP_END when we are
+ recursing in group 0. If this is not the end of a whole-pattern
+ recursion, there is nothing to be done. */
+
+ case OP_BRA:
+ if (Fcurrent_recurse != 0 || Fecode[1+LINK_SIZE] != OP_END) break;
+
+ /* It is the end of whole-pattern recursion. */
+
+ offset = Flast_group_offset;
+ if (offset == PCRE2_UNSET) return PCRE2_ERROR_INTERNAL;
+ N = (heapframe *)((char *)match_data->heapframes + offset);
+ P = (heapframe *)((char *)N - frame_size);
+ Flast_group_offset = P->last_group_offset;
+
+ /* Reinstate the previous set of captures and then carry on after the
+ recursion call. */
+
+ memcpy((char *)F + offsetof(heapframe, ovector), P->ovector,
+ Foffset_top * sizeof(PCRE2_SIZE));
+ Foffset_top = P->offset_top;
+ Fcapture_last = P->capture_last;
+ Fcurrent_recurse = P->current_recurse;
+ Fecode = P->ecode + 1 + LINK_SIZE;
+ continue; /* With next opcode */
+
+ case OP_COND: /* No need to do anything for these */
case OP_SCOND:
break;
/* Non-atomic positive assertions are like OP_BRA, except that the
subject pointer must be put back to where it was at the start of the
- assertion. */
+ assertion. For a variable lookbehind, check its end point. */
- case OP_ASSERT_NA:
case OP_ASSERTBACK_NA:
+ if (branch_start[1 + LINK_SIZE] == OP_VREVERSE && Feptr != P->eptr)
+ RRETURN(MATCH_NOMATCH);
+ /* Fall through */
+
+ case OP_ASSERT_NA:
if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
Feptr = P->eptr;
break;
/* Atomic positive assertions are like OP_ONCE, except that in addition
the subject pointer must be put back to where it was at the start of the
- assertion. */
+ assertion. For a variable lookbehind, check its end point. */
- case OP_ASSERT:
case OP_ASSERTBACK:
+ if (branch_start[1 + LINK_SIZE] == OP_VREVERSE && Feptr != P->eptr)
+ RRETURN(MATCH_NOMATCH);
+ /* Fall through */
+
+ case OP_ASSERT:
if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr;
Feptr = P->eptr;
/* Fall through */
@@ -5802,10 +6031,15 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
break;
/* A matching negative assertion returns MATCH, which is turned into
- NOMATCH at the assertion level. */
+ NOMATCH at the assertion level. For a variable lookbehind, check its end
+ point. */
- case OP_ASSERT_NOT:
case OP_ASSERTBACK_NOT:
+ if (branch_start[1 + LINK_SIZE] == OP_VREVERSE && Feptr != P->eptr)
+ RRETURN(MATCH_NOMATCH);
+ /* Fall through */
+
+ case OP_ASSERT_NOT:
RRETURN(MATCH_MATCH);
/* At the end of a script run, apply the script-checking rules. This code
@@ -5816,9 +6050,8 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
if (!PRIV(script_run)(P->eptr, Feptr, utf)) RRETURN(MATCH_NOMATCH);
break;
- /* Whole-pattern recursion is coded as a recurse into group 0, so it
- won't be picked up here. Instead, we catch it when the OP_END is reached.
- Other recursion is handled here. */
+ /* Whole-pattern recursion is coded as a recurse into group 0, and is
+ handled with OP_BRA above. Other recursion is handled here. */
case OP_CBRA:
case OP_CBRAPOS:
@@ -5833,7 +6066,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
{
P = (heapframe *)((char *)N - frame_size);
memcpy((char *)F + offsetof(heapframe, ovector), P->ovector,
- P->offset_top * sizeof(PCRE2_SIZE));
+ Foffset_top * sizeof(PCRE2_SIZE));
Foffset_top = P->offset_top;
Fcapture_last = P->capture_last;
Fcurrent_recurse = P->current_recurse;
@@ -5916,10 +6149,10 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
if ((mb->poptions & PCRE2_DOLLAR_ENDONLY) == 0) goto ASSERT_NL_OR_EOS;
/* Fall through */
- /* Unconditional end of subject assertion (\z) */
+ /* Unconditional end of subject assertion (\z). */
case OP_EOD:
- if (Feptr < mb->end_subject) RRETURN(MATCH_NOMATCH);
+ if (Feptr < mb->true_end_subject) RRETURN(MATCH_NOMATCH);
if (mb->partial != 0)
{
mb->hitend = TRUE;
@@ -6031,6 +6264,8 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
case OP_NOT_WORD_BOUNDARY:
case OP_WORD_BOUNDARY:
+ case OP_NOT_UCP_WORD_BOUNDARY:
+ case OP_UCP_WORD_BOUNDARY:
if (Feptr == mb->check_subject) prev_is_word = FALSE; else
{
PCRE2_SPTR lastptr = Feptr - 1;
@@ -6045,13 +6280,12 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
fc = *lastptr;
if (lastptr < mb->start_used_ptr) mb->start_used_ptr = lastptr;
#ifdef SUPPORT_UNICODE
- if ((mb->poptions & PCRE2_UCP) != 0)
+ if (Fop == OP_UCP_WORD_BOUNDARY || Fop == OP_NOT_UCP_WORD_BOUNDARY)
{
- if (fc == '_') prev_is_word = TRUE; else
- {
- int cat = UCD_CATEGORY(fc);
- prev_is_word = (cat == ucp_L || cat == ucp_N);
- }
+ int chartype = UCD_CHARTYPE(fc);
+ int category = PRIV(ucp_gentype)[chartype];
+ prev_is_word = (category == ucp_L || category == ucp_N ||
+ chartype == ucp_Mn || chartype == ucp_Pc);
}
else
#endif /* SUPPORT_UNICODE */
@@ -6079,13 +6313,12 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
fc = *Feptr;
if (nextptr > mb->last_used_ptr) mb->last_used_ptr = nextptr;
#ifdef SUPPORT_UNICODE
- if ((mb->poptions & PCRE2_UCP) != 0)
+ if (Fop == OP_UCP_WORD_BOUNDARY || Fop == OP_NOT_UCP_WORD_BOUNDARY)
{
- if (fc == '_') cur_is_word = TRUE; else
- {
- int cat = UCD_CATEGORY(fc);
- cur_is_word = (cat == ucp_L || cat == ucp_N);
- }
+ int chartype = UCD_CHARTYPE(fc);
+ int category = PRIV(ucp_gentype)[chartype];
+ cur_is_word = (category == ucp_L || category == ucp_N ||
+ chartype == ucp_Mn || chartype == ucp_Pc);
}
else
#endif /* SUPPORT_UNICODE */
@@ -6094,7 +6327,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode);
/* Now see if the situation is what we want */
- if ((*Fecode++ == OP_WORD_BOUNDARY)?
+ if ((*Fecode++ == OP_WORD_BOUNDARY || Fop == OP_UCP_WORD_BOUNDARY)?
cur_is_word == prev_is_word : cur_is_word != prev_is_word)
RRETURN(MATCH_NOMATCH);
break;
@@ -6240,7 +6473,7 @@ F = (heapframe *)((char *)F - Fback_frame); /* Backtrack */
mb->cb->callout_flags |= PCRE2_CALLOUT_BACKTRACK; /* Note for callouts */
#ifdef DEBUG_SHOW_RMATCH
-fprintf(stderr, "++ RETURN %d to %d\n", rrc, Freturn_id);
+fprintf(stderr, "++ RETURN %d to RM%d\n", rrc, Freturn_id);
#endif
switch (Freturn_id)
@@ -6249,7 +6482,7 @@ switch (Freturn_id)
LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(16)
LBL(17) LBL(18) LBL(19) LBL(20) LBL(21) LBL(22) LBL(23) LBL(24)
LBL(25) LBL(26) LBL(27) LBL(28) LBL(29) LBL(30) LBL(31) LBL(32)
- LBL(33) LBL(34) LBL(35) LBL(36)
+ LBL(33) LBL(34) LBL(35) LBL(36) LBL(37)
#ifdef SUPPORT_WIDE_CHARS
LBL(100) LBL(101)
@@ -6346,6 +6579,7 @@ BOOL jit_checked_utf = FALSE;
#endif /* SUPPORT_UNICODE */
PCRE2_SIZE frame_size;
+PCRE2_SIZE heapframes_size;
/* We need to have mb as a pointer to a match block, because the IS_NEWLINE
macro is used below, and it expects NLBLOCK to be defined as a pointer. */
@@ -6354,15 +6588,6 @@ pcre2_callout_block cb;
match_block actual_match_block;
match_block *mb = &actual_match_block;
-/* Allocate an initial vector of backtracking frames on the stack. If this
-proves to be too small, it is replaced by a larger one on the heap. To get a
-vector of the size required that is aligned for pointers, allocate it as a
-vector of pointers. */
-
-PCRE2_SPTR stack_frames_vector[START_FRAMES_SIZE/sizeof(PCRE2_SPTR)]
- PCRE2_KEEP_UNINITIALIZED;
-mb->stack_frames = (heapframe *)stack_frames_vector;
-
/* Recognize NULL, length 0 as an empty string. */
if (subject == NULL && length == 0) subject = (PCRE2_SPTR)"";
@@ -6545,6 +6770,7 @@ if (use_jit)
match_data, mcontext);
if (rc != PCRE2_ERROR_JIT_BADOPTION)
{
+ match_data->subject_length = length;
if (rc >= 0 && (options & PCRE2_COPY_MATCHED_SUBJECT) != 0)
{
length = CU2BYTES(length + was_zero_terminated);
@@ -6713,7 +6939,7 @@ if (mcontext == NULL)
else mb->memctl = mcontext->memctl;
anchored = ((re->overall_options | options) & PCRE2_ANCHORED) != 0;
-firstline = (re->overall_options & PCRE2_FIRSTLINE) != 0;
+firstline = !anchored && (re->overall_options & PCRE2_FIRSTLINE) != 0;
startline = (re->flags & PCRE2_STARTLINE) != 0;
bumpalong_limit = (mcontext->offset_limit == PCRE2_UNSET)?
true_end_subject : subject + mcontext->offset_limit;
@@ -6736,6 +6962,7 @@ mb->callout_data = mcontext->callout_data;
mb->start_subject = subject;
mb->start_offset = start_offset;
mb->end_subject = end_subject;
+mb->true_end_subject = true_end_subject;
mb->hasthen = (re->flags & PCRE2_HASTHEN) != 0;
mb->allowemptypartial = (re->max_lookbehind > 0) ||
(re->flags & PCRE2_MATCH_EMPTY) != 0;
@@ -6793,15 +7020,11 @@ switch(re->newline_convention)
vector at the end, whose size depends on the number of capturing parentheses in
the pattern. It is not used at all if there are no capturing parentheses.
- frame_size is the total size of each frame
- mb->frame_vector_size is the total usable size of the vector (rounded down
- to a whole number of frames)
-
-The last of these is changed within the match() function if the frame vector
-has to be expanded. We therefore put it into the match block so that it is
-correct when calling match() more than once for non-anchored patterns.
+ frame_size is the total size of each frame
+ match_data->heapframes is the pointer to the frames vector
+ match_data->heapframes_size is the allocated size of the vector
-We must also pad frame_size for alignment to ensure subsequent frames are as
+We must pad the frame_size for alignment to ensure subsequent frames are as
aligned as heapframe. Whilst ovector is word-aligned due to being a PCRE2_SIZE
array, that does not guarantee it is suitably aligned for pointers, as some
architectures have pointers that are larger than a size_t. */
@@ -6813,8 +7036,8 @@ frame_size = (offsetof(heapframe, ovector) +
/* Limits set in the pattern override the match context only if they are
smaller. */
-mb->heap_limit = (mcontext->heap_limit < re->limit_heap)?
- mcontext->heap_limit : re->limit_heap;
+mb->heap_limit = ((mcontext->heap_limit < re->limit_heap)?
+ mcontext->heap_limit : re->limit_heap);
mb->match_limit = (mcontext->match_limit < re->limit_match)?
mcontext->match_limit : re->limit_match;
@@ -6823,35 +7046,40 @@ mb->match_limit_depth = (mcontext->depth_limit < re->limit_depth)?
mcontext->depth_limit : re->limit_depth;
/* If a pattern has very many capturing parentheses, the frame size may be very
-large. Ensure that there are at least 10 available frames by getting an initial
-vector on the heap if necessary, except when the heap limit prevents this. Get
-fewer if possible. (The heap limit is in kibibytes.) */
+large. Set the initial frame vector size to ensure that there are at least 10
+available frames, but enforce a minimum of START_FRAMES_SIZE. If this is
+greater than the heap limit, get as large a vector as possible. */
-if (frame_size <= START_FRAMES_SIZE/10)
+heapframes_size = frame_size * 10;
+if (heapframes_size < START_FRAMES_SIZE) heapframes_size = START_FRAMES_SIZE;
+if (heapframes_size / 1024 > mb->heap_limit)
{
- mb->match_frames = mb->stack_frames; /* Initial frame vector on the stack */
- mb->frame_vector_size = ((START_FRAMES_SIZE/frame_size) * frame_size);
+ PCRE2_SIZE max_size = 1024 * mb->heap_limit;
+ if (max_size < frame_size) return PCRE2_ERROR_HEAPLIMIT;
+ heapframes_size = max_size;
}
-else
+
+/* If an existing frame vector in the match_data block is large enough, we can
+use it. Otherwise, free any pre-existing vector and get a new one. */
+
+if (match_data->heapframes_size < heapframes_size)
{
- mb->frame_vector_size = frame_size * 10;
- if ((mb->frame_vector_size / 1024) > mb->heap_limit)
+ match_data->memctl.free(match_data->heapframes,
+ match_data->memctl.memory_data);
+ match_data->heapframes = match_data->memctl.malloc(heapframes_size,
+ match_data->memctl.memory_data);
+ if (match_data->heapframes == NULL)
{
- if (frame_size > mb->heap_limit * 1024) return PCRE2_ERROR_HEAPLIMIT;
- mb->frame_vector_size = ((mb->heap_limit * 1024)/frame_size) * frame_size;
+ match_data->heapframes_size = 0;
+ return PCRE2_ERROR_NOMEMORY;
}
- mb->match_frames = mb->memctl.malloc(mb->frame_vector_size,
- mb->memctl.memory_data);
- if (mb->match_frames == NULL) return PCRE2_ERROR_NOMEMORY;
+ match_data->heapframes_size = heapframes_size;
}
-mb->match_frames_top =
- (heapframe *)((char *)mb->match_frames + mb->frame_vector_size);
-
/* Write to the ovector within the first frame to mark every capture unset and
to avoid uninitialized memory read errors when it is copied to a new frame. */
-memset((char *)(mb->match_frames) + offsetof(heapframe, ovector), 0xff,
+memset((char *)(match_data->heapframes) + offsetof(heapframe, ovector), 0xff,
frame_size - offsetof(heapframe, ovector));
/* Pointers to the individual character tables */
@@ -7279,8 +7507,16 @@ for(;;)
mb->end_offset_top = 0;
mb->skip_arg_count = 0;
- rc = match(start_match, mb->start_code, match_data->ovector,
- match_data->oveccount, re->top_bracket, frame_size, mb);
+#ifdef DEBUG_SHOW_OPS
+ fprintf(stderr, "++ Calling match()\n");
+#endif
+
+ rc = match(start_match, mb->start_code, re->top_bracket, frame_size,
+ match_data, mb);
+
+#ifdef DEBUG_SHOW_OPS
+ fprintf(stderr, "++ match() returned %d\n\n", rc);
+#endif
if (mb->hitend && start_partial == NULL)
{
@@ -7429,6 +7665,7 @@ if (utf && end_subject != true_end_subject &&
if (start_match >= true_end_subject)
{
rc = MATCH_NOMATCH; /* In case it was partial */
+ match_partial = NULL;
break;
}
@@ -7463,11 +7700,6 @@ if (utf && end_subject != true_end_subject &&
}
#endif /* SUPPORT_UNICODE */
-/* Release an enlarged frame vector that is on the heap. */
-
-if (mb->match_frames != mb->stack_frames)
- mb->memctl.free(mb->match_frames, mb->memctl.memory_data);
-
/* Fill in fields that are always returned in the match data. */
match_data->code = re;
@@ -7483,6 +7715,7 @@ if (rc == MATCH_MATCH)
{
match_data->rc = ((int)mb->end_offset_top >= 2 * match_data->oveccount)?
0 : (int)mb->end_offset_top/2 + 1;
+ match_data->subject_length = length;
match_data->startchar = start_match - subject;
match_data->leftchar = mb->start_used_ptr - subject;
match_data->rightchar = ((mb->last_used_ptr > mb->end_match_ptr)?
@@ -7497,6 +7730,7 @@ if (rc == MATCH_MATCH)
match_data->flags |= PCRE2_MD_COPIED_SUBJECT;
}
else match_data->subject = subject;
+
return match_data->rc;
}
@@ -7518,6 +7752,7 @@ PCRE2_ERROR_PARTIAL. */
else if (match_partial != NULL)
{
match_data->subject = subject;
+ match_data->subject_length = length;
match_data->ovector[0] = match_partial - subject;
match_data->ovector[1] = end_subject - subject;
match_data->startchar = match_partial - subject;
@@ -7533,4 +7768,10 @@ else match_data->rc = PCRE2_ERROR_NOMATCH;
return match_data->rc;
}
+/* These #undefs are here to enable unity builds with CMake. */
+
+#undef NLBLOCK /* Block containing newline information */
+#undef PSSTART /* Field containing processed string start */
+#undef PSEND /* Field containing processed string end */
+
/* End of pcre2_match.c */