diff options
Diffstat (limited to 'src/3rdparty/pcre2/src/pcre2_match.c')
-rw-r--r-- | src/3rdparty/pcre2/src/pcre2_match.c | 976 |
1 files changed, 720 insertions, 256 deletions
diff --git a/src/3rdparty/pcre2/src/pcre2_match.c b/src/3rdparty/pcre2/src/pcre2_match.c index ed60517131..b4a970313d 100644 --- a/src/3rdparty/pcre2/src/pcre2_match.c +++ b/src/3rdparty/pcre2/src/pcre2_match.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2015-2020 University of Cambridge + New API code Copyright (c) 2015-2024 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -43,16 +43,22 @@ POSSIBILITY OF SUCH DAMAGE. #include "config.h" #endif +#include "pcre2_internal.h" + /* These defines enable debugging code */ /* #define DEBUG_FRAMES_DISPLAY */ /* #define DEBUG_SHOW_OPS */ /* #define DEBUG_SHOW_RMATCH */ -#ifdef DEBUG_FRAME_DISPLAY +#ifdef DEBUG_FRAMES_DISPLAY #include <stdarg.h> #endif +#ifdef DEBUG_SHOW_OPS +static const char *OP_names[] = { OP_NAME_LIST }; +#endif + /* These defines identify the name of the block containing "static" information, and fields within it. */ @@ -60,8 +66,6 @@ information, and fields within it. */ #define PSSTART start_subject /* Field containing processed string start */ #define PSEND end_subject /* Field containing processed string end */ -#include "pcre2_internal.h" - #define RECURSE_UNSET 0xffffffffu /* Bigger than max group number */ /* Masks for identifying the public options that are permitted at match time. */ @@ -69,7 +73,8 @@ information, and fields within it. */ #define PUBLIC_MATCH_OPTIONS \ (PCRE2_ANCHORED|PCRE2_ENDANCHORED|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY| \ PCRE2_NOTEMPTY_ATSTART|PCRE2_NO_UTF_CHECK|PCRE2_PARTIAL_HARD| \ - PCRE2_PARTIAL_SOFT|PCRE2_NO_JIT|PCRE2_COPY_MATCHED_SUBJECT) + PCRE2_PARTIAL_SOFT|PCRE2_NO_JIT|PCRE2_COPY_MATCHED_SUBJECT| \ + PCRE2_DISABLE_RECURSELOOP_CHECK) #define PUBLIC_JIT_MATCH_OPTIONS \ (PCRE2_NO_UTF_CHECK|PCRE2_NOTBOL|PCRE2_NOTEOL|PCRE2_NOTEMPTY|\ @@ -150,7 +155,7 @@ changed, the code at RETURN_SWITCH below must be updated in sync. */ enum { RM1=1, RM2, RM3, RM4, RM5, RM6, RM7, RM8, RM9, RM10, RM11, RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20, RM21, RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30, - RM31, RM32, RM33, RM34, RM35, RM36 }; + RM31, RM32, RM33, RM34, RM35, RM36, RM37 }; #ifdef SUPPORT_WIDE_CHARS enum { RM100=100, RM101 }; @@ -159,7 +164,8 @@ enum { RM100=100, RM101 }; #ifdef SUPPORT_UNICODE enum { RM200=200, RM201, RM202, RM203, RM204, RM205, RM206, RM207, RM208, RM209, RM210, RM211, RM212, RM213, RM214, RM215, - RM216, RM217, RM218, RM219, RM220, RM221, RM222 }; + RM216, RM217, RM218, RM219, RM220, RM221, RM222, RM223, + RM224, RM225 }; #endif /* Define short names for general fields in the current backtrack frame, which @@ -203,6 +209,7 @@ Arguments: P a previous frame of interest frame_size the frame size mb points to the match block + match_data points to the match data block s identification text Returns: nothing @@ -210,7 +217,7 @@ Returns: nothing static void display_frames(FILE *f, heapframe *F, heapframe *P, PCRE2_SIZE frame_size, - match_block *mb, const char *s, ...) + match_block *mb, pcre2_match_data *match_data, const char *s, ...) { uint32_t i; heapframe *Q; @@ -222,10 +229,10 @@ vfprintf(f, s, ap); va_end(ap); if (P != NULL) fprintf(f, " P=%lu", - ((char *)P - (char *)(mb->match_frames))/frame_size); + ((char *)P - (char *)(match_data->heapframes))/frame_size); fprintf(f, "\n"); -for (i = 0, Q = mb->match_frames; +for (i = 0, Q = match_data->heapframes; Q <= F; i++, Q = (heapframe *)((char *)Q + frame_size)) { @@ -489,10 +496,16 @@ A version did exist that used individual frames on the heap instead of calling match() recursively, but this ran substantially slower. The current version is a refactoring that uses a vector of frames to remember backtracking points. This runs no slower, and possibly even a bit faster than the original recursive -implementation. An initial vector of size START_FRAMES_SIZE (enough for maybe -50 frames) is allocated on the system stack. If this is not big enough, the -heap is used for a larger vector. - +implementation. + +At first, an initial vector of size START_FRAMES_SIZE (enough for maybe 50 +frames) was allocated on the system stack. If this was not big enough, the heap +was used for a larger vector. However, it turns out that there are environments +where taking as little as 20KiB from the system stack is an embarrassment. +After another refactoring, the heap is used exclusively, but a pointer the +frames vector and its size are cached in the match_data block, so that there is +no new memory allocation if the same match_data block is used for multiple +matches (unless the frames vector has to be extended). ******************************************************************************* ******************************************************************************/ @@ -565,10 +578,9 @@ made performance worse. Arguments: start_eptr starting character in subject start_ecode starting position in compiled code - ovector pointer to the final output vector - oveccount number of pairs in ovector top_bracket number of capturing parentheses in the pattern frame_size size of each backtracking frame + match_data pointer to the match_data block mb pointer to "static" variables block Returns: MATCH_MATCH if matched ) these values are >= 0 @@ -579,20 +591,23 @@ Returns: MATCH_MATCH if matched ) these values are >= 0 */ static int -match(PCRE2_SPTR start_eptr, PCRE2_SPTR start_ecode, PCRE2_SIZE *ovector, - uint16_t oveccount, uint16_t top_bracket, PCRE2_SIZE frame_size, - match_block *mb) +match(PCRE2_SPTR start_eptr, PCRE2_SPTR start_ecode, uint16_t top_bracket, + PCRE2_SIZE frame_size, pcre2_match_data *match_data, match_block *mb) { /* Frame-handling variables */ heapframe *F; /* Current frame pointer */ heapframe *N = NULL; /* Temporary frame pointers */ heapframe *P = NULL; + +heapframe *frames_top; /* End of frames vector */ heapframe *assert_accept_frame = NULL; /* For passing back a frame with captures */ -PCRE2_SIZE frame_copy_size; /* Amount to copy when creating a new frame */ +PCRE2_SIZE frame_copy_size; /* Amount to copy when creating a new frame */ /* Local variables that do not need to be preserved over calls to RRMATCH(). */ +PCRE2_SPTR branch_end = NULL; +PCRE2_SPTR branch_start; PCRE2_SPTR bracode; /* Temp pointer to start of group */ PCRE2_SIZE offset; /* Used for group offsets */ PCRE2_SIZE length; /* Used for various length calculations */ @@ -626,10 +641,11 @@ copied when a new frame is created. */ frame_copy_size = frame_size - offsetof(heapframe, eptr); -/* Set up the first current frame at the start of the vector, and initialize -fields that are not reset for new frames. */ +/* Set up the first frame and the end of the frames vector. */ + +F = match_data->heapframes; +frames_top = (heapframe *)((char *)F + match_data->heapframes_size); -F = mb->match_frames; Frdepth = 0; /* "Recursion" depth */ Fcapture_last = 0; /* Number of most recent capture */ Fcurrent_recurse = RECURSE_UNSET; /* Not pattern recursing. */ @@ -645,38 +661,58 @@ backtracking point. */ MATCH_RECURSE: -/* Set up a new backtracking frame. If the vector is full, get a new one -on the heap, doubling the size, but constrained by the heap limit. */ +/* Set up a new backtracking frame. If the vector is full, get a new one, +doubling the size, but constrained by the heap limit (which is in KiB). */ N = (heapframe *)((char *)F + frame_size); -if (N >= mb->match_frames_top) +if ((heapframe *)((char *)N + frame_size) >= frames_top) { - PCRE2_SIZE newsize = mb->frame_vector_size * 2; heapframe *new; + PCRE2_SIZE newsize; + PCRE2_SIZE usedsize = (char *)N - (char *)(match_data->heapframes); - if ((newsize / 1024) > mb->heap_limit) + if (match_data->heapframes_size >= PCRE2_SIZE_MAX / 2) { - PCRE2_SIZE maxsize = ((mb->heap_limit * 1024)/frame_size) * frame_size; - if (mb->frame_vector_size >= maxsize) return PCRE2_ERROR_HEAPLIMIT; - newsize = maxsize; + if (match_data->heapframes_size == PCRE2_SIZE_MAX - 1) + return PCRE2_ERROR_NOMEMORY; + newsize = PCRE2_SIZE_MAX - 1; } + else + newsize = match_data->heapframes_size * 2; + + if (newsize / 1024 >= mb->heap_limit) + { + PCRE2_SIZE old_size = match_data->heapframes_size / 1024; + if (mb->heap_limit <= old_size) + return PCRE2_ERROR_HEAPLIMIT; + else + { + PCRE2_SIZE max_delta = 1024 * (mb->heap_limit - old_size); + int over_bytes = match_data->heapframes_size % 1024; + if (over_bytes) max_delta -= (1024 - over_bytes); + newsize = match_data->heapframes_size + max_delta; + } + } + + /* With a heap limit set, the permitted additional size may not be enough for + another frame, so do a final check. */ - new = mb->memctl.malloc(newsize, mb->memctl.memory_data); + if (newsize - usedsize < frame_size) return PCRE2_ERROR_HEAPLIMIT; + new = match_data->memctl.malloc(newsize, match_data->memctl.memory_data); if (new == NULL) return PCRE2_ERROR_NOMEMORY; - memcpy(new, mb->match_frames, mb->frame_vector_size); + memcpy(new, match_data->heapframes, usedsize); - F = (heapframe *)((char *)new + ((char *)F - (char *)mb->match_frames)); - N = (heapframe *)((char *)F + frame_size); + N = (heapframe *)((char *)new + usedsize); + F = (heapframe *)((char *)N - frame_size); - if (mb->match_frames != mb->stack_frames) - mb->memctl.free(mb->match_frames, mb->memctl.memory_data); - mb->match_frames = new; - mb->match_frames_top = (heapframe *)((char *)mb->match_frames + newsize); - mb->frame_vector_size = newsize; + match_data->memctl.free(match_data->heapframes, match_data->memctl.memory_data); + match_data->heapframes = new; + match_data->heapframes_size = newsize; + frames_top = (heapframe *)((char *)new + newsize); } #ifdef DEBUG_SHOW_RMATCH -fprintf(stderr, "++ RMATCH %2d frame=%d", Freturn_id, Frdepth + 1); +fprintf(stderr, "++ RMATCH %d frame=%d", Freturn_id, Frdepth + 1); if (group_frame_type != 0) { fprintf(stderr, " type=%x ", group_frame_type); @@ -730,7 +766,7 @@ recursion value. */ if (group_frame_type != 0) { - Flast_group_offset = (char *)F - (char *)mb->match_frames; + Flast_group_offset = (char *)F - (char *)match_data->heapframes; if (GF_IDMASK(group_frame_type) == GF_RECURSE) Fcurrent_recurse = GF_DATAMASK(group_frame_type); group_frame_type = 0; @@ -746,10 +782,16 @@ opcodes. */ if (mb->match_call_count++ >= mb->match_limit) return PCRE2_ERROR_MATCHLIMIT; if (Frdepth >= mb->match_limit_depth) return PCRE2_ERROR_DEPTHLIMIT; +#ifdef DEBUG_SHOW_OPS +fprintf(stderr, "\n++ New frame: type=0x%x subject offset %ld\n", + GF_IDMASK(Fgroup_frame_type), Feptr - mb->start_subject); +#endif + for (;;) { #ifdef DEBUG_SHOW_OPS -fprintf(stderr, "++ op=%d\n", *Fecode); +fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode, + OP_names[*Fecode]); #endif Fop = (uint8_t)(*Fecode); /* Cast needed for 16-bit and 32-bit modes */ @@ -772,7 +814,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode); for(;;) { if (offset == PCRE2_UNSET) return PCRE2_ERROR_INTERNAL; - N = (heapframe *)((char *)mb->match_frames + offset); + N = (heapframe *)((char *)match_data->heapframes + offset); P = (heapframe *)((char *)N - frame_size); if (N->group_frame_type == (GF_CAPTURE | number)) break; offset = P->last_group_offset; @@ -797,20 +839,21 @@ fprintf(stderr, "++ op=%d\n", *Fecode); assert_accept_frame = F; RRETURN(MATCH_ACCEPT); - /* If recursing, we have to find the most recent recursion. */ + /* For ACCEPT within a recursion, we have to find the most recent + recursion. If not in a recursion, fall through to code that is common with + OP_END. */ case OP_ACCEPT: - case OP_END: - - /* Handle end of a recursion. */ - if (Fcurrent_recurse != RECURSE_UNSET) { +#ifdef DEBUG_SHOW_OPS + fprintf(stderr, "++ Accept within recursion\n"); +#endif offset = Flast_group_offset; for(;;) { if (offset == PCRE2_UNSET) return PCRE2_ERROR_INTERNAL; - N = (heapframe *)((char *)mb->match_frames + offset); + N = (heapframe *)((char *)match_data->heapframes + offset); P = (heapframe *)((char *)N - frame_size); if (GF_IDMASK(N->group_frame_type) == GF_RECURSE) break; offset = P->last_group_offset; @@ -828,27 +871,49 @@ fprintf(stderr, "++ op=%d\n", *Fecode); Fecode += 1 + LINK_SIZE; continue; } + /* Fall through */ + + /* OP_END itself can never be reached within a recursion because that is + picked up when the OP_KET that always precedes OP_END is reached. */ + + case OP_END: - /* Not a recursion. Fail for an empty string match if either PCRE2_NOTEMPTY - is set, or if PCRE2_NOTEMPTY_ATSTART is set and we have matched at the - start of the subject. In both cases, backtracking will then try other - alternatives, if any. */ + /* Fail for an empty string match if either PCRE2_NOTEMPTY is set, or if + PCRE2_NOTEMPTY_ATSTART is set and we have matched at the start of the + subject. In both cases, backtracking will then try other alternatives, if + any. */ if (Feptr == Fstart_match && ((mb->moptions & PCRE2_NOTEMPTY) != 0 || ((mb->moptions & PCRE2_NOTEMPTY_ATSTART) != 0 && Fstart_match == mb->start_subject + mb->start_offset))) + { +#ifdef DEBUG_SHOW_OPS + fprintf(stderr, "++ Backtrack because empty string\n"); +#endif RRETURN(MATCH_NOMATCH); + } - /* Also fail if PCRE2_ENDANCHORED is set and the end of the match is not + /* Fail if PCRE2_ENDANCHORED is set and the end of the match is not the end of the subject. After (*ACCEPT) we fail the entire match (at this - position) but backtrack on reaching the end of the pattern. */ + position) but backtrack if we've reached the end of the pattern. This + applies whether or not we are in a recursion. */ if (Feptr < mb->end_subject && ((mb->moptions | mb->poptions) & PCRE2_ENDANCHORED) != 0) { - if (Fop == OP_END) RRETURN(MATCH_NOMATCH); - return MATCH_NOMATCH; + if (Fop == OP_END) + { +#ifdef DEBUG_SHOW_OPS + fprintf(stderr, "++ Backtrack because not at end (endanchored set)\n"); +#endif + RRETURN(MATCH_NOMATCH); + } + +#ifdef DEBUG_SHOW_OPS + fprintf(stderr, "++ Failed ACCEPT not at end (endanchnored set)\n"); +#endif + return MATCH_NOMATCH; /* (*ACCEPT) */ } /* We have a successful match of the whole pattern. Record the result and @@ -863,14 +928,15 @@ fprintf(stderr, "++ op=%d\n", *Fecode); mb->mark = Fmark; /* and the last success mark */ if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr; - ovector[0] = Fstart_match - mb->start_subject; - ovector[1] = Feptr - mb->start_subject; + match_data->ovector[0] = Fstart_match - mb->start_subject; + match_data->ovector[1] = Feptr - mb->start_subject; /* Set i to the smaller of the sizes of the external and frame ovectors. */ - i = 2 * ((top_bracket + 1 > oveccount)? oveccount : top_bracket + 1); - memcpy(ovector + 2, Fovector, (i - 2) * sizeof(PCRE2_SIZE)); - while (--i >= Foffset_top + 2) ovector[i] = PCRE2_UNSET; + i = 2 * ((top_bracket + 1 > match_data->oveccount)? + match_data->oveccount : top_bracket + 1); + memcpy(match_data->ovector + 2, Fovector, (i - 2) * sizeof(PCRE2_SIZE)); + while (--i >= Foffset_top + 2) match_data->ovector[i] = PCRE2_UNSET; return MATCH_MATCH; /* Note: NOT RRETURN */ @@ -2420,41 +2486,53 @@ fprintf(stderr, "++ op=%d\n", *Fecode); GETCHARINCTEST(fc, Feptr); { const uint32_t *cp; + uint32_t chartype; const ucd_record *prop = GET_UCD(fc); + BOOL notmatch = Fop == OP_NOTPROP; switch(Fecode[1]) { case PT_ANY: - if (Fop == OP_NOTPROP) RRETURN(MATCH_NOMATCH); + if (notmatch) RRETURN(MATCH_NOMATCH); break; case PT_LAMP: - if ((prop->chartype == ucp_Lu || - prop->chartype == ucp_Ll || - prop->chartype == ucp_Lt) == (Fop == OP_NOTPROP)) + chartype = prop->chartype; + if ((chartype == ucp_Lu || + chartype == ucp_Ll || + chartype == ucp_Lt) == notmatch) RRETURN(MATCH_NOMATCH); break; case PT_GC: - if ((Fecode[2] != PRIV(ucp_gentype)[prop->chartype]) == (Fop == OP_PROP)) + if ((Fecode[2] == PRIV(ucp_gentype)[prop->chartype]) == notmatch) RRETURN(MATCH_NOMATCH); break; case PT_PC: - if ((Fecode[2] != prop->chartype) == (Fop == OP_PROP)) + if ((Fecode[2] == prop->chartype) == notmatch) RRETURN(MATCH_NOMATCH); break; case PT_SC: - if ((Fecode[2] != prop->script) == (Fop == OP_PROP)) + if ((Fecode[2] == prop->script) == notmatch) RRETURN(MATCH_NOMATCH); break; + case PT_SCX: + { + BOOL ok = (Fecode[2] == prop->script || + MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), Fecode[2]) != 0); + if (ok == notmatch) RRETURN(MATCH_NOMATCH); + } + break; + /* These are specials */ case PT_ALNUM: - if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L || - PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (Fop == OP_NOTPROP)) + chartype = prop->chartype; + if ((PRIV(ucp_gentype)[chartype] == ucp_L || + PRIV(ucp_gentype)[chartype] == ucp_N) == notmatch) RRETURN(MATCH_NOMATCH); break; @@ -2468,41 +2546,63 @@ fprintf(stderr, "++ op=%d\n", *Fecode); { HSPACE_CASES: VSPACE_CASES: - if (Fop == OP_NOTPROP) RRETURN(MATCH_NOMATCH); + if (notmatch) RRETURN(MATCH_NOMATCH); break; default: - if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z) == - (Fop == OP_NOTPROP)) RRETURN(MATCH_NOMATCH); + if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z) == notmatch) + RRETURN(MATCH_NOMATCH); break; } break; case PT_WORD: - if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L || - PRIV(ucp_gentype)[prop->chartype] == ucp_N || - fc == CHAR_UNDERSCORE) == (Fop == OP_NOTPROP)) + chartype = prop->chartype; + if ((PRIV(ucp_gentype)[chartype] == ucp_L || + PRIV(ucp_gentype)[chartype] == ucp_N || + chartype == ucp_Mn || + chartype == ucp_Pc) == notmatch) RRETURN(MATCH_NOMATCH); break; case PT_CLIST: +#if PCRE2_CODE_UNIT_WIDTH == 32 + if (fc > MAX_UTF_CODE_POINT) + { + if (notmatch) break;; + RRETURN(MATCH_NOMATCH); + } +#endif cp = PRIV(ucd_caseless_sets) + Fecode[2]; for (;;) { if (fc < *cp) - { if (Fop == OP_PROP) { RRETURN(MATCH_NOMATCH); } else break; } + { if (notmatch) break; else { RRETURN(MATCH_NOMATCH); } } if (fc == *cp++) - { if (Fop == OP_PROP) break; else { RRETURN(MATCH_NOMATCH); } } + { if (notmatch) { RRETURN(MATCH_NOMATCH); } else break; } } break; case PT_UCNC: if ((fc == CHAR_DOLLAR_SIGN || fc == CHAR_COMMERCIAL_AT || fc == CHAR_GRAVE_ACCENT || (fc >= 0xa0 && fc <= 0xd7ff) || - fc >= 0xe000) == (Fop == OP_NOTPROP)) + fc >= 0xe000) == notmatch) RRETURN(MATCH_NOMATCH); break; + case PT_BIDICL: + if ((UCD_BIDICLASS_PROP(prop) == Fecode[2]) == notmatch) + RRETURN(MATCH_NOMATCH); + break; + + case PT_BOOL: + { + BOOL ok = MAPBIT(PRIV(ucd_boolprop_sets) + + UCD_BPROPS_PROP(prop), Fecode[2]) != 0; + if (ok == notmatch) RRETURN(MATCH_NOMATCH); + } + break; + /* This should never occur */ default: @@ -2616,18 +2716,20 @@ fprintf(stderr, "++ op=%d\n", *Fecode); /* First, ensure the minimum number of matches are present. Use inline code for maximizing the speed, and do the type test once at the start - (i.e. keep it out of the loop). The code for UTF mode is separated out for - tidiness, except for Unicode property tests. */ + (i.e. keep it out of the loops). As there are no calls to RMATCH in the + loops, we can use an ordinary variable for "notmatch". The code for UTF + mode is separated out for tidiness, except for Unicode property tests. */ if (Lmin > 0) { #ifdef SUPPORT_UNICODE if (proptype >= 0) /* Property tests in all modes */ { + BOOL notmatch = Lctype == OP_NOTPROP; switch(proptype) { case PT_ANY: - if (Lctype == OP_NOTPROP) RRETURN(MATCH_NOMATCH); + if (notmatch) RRETURN(MATCH_NOMATCH); for (i = 1; i <= Lmin; i++) { if (Feptr >= mb->end_subject) @@ -2652,7 +2754,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode); chartype = UCD_CHARTYPE(fc); if ((chartype == ucp_Lu || chartype == ucp_Ll || - chartype == ucp_Lt) == (Lctype == OP_NOTPROP)) + chartype == ucp_Lt) == notmatch) RRETURN(MATCH_NOMATCH); } break; @@ -2666,7 +2768,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode); RRETURN(MATCH_NOMATCH); } GETCHARINCTEST(fc, Feptr); - if ((UCD_CATEGORY(fc) == Lpropvalue) == (Lctype == OP_NOTPROP)) + if ((UCD_CATEGORY(fc) == Lpropvalue) == notmatch) RRETURN(MATCH_NOMATCH); } break; @@ -2680,7 +2782,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode); RRETURN(MATCH_NOMATCH); } GETCHARINCTEST(fc, Feptr); - if ((UCD_CHARTYPE(fc) == Lpropvalue) == (Lctype == OP_NOTPROP)) + if ((UCD_CHARTYPE(fc) == Lpropvalue) == notmatch) RRETURN(MATCH_NOMATCH); } break; @@ -2694,7 +2796,26 @@ fprintf(stderr, "++ op=%d\n", *Fecode); RRETURN(MATCH_NOMATCH); } GETCHARINCTEST(fc, Feptr); - if ((UCD_SCRIPT(fc) == Lpropvalue) == (Lctype == OP_NOTPROP)) + if ((UCD_SCRIPT(fc) == Lpropvalue) == notmatch) + RRETURN(MATCH_NOMATCH); + } + break; + + case PT_SCX: + for (i = 1; i <= Lmin; i++) + { + BOOL ok; + const ucd_record *prop; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + prop = GET_UCD(fc); + ok = (prop->script == Lpropvalue || + MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), Lpropvalue) != 0); + if (ok == notmatch) RRETURN(MATCH_NOMATCH); } break; @@ -2710,7 +2831,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode); } GETCHARINCTEST(fc, Feptr); category = UCD_CATEGORY(fc); - if ((category == ucp_L || category == ucp_N) == (Lctype == OP_NOTPROP)) + if ((category == ucp_L || category == ucp_N) == notmatch) RRETURN(MATCH_NOMATCH); } break; @@ -2733,11 +2854,11 @@ fprintf(stderr, "++ op=%d\n", *Fecode); { HSPACE_CASES: VSPACE_CASES: - if (Lctype == OP_NOTPROP) RRETURN(MATCH_NOMATCH); + if (notmatch) RRETURN(MATCH_NOMATCH); break; default: - if ((UCD_CATEGORY(fc) == ucp_Z) == (Lctype == OP_NOTPROP)) + if ((UCD_CATEGORY(fc) == ucp_Z) == notmatch) RRETURN(MATCH_NOMATCH); break; } @@ -2747,16 +2868,17 @@ fprintf(stderr, "++ op=%d\n", *Fecode); case PT_WORD: for (i = 1; i <= Lmin; i++) { - int category; + int chartype, category; if (Feptr >= mb->end_subject) { SCHECK_PARTIAL(); RRETURN(MATCH_NOMATCH); } GETCHARINCTEST(fc, Feptr); - category = UCD_CATEGORY(fc); + chartype = UCD_CHARTYPE(fc); + category = PRIV(ucp_gentype)[chartype]; if ((category == ucp_L || category == ucp_N || - fc == CHAR_UNDERSCORE) == (Lctype == OP_NOTPROP)) + chartype == ucp_Mn || chartype == ucp_Pc) == notmatch) RRETURN(MATCH_NOMATCH); } break; @@ -2771,17 +2893,24 @@ fprintf(stderr, "++ op=%d\n", *Fecode); RRETURN(MATCH_NOMATCH); } GETCHARINCTEST(fc, Feptr); +#if PCRE2_CODE_UNIT_WIDTH == 32 + if (fc > MAX_UTF_CODE_POINT) + { + if (notmatch) continue; + RRETURN(MATCH_NOMATCH); + } +#endif cp = PRIV(ucd_caseless_sets) + Lpropvalue; for (;;) { if (fc < *cp) { - if (Lctype == OP_NOTPROP) break; + if (notmatch) break; RRETURN(MATCH_NOMATCH); } if (fc == *cp++) { - if (Lctype == OP_NOTPROP) RRETURN(MATCH_NOMATCH); + if (notmatch) RRETURN(MATCH_NOMATCH); break; } } @@ -2799,7 +2928,40 @@ fprintf(stderr, "++ op=%d\n", *Fecode); GETCHARINCTEST(fc, Feptr); if ((fc == CHAR_DOLLAR_SIGN || fc == CHAR_COMMERCIAL_AT || fc == CHAR_GRAVE_ACCENT || (fc >= 0xa0 && fc <= 0xd7ff) || - fc >= 0xe000) == (Lctype == OP_NOTPROP)) + fc >= 0xe000) == notmatch) + RRETURN(MATCH_NOMATCH); + } + break; + + case PT_BIDICL: + for (i = 1; i <= Lmin; i++) + { + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + if ((UCD_BIDICLASS(fc) == Lpropvalue) == notmatch) + RRETURN(MATCH_NOMATCH); + } + break; + + case PT_BOOL: + for (i = 1; i <= Lmin; i++) + { + BOOL ok; + const ucd_record *prop; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + prop = GET_UCD(fc); + ok = MAPBIT(PRIV(ucd_boolprop_sets) + + UCD_BPROPS_PROP(prop), Lpropvalue) != 0; + if (ok == notmatch) RRETURN(MATCH_NOMATCH); } break; @@ -3343,7 +3505,9 @@ fprintf(stderr, "++ op=%d\n", *Fecode); if (Lmin == Lmax) continue; /* If minimizing, we have to test the rest of the pattern before each - subsequent match. */ + subsequent match. This means we cannot use a local "notmatch" variable as + in the other cases. As all 4 temporary 32-bit values in the frame are + already in use, just test the type each time. */ if (reptype == REPTYPE_MIN) { @@ -3440,6 +3604,28 @@ fprintf(stderr, "++ op=%d\n", *Fecode); } /* Control never gets here */ + case PT_SCX: + for (;;) + { + BOOL ok; + const ucd_record *prop; + RMATCH(Fecode, RM225); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + prop = GET_UCD(fc); + ok = (prop->script == Lpropvalue + || MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), Lpropvalue) != 0); + if (ok == (Lctype == OP_NOTPROP)) + RRETURN(MATCH_NOMATCH); + } + /* Control never gets here */ + case PT_ALNUM: for (;;) { @@ -3454,8 +3640,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode); } GETCHARINCTEST(fc, Feptr); category = UCD_CATEGORY(fc); - if ((category == ucp_L || category == ucp_N) == - (Lctype == OP_NOTPROP)) + if ((category == ucp_L || category == ucp_N) == (Lctype == OP_NOTPROP)) RRETURN(MATCH_NOMATCH); } /* Control never gets here */ @@ -3495,7 +3680,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode); case PT_WORD: for (;;) { - int category; + int chartype, category; RMATCH(Fecode, RM215); if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); @@ -3505,10 +3690,12 @@ fprintf(stderr, "++ op=%d\n", *Fecode); RRETURN(MATCH_NOMATCH); } GETCHARINCTEST(fc, Feptr); - category = UCD_CATEGORY(fc); + chartype = UCD_CHARTYPE(fc); + category = PRIV(ucp_gentype)[chartype]; if ((category == ucp_L || category == ucp_N || - fc == CHAR_UNDERSCORE) == (Lctype == OP_NOTPROP)) + chartype == ucp_Mn || + chartype == ucp_Pc) == (Lctype == OP_NOTPROP)) RRETURN(MATCH_NOMATCH); } /* Control never gets here */ @@ -3526,6 +3713,13 @@ fprintf(stderr, "++ op=%d\n", *Fecode); RRETURN(MATCH_NOMATCH); } GETCHARINCTEST(fc, Feptr); +#if PCRE2_CODE_UNIT_WIDTH == 32 + if (fc > MAX_UTF_CODE_POINT) + { + if (Lctype == OP_NOTPROP) continue; + RRETURN(MATCH_NOMATCH); + } +#endif cp = PRIV(ucd_caseless_sets) + Lpropvalue; for (;;) { @@ -3562,6 +3756,45 @@ fprintf(stderr, "++ op=%d\n", *Fecode); } /* Control never gets here */ + case PT_BIDICL: + for (;;) + { + RMATCH(Fecode, RM224); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + if ((UCD_BIDICLASS(fc) == Lpropvalue) == (Lctype == OP_NOTPROP)) + RRETURN(MATCH_NOMATCH); + } + /* Control never gets here */ + + case PT_BOOL: + for (;;) + { + BOOL ok; + const ucd_record *prop; + RMATCH(Fecode, RM223); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (Lmin++ >= Lmax) RRETURN(MATCH_NOMATCH); + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + RRETURN(MATCH_NOMATCH); + } + GETCHARINCTEST(fc, Feptr); + prop = GET_UCD(fc); + ok = MAPBIT(PRIV(ucd_boolprop_sets) + + UCD_BPROPS_PROP(prop), Lpropvalue) != 0; + if (ok == (Lctype == OP_NOTPROP)) + RRETURN(MATCH_NOMATCH); + } + /* Control never gets here */ + /* This should never occur */ default: return PCRE2_ERROR_INTERNAL; @@ -3870,7 +4103,9 @@ fprintf(stderr, "++ op=%d\n", *Fecode); } /* If maximizing, it is worth using inline code for speed, doing the type - test once at the start (i.e. keep it out of the loop). */ + test once at the start (i.e. keep it out of the loops). Once again, + "notmatch" can be an ordinary local variable because the loops do not call + RMATCH. */ else { @@ -3879,6 +4114,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode); #ifdef SUPPORT_UNICODE if (proptype >= 0) { + BOOL notmatch = Lctype == OP_NOTPROP; switch(proptype) { case PT_ANY: @@ -3891,7 +4127,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode); break; } GETCHARLENTEST(fc, Feptr, len); - if (Lctype == OP_NOTPROP) break; + if (notmatch) break; Feptr+= len; } break; @@ -3910,7 +4146,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode); chartype = UCD_CHARTYPE(fc); if ((chartype == ucp_Lu || chartype == ucp_Ll || - chartype == ucp_Lt) == (Lctype == OP_NOTPROP)) + chartype == ucp_Lt) == notmatch) break; Feptr+= len; } @@ -3926,8 +4162,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode); break; } GETCHARLENTEST(fc, Feptr, len); - if ((UCD_CATEGORY(fc) == Lpropvalue) == (Lctype == OP_NOTPROP)) - break; + if ((UCD_CATEGORY(fc) == Lpropvalue) == notmatch) break; Feptr+= len; } break; @@ -3942,8 +4177,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode); break; } GETCHARLENTEST(fc, Feptr, len); - if ((UCD_CHARTYPE(fc) == Lpropvalue) == (Lctype == OP_NOTPROP)) - break; + if ((UCD_CHARTYPE(fc) == Lpropvalue) == notmatch) break; Feptr+= len; } break; @@ -3958,8 +4192,27 @@ fprintf(stderr, "++ op=%d\n", *Fecode); break; } GETCHARLENTEST(fc, Feptr, len); - if ((UCD_SCRIPT(fc) == Lpropvalue) == (Lctype == OP_NOTPROP)) + if ((UCD_SCRIPT(fc) == Lpropvalue) == notmatch) break; + Feptr+= len; + } + break; + + case PT_SCX: + for (i = Lmin; i < Lmax; i++) + { + BOOL ok; + const ucd_record *prop; + int len = 1; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); break; + } + GETCHARLENTEST(fc, Feptr, len); + prop = GET_UCD(fc); + ok = (prop->script == Lpropvalue || + MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), Lpropvalue) != 0); + if (ok == notmatch) break; Feptr+= len; } break; @@ -3976,8 +4229,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode); } GETCHARLENTEST(fc, Feptr, len); category = UCD_CATEGORY(fc); - if ((category == ucp_L || category == ucp_N) == - (Lctype == OP_NOTPROP)) + if ((category == ucp_L || category == ucp_N) == notmatch) break; Feptr+= len; } @@ -4002,11 +4254,11 @@ fprintf(stderr, "++ op=%d\n", *Fecode); { HSPACE_CASES: VSPACE_CASES: - if (Lctype == OP_NOTPROP) goto ENDLOOP99; /* Break the loop */ + if (notmatch) goto ENDLOOP99; /* Break the loop */ break; default: - if ((UCD_CATEGORY(fc) == ucp_Z) == (Lctype == OP_NOTPROP)) + if ((UCD_CATEGORY(fc) == ucp_Z) == notmatch) goto ENDLOOP99; /* Break the loop */ break; } @@ -4018,7 +4270,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode); case PT_WORD: for (i = Lmin; i < Lmax; i++) { - int category; + int chartype, category; int len = 1; if (Feptr >= mb->end_subject) { @@ -4026,9 +4278,12 @@ fprintf(stderr, "++ op=%d\n", *Fecode); break; } GETCHARLENTEST(fc, Feptr, len); - category = UCD_CATEGORY(fc); - if ((category == ucp_L || category == ucp_N || - fc == CHAR_UNDERSCORE) == (Lctype == OP_NOTPROP)) + chartype = UCD_CHARTYPE(fc); + category = PRIV(ucp_gentype)[chartype]; + if ((category == ucp_L || + category == ucp_N || + chartype == ucp_Mn || + chartype == ucp_Pc) == notmatch) break; Feptr+= len; } @@ -4045,14 +4300,24 @@ fprintf(stderr, "++ op=%d\n", *Fecode); break; } GETCHARLENTEST(fc, Feptr, len); - cp = PRIV(ucd_caseless_sets) + Lpropvalue; - for (;;) +#if PCRE2_CODE_UNIT_WIDTH == 32 + if (fc > MAX_UTF_CODE_POINT) { - if (fc < *cp) - { if (Lctype == OP_NOTPROP) break; else goto GOT_MAX; } - if (fc == *cp++) - { if (Lctype == OP_NOTPROP) goto GOT_MAX; else break; } + if (!notmatch) goto GOT_MAX; + } + else +#endif + { + cp = PRIV(ucd_caseless_sets) + Lpropvalue; + for (;;) + { + if (fc < *cp) + { if (notmatch) break; else goto GOT_MAX; } + if (fc == *cp++) + { if (notmatch) goto GOT_MAX; else break; } + } } + Feptr += len; } GOT_MAX: @@ -4070,12 +4335,47 @@ fprintf(stderr, "++ op=%d\n", *Fecode); GETCHARLENTEST(fc, Feptr, len); if ((fc == CHAR_DOLLAR_SIGN || fc == CHAR_COMMERCIAL_AT || fc == CHAR_GRAVE_ACCENT || (fc >= 0xa0 && fc <= 0xd7ff) || - fc >= 0xe000) == (Lctype == OP_NOTPROP)) + fc >= 0xe000) == notmatch) break; Feptr += len; } break; + case PT_BIDICL: + for (i = Lmin; i < Lmax; i++) + { + int len = 1; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + GETCHARLENTEST(fc, Feptr, len); + if ((UCD_BIDICLASS(fc) == Lpropvalue) == notmatch) break; + Feptr+= len; + } + break; + + case PT_BOOL: + for (i = Lmin; i < Lmax; i++) + { + BOOL ok; + const ucd_record *prop; + int len = 1; + if (Feptr >= mb->end_subject) + { + SCHECK_PARTIAL(); + break; + } + GETCHARLENTEST(fc, Feptr, len); + prop = GET_UCD(fc); + ok = MAPBIT(PRIV(ucd_boolprop_sets) + + UCD_BPROPS_PROP(prop), Lpropvalue) != 0; + if (ok == notmatch) break; + Feptr+= len; + } + break; + default: return PCRE2_ERROR_INTERNAL; } @@ -5115,9 +5415,11 @@ fprintf(stderr, "++ op=%d\n", *Fecode); /* ===================================================================== */ - /* Recursion either matches the current regex, or some subexpression. The - offset data is the offset to the starting bracket from the start of the - whole pattern. (This is so that it works from duplicated subpatterns.) */ + /* Pattern recursion either matches the current regex, or some + subexpression. The offset data is the offset to the starting bracket from + the start of the whole pattern. This is so that it works from duplicated + subpatterns. For a whole-pattern recursion, we have to infer the number + zero. */ #define Lframe_type F->temp_32[0] #define Lstart_branch F->temp_sptr[0] @@ -5126,28 +5428,35 @@ fprintf(stderr, "++ op=%d\n", *Fecode); bracode = mb->start_code + GET(Fecode, 1); number = (bracode == mb->start_code)? 0 : GET2(bracode, 1 + LINK_SIZE); - /* If we are already in a recursion, check for repeating the same one - without advancing the subject pointer. This should catch convoluted mutual - recursions. (Some simple cases are caught at compile time.) */ + /* If we are already in a pattern recursion, check for repeating the same + one without changing the subject pointer or the last referenced character + in the subject. This should catch convoluted mutual recursions; some + simple cases are caught at compile time. However, there are rare cases when + this check needs to be turned off. In this case, actual recursion loops + will be caught by the match or heap limits. */ if (Fcurrent_recurse != RECURSE_UNSET) { offset = Flast_group_offset; while (offset != PCRE2_UNSET) { - N = (heapframe *)((char *)mb->match_frames + offset); + N = (heapframe *)((char *)match_data->heapframes + offset); P = (heapframe *)((char *)N - frame_size); if (N->group_frame_type == (GF_RECURSE | number)) { - if (Feptr == P->eptr) return PCRE2_ERROR_RECURSELOOP; + if (Feptr == P->eptr && mb->last_used_ptr == P->recurse_last_used && + (mb->moptions & PCRE2_DISABLE_RECURSELOOP_CHECK) == 0) + return PCRE2_ERROR_RECURSELOOP; break; } offset = P->last_group_offset; } } - /* Now run the recursion, branch by branch. */ + /* Remember the current last referenced character and then run the + recursion branch by branch. */ + F->recurse_last_used = mb->last_used_ptr; Lstart_branch = bracode; Lframe_type = GF_RECURSE | number; @@ -5476,13 +5785,13 @@ fprintf(stderr, "++ op=%d\n", *Fecode); /* ===================================================================== */ - /* Move the subject pointer back. This occurs only at the start of each - branch of a lookbehind assertion. If we are too close to the start to move - back, fail. When working with UTF-8 we move back a number of characters, - not bytes. */ + /* Move the subject pointer back by one fixed amount. This occurs at the + start of each branch that has a fixed length in a lookbehind assertion. If + we are too close to the start to move back, fail. When working with UTF-8 + we move back a number of characters, not bytes. */ case OP_REVERSE: - number = GET(Fecode, 1); + number = GET2(Fecode, 1); #ifdef SUPPORT_UNICODE if (utf) { @@ -5496,7 +5805,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode); else #endif - /* No UTF-8 support, or not in UTF-8 mode: count is code unit count */ + /* No UTF support, or not in UTF mode: count is code unit count */ { if ((ptrdiff_t)number > Feptr - mb->start_subject) RRETURN(MATCH_NOMATCH); @@ -5506,15 +5815,84 @@ fprintf(stderr, "++ op=%d\n", *Fecode); /* Save the earliest consulted character, then skip to next opcode */ if (Feptr < mb->start_used_ptr) mb->start_used_ptr = Feptr; - Fecode += 1 + LINK_SIZE; + Fecode += 1 + IMM2_SIZE; break; /* ===================================================================== */ + /* Move the subject pointer back by a variable amount. This occurs at the + start of each branch of a lookbehind assertion when the branch has a + variable, but limited, length. A loop is needed to try matching the branch + after moving back different numbers of characters. If we are too close to + the start to move back even the minimum amount, fail. When working with + UTF-8 we move back a number of characters, not bytes. */ + +#define Lmin F->temp_32[0] +#define Lmax F->temp_32[1] +#define Leptr F->temp_sptr[0] + + case OP_VREVERSE: + Lmin = GET2(Fecode, 1); + Lmax = GET2(Fecode, 1 + IMM2_SIZE); + Leptr = Feptr; + + /* Move back by the maximum branch length and then work forwards. This + ensures that items such as \d{3,5} get the maximum length, which is + relevant for captures, and makes for Perl compatibility. */ + +#ifdef SUPPORT_UNICODE + if (utf) + { + for (i = 0; i < Lmax; i++) + { + if (Feptr == mb->start_subject) + { + if (i < Lmin) RRETURN(MATCH_NOMATCH); + Lmax = i; + break; + } + Feptr--; + BACKCHAR(Feptr); + } + } + else +#endif + + /* No UTF support or not in UTF mode */ + + { + ptrdiff_t diff = Feptr - mb->start_subject; + uint32_t available = (diff > 65535)? 65535 : ((diff > 0)? diff : 0); + if (Lmin > available) RRETURN(MATCH_NOMATCH); + if (Lmax > available) Lmax = available; + Feptr -= Lmax; + } + + /* Now try matching, moving forward one character on failure, until we + reach the mimimum back length. */ + + for (;;) + { + RMATCH(Fecode + 1 + 2 * IMM2_SIZE, RM37); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (Lmax-- <= Lmin) RRETURN(MATCH_NOMATCH); + Feptr++; +#ifdef SUPPORT_UNICODE + if (utf) { FORWARDCHARTEST(Feptr, mb->end_subject); } +#endif + } + /* Control never reaches here */ + +#undef Lmin +#undef Lmax +#undef Leptr + + /* ===================================================================== */ /* An alternation is the end of a branch; scan along to find the end of the bracketed group. */ case OP_ALT: + branch_end = Fecode; do Fecode += GET(Fecode,1); while (*Fecode == OP_ALT); break; @@ -5522,7 +5900,8 @@ fprintf(stderr, "++ op=%d\n", *Fecode); /* ===================================================================== */ /* The end of a parenthesized group. For all but OP_BRA and OP_COND, the starting frame was added to the chained frames in order to remember the - starting subject position for the group. */ + starting subject position for the group. (Not true for OP_BRA when it's a + whole pattern recursion, but that is handled separately below.)*/ case OP_KET: case OP_KETRMIN: @@ -5531,12 +5910,18 @@ fprintf(stderr, "++ op=%d\n", *Fecode); bracode = Fecode - GET(Fecode, 1); - /* Point N to the frame at the start of the most recent group. - Remember the subject pointer at the start of the group. */ + if (branch_end == NULL) branch_end = Fecode; + branch_start = bracode; + while (branch_start + GET(branch_start, 1) != branch_end) + branch_start += GET(branch_start, 1); + branch_end = NULL; + + /* Point N to the frame at the start of the most recent group, and P to its + predecessor. Remember the subject pointer at the start of the group. */ if (*bracode != OP_BRA && *bracode != OP_COND) { - N = (heapframe *)((char *)mb->match_frames + Flast_group_offset); + N = (heapframe *)((char *)match_data->heapframes + Flast_group_offset); P = (heapframe *)((char *)N - frame_size); Flast_group_offset = P->last_group_offset; @@ -5568,27 +5953,64 @@ fprintf(stderr, "++ op=%d\n", *Fecode); switch (*bracode) { - case OP_BRA: /* No need to do anything for these */ - case OP_COND: + /* Whole pattern recursion is handled as a recursion into group 0, but + the entire pattern is wrapped in OP_BRA/OP_KET rather than a capturing + group - a design mistake: it should perhaps have been capture group 0. + Anyway, that means the end of such recursion must be handled here. It is + detected by checking for an immediately following OP_END when we are + recursing in group 0. If this is not the end of a whole-pattern + recursion, there is nothing to be done. */ + + case OP_BRA: + if (Fcurrent_recurse != 0 || Fecode[1+LINK_SIZE] != OP_END) break; + + /* It is the end of whole-pattern recursion. */ + + offset = Flast_group_offset; + if (offset == PCRE2_UNSET) return PCRE2_ERROR_INTERNAL; + N = (heapframe *)((char *)match_data->heapframes + offset); + P = (heapframe *)((char *)N - frame_size); + Flast_group_offset = P->last_group_offset; + + /* Reinstate the previous set of captures and then carry on after the + recursion call. */ + + memcpy((char *)F + offsetof(heapframe, ovector), P->ovector, + Foffset_top * sizeof(PCRE2_SIZE)); + Foffset_top = P->offset_top; + Fcapture_last = P->capture_last; + Fcurrent_recurse = P->current_recurse; + Fecode = P->ecode + 1 + LINK_SIZE; + continue; /* With next opcode */ + + case OP_COND: /* No need to do anything for these */ case OP_SCOND: break; /* Non-atomic positive assertions are like OP_BRA, except that the subject pointer must be put back to where it was at the start of the - assertion. */ + assertion. For a variable lookbehind, check its end point. */ - case OP_ASSERT_NA: case OP_ASSERTBACK_NA: + if (branch_start[1 + LINK_SIZE] == OP_VREVERSE && Feptr != P->eptr) + RRETURN(MATCH_NOMATCH); + /* Fall through */ + + case OP_ASSERT_NA: if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr; Feptr = P->eptr; break; /* Atomic positive assertions are like OP_ONCE, except that in addition the subject pointer must be put back to where it was at the start of the - assertion. */ + assertion. For a variable lookbehind, check its end point. */ - case OP_ASSERT: case OP_ASSERTBACK: + if (branch_start[1 + LINK_SIZE] == OP_VREVERSE && Feptr != P->eptr) + RRETURN(MATCH_NOMATCH); + /* Fall through */ + + case OP_ASSERT: if (Feptr > mb->last_used_ptr) mb->last_used_ptr = Feptr; Feptr = P->eptr; /* Fall through */ @@ -5609,10 +6031,15 @@ fprintf(stderr, "++ op=%d\n", *Fecode); break; /* A matching negative assertion returns MATCH, which is turned into - NOMATCH at the assertion level. */ + NOMATCH at the assertion level. For a variable lookbehind, check its end + point. */ - case OP_ASSERT_NOT: case OP_ASSERTBACK_NOT: + if (branch_start[1 + LINK_SIZE] == OP_VREVERSE && Feptr != P->eptr) + RRETURN(MATCH_NOMATCH); + /* Fall through */ + + case OP_ASSERT_NOT: RRETURN(MATCH_MATCH); /* At the end of a script run, apply the script-checking rules. This code @@ -5623,9 +6050,8 @@ fprintf(stderr, "++ op=%d\n", *Fecode); if (!PRIV(script_run)(P->eptr, Feptr, utf)) RRETURN(MATCH_NOMATCH); break; - /* Whole-pattern recursion is coded as a recurse into group 0, so it - won't be picked up here. Instead, we catch it when the OP_END is reached. - Other recursion is handled here. */ + /* Whole-pattern recursion is coded as a recurse into group 0, and is + handled with OP_BRA above. Other recursion is handled here. */ case OP_CBRA: case OP_CBRAPOS: @@ -5640,7 +6066,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode); { P = (heapframe *)((char *)N - frame_size); memcpy((char *)F + offsetof(heapframe, ovector), P->ovector, - P->offset_top * sizeof(PCRE2_SIZE)); + Foffset_top * sizeof(PCRE2_SIZE)); Foffset_top = P->offset_top; Fcapture_last = P->capture_last; Fcurrent_recurse = P->current_recurse; @@ -5723,10 +6149,10 @@ fprintf(stderr, "++ op=%d\n", *Fecode); if ((mb->poptions & PCRE2_DOLLAR_ENDONLY) == 0) goto ASSERT_NL_OR_EOS; /* Fall through */ - /* Unconditional end of subject assertion (\z) */ + /* Unconditional end of subject assertion (\z). */ case OP_EOD: - if (Feptr < mb->end_subject) RRETURN(MATCH_NOMATCH); + if (Feptr < mb->true_end_subject) RRETURN(MATCH_NOMATCH); if (mb->partial != 0) { mb->hitend = TRUE; @@ -5838,6 +6264,8 @@ fprintf(stderr, "++ op=%d\n", *Fecode); case OP_NOT_WORD_BOUNDARY: case OP_WORD_BOUNDARY: + case OP_NOT_UCP_WORD_BOUNDARY: + case OP_UCP_WORD_BOUNDARY: if (Feptr == mb->check_subject) prev_is_word = FALSE; else { PCRE2_SPTR lastptr = Feptr - 1; @@ -5852,13 +6280,12 @@ fprintf(stderr, "++ op=%d\n", *Fecode); fc = *lastptr; if (lastptr < mb->start_used_ptr) mb->start_used_ptr = lastptr; #ifdef SUPPORT_UNICODE - if ((mb->poptions & PCRE2_UCP) != 0) + if (Fop == OP_UCP_WORD_BOUNDARY || Fop == OP_NOT_UCP_WORD_BOUNDARY) { - if (fc == '_') prev_is_word = TRUE; else - { - int cat = UCD_CATEGORY(fc); - prev_is_word = (cat == ucp_L || cat == ucp_N); - } + int chartype = UCD_CHARTYPE(fc); + int category = PRIV(ucp_gentype)[chartype]; + prev_is_word = (category == ucp_L || category == ucp_N || + chartype == ucp_Mn || chartype == ucp_Pc); } else #endif /* SUPPORT_UNICODE */ @@ -5886,13 +6313,12 @@ fprintf(stderr, "++ op=%d\n", *Fecode); fc = *Feptr; if (nextptr > mb->last_used_ptr) mb->last_used_ptr = nextptr; #ifdef SUPPORT_UNICODE - if ((mb->poptions & PCRE2_UCP) != 0) + if (Fop == OP_UCP_WORD_BOUNDARY || Fop == OP_NOT_UCP_WORD_BOUNDARY) { - if (fc == '_') cur_is_word = TRUE; else - { - int cat = UCD_CATEGORY(fc); - cur_is_word = (cat == ucp_L || cat == ucp_N); - } + int chartype = UCD_CHARTYPE(fc); + int category = PRIV(ucp_gentype)[chartype]; + cur_is_word = (category == ucp_L || category == ucp_N || + chartype == ucp_Mn || chartype == ucp_Pc); } else #endif /* SUPPORT_UNICODE */ @@ -5901,7 +6327,7 @@ fprintf(stderr, "++ op=%d\n", *Fecode); /* Now see if the situation is what we want */ - if ((*Fecode++ == OP_WORD_BOUNDARY)? + if ((*Fecode++ == OP_WORD_BOUNDARY || Fop == OP_UCP_WORD_BOUNDARY)? cur_is_word == prev_is_word : cur_is_word != prev_is_word) RRETURN(MATCH_NOMATCH); break; @@ -6047,7 +6473,7 @@ F = (heapframe *)((char *)F - Fback_frame); /* Backtrack */ mb->cb->callout_flags |= PCRE2_CALLOUT_BACKTRACK; /* Note for callouts */ #ifdef DEBUG_SHOW_RMATCH -fprintf(stderr, "++ RETURN %d to %d\n", rrc, Freturn_id); +fprintf(stderr, "++ RETURN %d to RM%d\n", rrc, Freturn_id); #endif switch (Freturn_id) @@ -6056,7 +6482,7 @@ switch (Freturn_id) LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(16) LBL(17) LBL(18) LBL(19) LBL(20) LBL(21) LBL(22) LBL(23) LBL(24) LBL(25) LBL(26) LBL(27) LBL(28) LBL(29) LBL(30) LBL(31) LBL(32) - LBL(33) LBL(34) LBL(35) LBL(36) + LBL(33) LBL(34) LBL(35) LBL(36) LBL(37) #ifdef SUPPORT_WIDE_CHARS LBL(100) LBL(101) @@ -6066,7 +6492,7 @@ switch (Freturn_id) LBL(200) LBL(201) LBL(202) LBL(203) LBL(204) LBL(205) LBL(206) LBL(207) LBL(208) LBL(209) LBL(210) LBL(211) LBL(212) LBL(213) LBL(214) LBL(215) LBL(216) LBL(217) LBL(218) LBL(219) LBL(220) - LBL(221) LBL(222) + LBL(221) LBL(222) LBL(223) LBL(224) LBL(225) #endif default: @@ -6117,8 +6543,8 @@ BOOL has_req_cu = FALSE; BOOL startline; #if PCRE2_CODE_UNIT_WIDTH == 8 -BOOL memchr_not_found_first_cu; -BOOL memchr_not_found_first_cu2; +PCRE2_SPTR memchr_found_first_cu; +PCRE2_SPTR memchr_found_first_cu2; #endif PCRE2_UCHAR first_cu = 0; @@ -6129,8 +6555,8 @@ PCRE2_UCHAR req_cu2 = 0; PCRE2_SPTR bumpalong_limit; PCRE2_SPTR end_subject; PCRE2_SPTR true_end_subject; -PCRE2_SPTR start_match = subject + start_offset; -PCRE2_SPTR req_cu_ptr = start_match - 1; +PCRE2_SPTR start_match; +PCRE2_SPTR req_cu_ptr; PCRE2_SPTR start_partial; PCRE2_SPTR match_partial; @@ -6153,6 +6579,7 @@ BOOL jit_checked_utf = FALSE; #endif /* SUPPORT_UNICODE */ PCRE2_SIZE frame_size; +PCRE2_SIZE heapframes_size; /* We need to have mb as a pointer to a match block, because the IS_NEWLINE macro is used below, and it expects NLBLOCK to be defined as a pointer. */ @@ -6161,18 +6588,18 @@ pcre2_callout_block cb; match_block actual_match_block; match_block *mb = &actual_match_block; -/* Allocate an initial vector of backtracking frames on the stack. If this -proves to be too small, it is replaced by a larger one on the heap. To get a -vector of the size required that is aligned for pointers, allocate it as a -vector of pointers. */ +/* Recognize NULL, length 0 as an empty string. */ -PCRE2_SPTR stack_frames_vector[START_FRAMES_SIZE/sizeof(PCRE2_SPTR)] - PCRE2_KEEP_UNINITIALIZED; -mb->stack_frames = (heapframe *)stack_frames_vector; +if (subject == NULL && length == 0) subject = (PCRE2_SPTR)""; + +/* Plausibility checks */ -/* A length equal to PCRE2_ZERO_TERMINATED implies a zero-terminated -subject string. */ +if ((options & ~PUBLIC_MATCH_OPTIONS) != 0) return PCRE2_ERROR_BADOPTION; +if (code == NULL || subject == NULL || match_data == NULL) + return PCRE2_ERROR_NULL; +start_match = subject + start_offset; +req_cu_ptr = start_match - 1; if (length == PCRE2_ZERO_TERMINATED) { length = PRIV(strlen)(subject); @@ -6180,11 +6607,6 @@ if (length == PCRE2_ZERO_TERMINATED) } true_end_subject = end_subject = subject + length; -/* Plausibility checks */ - -if ((options & ~PUBLIC_MATCH_OPTIONS) != 0) return PCRE2_ERROR_BADOPTION; -if (code == NULL || subject == NULL || match_data == NULL) - return PCRE2_ERROR_NULL; if (start_offset > length) return PCRE2_ERROR_BADOFFSET; /* Check that the first field in the block is the magic number. */ @@ -6348,6 +6770,7 @@ if (use_jit) match_data, mcontext); if (rc != PCRE2_ERROR_JIT_BADOPTION) { + match_data->subject_length = length; if (rc >= 0 && (options & PCRE2_COPY_MATCHED_SUBJECT) != 0) { length = CU2BYTES(length + was_zero_terminated); @@ -6482,7 +6905,7 @@ if (utf && /* If the end precedes start_match, it means there is invalid UTF in the extra code units we reversed over because of a lookbehind. Advance past the first bad code unit, and then skip invalid character starting code units in - 8-bit and 16-bit modes, and try again. */ + 8-bit and 16-bit modes, and try again with the original end point. */ if (end_subject < start_match) { @@ -6491,6 +6914,7 @@ if (utf && while (mb->check_subject < start_match && NOT_FIRSTCU(*mb->check_subject)) mb->check_subject++; #endif + end_subject = true_end_subject; } /* Otherwise, set the not end of line option, and do the match. */ @@ -6515,7 +6939,7 @@ if (mcontext == NULL) else mb->memctl = mcontext->memctl; anchored = ((re->overall_options | options) & PCRE2_ANCHORED) != 0; -firstline = (re->overall_options & PCRE2_FIRSTLINE) != 0; +firstline = !anchored && (re->overall_options & PCRE2_FIRSTLINE) != 0; startline = (re->flags & PCRE2_STARTLINE) != 0; bumpalong_limit = (mcontext->offset_limit == PCRE2_UNSET)? true_end_subject : subject + mcontext->offset_limit; @@ -6538,6 +6962,7 @@ mb->callout_data = mcontext->callout_data; mb->start_subject = subject; mb->start_offset = start_offset; mb->end_subject = end_subject; +mb->true_end_subject = true_end_subject; mb->hasthen = (re->flags & PCRE2_HASTHEN) != 0; mb->allowemptypartial = (re->max_lookbehind > 0) || (re->flags & PCRE2_MATCH_EMPTY) != 0; @@ -6595,22 +7020,24 @@ switch(re->newline_convention) vector at the end, whose size depends on the number of capturing parentheses in the pattern. It is not used at all if there are no capturing parentheses. - frame_size is the total size of each frame - mb->frame_vector_size is the total usable size of the vector (rounded down - to a whole number of frames) + frame_size is the total size of each frame + match_data->heapframes is the pointer to the frames vector + match_data->heapframes_size is the allocated size of the vector -The last of these is changed within the match() function if the frame vector -has to be expanded. We therefore put it into the match block so that it is -correct when calling match() more than once for non-anchored patterns. */ +We must pad the frame_size for alignment to ensure subsequent frames are as +aligned as heapframe. Whilst ovector is word-aligned due to being a PCRE2_SIZE +array, that does not guarantee it is suitably aligned for pointers, as some +architectures have pointers that are larger than a size_t. */ -frame_size = offsetof(heapframe, ovector) + - re->top_bracket * 2 * sizeof(PCRE2_SIZE); +frame_size = (offsetof(heapframe, ovector) + + re->top_bracket * 2 * sizeof(PCRE2_SIZE) + HEAPFRAME_ALIGNMENT - 1) & + ~(HEAPFRAME_ALIGNMENT - 1); /* Limits set in the pattern override the match context only if they are smaller. */ -mb->heap_limit = (mcontext->heap_limit < re->limit_heap)? - mcontext->heap_limit : re->limit_heap; +mb->heap_limit = ((mcontext->heap_limit < re->limit_heap)? + mcontext->heap_limit : re->limit_heap); mb->match_limit = (mcontext->match_limit < re->limit_match)? mcontext->match_limit : re->limit_match; @@ -6619,36 +7046,41 @@ mb->match_limit_depth = (mcontext->depth_limit < re->limit_depth)? mcontext->depth_limit : re->limit_depth; /* If a pattern has very many capturing parentheses, the frame size may be very -large. Ensure that there are at least 10 available frames by getting an initial -vector on the heap if necessary, except when the heap limit prevents this. Get -fewer if possible. (The heap limit is in kibibytes.) */ +large. Set the initial frame vector size to ensure that there are at least 10 +available frames, but enforce a minimum of START_FRAMES_SIZE. If this is +greater than the heap limit, get as large a vector as possible. */ -if (frame_size <= START_FRAMES_SIZE/10) +heapframes_size = frame_size * 10; +if (heapframes_size < START_FRAMES_SIZE) heapframes_size = START_FRAMES_SIZE; +if (heapframes_size / 1024 > mb->heap_limit) { - mb->match_frames = mb->stack_frames; /* Initial frame vector on the stack */ - mb->frame_vector_size = ((START_FRAMES_SIZE/frame_size) * frame_size); + PCRE2_SIZE max_size = 1024 * mb->heap_limit; + if (max_size < frame_size) return PCRE2_ERROR_HEAPLIMIT; + heapframes_size = max_size; } -else + +/* If an existing frame vector in the match_data block is large enough, we can +use it. Otherwise, free any pre-existing vector and get a new one. */ + +if (match_data->heapframes_size < heapframes_size) { - mb->frame_vector_size = frame_size * 10; - if ((mb->frame_vector_size / 1024) > mb->heap_limit) + match_data->memctl.free(match_data->heapframes, + match_data->memctl.memory_data); + match_data->heapframes = match_data->memctl.malloc(heapframes_size, + match_data->memctl.memory_data); + if (match_data->heapframes == NULL) { - if (frame_size > mb->heap_limit * 1024) return PCRE2_ERROR_HEAPLIMIT; - mb->frame_vector_size = ((mb->heap_limit * 1024)/frame_size) * frame_size; + match_data->heapframes_size = 0; + return PCRE2_ERROR_NOMEMORY; } - mb->match_frames = mb->memctl.malloc(mb->frame_vector_size, - mb->memctl.memory_data); - if (mb->match_frames == NULL) return PCRE2_ERROR_NOMEMORY; + match_data->heapframes_size = heapframes_size; } -mb->match_frames_top = - (heapframe *)((char *)mb->match_frames + mb->frame_vector_size); - /* Write to the ovector within the first frame to mark every capture unset and to avoid uninitialized memory read errors when it is copied to a new frame. */ -memset((char *)(mb->match_frames) + offsetof(heapframe, ovector), 0xff, - re->top_bracket * 2 * sizeof(PCRE2_SIZE)); +memset((char *)(match_data->heapframes) + offsetof(heapframe, ovector), 0xff, + frame_size - offsetof(heapframe, ovector)); /* Pointers to the individual character tables */ @@ -6712,8 +7144,8 @@ start_partial = match_partial = NULL; mb->hitend = FALSE; #if PCRE2_CODE_UNIT_WIDTH == 8 -memchr_not_found_first_cu = FALSE; -memchr_not_found_first_cu2 = FALSE; +memchr_found_first_cu = NULL; +memchr_found_first_cu2 = NULL; #endif for(;;) @@ -6782,13 +7214,7 @@ for(;;) } } - /* Not anchored. Advance to a unique first code unit if there is one. In - 8-bit mode, the use of memchr() gives a big speed up, even though we have - to call it twice in caseless mode, in order to find the earliest occurrence - of the character in either of its cases. If a call to memchr() that - searches the rest of the subject fails to find one case, remember that in - order not to keep on repeating the search. This can make a huge difference - when the strings are very long and only one case is present. */ + /* Not anchored. Advance to a unique first code unit if there is one. */ else { @@ -6796,43 +7222,68 @@ for(;;) { if (first_cu != first_cu2) /* Caseless */ { + /* In 16-bit and 32_bit modes we have to do our own search, so can + look for both cases at once. */ + #if PCRE2_CODE_UNIT_WIDTH != 8 PCRE2_UCHAR smc; while (start_match < end_subject && (smc = UCHAR21TEST(start_match)) != first_cu && - smc != first_cu2) + smc != first_cu2) start_match++; +#else + /* In 8-bit mode, the use of memchr() gives a big speed up, even + though we have to call it twice in order to find the earliest + occurrence of the code unit in either of its cases. Caching is used + to remember the positions of previously found code units. This can + make a huge difference when the strings are very long and only one + case is actually present. */ -#else /* 8-bit code units */ PCRE2_SPTR pp1 = NULL; PCRE2_SPTR pp2 = NULL; - PCRE2_SIZE cu2size = end_subject - start_match; + PCRE2_SIZE searchlength = end_subject - start_match; + + /* If we haven't got a previously found position for first_cu, or if + the current starting position is later, we need to do a search. If + the code unit is not found, set it to the end. */ - if (!memchr_not_found_first_cu) + if (memchr_found_first_cu == NULL || + start_match > memchr_found_first_cu) { - pp1 = memchr(start_match, first_cu, end_subject - start_match); - if (pp1 == NULL) memchr_not_found_first_cu = TRUE; - else cu2size = pp1 - start_match; + pp1 = memchr(start_match, first_cu, searchlength); + memchr_found_first_cu = (pp1 == NULL)? end_subject : pp1; } - /* If pp1 is not NULL, we have arranged to search only as far as pp1, - to see if the other case is earlier, so we can set "not found" only - when both searches have returned NULL. */ + /* If the start is before a previously found position, use the + previous position, or NULL if a previous search failed. */ + + else pp1 = (memchr_found_first_cu == end_subject)? NULL : + memchr_found_first_cu; + + /* Do the same thing for the other case. */ - if (!memchr_not_found_first_cu2) + if (memchr_found_first_cu2 == NULL || + start_match > memchr_found_first_cu2) { - pp2 = memchr(start_match, first_cu2, cu2size); - memchr_not_found_first_cu2 = (pp2 == NULL && pp1 == NULL); + pp2 = memchr(start_match, first_cu2, searchlength); + memchr_found_first_cu2 = (pp2 == NULL)? end_subject : pp2; } + else pp2 = (memchr_found_first_cu2 == end_subject)? NULL : + memchr_found_first_cu2; + + /* Set the start to the end of the subject if neither case was found. + Otherwise, use the earlier found point. */ + if (pp1 == NULL) start_match = (pp2 == NULL)? end_subject : pp2; else start_match = (pp2 == NULL || pp1 < pp2)? pp1 : pp2; -#endif + +#endif /* 8-bit handling */ } - /* The caseful case */ + /* The caseful case is much simpler. */ else { @@ -7056,8 +7507,16 @@ for(;;) mb->end_offset_top = 0; mb->skip_arg_count = 0; - rc = match(start_match, mb->start_code, match_data->ovector, - match_data->oveccount, re->top_bracket, frame_size, mb); +#ifdef DEBUG_SHOW_OPS + fprintf(stderr, "++ Calling match()\n"); +#endif + + rc = match(start_match, mb->start_code, re->top_bracket, frame_size, + match_data, mb); + +#ifdef DEBUG_SHOW_OPS + fprintf(stderr, "++ match() returned %d\n\n", rc); +#endif if (mb->hitend && start_partial == NULL) { @@ -7206,6 +7665,7 @@ if (utf && end_subject != true_end_subject && if (start_match >= true_end_subject) { rc = MATCH_NOMATCH; /* In case it was partial */ + match_partial = NULL; break; } @@ -7240,11 +7700,6 @@ if (utf && end_subject != true_end_subject && } #endif /* SUPPORT_UNICODE */ -/* Release an enlarged frame vector that is on the heap. */ - -if (mb->match_frames != mb->stack_frames) - mb->memctl.free(mb->match_frames, mb->memctl.memory_data); - /* Fill in fields that are always returned in the match data. */ match_data->code = re; @@ -7260,6 +7715,7 @@ if (rc == MATCH_MATCH) { match_data->rc = ((int)mb->end_offset_top >= 2 * match_data->oveccount)? 0 : (int)mb->end_offset_top/2 + 1; + match_data->subject_length = length; match_data->startchar = start_match - subject; match_data->leftchar = mb->start_used_ptr - subject; match_data->rightchar = ((mb->last_used_ptr > mb->end_match_ptr)? @@ -7274,6 +7730,7 @@ if (rc == MATCH_MATCH) match_data->flags |= PCRE2_MD_COPIED_SUBJECT; } else match_data->subject = subject; + return match_data->rc; } @@ -7295,6 +7752,7 @@ PCRE2_ERROR_PARTIAL. */ else if (match_partial != NULL) { match_data->subject = subject; + match_data->subject_length = length; match_data->ovector[0] = match_partial - subject; match_data->ovector[1] = end_subject - subject; match_data->startchar = match_partial - subject; @@ -7310,4 +7768,10 @@ else match_data->rc = PCRE2_ERROR_NOMATCH; return match_data->rc; } +/* These #undefs are here to enable unity builds with CMake. */ + +#undef NLBLOCK /* Block containing newline information */ +#undef PSSTART /* Field containing processed string start */ +#undef PSEND /* Field containing processed string end */ + /* End of pcre2_match.c */ |