diff options
Diffstat (limited to 'src/3rdparty/pcre2/src/pcre2_dfa_match.c')
-rw-r--r-- | src/3rdparty/pcre2/src/pcre2_dfa_match.c | 319 |
1 files changed, 228 insertions, 91 deletions
diff --git a/src/3rdparty/pcre2/src/pcre2_dfa_match.c b/src/3rdparty/pcre2/src/pcre2_dfa_match.c index 625695b7cb..caae65248f 100644 --- a/src/3rdparty/pcre2/src/pcre2_dfa_match.c +++ b/src/3rdparty/pcre2/src/pcre2_dfa_match.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2020 University of Cambridge + New API code Copyright (c) 2016-2023 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -168,7 +168,7 @@ static const uint8_t coptable[] = { 0, /* KetRmax */ 0, /* KetRmin */ 0, /* KetRpos */ - 0, /* Reverse */ + 0, 0, /* Reverse, Vreverse */ 0, /* Assert */ 0, /* Assert not */ 0, /* Assert behind */ @@ -187,7 +187,8 @@ static const uint8_t coptable[] = { 0, 0, 0, 0, /* SKIP, SKIP_ARG, THEN, THEN_ARG */ 0, 0, /* COMMIT, COMMIT_ARG */ 0, 0, 0, /* FAIL, ACCEPT, ASSERT_ACCEPT */ - 0, 0, 0 /* CLOSE, SKIPZERO, DEFINE */ + 0, 0, 0, /* CLOSE, SKIPZERO, DEFINE */ + 0, 0 /* \B and \b in UCP mode */ }; /* This table identifies those opcodes that inspect a character. It is used to @@ -245,7 +246,7 @@ static const uint8_t poptable[] = { 0, /* KetRmax */ 0, /* KetRmin */ 0, /* KetRpos */ - 0, /* Reverse */ + 0, 0, /* Reverse, Vreverse */ 0, /* Assert */ 0, /* Assert not */ 0, /* Assert behind */ @@ -264,7 +265,8 @@ static const uint8_t poptable[] = { 0, 0, 0, 0, /* SKIP, SKIP_ARG, THEN, THEN_ARG */ 0, 0, /* COMMIT, COMMIT_ARG */ 0, 0, 0, /* FAIL, ACCEPT, ASSERT_ACCEPT */ - 0, 0, 0 /* CLOSE, SKIPZERO, DEFINE */ + 0, 0, 0, /* CLOSE, SKIPZERO, DEFINE */ + 1, 1 /* \B and \b in UCP mode */ }; /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W, @@ -350,7 +352,7 @@ Returns: the return from the callout */ static int -do_callout(PCRE2_SPTR code, PCRE2_SIZE *offsets, PCRE2_SPTR current_subject, +do_callout_dfa(PCRE2_SPTR code, PCRE2_SIZE *offsets, PCRE2_SPTR current_subject, PCRE2_SPTR ptr, dfa_match_block *mb, PCRE2_SIZE extracode, PCRE2_SIZE *lengthptr) { @@ -426,7 +428,7 @@ overflow. */ else { - uint32_t newsize = (rws->size >= UINT32_MAX/2)? UINT32_MAX/2 : rws->size * 2; + uint32_t newsize = (rws->size >= UINT32_MAX/(sizeof(int)*2))? UINT32_MAX/sizeof(int) : rws->size * 2; uint32_t newsizeK = newsize/(1024/sizeof(int)); if (newsizeK + mb->heap_used > mb->heap_limit) @@ -589,7 +591,7 @@ if (*this_start_code == OP_ASSERTBACK || *this_start_code == OP_ASSERTBACK_NOT) end_code = this_start_code; do { - size_t back = (size_t)GET(end_code, 2+LINK_SIZE); + size_t back = (size_t)GET2(end_code, 2+LINK_SIZE); if (back > max_back) max_back = back; end_code += GET(end_code, 1); } @@ -633,8 +635,8 @@ if (*this_start_code == OP_ASSERTBACK || *this_start_code == OP_ASSERTBACK_NOT) end_code = this_start_code; do { - uint32_t revlen = (end_code[1+LINK_SIZE] == OP_REVERSE)? 1 + LINK_SIZE : 0; - size_t back = (revlen == 0)? 0 : (size_t)GET(end_code, 2+LINK_SIZE); + uint32_t revlen = (end_code[1+LINK_SIZE] == OP_REVERSE)? 1 + IMM2_SIZE : 0; + size_t back = (revlen == 0)? 0 : (size_t)GET2(end_code, 2+LINK_SIZE); if (back <= gone_back) { int bstate = (int)(end_code - start_code + 1 + LINK_SIZE + revlen); @@ -1100,6 +1102,8 @@ for (;;) /*-----------------------------------------------------------------*/ case OP_WORD_BOUNDARY: case OP_NOT_WORD_BOUNDARY: + case OP_NOT_UCP_WORD_BOUNDARY: + case OP_UCP_WORD_BOUNDARY: { int left_word, right_word; @@ -1112,13 +1116,13 @@ for (;;) #endif GETCHARTEST(d, temp); #ifdef SUPPORT_UNICODE - if ((mb->poptions & PCRE2_UCP) != 0) + if (codevalue == OP_UCP_WORD_BOUNDARY || + codevalue == OP_NOT_UCP_WORD_BOUNDARY) { - if (d == '_') left_word = TRUE; else - { - uint32_t cat = UCD_CATEGORY(d); - left_word = (cat == ucp_L || cat == ucp_N); - } + int chartype = UCD_CHARTYPE(d); + int category = PRIV(ucp_gentype)[chartype]; + left_word = (category == ucp_L || category == ucp_N || + chartype == ucp_Mn || chartype == ucp_Pc); } else #endif @@ -1137,13 +1141,13 @@ for (;;) mb->last_used_ptr = temp; } #ifdef SUPPORT_UNICODE - if ((mb->poptions & PCRE2_UCP) != 0) + if (codevalue == OP_UCP_WORD_BOUNDARY || + codevalue == OP_NOT_UCP_WORD_BOUNDARY) { - if (c == '_') right_word = TRUE; else - { - uint32_t cat = UCD_CATEGORY(c); - right_word = (cat == ucp_L || cat == ucp_N); - } + int chartype = UCD_CHARTYPE(c); + int category = PRIV(ucp_gentype)[chartype]; + right_word = (category == ucp_L || category == ucp_N || + chartype == ucp_Mn || chartype == ucp_Pc); } else #endif @@ -1151,7 +1155,9 @@ for (;;) } else right_word = FALSE; - if ((left_word == right_word) == (codevalue == OP_NOT_WORD_BOUNDARY)) + if ((left_word == right_word) == + (codevalue == OP_NOT_WORD_BOUNDARY || + codevalue == OP_NOT_UCP_WORD_BOUNDARY)) { ADD_ACTIVE(state_offset + 1, 0); } } break; @@ -1168,6 +1174,7 @@ for (;;) if (clen > 0) { BOOL OK; + int chartype; const uint32_t *cp; const ucd_record * prop = GET_UCD(c); switch(code[1]) @@ -1177,8 +1184,9 @@ for (;;) break; case PT_LAMP: - OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || - prop->chartype == ucp_Lt; + chartype = prop->chartype; + OK = chartype == ucp_Lu || chartype == ucp_Ll || + chartype == ucp_Lt; break; case PT_GC: @@ -1193,11 +1201,17 @@ for (;;) OK = prop->script == code[2]; break; + case PT_SCX: + OK = (prop->script == code[2] || + MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), code[2]) != 0); + break; + /* These are specials for combination cases. */ case PT_ALNUM: - OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L || - PRIV(ucp_gentype)[prop->chartype] == ucp_N; + chartype = prop->chartype; + OK = PRIV(ucp_gentype)[chartype] == ucp_L || + PRIV(ucp_gentype)[chartype] == ucp_N; break; /* Perl space used to exclude VT, but from Perl 5.18 it is included, @@ -1220,12 +1234,20 @@ for (;;) break; case PT_WORD: - OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L || - PRIV(ucp_gentype)[prop->chartype] == ucp_N || - c == CHAR_UNDERSCORE; + chartype = prop->chartype; + OK = PRIV(ucp_gentype)[chartype] == ucp_L || + PRIV(ucp_gentype)[chartype] == ucp_N || + chartype == ucp_Mn || chartype == ucp_Pc; break; case PT_CLIST: +#if PCRE2_CODE_UNIT_WIDTH == 32 + if (c > MAX_UTF_CODE_POINT) + { + OK = FALSE; + break; + } +#endif cp = PRIV(ucd_caseless_sets) + code[2]; for (;;) { @@ -1240,6 +1262,15 @@ for (;;) c >= 0xe000; break; + case PT_BIDICL: + OK = UCD_BIDICLASS(c) == code[2]; + break; + + case PT_BOOL: + OK = MAPBIT(PRIV(ucd_boolprop_sets) + + UCD_BPROPS_PROP(prop), code[2]) != 0; + break; + /* Should never occur, but keep compilers from grumbling. */ default: @@ -1426,6 +1457,7 @@ for (;;) if (clen > 0) { BOOL OK; + int chartype; const uint32_t *cp; const ucd_record * prop = GET_UCD(c); switch(code[2]) @@ -1435,8 +1467,8 @@ for (;;) break; case PT_LAMP: - OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || - prop->chartype == ucp_Lt; + chartype = prop->chartype; + OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt; break; case PT_GC: @@ -1451,11 +1483,17 @@ for (;;) OK = prop->script == code[3]; break; + case PT_SCX: + OK = (prop->script == code[3] || + MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), code[3]) != 0); + break; + /* These are specials for combination cases. */ case PT_ALNUM: - OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L || - PRIV(ucp_gentype)[prop->chartype] == ucp_N; + chartype = prop->chartype; + OK = PRIV(ucp_gentype)[chartype] == ucp_L || + PRIV(ucp_gentype)[chartype] == ucp_N; break; /* Perl space used to exclude VT, but from Perl 5.18 it is included, @@ -1478,12 +1516,20 @@ for (;;) break; case PT_WORD: - OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L || - PRIV(ucp_gentype)[prop->chartype] == ucp_N || - c == CHAR_UNDERSCORE; + chartype = prop->chartype; + OK = PRIV(ucp_gentype)[chartype] == ucp_L || + PRIV(ucp_gentype)[chartype] == ucp_N || + chartype == ucp_Mn || chartype == ucp_Pc; break; case PT_CLIST: +#if PCRE2_CODE_UNIT_WIDTH == 32 + if (c > MAX_UTF_CODE_POINT) + { + OK = FALSE; + break; + } +#endif cp = PRIV(ucd_caseless_sets) + code[3]; for (;;) { @@ -1498,6 +1544,15 @@ for (;;) c >= 0xe000; break; + case PT_BIDICL: + OK = UCD_BIDICLASS(c) == code[3]; + break; + + case PT_BOOL: + OK = MAPBIT(PRIV(ucd_boolprop_sets) + + UCD_BPROPS_PROP(prop), code[3]) != 0; + break; + /* Should never occur, but keep compilers from grumbling. */ default: @@ -1667,6 +1722,7 @@ for (;;) if (clen > 0) { BOOL OK; + int chartype; const uint32_t *cp; const ucd_record * prop = GET_UCD(c); switch(code[2]) @@ -1676,8 +1732,8 @@ for (;;) break; case PT_LAMP: - OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || - prop->chartype == ucp_Lt; + chartype = prop->chartype; + OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt; break; case PT_GC: @@ -1692,11 +1748,17 @@ for (;;) OK = prop->script == code[3]; break; + case PT_SCX: + OK = (prop->script == code[3] || + MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), code[3]) != 0); + break; + /* These are specials for combination cases. */ case PT_ALNUM: - OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L || - PRIV(ucp_gentype)[prop->chartype] == ucp_N; + chartype = prop->chartype; + OK = PRIV(ucp_gentype)[chartype] == ucp_L || + PRIV(ucp_gentype)[chartype] == ucp_N; break; /* Perl space used to exclude VT, but from Perl 5.18 it is included, @@ -1719,12 +1781,20 @@ for (;;) break; case PT_WORD: - OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L || - PRIV(ucp_gentype)[prop->chartype] == ucp_N || - c == CHAR_UNDERSCORE; + chartype = prop->chartype; + OK = PRIV(ucp_gentype)[chartype] == ucp_L || + PRIV(ucp_gentype)[chartype] == ucp_N || + chartype == ucp_Mn || chartype == ucp_Pc; break; case PT_CLIST: +#if PCRE2_CODE_UNIT_WIDTH == 32 + if (c > MAX_UTF_CODE_POINT) + { + OK = FALSE; + break; + } +#endif cp = PRIV(ucd_caseless_sets) + code[3]; for (;;) { @@ -1739,6 +1809,15 @@ for (;;) c >= 0xe000; break; + case PT_BIDICL: + OK = UCD_BIDICLASS(c) == code[3]; + break; + + case PT_BOOL: + OK = MAPBIT(PRIV(ucd_boolprop_sets) + + UCD_BPROPS_PROP(prop), code[3]) != 0; + break; + /* Should never occur, but keep compilers from grumbling. */ default: @@ -1933,6 +2012,7 @@ for (;;) if (clen > 0) { BOOL OK; + int chartype; const uint32_t *cp; const ucd_record * prop = GET_UCD(c); switch(code[1 + IMM2_SIZE + 1]) @@ -1942,8 +2022,8 @@ for (;;) break; case PT_LAMP: - OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || - prop->chartype == ucp_Lt; + chartype = prop->chartype; + OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt; break; case PT_GC: @@ -1958,11 +2038,18 @@ for (;;) OK = prop->script == code[1 + IMM2_SIZE + 2]; break; + case PT_SCX: + OK = (prop->script == code[1 + IMM2_SIZE + 2] || + MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), + code[1 + IMM2_SIZE + 2]) != 0); + break; + /* These are specials for combination cases. */ case PT_ALNUM: - OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L || - PRIV(ucp_gentype)[prop->chartype] == ucp_N; + chartype = prop->chartype; + OK = PRIV(ucp_gentype)[chartype] == ucp_L || + PRIV(ucp_gentype)[chartype] == ucp_N; break; /* Perl space used to exclude VT, but from Perl 5.18 it is included, @@ -1985,12 +2072,20 @@ for (;;) break; case PT_WORD: - OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L || - PRIV(ucp_gentype)[prop->chartype] == ucp_N || - c == CHAR_UNDERSCORE; + chartype = prop->chartype; + OK = PRIV(ucp_gentype)[chartype] == ucp_L || + PRIV(ucp_gentype)[chartype] == ucp_N || + chartype == ucp_Mn || chartype == ucp_Pc; break; case PT_CLIST: +#if PCRE2_CODE_UNIT_WIDTH == 32 + if (c > MAX_UTF_CODE_POINT) + { + OK = FALSE; + break; + } +#endif cp = PRIV(ucd_caseless_sets) + code[1 + IMM2_SIZE + 2]; for (;;) { @@ -2005,6 +2100,15 @@ for (;;) c >= 0xe000; break; + case PT_BIDICL: + OK = UCD_BIDICLASS(c) == code[1 + IMM2_SIZE + 2]; + break; + + case PT_BOOL: + OK = MAPBIT(PRIV(ucd_boolprop_sets) + + UCD_BPROPS_PROP(prop), code[1 + IMM2_SIZE + 2]) != 0; + break; + /* Should never occur, but keep compilers from grumbling. */ default: @@ -2742,7 +2846,7 @@ for (;;) || code[LINK_SIZE + 1] == OP_CALLOUT_STR) { PCRE2_SIZE callout_length; - rrc = do_callout(code, offsets, current_subject, ptr, mb, + rrc = do_callout_dfa(code, offsets, current_subject, ptr, mb, 1 + LINK_SIZE, &callout_length); if (rrc < 0) return rrc; /* Abandon */ if (rrc > 0) break; /* Fail this thread */ @@ -2837,7 +2941,6 @@ for (;;) int *local_workspace; PCRE2_SIZE *local_offsets; RWS_anchor *rws = (RWS_anchor *)RWS; - dfa_recursion_info *ri; PCRE2_SPTR callpat = start_code + GET(code, 1); uint32_t recno = (callpat == mb->start_code)? 0 : GET2(callpat, 1 + LINK_SIZE); @@ -2854,18 +2957,24 @@ for (;;) rws->free -= RWS_RSIZE + RWS_OVEC_RSIZE; /* Check for repeating a recursion without advancing the subject - pointer. This should catch convoluted mutual recursions. (Some simple - cases are caught at compile time.) */ + pointer or last used character. This should catch convoluted mutual + recursions. (Some simple cases are caught at compile time.) */ - for (ri = mb->recursive; ri != NULL; ri = ri->prevrec) - if (recno == ri->group_num && ptr == ri->subject_position) + for (dfa_recursion_info *ri = mb->recursive; + ri != NULL; + ri = ri->prevrec) + { + if (recno == ri->group_num && ptr == ri->subject_position && + mb->last_used_ptr == ri->last_used_ptr) return PCRE2_ERROR_RECURSELOOP; + } /* Remember this recursion and where we started it so as to catch infinite loops. */ new_recursive.group_num = recno; new_recursive.subject_position = ptr; + new_recursive.last_used_ptr = mb->last_used_ptr; new_recursive.prevrec = mb->recursive; mb->recursive = &new_recursive; @@ -3139,7 +3248,7 @@ for (;;) case OP_CALLOUT_STR: { PCRE2_SIZE callout_length; - rrc = do_callout(code, offsets, current_subject, ptr, mb, 0, + rrc = do_callout_dfa(code, offsets, current_subject, ptr, mb, 0, &callout_length); if (rrc < 0) return rrc; /* Abandon */ if (rrc == 0) @@ -3256,8 +3365,8 @@ BOOL has_first_cu = FALSE; BOOL has_req_cu = FALSE; #if PCRE2_CODE_UNIT_WIDTH == 8 -BOOL memchr_not_found_first_cu = FALSE; -BOOL memchr_not_found_first_cu2 = FALSE; +PCRE2_SPTR memchr_found_first_cu = NULL; +PCRE2_SPTR memchr_found_first_cu2 = NULL; #endif PCRE2_UCHAR first_cu = 0; @@ -3285,20 +3394,22 @@ rws->next = NULL; rws->size = RWS_BASE_SIZE; rws->free = RWS_BASE_SIZE - RWS_ANCHOR_SIZE; -/* A length equal to PCRE2_ZERO_TERMINATED implies a zero-terminated -subject string. */ +/* Recognize NULL, length 0 as an empty string. */ -if (length == PCRE2_ZERO_TERMINATED) - { - length = PRIV(strlen)(subject); - was_zero_terminated = 1; - } +if (subject == NULL && length == 0) subject = (PCRE2_SPTR)""; /* Plausibility checks */ if ((options & ~PUBLIC_DFA_MATCH_OPTIONS) != 0) return PCRE2_ERROR_BADOPTION; if (re == NULL || subject == NULL || workspace == NULL || match_data == NULL) return PCRE2_ERROR_NULL; + +if (length == PCRE2_ZERO_TERMINATED) + { + length = PRIV(strlen)(subject); + was_zero_terminated = 1; + } + if (wscount < 20) return PCRE2_ERROR_DFA_WSSIZE; if (start_offset > length) return PCRE2_ERROR_BADOFFSET; @@ -3365,7 +3476,7 @@ anchored = (options & (PCRE2_ANCHORED|PCRE2_DFA_RESTART)) != 0 || where to start. */ startline = (re->flags & PCRE2_STARTLINE) != 0; -firstline = (re->overall_options & PCRE2_FIRSTLINE) != 0; +firstline = !anchored && (re->overall_options & PCRE2_FIRSTLINE) != 0; bumpalong_limit = end_subject; /* Initialize and set up the fixed fields in the callout block, with a pointer @@ -3648,13 +3759,7 @@ for (;;) } } - /* Not anchored. Advance to a unique first code unit if there is one. In - 8-bit mode, the use of memchr() gives a big speed up, even though we have - to call it twice in caseless mode, in order to find the earliest occurrence - of the character in either of its cases. If a call to memchr() that - searches the rest of the subject fails to find one case, remember that in - order not to keep on repeating the search. This can make a huge difference - when the strings are very long and only one case is present. */ + /* Not anchored. Advance to a unique first code unit if there is one. */ else { @@ -3662,43 +3767,68 @@ for (;;) { if (first_cu != first_cu2) /* Caseless */ { + /* In 16-bit and 32_bit modes we have to do our own search, so can + look for both cases at once. */ + #if PCRE2_CODE_UNIT_WIDTH != 8 PCRE2_UCHAR smc; while (start_match < end_subject && (smc = UCHAR21TEST(start_match)) != first_cu && - smc != first_cu2) + smc != first_cu2) start_match++; +#else + /* In 8-bit mode, the use of memchr() gives a big speed up, even + though we have to call it twice in order to find the earliest + occurrence of the code unit in either of its cases. Caching is used + to remember the positions of previously found code units. This can + make a huge difference when the strings are very long and only one + case is actually present. */ -#else /* 8-bit code units */ PCRE2_SPTR pp1 = NULL; PCRE2_SPTR pp2 = NULL; - PCRE2_SIZE cu2size = end_subject - start_match; + PCRE2_SIZE searchlength = end_subject - start_match; + + /* If we haven't got a previously found position for first_cu, or if + the current starting position is later, we need to do a search. If + the code unit is not found, set it to the end. */ - if (!memchr_not_found_first_cu) + if (memchr_found_first_cu == NULL || + start_match > memchr_found_first_cu) { - pp1 = memchr(start_match, first_cu, end_subject - start_match); - if (pp1 == NULL) memchr_not_found_first_cu = TRUE; - else cu2size = pp1 - start_match; + pp1 = memchr(start_match, first_cu, searchlength); + memchr_found_first_cu = (pp1 == NULL)? end_subject : pp1; } - /* If pp1 is not NULL, we have arranged to search only as far as pp1, - to see if the other case is earlier, so we can set "not found" only - when both searches have returned NULL. */ + /* If the start is before a previously found position, use the + previous position, or NULL if a previous search failed. */ - if (!memchr_not_found_first_cu2) + else pp1 = (memchr_found_first_cu == end_subject)? NULL : + memchr_found_first_cu; + + /* Do the same thing for the other case. */ + + if (memchr_found_first_cu2 == NULL || + start_match > memchr_found_first_cu2) { - pp2 = memchr(start_match, first_cu2, cu2size); - memchr_not_found_first_cu2 = (pp2 == NULL && pp1 == NULL); + pp2 = memchr(start_match, first_cu2, searchlength); + memchr_found_first_cu2 = (pp2 == NULL)? end_subject : pp2; } + else pp2 = (memchr_found_first_cu2 == end_subject)? NULL : + memchr_found_first_cu2; + + /* Set the start to the end of the subject if neither case was found. + Otherwise, use the earlier found point. */ + if (pp1 == NULL) start_match = (pp2 == NULL)? end_subject : pp2; else start_match = (pp2 == NULL || pp1 < pp2)? pp1 : pp2; -#endif + +#endif /* 8-bit handling */ } - /* The caseful case */ + /* The caseful case is much simpler. */ else { @@ -3916,8 +4046,9 @@ for (;;) match_data->ovector[0] = (PCRE2_SIZE)(start_match - subject); match_data->ovector[1] = (PCRE2_SIZE)(end_subject - subject); } + match_data->subject_length = length; match_data->leftchar = (PCRE2_SIZE)(mb->start_used_ptr - subject); - match_data->rightchar = (PCRE2_SIZE)( mb->last_used_ptr - subject); + match_data->rightchar = (PCRE2_SIZE)(mb->last_used_ptr - subject); match_data->startchar = (PCRE2_SIZE)(start_match - subject); match_data->rc = rc; @@ -3979,4 +4110,10 @@ while (rws->next != NULL) return rc; } +/* These #undefs are here to enable unity builds with CMake. */ + +#undef NLBLOCK /* Block containing newline information */ +#undef PSSTART /* Field containing processed string start */ +#undef PSEND /* Field containing processed string end */ + /* End of pcre2_dfa_match.c */ |