diff options
Diffstat (limited to 'src/3rdparty/pcre2/src/pcre2_auto_possess.c')
-rw-r--r-- | src/3rdparty/pcre2/src/pcre2_auto_possess.c | 59 |
1 files changed, 41 insertions, 18 deletions
diff --git a/src/3rdparty/pcre2/src/pcre2_auto_possess.c b/src/3rdparty/pcre2/src/pcre2_auto_possess.c index e5e0895682..210d13d37a 100644 --- a/src/3rdparty/pcre2/src/pcre2_auto_possess.c +++ b/src/3rdparty/pcre2/src/pcre2_auto_possess.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2021 University of Cambridge + New API code Copyright (c) 2016-2022 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -123,18 +123,21 @@ opcode is used to select the column. The values are as follows: */ static const uint8_t propposstab[PT_TABSIZE][PT_TABSIZE] = { -/* ANY LAMP GC PC SC ALNUM SPACE PXSPACE WORD CLIST UCNC */ - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_ANY */ - { 0, 3, 0, 0, 0, 3, 1, 1, 0, 0, 0 }, /* PT_LAMP */ - { 0, 0, 2, 4, 0, 9, 10, 10, 11, 0, 0 }, /* PT_GC */ - { 0, 0, 5, 2, 0, 15, 16, 16, 17, 0, 0 }, /* PT_PC */ - { 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0 }, /* PT_SC */ - { 0, 3, 6, 12, 0, 3, 1, 1, 0, 0, 0 }, /* PT_ALNUM */ - { 0, 1, 7, 13, 0, 1, 3, 3, 1, 0, 0 }, /* PT_SPACE */ - { 0, 1, 7, 13, 0, 1, 3, 3, 1, 0, 0 }, /* PT_PXSPACE */ - { 0, 0, 8, 14, 0, 0, 1, 1, 3, 0, 0 }, /* PT_WORD */ - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_CLIST */ - { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3 } /* PT_UCNC */ +/* ANY LAMP GC PC SC SCX ALNUM SPACE PXSPACE WORD CLIST UCNC BIDICL BOOL */ + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_ANY */ + { 0, 3, 0, 0, 0, 0, 3, 1, 1, 0, 0, 0, 0, 0 }, /* PT_LAMP */ + { 0, 0, 2, 4, 0, 0, 9, 10, 10, 11, 0, 0, 0, 0 }, /* PT_GC */ + { 0, 0, 5, 2, 0, 0, 15, 16, 16, 17, 0, 0, 0, 0 }, /* PT_PC */ + { 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_SC */ + { 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_SCX */ + { 0, 3, 6, 12, 0, 0, 3, 1, 1, 0, 0, 0, 0, 0 }, /* PT_ALNUM */ + { 0, 1, 7, 13, 0, 0, 1, 3, 3, 1, 0, 0, 0, 0 }, /* PT_SPACE */ + { 0, 1, 7, 13, 0, 0, 1, 3, 3, 1, 0, 0, 0, 0 }, /* PT_PXSPACE */ + { 0, 0, 8, 14, 0, 0, 0, 1, 1, 3, 0, 0, 0, 0 }, /* PT_WORD */ + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_CLIST */ + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0 }, /* PT_UCNC */ + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, /* PT_BIDICL */ + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } /* PT_BOOL */ }; /* This table is used to check whether auto-possessification is possible @@ -196,6 +199,7 @@ static BOOL check_char_prop(uint32_t c, unsigned int ptype, unsigned int pdata, BOOL negated) { +BOOL ok; const uint32_t *p; const ucd_record *prop = GET_UCD(c); @@ -215,6 +219,11 @@ switch(ptype) case PT_SC: return (pdata == prop->script) == negated; + case PT_SCX: + ok = (pdata == prop->script + || MAPBIT(PRIV(ucd_script_sets) + UCD_SCRIPTX_PROP(prop), pdata) != 0); + return ok == negated; + /* These are specials */ case PT_ALNUM: @@ -251,6 +260,14 @@ switch(ptype) if (c == *p++) return negated; } break; /* Control never reaches here */ + + /* Haven't yet thought these through. */ + + case PT_BIDICL: + return FALSE; + + case PT_BOOL: + return FALSE; } return FALSE; @@ -543,6 +560,8 @@ matches to an empty string (also represented by a non-zero value). */ for(;;) { + PCRE2_SPTR bracode; + /* All operations move the code pointer forward. Therefore infinite recursions are not possible. */ @@ -600,7 +619,8 @@ for(;;) recursions. (This could be improved by keeping a list of group numbers that are called by recursion.) */ - switch(*(code - GET(code, 1))) + bracode = code - GET(code, 1); + switch(*bracode) { case OP_CBRA: case OP_SCBRA: @@ -619,16 +639,19 @@ for(;;) break; /* Atomic sub-patterns and assertions can always auto-possessify their - last iterator. However, if the group was entered as a result of checking - a previous iterator, this is not possible. */ + last iterator except for variable length lookbehinds. However, if the + group was entered as a result of checking a previous iterator, this is + not possible. */ case OP_ASSERT: case OP_ASSERT_NOT: - case OP_ASSERTBACK: - case OP_ASSERTBACK_NOT: case OP_ONCE: return !entered_a_group; + case OP_ASSERTBACK: + case OP_ASSERTBACK_NOT: + return (bracode[1+LINK_SIZE] == OP_VREVERSE)? FALSE : !entered_a_group; + /* Non-atomic assertions - don't possessify last iterator. This needs more thought. */ |