diff options
Diffstat (limited to 'src/3rdparty/pcre2/src/pcre2_jit_compile.c')
-rw-r--r-- | src/3rdparty/pcre2/src/pcre2_jit_compile.c | 4932 |
1 files changed, 2841 insertions, 2091 deletions
diff --git a/src/3rdparty/pcre2/src/pcre2_jit_compile.c b/src/3rdparty/pcre2/src/pcre2_jit_compile.c index 1f21bfb6ad..050063ec6d 100644 --- a/src/3rdparty/pcre2/src/pcre2_jit_compile.c +++ b/src/3rdparty/pcre2/src/pcre2_jit_compile.c @@ -6,8 +6,9 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel + This module by Zoltan Herczeg Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2018 University of Cambridge + New API code Copyright (c) 2016-2021 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -42,6 +43,12 @@ POSSIBILITY OF SUCH DAMAGE. #include "config.h" #endif +#if defined(__has_feature) +#if __has_feature(memory_sanitizer) +#include <sanitizer/msan_interface.h> +#endif /* __has_feature(memory_sanitizer) */ +#endif /* defined(__has_feature) */ + #include "pcre2_internal.h" #ifdef SUPPORT_JIT @@ -212,12 +219,6 @@ typedef struct stub_list { struct stub_list *next; } stub_list; -typedef struct label_addr_list { - struct sljit_label *label; - sljit_uw *update_addr; - struct label_addr_list *next; -} label_addr_list; - enum frame_types { no_frame = -1, no_stack = -2 @@ -228,6 +229,12 @@ enum control_types { type_then_trap = 1 }; +enum early_fail_types { + type_skip = 0, + type_fail = 1, + type_fail_range = 2 +}; + typedef int (SLJIT_FUNC *jit_function)(jit_arguments *args); /* The following structure is the key data type for the recursive @@ -235,12 +242,21 @@ code generator. It is allocated by compile_matchingpath, and contains the arguments for compile_backtrackingpath. Must be the first member of its descendants. */ typedef struct backtrack_common { - /* Concatenation stack. */ + /* Backtracking path of an opcode, which falls back + to our opcode, if it cannot resume matching. */ struct backtrack_common *prev; - jump_list *nextbacktracks; - /* Internal stack (for component operators). */ + /* Backtracks for opcodes without backtracking path. + These opcodes are between 'prev' and the current + opcode, and they never resume the match. */ + jump_list *simple_backtracks; + /* Internal backtracking list for block constructs + which contains other opcodes, such as brackets, + asserts, conditionals, etc. */ struct backtrack_common *top; - jump_list *topbacktracks; + /* Backtracks used internally by the opcode. For component + opcodes, this list is also used by those opcodes without + backtracking path which follows the 'top' backtrack. */ + jump_list *own_backtracks; /* Opcode pointer. */ PCRE2_SPTR cc; } backtrack_common; @@ -271,6 +287,8 @@ typedef struct bracket_backtrack { assert_backtrack *assert; /* For OP_ONCE. Less than 0 if not needed. */ int framesize; + /* For brackets with >3 alternatives. */ + struct sljit_put_label *matching_put_label; } u; /* Points to our private memory word on the stack. */ int private_data_ptr; @@ -335,6 +353,12 @@ typedef struct recurse_backtrack { BOOL inlined_pattern; } recurse_backtrack; +typedef struct vreverse_backtrack { + backtrack_common common; + /* Return to the matching path. */ + struct sljit_label *matchingpath; +} vreverse_backtrack; + #define OP_THEN_TRAP OP_TABLE_LENGTH typedef struct then_trap_backtrack { @@ -401,21 +425,28 @@ typedef struct compiler_common { sljit_s32 match_end_ptr; /* Points to the marked string. */ sljit_s32 mark_ptr; - /* Recursive control verb management chain. */ + /* Head of the recursive control verb management chain. + Each item must have a previous offset and type + (see control_types) values. See do_search_mark. */ sljit_s32 control_head_ptr; /* Points to the last matched capture block index. */ sljit_s32 capture_last_ptr; /* Fast forward skipping byte code pointer. */ PCRE2_SPTR fast_forward_bc_ptr; /* Locals used by fast fail optimization. */ - sljit_s32 fast_fail_start_ptr; - sljit_s32 fast_fail_end_ptr; + sljit_s32 early_fail_start_ptr; + sljit_s32 early_fail_end_ptr; + /* Variables used by recursive call generator. */ + sljit_s32 recurse_bitset_size; + uint8_t *recurse_bitset; /* Flipped and lower case tables. */ const sljit_u8 *fcc; sljit_sw lcc; /* Mode can be PCRE2_JIT_COMPLETE and others. */ int mode; + /* TRUE, when empty match is accepted for partial matching. */ + BOOL allow_empty_partial; /* TRUE, when minlength is greater than 0. */ BOOL might_be_empty; /* \K is found in the pattern. */ @@ -454,7 +485,6 @@ typedef struct compiler_common { struct sljit_label *accept_label; struct sljit_label *ff_newline_shortcut; stub_list *stubs; - label_addr_list *label_addrs; recurse_entry *entries; recurse_entry *currententry; jump_list *partialmatch; @@ -467,18 +497,21 @@ typedef struct compiler_common { jump_list *stackalloc; jump_list *revertframes; jump_list *wordboundary; + jump_list *ucp_wordboundary; jump_list *anynewline; jump_list *hspace; jump_list *vspace; jump_list *casefulcmp; jump_list *caselesscmp; jump_list *reset_match; + /* Same as reset_match, but resets the STR_PTR as well. */ + jump_list *restart_match; BOOL unset_backref; BOOL alt_circumflex; #ifdef SUPPORT_UNICODE BOOL utf; BOOL invalid_utf; - BOOL use_ucp; + BOOL ucp; /* Points to saving area for iref. */ sljit_s32 iref_ptr; jump_list *getucd; @@ -535,7 +568,7 @@ typedef struct compare_context { #undef CMP /* Used for accessing the elements of the stack. */ -#define STACK(i) ((i) * (int)sizeof(sljit_sw)) +#define STACK(i) ((i) * SSIZE_OF(sw)) #ifdef SLJIT_PREF_SHIFT_REG #if SLJIT_PREF_SHIFT_REG == SLJIT_R2 @@ -563,6 +596,12 @@ typedef struct compare_context { #define ARGUMENTS SLJIT_S4 #define RETURN_ADDR SLJIT_R4 +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) +#define HAS_VIRTUAL_REGISTERS 1 +#else +#define HAS_VIRTUAL_REGISTERS 0 +#endif + /* Local space layout. */ /* These two locals can be used by the current opcode. */ #define LOCALS0 (0 * sizeof(sljit_sw)) @@ -577,8 +616,8 @@ to characters. The vector data is divided into two groups: the first group contains the start / end character pointers, and the second is the start pointers when the end of the capturing group has not yet reached. */ #define OVECTOR_START (common->ovector_start) -#define OVECTOR(i) (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw)) -#define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw)) +#define OVECTOR(i) (OVECTOR_START + (i) * SSIZE_OF(sw)) +#define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * SSIZE_OF(sw)) #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start]) #if PCRE2_CODE_UNIT_WIDTH == 8 @@ -603,6 +642,10 @@ the start pointers when the end of the capturing group has not yet reached. */ sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw)) #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \ sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w)) +#define OP2U(op, src1, src1w, src2, src2w) \ + sljit_emit_op2u(compiler, (op), (src1), (src1w), (src2), (src2w)) +#define OP_SRC(op, src, srcw) \ + sljit_emit_op_src(compiler, (op), (src), (srcw)) #define LABEL() \ sljit_emit_label(compiler) #define JUMP(type) \ @@ -619,8 +662,8 @@ the start pointers when the end of the capturing group has not yet reached. */ sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label)) #define OP_FLAGS(op, dst, dstw, type) \ sljit_emit_op_flags(compiler, (op), (dst), (dstw), (type)) -#define CMOV(type, dst_reg, src, srcw) \ - sljit_emit_cmov(compiler, (type), (dst_reg), (src), (srcw)) +#define SELECT(type, dst_reg, src1, src1w, src2_reg) \ + sljit_emit_select(compiler, (type), (dst_reg), (src1), (src1w), (src2_reg)) #define GET_LOCAL_BASE(dst, dstw, offset) \ sljit_get_local_base(compiler, (dst), (dstw), (offset)) @@ -696,11 +739,12 @@ the start pointers when the end of the capturing group has not yet reached. */ #define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \ { \ - if (ptr[-1] <= 0x7f) \ - c = *ptr--; \ + c = ptr[-1]; \ + if (c <= 0x7f) \ + ptr--; \ else if (ptr - 1 > start && ptr[-1] >= 0x80 && ptr[-1] < 0xc0) \ { \ - c = ptr[-1] - 0x80; \ + c -= 0x80; \ \ if (ptr[-2] >= 0xc2 && ptr[-2] <= 0xdf) \ { \ @@ -775,11 +819,12 @@ the start pointers when the end of the capturing group has not yet reached. */ #define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \ { \ - if (ptr[-1] < 0xd800 || ptr[-1] >= 0xe000) \ - c = *ptr--; \ - else if (ptr[-1] >= 0xdc00 && ptr - 1 > start && ptr[-2] >= 0xd800 && ptr[-2] < 0xdc00) \ + c = ptr[-1]; \ + if (c < 0xd800 || c >= 0xe000) \ + ptr--; \ + else if (c >= 0xdc00 && ptr - 1 > start && ptr[-2] >= 0xd800 && ptr[-2] < 0xdc00) \ { \ - c = (((ptr[-2] - 0xd800) << 10) | (ptr[-1] - 0xdc00)) + 0x10000; \ + c = (((ptr[-2] - 0xd800) << 10) | (c - 0xdc00)) + 0x10000; \ ptr -= 2; \ } \ else \ @@ -793,7 +838,7 @@ the start pointers when the end of the capturing group has not yet reached. */ #define GETCHARINC_INVALID(c, ptr, end, invalid_action) \ { \ - if (ptr[0] < 0x110000) \ + if (ptr[0] < 0xd800 || (ptr[0] >= 0xe000 && ptr[0] < 0x110000)) \ c = *ptr++; \ else \ { \ @@ -801,12 +846,23 @@ the start pointers when the end of the capturing group has not yet reached. */ } \ } +#define GETCHARBACK_INVALID(c, ptr, start, invalid_action) \ + { \ + c = ptr[-1]; \ + if (ptr[-1] < 0xd800 || (ptr[-1] >= 0xe000 && ptr[-1] < 0x110000)) \ + ptr--; \ + else \ + { \ + invalid_action; \ + } \ + } + #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */ #endif /* SUPPORT_UNICODE */ static PCRE2_SPTR bracketend(PCRE2_SPTR cc) { -SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND)); +SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND)); do cc += GET(cc, 1); while (*cc == OP_ALT); SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS); cc += 1 + LINK_SIZE; @@ -816,7 +872,7 @@ return cc; static int no_alternatives(PCRE2_SPTR cc) { int count = 0; -SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND)); +SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND)); do { cc += GET(cc, 1); @@ -827,6 +883,21 @@ SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS); return count; } +static BOOL find_vreverse(PCRE2_SPTR cc) +{ + SLJIT_ASSERT(*cc == OP_ASSERTBACK || *cc == OP_ASSERTBACK_NOT || *cc == OP_ASSERTBACK_NA); + + do + { + if (cc[1 + LINK_SIZE] == OP_VREVERSE) + return TRUE; + cc += GET(cc, 1); + } + while (*cc == OP_ALT); + + return FALSE; +} + /* Functions whose might need modification for all new supported opcodes: next_opcode check_opcode_types @@ -897,10 +968,13 @@ switch(*cc) case OP_KETRMIN: case OP_KETRPOS: case OP_REVERSE: + case OP_VREVERSE: case OP_ASSERT: case OP_ASSERT_NOT: case OP_ASSERTBACK: case OP_ASSERTBACK_NOT: + case OP_ASSERT_NA: + case OP_ASSERTBACK_NA: case OP_ONCE: case OP_SCRIPT_RUN: case OP_BRA: @@ -931,6 +1005,8 @@ switch(*cc) case OP_ASSERT_ACCEPT: case OP_CLOSE: case OP_SKIPZERO: + case OP_NOT_UCP_WORD_BOUNDARY: + case OP_UCP_WORD_BOUNDARY: return cc + PRIV(OP_lengths)[*cc]; case OP_CHAR: @@ -1033,7 +1109,6 @@ switch(*cc) return cc + 1 + 2 + cc[1]; default: - /* All opcodes are supported now! */ SLJIT_UNREACHABLE(); return NULL; } @@ -1044,6 +1119,7 @@ static BOOL check_opcode_types(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPT int count; PCRE2_SPTR slot; PCRE2_SPTR assert_back_end = cc - 1; +PCRE2_SPTR assert_na_end = cc - 1; /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */ while (cc < ccend) @@ -1070,6 +1146,14 @@ while (cc < ccend) cc += 1 + IMM2_SIZE; break; + case OP_ASSERT_NA: + case OP_ASSERTBACK_NA: + slot = bracketend(cc); + if (slot > assert_na_end) + assert_na_end = slot; + cc += 1 + LINK_SIZE; + break; + case OP_CBRAPOS: case OP_SCBRAPOS: common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0; @@ -1137,6 +1221,9 @@ while (cc < ccend) case OP_COMMIT_ARG: case OP_PRUNE_ARG: + if (cc < assert_na_end) + return FALSE; + /* Fall through */ case OP_MARK: if (common->mark_ptr == 0) { @@ -1155,6 +1242,8 @@ while (cc < ccend) case OP_SKIP: if (cc < assert_back_end) common->has_skip_in_assert_back = TRUE; + if (cc < assert_na_end) + return FALSE; cc += 1; break; @@ -1163,9 +1252,19 @@ while (cc < ccend) common->has_skip_arg = TRUE; if (cc < assert_back_end) common->has_skip_in_assert_back = TRUE; + if (cc < assert_na_end) + return FALSE; cc += 1 + 2 + cc[1]; break; + case OP_PRUNE: + case OP_COMMIT: + case OP_ASSERT_ACCEPT: + if (cc < assert_na_end) + return FALSE; + cc++; + break; + default: cc = next_opcode(common, cc); if (cc == NULL) @@ -1176,183 +1275,393 @@ while (cc < ccend) return TRUE; } -static BOOL is_accelerated_repeat(PCRE2_SPTR cc) +#define EARLY_FAIL_ENHANCE_MAX (3 + 3) + +/* + Start represent the number of allowed early fail enhancements + + The 0-2 values has a special meaning: + 0 - skip is allowed for all iterators + 1 - fail is allowed for all iterators + 2 - fail is allowed for greedy iterators + 3 - only ranged early fail is allowed + >3 - (start - 3) number of remaining ranged early fails allowed + +return: the updated value of start +*/ +static int detect_early_fail(compiler_common *common, PCRE2_SPTR cc, + int *private_data_start, sljit_s32 depth, int start) { -switch(*cc) +PCRE2_SPTR begin = cc; +PCRE2_SPTR next_alt; +PCRE2_SPTR end; +PCRE2_SPTR accelerated_start; +int result = 0; +int count, prev_count; + +SLJIT_ASSERT(*cc == OP_ONCE || *cc == OP_BRA || *cc == OP_CBRA); +SLJIT_ASSERT(*cc != OP_CBRA || common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] != 0); +SLJIT_ASSERT(start < EARLY_FAIL_ENHANCE_MAX); + +next_alt = cc + GET(cc, 1); +if (*next_alt == OP_ALT && start < 1) + start = 1; + +do { - case OP_TYPESTAR: - case OP_TYPEMINSTAR: - case OP_TYPEPLUS: - case OP_TYPEMINPLUS: - case OP_TYPEPOSSTAR: - case OP_TYPEPOSPLUS: - return (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI); + count = start; + cc += 1 + LINK_SIZE + ((*cc == OP_CBRA) ? IMM2_SIZE : 0); - case OP_STAR: - case OP_MINSTAR: - case OP_PLUS: - case OP_MINPLUS: - case OP_POSSTAR: - case OP_POSPLUS: + while (TRUE) + { + accelerated_start = NULL; - case OP_STARI: - case OP_MINSTARI: - case OP_PLUSI: - case OP_MINPLUSI: - case OP_POSSTARI: - case OP_POSPLUSI: + switch(*cc) + { + case OP_SOD: + case OP_SOM: + case OP_SET_SOM: + case OP_NOT_WORD_BOUNDARY: + case OP_WORD_BOUNDARY: + case OP_EODN: + case OP_EOD: + case OP_CIRC: + case OP_CIRCM: + case OP_DOLL: + case OP_DOLLM: + case OP_NOT_UCP_WORD_BOUNDARY: + case OP_UCP_WORD_BOUNDARY: + /* Zero width assertions. */ + cc++; + continue; + + case OP_NOT_DIGIT: + case OP_DIGIT: + case OP_NOT_WHITESPACE: + case OP_WHITESPACE: + case OP_NOT_WORDCHAR: + case OP_WORDCHAR: + case OP_ANY: + case OP_ALLANY: + case OP_ANYBYTE: + case OP_NOT_HSPACE: + case OP_HSPACE: + case OP_NOT_VSPACE: + case OP_VSPACE: + if (count < 1) + count = 1; + cc++; + continue; - case OP_NOTSTAR: - case OP_NOTMINSTAR: - case OP_NOTPLUS: - case OP_NOTMINPLUS: - case OP_NOTPOSSTAR: - case OP_NOTPOSPLUS: + case OP_ANYNL: + case OP_EXTUNI: + if (count < 3) + count = 3; + cc++; + continue; + + case OP_NOTPROP: + case OP_PROP: + if (count < 1) + count = 1; + cc += 1 + 2; + continue; + + case OP_CHAR: + case OP_CHARI: + case OP_NOT: + case OP_NOTI: + if (count < 1) + count = 1; + cc += 2; +#ifdef SUPPORT_UNICODE + if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); +#endif + continue; - case OP_NOTSTARI: - case OP_NOTMINSTARI: - case OP_NOTPLUSI: - case OP_NOTMINPLUSI: - case OP_NOTPOSSTARI: - case OP_NOTPOSPLUSI: - return TRUE; + case OP_TYPEMINSTAR: + case OP_TYPEMINPLUS: + if (count == 2) + count = 3; + /* Fall through */ - case OP_CLASS: - case OP_NCLASS: + case OP_TYPESTAR: + case OP_TYPEPLUS: + case OP_TYPEPOSSTAR: + case OP_TYPEPOSPLUS: + /* The type or prop opcode is skipped in the next iteration. */ + cc += 1; + + if (cc[0] != OP_ANYNL && cc[0] != OP_EXTUNI) + { + accelerated_start = cc - 1; + break; + } + + if (count < 3) + count = 3; + continue; + + case OP_TYPEEXACT: + if (count < 1) + count = 1; + cc += 1 + IMM2_SIZE; + continue; + + case OP_TYPEUPTO: + case OP_TYPEMINUPTO: + case OP_TYPEPOSUPTO: + cc += IMM2_SIZE; + /* Fall through */ + + case OP_TYPEQUERY: + case OP_TYPEMINQUERY: + case OP_TYPEPOSQUERY: + /* The type or prop opcode is skipped in the next iteration. */ + if (count < 3) + count = 3; + cc += 1; + continue; + + case OP_MINSTAR: + case OP_MINPLUS: + case OP_MINSTARI: + case OP_MINPLUSI: + case OP_NOTMINSTAR: + case OP_NOTMINPLUS: + case OP_NOTMINSTARI: + case OP_NOTMINPLUSI: + if (count == 2) + count = 3; + /* Fall through */ + + case OP_STAR: + case OP_PLUS: + case OP_POSSTAR: + case OP_POSPLUS: + + case OP_STARI: + case OP_PLUSI: + case OP_POSSTARI: + case OP_POSPLUSI: + + case OP_NOTSTAR: + case OP_NOTPLUS: + case OP_NOTPOSSTAR: + case OP_NOTPOSPLUS: + + case OP_NOTSTARI: + case OP_NOTPLUSI: + case OP_NOTPOSSTARI: + case OP_NOTPOSPLUSI: + accelerated_start = cc; + cc += 2; +#ifdef SUPPORT_UNICODE + if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); +#endif + break; + + case OP_EXACT: + if (count < 1) + count = 1; + cc += 2 + IMM2_SIZE; +#ifdef SUPPORT_UNICODE + if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); +#endif + continue; + + case OP_UPTO: + case OP_MINUPTO: + case OP_POSUPTO: + case OP_UPTOI: + case OP_MINUPTOI: + case OP_EXACTI: + case OP_POSUPTOI: + case OP_NOTUPTO: + case OP_NOTMINUPTO: + case OP_NOTEXACT: + case OP_NOTPOSUPTO: + case OP_NOTUPTOI: + case OP_NOTMINUPTOI: + case OP_NOTEXACTI: + case OP_NOTPOSUPTOI: + cc += IMM2_SIZE; + /* Fall through */ + + case OP_QUERY: + case OP_MINQUERY: + case OP_POSQUERY: + case OP_QUERYI: + case OP_MINQUERYI: + case OP_POSQUERYI: + case OP_NOTQUERY: + case OP_NOTMINQUERY: + case OP_NOTPOSQUERY: + case OP_NOTQUERYI: + case OP_NOTMINQUERYI: + case OP_NOTPOSQUERYI: + if (count < 3) + count = 3; + cc += 2; +#ifdef SUPPORT_UNICODE + if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); +#endif + continue; + + case OP_CLASS: + case OP_NCLASS: #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8 - case OP_XCLASS: - cc += (*cc == OP_XCLASS) ? GET(cc, 1) : (int)(1 + (32 / sizeof(PCRE2_UCHAR))); + case OP_XCLASS: + accelerated_start = cc; + cc += ((*cc == OP_XCLASS) ? GET(cc, 1) : (unsigned int)(1 + (32 / sizeof(PCRE2_UCHAR)))); #else - cc += (1 + (32 / sizeof(PCRE2_UCHAR))); + accelerated_start = cc; + cc += (1 + (32 / sizeof(PCRE2_UCHAR))); #endif - switch(*cc) - { - case OP_CRSTAR: - case OP_CRMINSTAR: - case OP_CRPLUS: - case OP_CRMINPLUS: - case OP_CRPOSSTAR: - case OP_CRPOSPLUS: - return TRUE; - } - break; - } -return FALSE; -} - -static SLJIT_INLINE BOOL detect_fast_forward_skip(compiler_common *common, int *private_data_start) -{ -PCRE2_SPTR cc = common->start; -PCRE2_SPTR end; + switch (*cc) + { + case OP_CRMINSTAR: + case OP_CRMINPLUS: + if (count == 2) + count = 3; + /* Fall through */ + + case OP_CRSTAR: + case OP_CRPLUS: + case OP_CRPOSSTAR: + case OP_CRPOSPLUS: + cc++; + break; -/* Skip not repeated brackets. */ -while (TRUE) - { - switch(*cc) - { - case OP_SOD: - case OP_SOM: - case OP_SET_SOM: - case OP_NOT_WORD_BOUNDARY: - case OP_WORD_BOUNDARY: - case OP_EODN: - case OP_EOD: - case OP_CIRC: - case OP_CIRCM: - case OP_DOLL: - case OP_DOLLM: - /* Zero width assertions. */ - cc++; - continue; - } + case OP_CRRANGE: + case OP_CRMINRANGE: + case OP_CRPOSRANGE: + if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE)) + { + /* Exact repeat. */ + cc += 1 + 2 * IMM2_SIZE; + if (count < 1) + count = 1; + continue; + } - if (*cc != OP_BRA && *cc != OP_CBRA) - break; + cc += 2 * IMM2_SIZE; + /* Fall through */ + case OP_CRQUERY: + case OP_CRMINQUERY: + case OP_CRPOSQUERY: + cc++; + if (count < 3) + count = 3; + continue; - end = cc + GET(cc, 1); - if (*end != OP_KET || PRIVATE_DATA(end) != 0) - return FALSE; - if (*cc == OP_CBRA) - { - if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0) - return FALSE; - cc += IMM2_SIZE; - } - cc += 1 + LINK_SIZE; - } + default: + /* No repeat. */ + if (count < 1) + count = 1; + continue; + } + break; -if (is_accelerated_repeat(cc)) - { - common->fast_forward_bc_ptr = cc; - common->private_data_ptrs[(cc + 1) - common->start] = *private_data_start; - *private_data_start += sizeof(sljit_sw); - return TRUE; - } -return FALSE; -} + case OP_BRA: + case OP_CBRA: + prev_count = count; + if (count < 1) + count = 1; -static SLJIT_INLINE void detect_fast_fail(compiler_common *common, PCRE2_SPTR cc, int *private_data_start, sljit_s32 depth) -{ - PCRE2_SPTR next_alt; + if (depth >= 4) + break; - SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA); + if (count < 3 && cc[GET(cc, 1)] == OP_ALT) + count = 3; - if (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0) - return; + end = bracketend(cc); + if (end[-1 - LINK_SIZE] != OP_KET || (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)) + break; - next_alt = bracketend(cc) - (1 + LINK_SIZE); - if (*next_alt != OP_KET || PRIVATE_DATA(next_alt) != 0) - return; + prev_count = detect_early_fail(common, cc, private_data_start, depth + 1, prev_count); - do - { - next_alt = cc + GET(cc, 1); + if (prev_count > count) + count = prev_count; - cc += 1 + LINK_SIZE + ((*cc == OP_CBRA) ? IMM2_SIZE : 0); + if (PRIVATE_DATA(cc) != 0) + common->private_data_ptrs[begin - common->start] = 1; - while (TRUE) - { - switch(*cc) + if (count < EARLY_FAIL_ENHANCE_MAX) { - case OP_SOD: - case OP_SOM: - case OP_SET_SOM: - case OP_NOT_WORD_BOUNDARY: - case OP_WORD_BOUNDARY: - case OP_EODN: - case OP_EOD: - case OP_CIRC: - case OP_CIRCM: - case OP_DOLL: - case OP_DOLLM: - /* Zero width assertions. */ - cc++; + cc = end; continue; } break; + + case OP_KET: + SLJIT_ASSERT(PRIVATE_DATA(cc) == 0); + if (cc >= next_alt) + break; + cc += 1 + LINK_SIZE; + continue; } - if (depth > 0 && (*cc == OP_BRA || *cc == OP_CBRA)) - detect_fast_fail(common, cc, private_data_start, depth - 1); + if (accelerated_start == NULL) + break; - if (is_accelerated_repeat(cc)) + if (count == 0) { - common->private_data_ptrs[(cc + 1) - common->start] = *private_data_start; + common->fast_forward_bc_ptr = accelerated_start; + common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_skip; + *private_data_start += sizeof(sljit_sw); + count = 4; + } + else if (count < 3) + { + common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_fail; - if (common->fast_fail_start_ptr == 0) - common->fast_fail_start_ptr = *private_data_start; + if (common->early_fail_start_ptr == 0) + common->early_fail_start_ptr = *private_data_start; *private_data_start += sizeof(sljit_sw); - common->fast_fail_end_ptr = *private_data_start; + common->early_fail_end_ptr = *private_data_start; if (*private_data_start > SLJIT_MAX_LOCAL_SIZE) - return; + return EARLY_FAIL_ENHANCE_MAX; + + count = 4; + } + else + { + common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_fail_range; + + if (common->early_fail_start_ptr == 0) + common->early_fail_start_ptr = *private_data_start; + + *private_data_start += 2 * sizeof(sljit_sw); + common->early_fail_end_ptr = *private_data_start; + + if (*private_data_start > SLJIT_MAX_LOCAL_SIZE) + return EARLY_FAIL_ENHANCE_MAX; + + count++; } - cc = next_alt; + /* Cannot be part of a repeat. */ + common->private_data_ptrs[begin - common->start] = 1; + + if (count >= EARLY_FAIL_ENHANCE_MAX) + break; } - while (*cc == OP_ALT); + + if (*cc != OP_ALT && *cc != OP_KET) + result = EARLY_FAIL_ENHANCE_MAX; + else if (result < count) + result = count; + + cc = next_alt; + next_alt = cc + GET(cc, 1); + } +while (*cc == OP_ALT); + +return result; } static int get_class_iterator_size(PCRE2_SPTR cc) @@ -1398,11 +1707,12 @@ sljit_sw length = end - begin; sljit_s32 min, max, i; /* Detect fixed iterations first. */ -if (end[-(1 + LINK_SIZE)] != OP_KET) +if (end[-(1 + LINK_SIZE)] != OP_KET || PRIVATE_DATA(begin) != 0) return FALSE; -/* Already detected repeat. */ -if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0) +/* /(?:AB){4,6}/ is currently converted to /(?:AB){3}(?AB){1,3}/ + * Skip the check of the second part. */ +if (PRIVATE_DATA(end - LINK_SIZE) != 0) return TRUE; next = end; @@ -1541,6 +1851,7 @@ while (cc < ccend) if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE) break; + /* When the bracket is prefixed by a zero iteration, skip the repeat check (at this point). */ if (repeat_check && (*cc == OP_ONCE || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND)) { if (detect_repeat(common, cc)) @@ -1569,6 +1880,7 @@ while (cc < ccend) case OP_ASSERT_NOT: case OP_ASSERTBACK: case OP_ASSERTBACK_NOT: + case OP_ASSERT_NA: case OP_ONCE: case OP_SCRIPT_RUN: case OP_BRAPOS: @@ -1580,6 +1892,19 @@ while (cc < ccend) bracketlen = 1 + LINK_SIZE; break; + case OP_ASSERTBACK_NA: + common->private_data_ptrs[cc - common->start] = private_data_ptr; + private_data_ptr += sizeof(sljit_sw); + + if (find_vreverse(cc)) + { + common->private_data_ptrs[cc + 1 - common->start] = 1; + private_data_ptr += sizeof(sljit_sw); + } + + bracketlen = 1 + LINK_SIZE; + break; + case OP_CBRAPOS: case OP_SCBRAPOS: common->private_data_ptrs[cc - common->start] = private_data_ptr; @@ -1589,6 +1914,7 @@ while (cc < ccend) case OP_COND: /* Might be a hidden SCOND. */ + common->private_data_ptrs[cc - common->start] = 0; alternative = cc + GET(cc, 1); if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN) { @@ -1610,57 +1936,57 @@ while (cc < ccend) case OP_BRAZERO: case OP_BRAMINZERO: case OP_BRAPOSZERO: - repeat_check = FALSE; size = 1; + repeat_check = FALSE; break; CASE_ITERATOR_PRIVATE_DATA_1 - space = 1; size = -2; + space = 1; break; CASE_ITERATOR_PRIVATE_DATA_2A - space = 2; size = -2; + space = 2; break; CASE_ITERATOR_PRIVATE_DATA_2B - space = 2; size = -(2 + IMM2_SIZE); + space = 2; break; CASE_ITERATOR_TYPE_PRIVATE_DATA_1 - space = 1; size = 1; + space = 1; break; CASE_ITERATOR_TYPE_PRIVATE_DATA_2A + size = 1; if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI) space = 2; - size = 1; break; case OP_TYPEUPTO: + size = 1 + IMM2_SIZE; if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI) space = 2; - size = 1 + IMM2_SIZE; break; case OP_TYPEMINUPTO: - space = 2; size = 1 + IMM2_SIZE; + space = 2; break; case OP_CLASS: case OP_NCLASS: - space = get_class_iterator_size(cc + size); size = 1 + 32 / sizeof(PCRE2_UCHAR); + space = get_class_iterator_size(cc + size); break; #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8 case OP_XCLASS: - space = get_class_iterator_size(cc + size); size = GET(cc, 1); + space = get_class_iterator_size(cc + size); break; #endif @@ -1878,6 +2204,9 @@ while (cc < ccend) case OP_CALLOUT: case OP_CALLOUT_STR: + case OP_NOT_UCP_WORD_BOUNDARY: + case OP_UCP_WORD_BOUNDARY: + cc = next_opcode(common, cc); SLJIT_ASSERT(cc != NULL); break; @@ -1923,9 +2252,9 @@ while (cc < ccend) { OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0)); - stackpos -= (int)sizeof(sljit_sw); + stackpos -= SSIZE_OF(sw); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); - stackpos -= (int)sizeof(sljit_sw); + stackpos -= SSIZE_OF(sw); setsom_found = TRUE; } cc += 1; @@ -1940,9 +2269,9 @@ while (cc < ccend) { OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr); - stackpos -= (int)sizeof(sljit_sw); + stackpos -= SSIZE_OF(sw); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); - stackpos -= (int)sizeof(sljit_sw); + stackpos -= SSIZE_OF(sw); setmark_found = TRUE; } cc += 1 + 2 + cc[1]; @@ -1953,27 +2282,27 @@ while (cc < ccend) { OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0)); - stackpos -= (int)sizeof(sljit_sw); + stackpos -= SSIZE_OF(sw); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); - stackpos -= (int)sizeof(sljit_sw); + stackpos -= SSIZE_OF(sw); setsom_found = TRUE; } if (common->mark_ptr != 0 && !setmark_found) { OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr); - stackpos -= (int)sizeof(sljit_sw); + stackpos -= SSIZE_OF(sw); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); - stackpos -= (int)sizeof(sljit_sw); + stackpos -= SSIZE_OF(sw); setmark_found = TRUE; } if (common->capture_last_ptr != 0 && !capture_last_found) { OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr); - stackpos -= (int)sizeof(sljit_sw); + stackpos -= SSIZE_OF(sw); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); - stackpos -= (int)sizeof(sljit_sw); + stackpos -= SSIZE_OF(sw); capture_last_found = TRUE; } cc += 1 + LINK_SIZE; @@ -1987,20 +2316,20 @@ while (cc < ccend) { OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr); - stackpos -= (int)sizeof(sljit_sw); + stackpos -= SSIZE_OF(sw); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); - stackpos -= (int)sizeof(sljit_sw); + stackpos -= SSIZE_OF(sw); capture_last_found = TRUE; } offset = (GET2(cc, 1 + LINK_SIZE)) << 1; OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset)); - stackpos -= (int)sizeof(sljit_sw); + stackpos -= SSIZE_OF(sw); OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset)); OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1)); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); - stackpos -= (int)sizeof(sljit_sw); + stackpos -= SSIZE_OF(sw); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0); - stackpos -= (int)sizeof(sljit_sw); + stackpos -= SSIZE_OF(sw); cc += 1 + LINK_SIZE + IMM2_SIZE; break; @@ -2033,7 +2362,7 @@ int i; for (i = 0; i < RECURSE_TMP_REG_COUNT; i++) { SLJIT_ASSERT(status->tmp_regs[i] >= 0); - SLJIT_ASSERT(sljit_get_register_index(status->saved_tmp_regs[i]) < 0 || status->tmp_regs[i] == status->saved_tmp_regs[i]); + SLJIT_ASSERT(sljit_get_register_index(SLJIT_GP_REGISTER, status->saved_tmp_regs[i]) < 0 || status->tmp_regs[i] == status->saved_tmp_regs[i]); status->store_bases[i] = -1; } @@ -2053,7 +2382,7 @@ SLJIT_ASSERT(load_base > 0 && store_base > 0); if (status->store_bases[next_tmp_reg] == -1) { /* Preserve virtual registers. */ - if (sljit_get_register_index(status->saved_tmp_regs[next_tmp_reg]) < 0) + if (sljit_get_register_index(SLJIT_GP_REGISTER, status->saved_tmp_regs[next_tmp_reg]) < 0) OP1(SLJIT_MOV, status->saved_tmp_regs[next_tmp_reg], 0, tmp_reg, 0); } else @@ -2082,7 +2411,7 @@ for (i = 0; i < RECURSE_TMP_REG_COUNT; i++) OP1(SLJIT_MOV, SLJIT_MEM1(status->store_bases[next_tmp_reg]), status->store_offsets[next_tmp_reg], tmp_reg, 0); /* Restore virtual registers. */ - if (sljit_get_register_index(saved_tmp_reg) < 0) + if (sljit_get_register_index(SLJIT_GP_REGISTER, saved_tmp_reg) < 0) OP1(SLJIT_MOV, tmp_reg, 0, saved_tmp_reg, 0); } @@ -2092,22 +2421,47 @@ for (i = 0; i < RECURSE_TMP_REG_COUNT; i++) #undef RECURSE_TMP_REG_COUNT -static int get_recurse_data_length(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, - BOOL *needs_control_head, BOOL *has_quit, BOOL *has_accept) +static BOOL recurse_check_bit(compiler_common *common, sljit_sw bit_index) +{ +uint8_t *byte; +uint8_t mask; + +SLJIT_ASSERT((bit_index & (sizeof(sljit_sw) - 1)) == 0); + +bit_index >>= SLJIT_WORD_SHIFT; + +SLJIT_ASSERT((bit_index >> 3) < common->recurse_bitset_size); + +mask = 1 << (bit_index & 0x7); +byte = common->recurse_bitset + (bit_index >> 3); + +if (*byte & mask) + return FALSE; + +*byte |= mask; +return TRUE; +} + +enum get_recurse_flags { + recurse_flag_quit_found = (1 << 0), + recurse_flag_accept_found = (1 << 1), + recurse_flag_setsom_found = (1 << 2), + recurse_flag_setmark_found = (1 << 3), + recurse_flag_control_head_found = (1 << 4), +}; + +static int get_recurse_data_length(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, uint32_t *result_flags) { int length = 1; -int size; +int size, offset; PCRE2_SPTR alternative; -BOOL quit_found = FALSE; -BOOL accept_found = FALSE; -BOOL setsom_found = FALSE; -BOOL setmark_found = FALSE; -BOOL capture_last_found = FALSE; -BOOL control_head_found = FALSE; +uint32_t recurse_flags = 0; + +memset(common->recurse_bitset, 0, common->recurse_bitset_size); #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD SLJIT_ASSERT(common->control_head_ptr != 0); -control_head_found = TRUE; +recurse_flags |= recurse_flag_control_head_found; #endif /* Calculate the sum of the private machine words. */ @@ -2118,24 +2472,26 @@ while (cc < ccend) { case OP_SET_SOM: SLJIT_ASSERT(common->has_set_som); - setsom_found = TRUE; + recurse_flags |= recurse_flag_setsom_found; cc += 1; break; case OP_RECURSE: if (common->has_set_som) - setsom_found = TRUE; + recurse_flags |= recurse_flag_setsom_found; if (common->mark_ptr != 0) - setmark_found = TRUE; - if (common->capture_last_ptr != 0) - capture_last_found = TRUE; + recurse_flags |= recurse_flag_setmark_found; + if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr)) + length++; cc += 1 + LINK_SIZE; break; case OP_KET: - if (PRIVATE_DATA(cc) != 0) + offset = PRIVATE_DATA(cc); + if (offset != 0) { - length++; + if (recurse_check_bit(common, offset)) + length++; SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0); cc += PRIVATE_DATA(cc + 1); } @@ -2146,45 +2502,63 @@ while (cc < ccend) case OP_ASSERT_NOT: case OP_ASSERTBACK: case OP_ASSERTBACK_NOT: + case OP_ASSERT_NA: + case OP_ASSERTBACK_NA: case OP_ONCE: case OP_SCRIPT_RUN: case OP_BRAPOS: case OP_SBRA: case OP_SBRAPOS: case OP_SCOND: - length++; SLJIT_ASSERT(PRIVATE_DATA(cc) != 0); + if (recurse_check_bit(common, PRIVATE_DATA(cc))) + length++; cc += 1 + LINK_SIZE; break; case OP_CBRA: case OP_SCBRA: - length += 2; - if (common->capture_last_ptr != 0) - capture_last_found = TRUE; - if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0) + offset = GET2(cc, 1 + LINK_SIZE); + if (recurse_check_bit(common, OVECTOR(offset << 1))) + { + SLJIT_ASSERT(recurse_check_bit(common, OVECTOR((offset << 1) + 1))); + length += 2; + } + if (common->optimized_cbracket[offset] == 0 && recurse_check_bit(common, OVECTOR_PRIV(offset))) + length++; + if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr)) length++; cc += 1 + LINK_SIZE + IMM2_SIZE; break; case OP_CBRAPOS: case OP_SCBRAPOS: - length += 2 + 2; - if (common->capture_last_ptr != 0) - capture_last_found = TRUE; + offset = GET2(cc, 1 + LINK_SIZE); + if (recurse_check_bit(common, OVECTOR(offset << 1))) + { + SLJIT_ASSERT(recurse_check_bit(common, OVECTOR((offset << 1) + 1))); + length += 2; + } + if (recurse_check_bit(common, OVECTOR_PRIV(offset))) + length++; + if (recurse_check_bit(common, PRIVATE_DATA(cc))) + length++; + if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr)) + length++; cc += 1 + LINK_SIZE + IMM2_SIZE; break; case OP_COND: /* Might be a hidden SCOND. */ alternative = cc + GET(cc, 1); - if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN) + if ((*alternative == OP_KETRMAX || *alternative == OP_KETRMIN) && recurse_check_bit(common, PRIVATE_DATA(cc))) length++; cc += 1 + LINK_SIZE; break; CASE_ITERATOR_PRIVATE_DATA_1 - if (PRIVATE_DATA(cc) != 0) + offset = PRIVATE_DATA(cc); + if (offset != 0 && recurse_check_bit(common, offset)) length++; cc += 2; #ifdef SUPPORT_UNICODE @@ -2193,8 +2567,12 @@ while (cc < ccend) break; CASE_ITERATOR_PRIVATE_DATA_2A - if (PRIVATE_DATA(cc) != 0) + offset = PRIVATE_DATA(cc); + if (offset != 0 && recurse_check_bit(common, offset)) + { + SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw))); length += 2; + } cc += 2; #ifdef SUPPORT_UNICODE if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); @@ -2202,8 +2580,12 @@ while (cc < ccend) break; CASE_ITERATOR_PRIVATE_DATA_2B - if (PRIVATE_DATA(cc) != 0) + offset = PRIVATE_DATA(cc); + if (offset != 0 && recurse_check_bit(common, offset)) + { + SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw))); length += 2; + } cc += 2 + IMM2_SIZE; #ifdef SUPPORT_UNICODE if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); @@ -2211,20 +2593,29 @@ while (cc < ccend) break; CASE_ITERATOR_TYPE_PRIVATE_DATA_1 - if (PRIVATE_DATA(cc) != 0) + offset = PRIVATE_DATA(cc); + if (offset != 0 && recurse_check_bit(common, offset)) length++; cc += 1; break; CASE_ITERATOR_TYPE_PRIVATE_DATA_2A - if (PRIVATE_DATA(cc) != 0) + offset = PRIVATE_DATA(cc); + if (offset != 0 && recurse_check_bit(common, offset)) + { + SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw))); length += 2; + } cc += 1; break; CASE_ITERATOR_TYPE_PRIVATE_DATA_2B - if (PRIVATE_DATA(cc) != 0) + offset = PRIVATE_DATA(cc); + if (offset != 0 && recurse_check_bit(common, offset)) + { + SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw))); length += 2; + } cc += 1 + IMM2_SIZE; break; @@ -2236,7 +2627,9 @@ while (cc < ccend) #else size = 1 + 32 / (int)sizeof(PCRE2_UCHAR); #endif - if (PRIVATE_DATA(cc) != 0) + + offset = PRIVATE_DATA(cc); + if (offset != 0 && recurse_check_bit(common, offset)) length += get_class_iterator_size(cc + size); cc += size; break; @@ -2246,12 +2639,11 @@ while (cc < ccend) case OP_PRUNE_ARG: case OP_THEN_ARG: SLJIT_ASSERT(common->mark_ptr != 0); - if (!setmark_found) - setmark_found = TRUE; + recurse_flags |= recurse_flag_setmark_found; if (common->control_head_ptr != 0) - control_head_found = TRUE; + recurse_flags |= recurse_flag_control_head_found; if (*cc != OP_MARK) - quit_found = TRUE; + recurse_flags |= recurse_flag_quit_found; cc += 1 + 2 + cc[1]; break; @@ -2259,26 +2651,24 @@ while (cc < ccend) case OP_PRUNE: case OP_SKIP: case OP_COMMIT: - quit_found = TRUE; + recurse_flags |= recurse_flag_quit_found; cc++; break; case OP_SKIP_ARG: - quit_found = TRUE; + recurse_flags |= recurse_flag_quit_found; cc += 1 + 2 + cc[1]; break; case OP_THEN: SLJIT_ASSERT(common->control_head_ptr != 0); - quit_found = TRUE; - if (!control_head_found) - control_head_found = TRUE; + recurse_flags |= recurse_flag_quit_found | recurse_flag_control_head_found; cc++; break; case OP_ACCEPT: case OP_ASSERT_ACCEPT: - accept_found = TRUE; + recurse_flags |= recurse_flag_accept_found; cc++; break; @@ -2290,21 +2680,17 @@ while (cc < ccend) } SLJIT_ASSERT(cc == ccend); -if (control_head_found) - length++; -if (capture_last_found) +if (recurse_flags & recurse_flag_control_head_found) length++; -if (quit_found) +if (recurse_flags & recurse_flag_quit_found) { - if (setsom_found) + if (recurse_flags & recurse_flag_setsom_found) length++; - if (setmark_found) + if (recurse_flags & recurse_flag_setmark_found) length++; } -*needs_control_head = control_head_found; -*has_quit = quit_found; -*has_accept = accept_found; +*result_flags = recurse_flags; return length; } @@ -2317,7 +2703,7 @@ enum copy_recurse_data_types { }; static void copy_recurse_data(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, - int type, int stackptr, int stacktop, BOOL has_quit) + int type, int stackptr, int stacktop, uint32_t recurse_flags) { delayed_mem_copy_status status; PCRE2_SPTR alternative; @@ -2326,14 +2712,12 @@ sljit_sw shared_srcw[3]; sljit_sw kept_shared_srcw[2]; int private_count, shared_count, kept_shared_count; int from_sp, base_reg, offset, i; -BOOL setsom_found = FALSE; -BOOL setmark_found = FALSE; -BOOL capture_last_found = FALSE; -BOOL control_head_found = FALSE; + +memset(common->recurse_bitset, 0, common->recurse_bitset_size); #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD SLJIT_ASSERT(common->control_head_ptr != 0); -control_head_found = TRUE; +recurse_check_bit(common, common->control_head_ptr); #endif switch (type) @@ -2371,14 +2755,14 @@ if (base_reg != TMP2) else { status.saved_tmp_regs[1] = RETURN_ADDR; - if (sljit_get_register_index(RETURN_ADDR) == -1) + if (HAS_VIRTUAL_REGISTERS) status.tmp_regs[1] = STR_PTR; else status.tmp_regs[1] = RETURN_ADDR; } status.saved_tmp_regs[2] = TMP3; -if (sljit_get_register_index(TMP3) == -1) +if (HAS_VIRTUAL_REGISTERS) status.tmp_regs[2] = STR_END; else status.tmp_regs[2] = TMP3; @@ -2421,45 +2805,42 @@ while (cc < ccend) { case OP_SET_SOM: SLJIT_ASSERT(common->has_set_som); - if (has_quit && !setsom_found) + if ((recurse_flags & recurse_flag_quit_found) && recurse_check_bit(common, OVECTOR(0))) { kept_shared_srcw[0] = OVECTOR(0); kept_shared_count = 1; - setsom_found = TRUE; } cc += 1; break; case OP_RECURSE: - if (has_quit) + if (recurse_flags & recurse_flag_quit_found) { - if (common->has_set_som && !setsom_found) + if (common->has_set_som && recurse_check_bit(common, OVECTOR(0))) { kept_shared_srcw[0] = OVECTOR(0); kept_shared_count = 1; - setsom_found = TRUE; } - if (common->mark_ptr != 0 && !setmark_found) + if (common->mark_ptr != 0 && recurse_check_bit(common, common->mark_ptr)) { kept_shared_srcw[kept_shared_count] = common->mark_ptr; kept_shared_count++; - setmark_found = TRUE; } } - if (common->capture_last_ptr != 0 && !capture_last_found) + if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr)) { shared_srcw[0] = common->capture_last_ptr; shared_count = 1; - capture_last_found = TRUE; } cc += 1 + LINK_SIZE; break; case OP_KET: - if (PRIVATE_DATA(cc) != 0) + private_srcw[0] = PRIVATE_DATA(cc); + if (private_srcw[0] != 0) { - private_count = 1; - private_srcw[0] = PRIVATE_DATA(cc); + if (recurse_check_bit(common, private_srcw[0])) + private_count = 1; SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0); cc += PRIVATE_DATA(cc + 1); } @@ -2470,56 +2851,74 @@ while (cc < ccend) case OP_ASSERT_NOT: case OP_ASSERTBACK: case OP_ASSERTBACK_NOT: + case OP_ASSERT_NA: + case OP_ASSERTBACK_NA: case OP_ONCE: case OP_SCRIPT_RUN: case OP_BRAPOS: case OP_SBRA: case OP_SBRAPOS: case OP_SCOND: - private_count = 1; private_srcw[0] = PRIVATE_DATA(cc); + if (recurse_check_bit(common, private_srcw[0])) + private_count = 1; cc += 1 + LINK_SIZE; break; case OP_CBRA: case OP_SCBRA: - offset = (GET2(cc, 1 + LINK_SIZE)) << 1; - shared_srcw[0] = OVECTOR(offset); - shared_srcw[1] = OVECTOR(offset + 1); - shared_count = 2; + offset = GET2(cc, 1 + LINK_SIZE); + shared_srcw[0] = OVECTOR(offset << 1); + if (recurse_check_bit(common, shared_srcw[0])) + { + shared_srcw[1] = shared_srcw[0] + sizeof(sljit_sw); + SLJIT_ASSERT(recurse_check_bit(common, shared_srcw[1])); + shared_count = 2; + } - if (common->capture_last_ptr != 0 && !capture_last_found) + if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr)) { - shared_srcw[2] = common->capture_last_ptr; - shared_count = 3; - capture_last_found = TRUE; + shared_srcw[shared_count] = common->capture_last_ptr; + shared_count++; } - if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0) + if (common->optimized_cbracket[offset] == 0) { - private_count = 1; - private_srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE)); + private_srcw[0] = OVECTOR_PRIV(offset); + if (recurse_check_bit(common, private_srcw[0])) + private_count = 1; } + cc += 1 + LINK_SIZE + IMM2_SIZE; break; case OP_CBRAPOS: case OP_SCBRAPOS: - offset = (GET2(cc, 1 + LINK_SIZE)) << 1; - shared_srcw[0] = OVECTOR(offset); - shared_srcw[1] = OVECTOR(offset + 1); - shared_count = 2; + offset = GET2(cc, 1 + LINK_SIZE); + shared_srcw[0] = OVECTOR(offset << 1); + if (recurse_check_bit(common, shared_srcw[0])) + { + shared_srcw[1] = shared_srcw[0] + sizeof(sljit_sw); + SLJIT_ASSERT(recurse_check_bit(common, shared_srcw[1])); + shared_count = 2; + } - if (common->capture_last_ptr != 0 && !capture_last_found) + if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr)) { - shared_srcw[2] = common->capture_last_ptr; - shared_count = 3; - capture_last_found = TRUE; + shared_srcw[shared_count] = common->capture_last_ptr; + shared_count++; } - private_count = 2; private_srcw[0] = PRIVATE_DATA(cc); - private_srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE)); + if (recurse_check_bit(common, private_srcw[0])) + private_count = 1; + + offset = OVECTOR_PRIV(offset); + if (recurse_check_bit(common, offset)) + { + private_srcw[private_count] = offset; + private_count++; + } cc += 1 + LINK_SIZE + IMM2_SIZE; break; @@ -2528,18 +2927,17 @@ while (cc < ccend) alternative = cc + GET(cc, 1); if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN) { - private_count = 1; private_srcw[0] = PRIVATE_DATA(cc); + if (recurse_check_bit(common, private_srcw[0])) + private_count = 1; } cc += 1 + LINK_SIZE; break; CASE_ITERATOR_PRIVATE_DATA_1 - if (PRIVATE_DATA(cc)) - { + private_srcw[0] = PRIVATE_DATA(cc); + if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0])) private_count = 1; - private_srcw[0] = PRIVATE_DATA(cc); - } cc += 2; #ifdef SUPPORT_UNICODE if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); @@ -2547,11 +2945,12 @@ while (cc < ccend) break; CASE_ITERATOR_PRIVATE_DATA_2A - if (PRIVATE_DATA(cc)) + private_srcw[0] = PRIVATE_DATA(cc); + if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0])) { private_count = 2; - private_srcw[0] = PRIVATE_DATA(cc); - private_srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw); + private_srcw[1] = private_srcw[0] + sizeof(sljit_sw); + SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1])); } cc += 2; #ifdef SUPPORT_UNICODE @@ -2560,11 +2959,12 @@ while (cc < ccend) break; CASE_ITERATOR_PRIVATE_DATA_2B - if (PRIVATE_DATA(cc)) + private_srcw[0] = PRIVATE_DATA(cc); + if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0])) { private_count = 2; - private_srcw[0] = PRIVATE_DATA(cc); - private_srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw); + private_srcw[1] = private_srcw[0] + sizeof(sljit_sw); + SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1])); } cc += 2 + IMM2_SIZE; #ifdef SUPPORT_UNICODE @@ -2573,30 +2973,30 @@ while (cc < ccend) break; CASE_ITERATOR_TYPE_PRIVATE_DATA_1 - if (PRIVATE_DATA(cc)) - { + private_srcw[0] = PRIVATE_DATA(cc); + if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0])) private_count = 1; - private_srcw[0] = PRIVATE_DATA(cc); - } cc += 1; break; CASE_ITERATOR_TYPE_PRIVATE_DATA_2A - if (PRIVATE_DATA(cc)) + private_srcw[0] = PRIVATE_DATA(cc); + if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0])) { private_count = 2; - private_srcw[0] = PRIVATE_DATA(cc); private_srcw[1] = private_srcw[0] + sizeof(sljit_sw); + SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1])); } cc += 1; break; CASE_ITERATOR_TYPE_PRIVATE_DATA_2B - if (PRIVATE_DATA(cc)) + private_srcw[0] = PRIVATE_DATA(cc); + if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0])) { private_count = 2; - private_srcw[0] = PRIVATE_DATA(cc); private_srcw[1] = private_srcw[0] + sizeof(sljit_sw); + SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1])); } cc += 1 + IMM2_SIZE; break; @@ -2610,23 +3010,28 @@ while (cc < ccend) i = 1 + 32 / (int)sizeof(PCRE2_UCHAR); #endif if (PRIVATE_DATA(cc) != 0) + { + private_count = 1; + private_srcw[0] = PRIVATE_DATA(cc); switch(get_class_iterator_size(cc + i)) { case 1: - private_count = 1; - private_srcw[0] = PRIVATE_DATA(cc); break; case 2: - private_count = 2; - private_srcw[0] = PRIVATE_DATA(cc); - private_srcw[1] = private_srcw[0] + sizeof(sljit_sw); + if (recurse_check_bit(common, private_srcw[0])) + { + private_count = 2; + private_srcw[1] = private_srcw[0] + sizeof(sljit_sw); + SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1])); + } break; default: SLJIT_UNREACHABLE(); break; } + } cc += i; break; @@ -2635,28 +3040,25 @@ while (cc < ccend) case OP_PRUNE_ARG: case OP_THEN_ARG: SLJIT_ASSERT(common->mark_ptr != 0); - if (has_quit && !setmark_found) + if ((recurse_flags & recurse_flag_quit_found) && recurse_check_bit(common, common->mark_ptr)) { kept_shared_srcw[0] = common->mark_ptr; kept_shared_count = 1; - setmark_found = TRUE; } - if (common->control_head_ptr != 0 && !control_head_found) + if (common->control_head_ptr != 0 && recurse_check_bit(common, common->control_head_ptr)) { - shared_srcw[0] = common->control_head_ptr; - shared_count = 1; - control_head_found = TRUE; + private_srcw[0] = common->control_head_ptr; + private_count = 1; } cc += 1 + 2 + cc[1]; break; case OP_THEN: SLJIT_ASSERT(common->control_head_ptr != 0); - if (!control_head_found) + if (recurse_check_bit(common, common->control_head_ptr)) { - shared_srcw[0] = common->control_head_ptr; - shared_count = 1; - control_head_found = TRUE; + private_srcw[0] = common->control_head_ptr; + private_count = 1; } cc++; break; @@ -2664,7 +3066,7 @@ while (cc < ccend) default: cc = next_opcode(common, cc); SLJIT_ASSERT(cc != NULL); - break; + continue; } if (type != recurse_copy_shared_to_global && type != recurse_copy_kept_shared_to_global) @@ -2739,24 +3141,43 @@ PCRE2_SPTR end = bracketend(cc); BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT; /* Assert captures then. */ -if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) +if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) current_offset = NULL; /* Conditional block does not. */ if (*cc == OP_COND || *cc == OP_SCOND) has_alternatives = FALSE; cc = next_opcode(common, cc); + if (has_alternatives) + { + if (*cc == OP_REVERSE) + cc += 1 + IMM2_SIZE; + else if (*cc == OP_VREVERSE) + cc += 1 + 2 * IMM2_SIZE; + current_offset = common->then_offsets + (cc - common->start); + } while (cc < end) { - if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND)) + if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NA) || (*cc >= OP_ONCE && *cc <= OP_SCOND)) cc = set_then_offsets(common, cc, current_offset); else { if (*cc == OP_ALT && has_alternatives) - current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start); + { + cc += 1 + LINK_SIZE; + + if (*cc == OP_REVERSE) + cc += 1 + IMM2_SIZE; + else if (*cc == OP_VREVERSE) + cc += 1 + 2 * IMM2_SIZE; + + current_offset = common->then_offsets + (cc - common->start); + continue; + } + if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL) *current_offset = 1; cc = next_opcode(common, cc); @@ -2780,7 +3201,7 @@ return (value & (value - 1)) == 0; static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label) { -while (list) +while (list != NULL) { /* sljit_set_label is clever enough to do nothing if either the jump or the label is NULL. */ @@ -2829,20 +3250,6 @@ while (list_item) common->stubs = NULL; } -static void add_label_addr(compiler_common *common, sljit_uw *update_addr) -{ -DEFINE_COMPILER; -label_addr_list *label_addr; - -label_addr = sljit_alloc_memory(compiler, sizeof(label_addr_list)); -if (label_addr == NULL) - return; -label_addr->label = LABEL(); -label_addr->update_addr = update_addr; -label_addr->next = common->label_addrs; -common->label_addrs = label_addr; -} - static SLJIT_INLINE void count_match(compiler_common *common) { DEFINE_COMPILER; @@ -2857,7 +3264,7 @@ static SLJIT_INLINE void allocate_stack(compiler_common *common, int size) DEFINE_COMPILER; SLJIT_ASSERT(size > 0); -OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw)); +OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * SSIZE_OF(sw)); #ifdef DESTROY_REGISTERS OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345); OP1(SLJIT_MOV, TMP3, 0, TMP1, 0); @@ -2873,7 +3280,7 @@ static SLJIT_INLINE void free_stack(compiler_common *common, int size) DEFINE_COMPILER; SLJIT_ASSERT(size > 0); -OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw)); +OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * SSIZE_OF(sw)); } static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size) @@ -2913,12 +3320,12 @@ if (length < 8) } else { - if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw)) == SLJIT_SUCCESS) + if (sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw)) == SLJIT_SUCCESS) { GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START); OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1); loop = LABEL(); - sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw)); + sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw)); OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1); JUMPTO(SLJIT_NOT_ZERO, loop); } @@ -2935,16 +3342,54 @@ else } } -static SLJIT_INLINE void reset_fast_fail(compiler_common *common) +static SLJIT_INLINE void reset_early_fail(compiler_common *common) { DEFINE_COMPILER; +sljit_u32 size = (sljit_u32)(common->early_fail_end_ptr - common->early_fail_start_ptr); +sljit_u32 uncleared_size; +sljit_s32 src = SLJIT_IMM; sljit_s32 i; +struct sljit_label *loop; + +SLJIT_ASSERT(common->early_fail_start_ptr < common->early_fail_end_ptr); + +if (size == sizeof(sljit_sw)) + { + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->early_fail_start_ptr, SLJIT_IMM, 0); + return; + } + +if (sljit_get_register_index(SLJIT_GP_REGISTER, TMP3) >= 0 && !sljit_has_cpu_feature(SLJIT_HAS_ZERO_REGISTER)) + { + OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0); + src = TMP3; + } + +if (size <= 6 * sizeof(sljit_sw)) + { + for (i = common->early_fail_start_ptr; i < common->early_fail_end_ptr; i += sizeof(sljit_sw)) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), i, src, 0); + return; + } + +GET_LOCAL_BASE(TMP1, 0, common->early_fail_start_ptr); + +uncleared_size = ((size / sizeof(sljit_sw)) % 3) * sizeof(sljit_sw); -SLJIT_ASSERT(common->fast_fail_start_ptr < common->fast_fail_end_ptr); +OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, size - uncleared_size); -OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); -for (i = common->fast_fail_start_ptr; i < common->fast_fail_end_ptr; i += sizeof(sljit_sw)) - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), i, TMP1, 0); +loop = LABEL(); +OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), 0, src, 0); +OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw)); +OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -2 * SSIZE_OF(sw), src, 0); +OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -1 * SSIZE_OF(sw), src, 0); +CMPTO(SLJIT_LESS, TMP1, 0, TMP2, 0, loop); + +if (uncleared_size >= sizeof(sljit_sw)) + OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), 0, src, 0); + +if (uncleared_size >= 2 * sizeof(sljit_sw)) + OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), sizeof(sljit_sw), src, 0); } static SLJIT_INLINE void do_reset_match(compiler_common *common, int length) @@ -2964,12 +3409,12 @@ if (length < 8) } else { - if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw)) == SLJIT_SUCCESS) + if (sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw)) == SLJIT_SUCCESS) { GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw)); OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2); loop = LABEL(); - sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw)); + sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw)); OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1); JUMPTO(SLJIT_NOT_ZERO, loop); } @@ -2985,12 +3430,18 @@ else } } -OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0); +if (!HAS_VIRTUAL_REGISTERS) + OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, stack)); +else + OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0); + if (common->mark_ptr != 0) OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0); if (common->control_head_ptr != 0) OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0); -OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack)); +if (HAS_VIRTUAL_REGISTERS) + OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack)); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr); OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, end)); } @@ -3029,26 +3480,41 @@ BOOL has_pre; OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0); -OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0); -OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr); -if (common->mark_ptr != 0) - OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr); -OP1(SLJIT_MOV_U32, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, oveccount)); -OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_S0, 0); -if (common->mark_ptr != 0) - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0); -OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, match_data), - SLJIT_IMM, SLJIT_OFFSETOF(pcre2_match_data, ovector) - sizeof(PCRE2_SIZE)); +if (HAS_VIRTUAL_REGISTERS) + { + OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0); + OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr); + if (common->mark_ptr != 0) + OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr); + OP1(SLJIT_MOV_U32, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, oveccount)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_S0, 0); + if (common->mark_ptr != 0) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0); + OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, match_data), + SLJIT_IMM, SLJIT_OFFSETOF(pcre2_match_data, ovector) - sizeof(PCRE2_SIZE)); + } +else + { + OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr); + OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, match_data)); + if (common->mark_ptr != 0) + OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr); + OP1(SLJIT_MOV_U32, SLJIT_R1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, oveccount)); + OP1(SLJIT_MOV, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_S0, 0); + if (common->mark_ptr != 0) + OP1(SLJIT_MOV, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R0, 0); + OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, SLJIT_OFFSETOF(pcre2_match_data, ovector) - sizeof(PCRE2_SIZE)); + } -has_pre = sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw)) == SLJIT_SUCCESS; +has_pre = sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw)) == SLJIT_SUCCESS; GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START - (has_pre ? sizeof(sljit_sw) : 0)); -OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, begin)); +OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? SLJIT_R0 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin)); loop = LABEL(); if (has_pre) - sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw)); + sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw)); else { OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0); @@ -3071,14 +3537,14 @@ JUMPTO(SLJIT_NOT_ZERO, loop); /* Calculate the return value, which is the maximum ovector value. */ if (topbracket > 1) { - if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw))) == SLJIT_SUCCESS) + if (sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * SSIZE_OF(sw))) == SLJIT_SUCCESS) { GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw)); OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1); /* OVECTOR(0) is never equal to SLJIT_S2. */ loop = LABEL(); - sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw))); + sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * SSIZE_OF(sw))); OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1); CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop); OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0); @@ -3091,7 +3557,7 @@ if (topbracket > 1) /* OVECTOR(0) is never equal to SLJIT_S2. */ loop = LABEL(); OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), 0); - OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 2 * (sljit_sw)sizeof(sljit_sw)); + OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 2 * SSIZE_OF(sw)); OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1); CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop); OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0); @@ -3105,20 +3571,22 @@ static SLJIT_INLINE void return_with_partial_match(compiler_common *common, stru { DEFINE_COMPILER; sljit_s32 mov_opcode; +sljit_s32 arguments_reg = !HAS_VIRTUAL_REGISTERS ? ARGUMENTS : SLJIT_R1; SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S0, str_end_must_be_saved_reg0); SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0 && (common->mode == PCRE2_JIT_PARTIAL_SOFT ? common->hit_start != 0 : common->hit_start == 0)); -OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0); +if (arguments_reg != ARGUMENTS) + OP1(SLJIT_MOV, arguments_reg, 0, ARGUMENTS, 0); OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == PCRE2_JIT_PARTIAL_SOFT ? common->hit_start : common->start_ptr); OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE2_ERROR_PARTIAL); /* Store match begin and end. */ -OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, begin)); -OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_R2, 0); -OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, match_data)); +OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, begin)); +OP1(SLJIT_MOV, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, startchar_ptr), SLJIT_R2, 0); +OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(arguments_reg), SLJIT_OFFSETOF(jit_arguments, match_data)); mov_opcode = (sizeof(PCRE2_SIZE) == 4) ? SLJIT_MOV_U32 : SLJIT_MOV; @@ -3167,16 +3635,19 @@ static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, PCRE2_SPTR unsigned int c; #ifdef SUPPORT_UNICODE -if (common->utf) +if (common->utf || common->ucp) { - GETCHAR(c, cc); - if (c > 127) + if (common->utf) { - return c != UCD_OTHERCASE(c); + GETCHAR(c, cc); } -#if PCRE2_CODE_UNIT_WIDTH != 8 + else + c = *cc; + + if (c > 127) + return c != UCD_OTHERCASE(c); + return common->fcc[c] != c; -#endif } else #endif @@ -3188,10 +3659,8 @@ static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigne { /* Returns with the othercase. */ #ifdef SUPPORT_UNICODE -if (common->utf && c > 127) - { +if ((common->utf || common->ucp) && c > 127) return UCD_OTHERCASE(c); - } #endif return TABLE_GET(c, common->fcc, c); } @@ -3205,15 +3674,19 @@ int n; #endif #ifdef SUPPORT_UNICODE -if (common->utf) +if (common->utf || common->ucp) { - GETCHAR(c, cc); + if (common->utf) + { + GETCHAR(c, cc); + } + else + c = *cc; + if (c <= 127) oc = common->fcc[c]; else - { oc = UCD_OTHERCASE(c); - } } else { @@ -3279,7 +3752,7 @@ SLJIT_ASSERT(!force || common->mode != PCRE2_JIT_COMPLETE); if (common->mode == PCRE2_JIT_COMPLETE) return; -if (!force) +if (!force && !common->allow_empty_partial) jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0); else if (common->mode == PCRE2_JIT_PARTIAL_SOFT) jump = CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1); @@ -3341,7 +3814,11 @@ if (common->mode == PCRE2_JIT_COMPLETE) /* Partial matching mode. */ jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0); -add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0)); +if (!common->allow_empty_partial) + add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0)); +else if (common->mode == PCRE2_JIT_PARTIAL_SOFT) + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1)); + if (common->mode == PCRE2_JIT_PARTIAL_SOFT) { OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0); @@ -3357,6 +3834,35 @@ else JUMPHERE(jump); } +static void process_partial_match(compiler_common *common) +{ +DEFINE_COMPILER; +struct sljit_jump *jump; + +/* Partial matching mode. */ +if (common->mode == PCRE2_JIT_PARTIAL_SOFT) + { + jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0); + JUMPHERE(jump); + } +else if (common->mode == PCRE2_JIT_PARTIAL_HARD) + { + if (common->partialmatchlabel != NULL) + CMPTO(SLJIT_LESS, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0, common->partialmatchlabel); + else + add_jump(compiler, &common->partialmatch, CMP(SLJIT_LESS, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0)); + } +} + +static void detect_partial_match_to(compiler_common *common, struct sljit_label *label) +{ +DEFINE_COMPILER; + +CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, label); +process_partial_match(common); +} + static void peek_char(compiler_common *common, sljit_u32 max, sljit_s32 dst, sljit_sw dstw, jump_list **backtracks) { /* Reads the character into TMP1, keeps STR_PTR. @@ -3420,12 +3926,21 @@ if (common->utf) #elif PCRE2_CODE_UNIT_WIDTH == 32 if (common->invalid_utf) { + if (max < 0xd800) return; + if (backtracks != NULL) + { + OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800); add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000)); + add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800)); + } else { - OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x110000); - CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR); + OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800); + OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000); + SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1); + OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800); + SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1); } } #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */ @@ -3490,8 +4005,12 @@ if (common->utf) JUMPHERE(jump); } #elif PCRE2_CODE_UNIT_WIDTH == 32 - if (common->invalid_utf) - add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000)); +if (common->invalid_utf) + { + OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800); + add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000)); + add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800)); + } #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */ #endif /* SUPPORT_UNICODE */ } @@ -3653,15 +4172,15 @@ if (common->utf) /* Skip low surrogate if necessary. */ OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800); - if (sljit_has_cpu_feature(SLJIT_HAS_CMOV) && sljit_get_register_index(RETURN_ADDR) >= 0) + if (sljit_has_cpu_feature(SLJIT_HAS_CMOV) && !HAS_VIRTUAL_REGISTERS) { if (options & READ_CHAR_UPDATE_STR_PTR) OP2(SLJIT_ADD, RETURN_ADDR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); - OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x400); + OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0x400); if (options & READ_CHAR_UPDATE_STR_PTR) - CMOV(SLJIT_LESS, STR_PTR, RETURN_ADDR, 0); + SELECT(SLJIT_LESS, STR_PTR, RETURN_ADDR, 0, STR_PTR); if (max >= 0xd800) - CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, 0x10000); + SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, 0x10000, TMP1); } else { @@ -3677,17 +4196,55 @@ if (common->utf) if (common->invalid_utf) { if (backtracks != NULL) + { + OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800); add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000)); + add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800)); + } else { - OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x110000); - CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR); + OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800); + OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000); + SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1); + OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800); + SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1); } } #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */ #endif /* SUPPORT_UNICODE */ } +static void skip_valid_char(compiler_common *common) +{ +DEFINE_COMPILER; +#if (defined SUPPORT_UNICODE) && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16) +struct sljit_jump *jump; +#endif + +#if (defined SUPPORT_UNICODE) && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16) + if (common->utf) + { + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); +#if PCRE2_CODE_UNIT_WIDTH == 8 + jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0); + OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); +#elif PCRE2_CODE_UNIT_WIDTH == 16 + jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800); + OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0xd800); + OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL); + OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); +#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */ + JUMPHERE(jump); + return; + } +#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == [8|16] */ + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); +} + #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 static BOOL is_char7_bitset(const sljit_u8 *bitset, BOOL nclass) @@ -3729,6 +4286,7 @@ if (negated) if (common->invalid_utf) { + OP1(SLJIT_MOV, TMP1, 0, TMP2, 0); add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL)); add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR)); OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); @@ -3832,11 +4390,11 @@ if (common->utf && negated) { OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800); - if (sljit_has_cpu_feature(SLJIT_HAS_CMOV) && sljit_get_register_index(RETURN_ADDR) >= 0) + if (sljit_has_cpu_feature(SLJIT_HAS_CMOV) && !HAS_VIRTUAL_REGISTERS) { OP2(SLJIT_ADD, RETURN_ADDR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); - OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x400); - CMOV(SLJIT_LESS, STR_PTR, RETURN_ADDR, 0); + OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0x400); + SELECT(SLJIT_LESS, STR_PTR, RETURN_ADDR, 0, STR_PTR); } else { @@ -3865,14 +4423,11 @@ if (common->utf && negated) static void move_back(compiler_common *common, jump_list **backtracks, BOOL must_be_valid) { -/* Goes one character back. TMP2 must contain the start of -the subject buffer. Affects STR_PTR and TMP1. Does not modify -STR_PTR for invalid character sequences. */ +/* Goes one character back. Affects STR_PTR and TMP1. If must_be_valid is TRUE, +TMP2 is not used. Otherwise TMP2 must contain the start of the subject buffer, +and it is destroyed. Does not modify STR_PTR for invalid character sequences. */ DEFINE_COMPILER; -SLJIT_UNUSED_ARG(backtracks); -SLJIT_UNUSED_ARG(must_be_valid); - #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 struct sljit_jump *jump; #endif @@ -3921,7 +4476,7 @@ if (common->utf) /* Skip low surrogate if necessary. */ OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00); - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0xdc00); OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL); OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT); OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0); @@ -3938,7 +4493,7 @@ if (common->invalid_utf && !must_be_valid) return; } - OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x110000); + OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x110000); OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_LESS); OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT); OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0); @@ -3946,6 +4501,10 @@ if (common->invalid_utf && !must_be_valid) } #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */ #endif /* SUPPORT_UNICODE */ + +SLJIT_UNUSED_ARG(backtracks); +SLJIT_UNUSED_ARG(must_be_valid); + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); } @@ -3992,19 +4551,19 @@ of the character (>= 0xc0). Return char value in TMP1. */ DEFINE_COMPILER; struct sljit_jump *jump; -sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); +sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0); OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); /* Searching for the first zero. */ -OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800); +OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x800); jump = JUMP(SLJIT_NOT_ZERO); /* Two byte sequence. */ OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3000); OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); -sljit_emit_fast_return(compiler, RETURN_ADDR, 0); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); JUMPHERE(jump); OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); @@ -4012,12 +4571,12 @@ OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); -OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000); +OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x10000); jump = JUMP(SLJIT_NOT_ZERO); /* Three byte sequence. */ OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0000); OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); -sljit_emit_fast_return(compiler, RETURN_ADDR, 0); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); /* Four byte sequence. */ JUMPHERE(jump); @@ -4027,7 +4586,7 @@ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3)); OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); -sljit_emit_fast_return(compiler, RETURN_ADDR, 0); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); } static void do_utfreadtype8(compiler_common *common) @@ -4038,9 +4597,9 @@ DEFINE_COMPILER; struct sljit_jump *jump; struct sljit_jump *compare; -sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); +sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0); -OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20); +OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, 0x20); jump = JUMP(SLJIT_NOT_ZERO); /* Two byte sequence. */ OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); @@ -4052,18 +4611,18 @@ OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6); OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f); OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0); OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); -sljit_emit_fast_return(compiler, RETURN_ADDR, 0); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); JUMPHERE(compare); OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); -sljit_emit_fast_return(compiler, RETURN_ADDR, 0); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); /* We only have types for characters less than 256. */ JUMPHERE(jump); OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0); OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); -sljit_emit_fast_return(compiler, RETURN_ADDR, 0); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); } static void do_utfreadchar_invalid(compiler_common *common) @@ -4080,7 +4639,7 @@ struct sljit_label *three_byte_entry; struct sljit_label *exit_invalid_label; struct sljit_jump *exit_invalid[11]; -sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); +sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0); OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc2); @@ -4099,11 +4658,11 @@ OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0); OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80); exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40); -OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800); +OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x800); jump = JUMP(SLJIT_NOT_ZERO); OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); -sljit_emit_fast_return(compiler, RETURN_ADDR, 0); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); JUMPHERE(jump); @@ -4114,14 +4673,14 @@ OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80); OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); if (has_cmov) { - OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x40); - CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, 0x20000); + OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40); + SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, 0x20000, TMP1); exit_invalid[2] = NULL; } else exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40); -OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000); +OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x10000); jump = JUMP(SLJIT_NOT_ZERO); three_byte_entry = LABEL(); @@ -4129,8 +4688,8 @@ three_byte_entry = LABEL(); OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2d800); if (has_cmov) { - OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800); - CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0xd800); + OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800); + SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0xd800, TMP1); exit_invalid[3] = NULL; } else @@ -4140,13 +4699,13 @@ OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); if (has_cmov) { - OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800); - CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR); + OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800); + SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1); exit_invalid[4] = NULL; } else exit_invalid[4] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800); -sljit_emit_fast_return(compiler, RETURN_ADDR, 0); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); JUMPHERE(jump); @@ -4157,8 +4716,8 @@ OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80); OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); if (has_cmov) { - OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x40); - CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, 0); + OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40); + SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, 0, TMP1); exit_invalid[5] = NULL; } else @@ -4167,15 +4726,15 @@ else OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc10000); if (has_cmov) { - OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x100000); - CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0x10000); + OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000); + SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0x10000, TMP1); exit_invalid[6] = NULL; } else exit_invalid[6] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000); OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000); -sljit_emit_fast_return(compiler, RETURN_ADDR, 0); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); JUMPHERE(buffer_end_close); OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); @@ -4189,10 +4748,10 @@ OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0); OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80); exit_invalid[8] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40); -OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800); +OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x800); jump = JUMP(SLJIT_NOT_ZERO); -sljit_emit_fast_return(compiler, RETURN_ADDR, 0); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); /* Three-byte sequence. */ JUMPHERE(jump); @@ -4204,8 +4763,8 @@ OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80); OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); if (has_cmov) { - OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x40); - CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR); + OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40); + SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1); exit_invalid[10] = NULL; } else @@ -4222,7 +4781,7 @@ for (i = 0; i < 11; i++) sljit_set_label(exit_invalid[i], exit_invalid_label); OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR); -sljit_emit_fast_return(compiler, RETURN_ADDR, 0); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); } static void do_utfreadnewline_invalid(compiler_common *common) @@ -4236,7 +4795,7 @@ struct sljit_label *skip_start; struct sljit_label *three_byte_exit; struct sljit_jump *jump[5]; -sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); +sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0); if (common->nltype != NLTYPE_ANY) { @@ -4245,7 +4804,14 @@ if (common->nltype != NLTYPE_ANY) /* All newlines are ascii, just skip intermediate octets. */ jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); loop = LABEL(); - OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); + if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, TMP2, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)) == SLJIT_SUCCESS) + sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, TMP2, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); + else + { + OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + } + OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc0); CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0x80, loop); OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); @@ -4253,7 +4819,7 @@ if (common->nltype != NLTYPE_ANY) JUMPHERE(jump[0]); OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR); - sljit_emit_fast_return(compiler, RETURN_ADDR, 0); + OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); return; } @@ -4284,14 +4850,14 @@ JUMPHERE(jump[0]); JUMPHERE(jump[4]); OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR); -sljit_emit_fast_return(compiler, RETURN_ADDR, 0); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); /* Two byte long newline: 0x85. */ JUMPHERE(jump[1]); CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0x85, skip_start); OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x85); -sljit_emit_fast_return(compiler, RETURN_ADDR, 0); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); /* Three byte long newlines: 0x2028 and 0x2029. */ JUMPHERE(jump[2]); @@ -4306,7 +4872,7 @@ CMPTO(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x40, skip_start); OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0x2000); OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); -sljit_emit_fast_return(compiler, RETURN_ADDR, 0); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); } static void do_utfmoveback_invalid(compiler_common *common) @@ -4320,7 +4886,7 @@ struct sljit_label *exit_ok_label; struct sljit_label *exit_invalid_label; struct sljit_jump *exit_invalid[7]; -sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); +sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0); OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3)); exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xc0); @@ -4335,7 +4901,7 @@ jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x20); OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1); OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); -sljit_emit_fast_return(compiler, RETURN_ADDR, 0); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); /* Three-byte sequence. */ JUMPHERE(jump); @@ -4348,7 +4914,7 @@ jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x10); OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1); OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); -sljit_emit_fast_return(compiler, RETURN_ADDR, 0); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); /* Four-byte sequence. */ JUMPHERE(jump); @@ -4361,7 +4927,7 @@ exit_invalid[3] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x05); exit_ok_label = LABEL(); OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1); -sljit_emit_fast_return(compiler, RETURN_ADDR, 0); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); /* Two-byte sequence. */ JUMPHERE(buffer_start_close); @@ -4391,7 +4957,7 @@ sljit_set_label(exit_invalid[5], exit_invalid_label); sljit_set_label(exit_invalid[6], exit_invalid_label); OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3)); -sljit_emit_fast_return(compiler, RETURN_ADDR, 0); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); JUMPHERE(exit_invalid[4]); /* -2 + 4 = 2 */ @@ -4402,16 +4968,16 @@ for (i = 0; i < 4; i++) sljit_set_label(exit_invalid[i], exit_invalid_label); OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(4)); -sljit_emit_fast_return(compiler, RETURN_ADDR, 0); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); } static void do_utfpeakcharback(compiler_common *common) { -/* Peak a character back. */ +/* Peak a character back. Does not modify STR_PTR. */ DEFINE_COMPILER; struct sljit_jump *jump[2]; -sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); +sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0); OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2)); OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0); @@ -4439,12 +5005,12 @@ OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80); OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); -sljit_emit_fast_return(compiler, RETURN_ADDR, 0); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); } static void do_utfpeakcharback_invalid(compiler_common *common) { -/* Peak a character back. */ +/* Peak a character back. Does not modify STR_PTR. */ DEFINE_COMPILER; sljit_s32 i; sljit_s32 has_cmov = sljit_has_cpu_feature(SLJIT_HAS_CMOV); @@ -4454,7 +5020,7 @@ struct sljit_label *three_byte_entry; struct sljit_label *exit_invalid_label; struct sljit_jump *exit_invalid[8]; -sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); +sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0); OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(3)); exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xc0); @@ -4469,7 +5035,7 @@ two_byte_entry = LABEL(); OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6); /* If TMP1 is in 0x80-0xbf range, TMP1 is also increased by (0x2 << 6). */ OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0); -sljit_emit_fast_return(compiler, RETURN_ADDR, 0); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); JUMPHERE(jump[1]); OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xc2 - 0x80); @@ -4490,8 +5056,8 @@ OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800); if (has_cmov) { - OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800); - CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, -0xd800); + OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800); + SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, -0xd800, TMP1); exit_invalid[2] = NULL; } else @@ -4500,14 +5066,14 @@ else OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800); if (has_cmov) { - OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800); - CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR); + OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800); + SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1); exit_invalid[3] = NULL; } else exit_invalid[3] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x800); -sljit_emit_fast_return(compiler, RETURN_ADDR, 0); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); JUMPHERE(jump[1]); OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0 - 0x80); @@ -4525,15 +5091,15 @@ OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0); if (has_cmov) { - OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x100000); - CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0x10000); + OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000); + SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0x10000, TMP1); exit_invalid[5] = NULL; } else exit_invalid[5] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000); OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000); -sljit_emit_fast_return(compiler, RETURN_ADDR, 0); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); JUMPHERE(jump[0]); OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1)); @@ -4556,7 +5122,7 @@ OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xe0); CMPTO(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0x10, three_byte_entry); OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR); -sljit_emit_fast_return(compiler, RETURN_ADDR, 0); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); JUMPHERE(jump[0]); exit_invalid[7] = CMP(SLJIT_GREATER, TMP2, 0, STR_PTR, 0); @@ -4571,7 +5137,7 @@ for (i = 0; i < 8; i++) sljit_set_label(exit_invalid[i], exit_invalid_label); OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR); -sljit_emit_fast_return(compiler, RETURN_ADDR, 0); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); } #endif /* PCRE2_CODE_UNIT_WIDTH == 8 */ @@ -4586,7 +5152,7 @@ undefined for invalid characters. */ DEFINE_COMPILER; struct sljit_jump *exit_invalid[3]; -sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); +sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0); /* TMP2 contains the high surrogate. */ exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00); @@ -4601,13 +5167,13 @@ OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x10000); exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x400); OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0); -sljit_emit_fast_return(compiler, RETURN_ADDR, 0); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); JUMPHERE(exit_invalid[0]); JUMPHERE(exit_invalid[1]); JUMPHERE(exit_invalid[2]); OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR); -sljit_emit_fast_return(compiler, RETURN_ADDR, 0); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); } static void do_utfreadnewline_invalid(compiler_common *common) @@ -4619,7 +5185,7 @@ char value in TMP1. */ DEFINE_COMPILER; struct sljit_jump *exit_invalid[2]; -sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); +sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0); /* TMP2 contains the high surrogate. */ exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); @@ -4628,18 +5194,18 @@ OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00); OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xdc00); -OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x400); +OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0x400); OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS); OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000); OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT); OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); -sljit_emit_fast_return(compiler, RETURN_ADDR, 0); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); JUMPHERE(exit_invalid[0]); JUMPHERE(exit_invalid[1]); OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR); -sljit_emit_fast_return(compiler, RETURN_ADDR, 0); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); } static void do_utfmoveback_invalid(compiler_common *common) @@ -4648,7 +5214,7 @@ static void do_utfmoveback_invalid(compiler_common *common) DEFINE_COMPILER; struct sljit_jump *exit_invalid[3]; -sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); +sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0); exit_invalid[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x400); exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0); @@ -4659,7 +5225,7 @@ exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x400); OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1); -sljit_emit_fast_return(compiler, RETURN_ADDR, 0); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); JUMPHERE(exit_invalid[0]); JUMPHERE(exit_invalid[1]); @@ -4667,17 +5233,17 @@ JUMPHERE(exit_invalid[2]); OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); -sljit_emit_fast_return(compiler, RETURN_ADDR, 0); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); } static void do_utfpeakcharback_invalid(compiler_common *common) { -/* Peak a character back. */ +/* Peak a character back. Does not modify STR_PTR. */ DEFINE_COMPILER; struct sljit_jump *jump; struct sljit_jump *exit_invalid[3]; -sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); +sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0); jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000); OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1)); @@ -4692,14 +5258,14 @@ OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10); OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0); JUMPHERE(jump); -sljit_emit_fast_return(compiler, RETURN_ADDR, 0); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); JUMPHERE(exit_invalid[0]); JUMPHERE(exit_invalid[1]); JUMPHERE(exit_invalid[2]); OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR); -sljit_emit_fast_return(compiler, RETURN_ADDR, 0); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); } #endif /* PCRE2_CODE_UNIT_WIDTH == 16 */ @@ -4726,7 +5292,7 @@ SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0); SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 12); -sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); +sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0); #if PCRE2_CODE_UNIT_WIDTH == 32 if (!common->utf) @@ -4745,7 +5311,7 @@ OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT); OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0); OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2)); OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1); -sljit_emit_fast_return(compiler, RETURN_ADDR, 0); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); } static void do_getucdtype(compiler_common *common) @@ -4766,7 +5332,7 @@ SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0); SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 12); -sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); +sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0); #if PCRE2_CODE_UNIT_WIDTH == 32 if (!common->utf) @@ -4786,19 +5352,13 @@ OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0); OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2)); OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1); -// PH hacking -//fprintf(stderr, "~~A\n"); - OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2); - OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3); - OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0); - OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype)); - +/* TMP2 is multiplied by 12. Same as (TMP2 << 2) + ((TMP2 << 2) << 1). */ OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype)); +OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2); +OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0); +OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 1); - OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 0); - -// OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3); -sljit_emit_fast_return(compiler, RETURN_ADDR, 0); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); } #endif /* SUPPORT_UNICODE */ @@ -4866,15 +5426,27 @@ else if ((overall_options & PCRE2_USE_OFFSET_LIMIT) != 0) /* Check whether offset limit is set and valid. */ SLJIT_ASSERT(common->match_end_ptr != 0); - OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, offset_limit)); + if (HAS_VIRTUAL_REGISTERS) + { + OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, offset_limit)); + } + else + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, offset_limit)); + OP1(SLJIT_MOV, TMP2, 0, STR_END, 0); end = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw) PCRE2_UNSET); - OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); + if (HAS_VIRTUAL_REGISTERS) + OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); + else + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin)); + #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT); #endif /* PCRE2_CODE_UNIT_WIDTH == [16|32] */ - OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin)); + if (HAS_VIRTUAL_REGISTERS) + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin)); + OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0); end2 = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0); OP1(SLJIT_MOV, TMP2, 0, STR_END, 0); @@ -4893,7 +5465,7 @@ if (newlinecheck) OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, common->newline & 0xff); OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL); #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT); @@ -4958,12 +5530,12 @@ else if (common->utf) if (sljit_has_cpu_feature(SLJIT_HAS_CMOV)) { OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); - OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400); - CMOV(SLJIT_LESS, STR_PTR, TMP2, 0); + OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x400); + SELECT(SLJIT_LESS, STR_PTR, TMP2, 0, STR_PTR); } else { - OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400); + OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x400); OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_LESS); OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT); OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); @@ -5066,6 +5638,8 @@ while (TRUE) case OP_CIRCM: case OP_DOLL: case OP_DOLLM: + case OP_NOT_UCP_WORD_BOUNDARY: + case OP_UCP_WORD_BOUNDARY: /* Zero width assertions. */ cc++; continue; @@ -5074,6 +5648,8 @@ while (TRUE) case OP_ASSERT_NOT: case OP_ASSERTBACK: case OP_ASSERTBACK_NOT: + case OP_ASSERT_NA: + case OP_ASSERTBACK_NA: cc = bracketend(cc); continue; @@ -5373,7 +5949,12 @@ while (TRUE) #endif { chr = *cc; - othercase[0] = TABLE_GET(chr, common->fcc, chr); +#ifdef SUPPORT_UNICODE + if (common->ucp && chr > 127) + othercase[0] = UCD_OTHERCASE(chr); + else +#endif + othercase[0] = TABLE_GET(chr, common->fcc, chr); } } else @@ -5434,699 +6015,57 @@ CMPTO(SLJIT_EQUAL, reg, 0, SLJIT_IMM, 0xdc00, label); } #endif -#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) && !(defined SUPPORT_VALGRIND) - -#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 -static struct sljit_jump *jump_if_utf_char_start(struct sljit_compiler *compiler, sljit_s32 reg) -{ -#if PCRE2_CODE_UNIT_WIDTH == 8 -OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xc0); -return CMP(SLJIT_NOT_EQUAL, reg, 0, SLJIT_IMM, 0x80); -#elif PCRE2_CODE_UNIT_WIDTH == 16 -OP2(SLJIT_AND, reg, 0, reg, 0, SLJIT_IMM, 0xfc00); -return CMP(SLJIT_NOT_EQUAL, reg, 0, SLJIT_IMM, 0xdc00); -#else -#error "Unknown code width" -#endif -} -#endif - -static sljit_s32 character_to_int32(PCRE2_UCHAR chr) -{ -sljit_u32 value = chr; -#if PCRE2_CODE_UNIT_WIDTH == 8 -#define SSE2_COMPARE_TYPE_INDEX 0 -return (sljit_s32)((value << 24) | (value << 16) | (value << 8) | value); -#elif PCRE2_CODE_UNIT_WIDTH == 16 -#define SSE2_COMPARE_TYPE_INDEX 1 -return (sljit_s32)((value << 16) | value); -#elif PCRE2_CODE_UNIT_WIDTH == 32 -#define SSE2_COMPARE_TYPE_INDEX 2 -return (sljit_s32)(value); -#else -#error "Unsupported unit width" -#endif -} - -static void load_from_mem_sse2(struct sljit_compiler *compiler, sljit_s32 dst_xmm_reg, sljit_s32 src_general_reg) -{ -#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) -sljit_u8 instruction[5]; -#else -sljit_u8 instruction[4]; -#endif - -SLJIT_ASSERT(dst_xmm_reg < 8); - -/* MOVDQA xmm1, xmm2/m128 */ -#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) -if (src_general_reg < 8) - { - instruction[0] = 0x66; - instruction[1] = 0x0f; - instruction[2] = 0x6f; - instruction[3] = (dst_xmm_reg << 3) | src_general_reg; - sljit_emit_op_custom(compiler, instruction, 4); - } -else - { - instruction[0] = 0x66; - instruction[1] = 0x41; - instruction[2] = 0x0f; - instruction[3] = 0x6f; - instruction[4] = (dst_xmm_reg << 3) | (src_general_reg & 0x7); - sljit_emit_op_custom(compiler, instruction, 4); - } -#else -instruction[0] = 0x66; -instruction[1] = 0x0f; -instruction[2] = 0x6f; -instruction[3] = (dst_xmm_reg << 3) | src_general_reg; -sljit_emit_op_custom(compiler, instruction, 4); -#endif -} - -static void fast_forward_char_pair_sse2_compare(struct sljit_compiler *compiler, PCRE2_UCHAR char1, PCRE2_UCHAR char2, - sljit_u32 bit, sljit_s32 dst_ind, sljit_s32 cmp1_ind, sljit_s32 cmp2_ind, sljit_s32 tmp_ind) -{ -sljit_u8 instruction[4]; -instruction[0] = 0x66; -instruction[1] = 0x0f; - -if (char1 == char2 || bit != 0) - { - if (bit != 0) - { - /* POR xmm1, xmm2/m128 */ - /* instruction[0] = 0x66; */ - /* instruction[1] = 0x0f; */ - instruction[2] = 0xeb; - instruction[3] = 0xc0 | (dst_ind << 3) | cmp2_ind; - sljit_emit_op_custom(compiler, instruction, 4); - } - - /* PCMPEQB/W/D xmm1, xmm2/m128 */ - /* instruction[0] = 0x66; */ - /* instruction[1] = 0x0f; */ - instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX; - instruction[3] = 0xc0 | (dst_ind << 3) | cmp1_ind; - sljit_emit_op_custom(compiler, instruction, 4); - } -else - { - /* MOVDQA xmm1, xmm2/m128 */ - /* instruction[0] = 0x66; */ - /* instruction[1] = 0x0f; */ - instruction[2] = 0x6f; - instruction[3] = 0xc0 | (tmp_ind << 3) | dst_ind; - sljit_emit_op_custom(compiler, instruction, 4); - - /* PCMPEQB/W/D xmm1, xmm2/m128 */ - /* instruction[0] = 0x66; */ - /* instruction[1] = 0x0f; */ - instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX; - instruction[3] = 0xc0 | (dst_ind << 3) | cmp1_ind; - sljit_emit_op_custom(compiler, instruction, 4); - - instruction[3] = 0xc0 | (tmp_ind << 3) | cmp2_ind; - sljit_emit_op_custom(compiler, instruction, 4); +#include "pcre2_jit_simd_inc.h" - /* POR xmm1, xmm2/m128 */ - /* instruction[0] = 0x66; */ - /* instruction[1] = 0x0f; */ - instruction[2] = 0xeb; - instruction[3] = 0xc0 | (dst_ind << 3) | tmp_ind; - sljit_emit_op_custom(compiler, instruction, 4); - } -} +#ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD -static void fast_forward_first_char2_sse2(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset) +static BOOL check_fast_forward_char_pair_simd(compiler_common *common, fast_forward_char_data *chars, int max) { -DEFINE_COMPILER; -struct sljit_label *start; -#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 -struct sljit_label *restart; -#endif -struct sljit_jump *quit; -struct sljit_jump *partial_quit[2]; -sljit_u8 instruction[8]; -sljit_s32 tmp1_ind = sljit_get_register_index(TMP1); -sljit_s32 str_ptr_ind = sljit_get_register_index(STR_PTR); -sljit_s32 data_ind = 0; -sljit_s32 tmp_ind = 1; -sljit_s32 cmp1_ind = 2; -sljit_s32 cmp2_ind = 3; -sljit_u32 bit = 0; - -SLJIT_UNUSED_ARG(offset); - -if (char1 != char2) - { - bit = char1 ^ char2; - if (!is_powerof2(bit)) - bit = 0; - } - -partial_quit[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); -if (common->mode == PCRE2_JIT_COMPLETE) - add_jump(compiler, &common->failed_match, partial_quit[0]); - -/* First part (unaligned start) */ - -OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1 | bit)); - -SLJIT_ASSERT(tmp1_ind < 8); - -/* MOVD xmm, r/m32 */ -instruction[0] = 0x66; -instruction[1] = 0x0f; -instruction[2] = 0x6e; -instruction[3] = 0xc0 | (cmp1_ind << 3) | tmp1_ind; -sljit_emit_op_custom(compiler, instruction, 4); - -if (char1 != char2) - { - OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(bit != 0 ? bit : char2)); - - /* MOVD xmm, r/m32 */ - instruction[3] = 0xc0 | (cmp2_ind << 3) | tmp1_ind; - sljit_emit_op_custom(compiler, instruction, 4); - } - -OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0); - -/* PSHUFD xmm1, xmm2/m128, imm8 */ -/* instruction[0] = 0x66; */ -/* instruction[1] = 0x0f; */ -instruction[2] = 0x70; -instruction[3] = 0xc0 | (cmp1_ind << 3) | 2; -instruction[4] = 0; -sljit_emit_op_custom(compiler, instruction, 5); - -if (char1 != char2) - { - /* PSHUFD xmm1, xmm2/m128, imm8 */ - instruction[3] = 0xc0 | (cmp2_ind << 3) | 3; - sljit_emit_op_custom(compiler, instruction, 5); - } - -#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 -restart = LABEL(); -#endif -OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~0xf); -OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf); - -load_from_mem_sse2(compiler, data_ind, str_ptr_ind); -fast_forward_char_pair_sse2_compare(compiler, char1, char2, bit, data_ind, cmp1_ind, cmp2_ind, tmp_ind); - -/* PMOVMSKB reg, xmm */ -/* instruction[0] = 0x66; */ -/* instruction[1] = 0x0f; */ -instruction[2] = 0xd7; -instruction[3] = 0xc0 | (tmp1_ind << 3) | 0; -sljit_emit_op_custom(compiler, instruction, 4); - -OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); -OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0); - -/* BSF r32, r/m32 */ -instruction[0] = 0x0f; -instruction[1] = 0xbc; -instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind; -sljit_emit_op_custom(compiler, instruction, 3); -sljit_set_current_flags(compiler, SLJIT_SET_Z); - -quit = JUMP(SLJIT_NOT_ZERO); - -OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); - -start = LABEL(); -OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16); - -partial_quit[1] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); -if (common->mode == PCRE2_JIT_COMPLETE) - add_jump(compiler, &common->failed_match, partial_quit[1]); - -/* Second part (aligned) */ - -load_from_mem_sse2(compiler, 0, str_ptr_ind); -fast_forward_char_pair_sse2_compare(compiler, char1, char2, bit, data_ind, cmp1_ind, cmp2_ind, tmp_ind); - -/* PMOVMSKB reg, xmm */ -instruction[0] = 0x66; -instruction[1] = 0x0f; -instruction[2] = 0xd7; -instruction[3] = 0xc0 | (tmp1_ind << 3) | 0; -sljit_emit_op_custom(compiler, instruction, 4); - -/* BSF r32, r/m32 */ -instruction[0] = 0x0f; -instruction[1] = 0xbc; -instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind; -sljit_emit_op_custom(compiler, instruction, 3); -sljit_set_current_flags(compiler, SLJIT_SET_Z); - -JUMPTO(SLJIT_ZERO, start); - -JUMPHERE(quit); -OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); - -if (common->mode != PCRE2_JIT_COMPLETE) - { - JUMPHERE(partial_quit[0]); - JUMPHERE(partial_quit[1]); - OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0); - CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0); - } -else - add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); - -#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 -if (common->utf && offset > 0) - { - SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE); - - OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset)); - - quit = jump_if_utf_char_start(compiler, TMP1); - - OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); - add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); - OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0); - JUMPTO(SLJIT_JUMP, restart); - - JUMPHERE(quit); - } -#endif -} - -#ifndef _WIN64 - -static SLJIT_INLINE sljit_u32 max_fast_forward_char_pair_sse2_offset(void) -{ -#if PCRE2_CODE_UNIT_WIDTH == 8 -return 15; -#elif PCRE2_CODE_UNIT_WIDTH == 16 -return 7; -#elif PCRE2_CODE_UNIT_WIDTH == 32 -return 3; -#else -#error "Unsupported unit width" -#endif -} - -static void fast_forward_char_pair_sse2(compiler_common *common, sljit_s32 offs1, - PCRE2_UCHAR char1a, PCRE2_UCHAR char1b, sljit_s32 offs2, PCRE2_UCHAR char2a, PCRE2_UCHAR char2b) -{ -DEFINE_COMPILER; -sljit_u32 bit1 = 0; -sljit_u32 bit2 = 0; -sljit_u32 diff = IN_UCHARS(offs1 - offs2); -sljit_s32 tmp1_ind = sljit_get_register_index(TMP1); -sljit_s32 tmp2_ind = sljit_get_register_index(TMP2); -sljit_s32 str_ptr_ind = sljit_get_register_index(STR_PTR); -sljit_s32 data1_ind = 0; -sljit_s32 data2_ind = 1; -sljit_s32 tmp_ind = 2; -sljit_s32 cmp1a_ind = 3; -sljit_s32 cmp1b_ind = 4; -sljit_s32 cmp2a_ind = 5; -sljit_s32 cmp2b_ind = 6; -struct sljit_label *start; -#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 -struct sljit_label *restart; -#endif -struct sljit_jump *jump[2]; - -sljit_u8 instruction[8]; - -SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE && offs1 > offs2); -SLJIT_ASSERT(diff <= IN_UCHARS(max_fast_forward_char_pair_sse2_offset())); -SLJIT_ASSERT(tmp1_ind < 8 && tmp2_ind == 1); - -/* Initialize. */ -if (common->match_end_ptr != 0) - { - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr); - OP1(SLJIT_MOV, TMP3, 0, STR_END, 0); - OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offs1 + 1)); - - OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, STR_END, 0); - CMOV(SLJIT_LESS, STR_END, TMP1, 0); - } - -OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1)); -add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); - -/* MOVD xmm, r/m32 */ -instruction[0] = 0x66; -instruction[1] = 0x0f; -instruction[2] = 0x6e; - -if (char1a == char1b) - OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1a)); -else - { - bit1 = char1a ^ char1b; - if (is_powerof2(bit1)) - { - OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1a | bit1)); - OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(bit1)); - } - else - { - bit1 = 0; - OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1a)); - OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(char1b)); - } - } - -instruction[3] = 0xc0 | (cmp1a_ind << 3) | tmp1_ind; -sljit_emit_op_custom(compiler, instruction, 4); - -if (char1a != char1b) - { - instruction[3] = 0xc0 | (cmp1b_ind << 3) | tmp2_ind; - sljit_emit_op_custom(compiler, instruction, 4); - } - -if (char2a == char2b) - OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a)); -else - { - bit2 = char2a ^ char2b; - if (is_powerof2(bit2)) - { - OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a | bit2)); - OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(bit2)); - } - else - { - bit2 = 0; - OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a)); - OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(char2b)); - } - } - -instruction[3] = 0xc0 | (cmp2a_ind << 3) | tmp1_ind; -sljit_emit_op_custom(compiler, instruction, 4); - -if (char2a != char2b) - { - instruction[3] = 0xc0 | (cmp2b_ind << 3) | tmp2_ind; - sljit_emit_op_custom(compiler, instruction, 4); - } - -/* PSHUFD xmm1, xmm2/m128, imm8 */ -/* instruction[0] = 0x66; */ -/* instruction[1] = 0x0f; */ -instruction[2] = 0x70; -instruction[4] = 0; - -instruction[3] = 0xc0 | (cmp1a_ind << 3) | cmp1a_ind; -sljit_emit_op_custom(compiler, instruction, 5); - -if (char1a != char1b) - { - instruction[3] = 0xc0 | (cmp1b_ind << 3) | cmp1b_ind; - sljit_emit_op_custom(compiler, instruction, 5); - } - -instruction[3] = 0xc0 | (cmp2a_ind << 3) | cmp2a_ind; -sljit_emit_op_custom(compiler, instruction, 5); - -if (char2a != char2b) - { - instruction[3] = 0xc0 | (cmp2b_ind << 3) | cmp2b_ind; - sljit_emit_op_custom(compiler, instruction, 5); - } - -#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 -restart = LABEL(); -#endif - -OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1 - offs2)); -OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0); -OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~0xf); -OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, ~0xf); - -load_from_mem_sse2(compiler, data1_ind, str_ptr_ind); - -jump[0] = CMP(SLJIT_EQUAL, STR_PTR, 0, TMP1, 0); - -load_from_mem_sse2(compiler, data2_ind, tmp1_ind); - -/* MOVDQA xmm1, xmm2/m128 */ -/* instruction[0] = 0x66; */ -/* instruction[1] = 0x0f; */ -instruction[2] = 0x6f; -instruction[3] = 0xc0 | (tmp_ind << 3) | data1_ind; -sljit_emit_op_custom(compiler, instruction, 4); - -/* PSLLDQ xmm1, xmm2/m128, imm8 */ -/* instruction[0] = 0x66; */ -/* instruction[1] = 0x0f; */ -instruction[2] = 0x73; -instruction[3] = 0xc0 | (7 << 3) | tmp_ind; -instruction[4] = diff; -sljit_emit_op_custom(compiler, instruction, 5); - -/* PSRLDQ xmm1, xmm2/m128, imm8 */ -/* instruction[0] = 0x66; */ -/* instruction[1] = 0x0f; */ -/* instruction[2] = 0x73; */ -instruction[3] = 0xc0 | (3 << 3) | data2_ind; -instruction[4] = 16 - diff; -sljit_emit_op_custom(compiler, instruction, 5); - -/* POR xmm1, xmm2/m128 */ -/* instruction[0] = 0x66; */ -/* instruction[1] = 0x0f; */ -instruction[2] = 0xeb; -instruction[3] = 0xc0 | (data2_ind << 3) | tmp_ind; -sljit_emit_op_custom(compiler, instruction, 4); - -jump[1] = JUMP(SLJIT_JUMP); - -JUMPHERE(jump[0]); - -/* MOVDQA xmm1, xmm2/m128 */ -/* instruction[0] = 0x66; */ -/* instruction[1] = 0x0f; */ -instruction[2] = 0x6f; -instruction[3] = 0xc0 | (data2_ind << 3) | data1_ind; -sljit_emit_op_custom(compiler, instruction, 4); - -/* PSLLDQ xmm1, xmm2/m128, imm8 */ -/* instruction[0] = 0x66; */ -/* instruction[1] = 0x0f; */ -instruction[2] = 0x73; -instruction[3] = 0xc0 | (7 << 3) | data2_ind; -instruction[4] = diff; -sljit_emit_op_custom(compiler, instruction, 5); - -JUMPHERE(jump[1]); - -OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xf); - -fast_forward_char_pair_sse2_compare(compiler, char2a, char2b, bit2, data2_ind, cmp2a_ind, cmp2b_ind, tmp_ind); -fast_forward_char_pair_sse2_compare(compiler, char1a, char1b, bit1, data1_ind, cmp1a_ind, cmp1b_ind, tmp_ind); - -/* PAND xmm1, xmm2/m128 */ -/* instruction[0] = 0x66; */ -/* instruction[1] = 0x0f; */ -instruction[2] = 0xdb; -instruction[3] = 0xc0 | (data1_ind << 3) | data2_ind; -sljit_emit_op_custom(compiler, instruction, 4); - -/* PMOVMSKB reg, xmm */ -/* instruction[0] = 0x66; */ -/* instruction[1] = 0x0f; */ -instruction[2] = 0xd7; -instruction[3] = 0xc0 | (tmp1_ind << 3) | 0; -sljit_emit_op_custom(compiler, instruction, 4); - -/* Ignore matches before the first STR_PTR. */ -OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); -OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, TMP2, 0); - -/* BSF r32, r/m32 */ -instruction[0] = 0x0f; -instruction[1] = 0xbc; -instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind; -sljit_emit_op_custom(compiler, instruction, 3); -sljit_set_current_flags(compiler, SLJIT_SET_Z); - -jump[0] = JUMP(SLJIT_NOT_ZERO); - -OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); - -/* Main loop. */ -instruction[0] = 0x66; -instruction[1] = 0x0f; - -start = LABEL(); - -load_from_mem_sse2(compiler, data2_ind, str_ptr_ind); - -OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16); -add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); - -load_from_mem_sse2(compiler, data1_ind, str_ptr_ind); - -/* PSRLDQ xmm1, xmm2/m128, imm8 */ -/* instruction[0] = 0x66; */ -/* instruction[1] = 0x0f; */ -instruction[2] = 0x73; -instruction[3] = 0xc0 | (3 << 3) | data2_ind; -instruction[4] = 16 - diff; -sljit_emit_op_custom(compiler, instruction, 5); - -/* MOVDQA xmm1, xmm2/m128 */ -/* instruction[0] = 0x66; */ -/* instruction[1] = 0x0f; */ -instruction[2] = 0x6f; -instruction[3] = 0xc0 | (tmp_ind << 3) | data1_ind; -sljit_emit_op_custom(compiler, instruction, 4); - -/* PSLLDQ xmm1, xmm2/m128, imm8 */ -/* instruction[0] = 0x66; */ -/* instruction[1] = 0x0f; */ -instruction[2] = 0x73; -instruction[3] = 0xc0 | (7 << 3) | tmp_ind; -instruction[4] = diff; -sljit_emit_op_custom(compiler, instruction, 5); - -/* POR xmm1, xmm2/m128 */ -/* instruction[0] = 0x66; */ -/* instruction[1] = 0x0f; */ -instruction[2] = 0xeb; -instruction[3] = 0xc0 | (data2_ind << 3) | tmp_ind; -sljit_emit_op_custom(compiler, instruction, 4); - -fast_forward_char_pair_sse2_compare(compiler, char1a, char1b, bit1, data1_ind, cmp1a_ind, cmp1b_ind, tmp_ind); -fast_forward_char_pair_sse2_compare(compiler, char2a, char2b, bit2, data2_ind, cmp2a_ind, cmp2b_ind, tmp_ind); - -/* PAND xmm1, xmm2/m128 */ -/* instruction[0] = 0x66; */ -/* instruction[1] = 0x0f; */ -instruction[2] = 0xdb; -instruction[3] = 0xc0 | (data1_ind << 3) | data2_ind; -sljit_emit_op_custom(compiler, instruction, 4); - -/* PMOVMSKB reg, xmm */ -/* instruction[0] = 0x66; */ -/* instruction[1] = 0x0f; */ -instruction[2] = 0xd7; -instruction[3] = 0xc0 | (tmp1_ind << 3) | 0; -sljit_emit_op_custom(compiler, instruction, 4); - -/* BSF r32, r/m32 */ -instruction[0] = 0x0f; -instruction[1] = 0xbc; -instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind; -sljit_emit_op_custom(compiler, instruction, 3); -sljit_set_current_flags(compiler, SLJIT_SET_Z); - -JUMPTO(SLJIT_ZERO, start); - -JUMPHERE(jump[0]); - -OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); - -add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); - -if (common->match_end_ptr != 0) - OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr); - -#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 -if (common->utf) - { - OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offs1)); - - jump[0] = jump_if_utf_char_start(compiler, TMP1); - - OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); - CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, restart); - - add_jump(compiler, &common->failed_match, JUMP(SLJIT_JUMP)); - - JUMPHERE(jump[0]); - } -#endif - -OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1)); - -if (common->match_end_ptr != 0) - OP1(SLJIT_MOV, STR_END, 0, TMP3, 0); -} - -static BOOL check_fast_forward_char_pair_sse2(compiler_common *common, fast_forward_char_data *chars, int max) -{ -sljit_s32 i, j, priority, count; -sljit_u32 priorities; -PCRE2_UCHAR a1, a2, b1, b2; - -priorities = 0; - -count = 0; -for (i = 0; i < max; i++) - { - if (chars[i].last_count > 2) - { - SLJIT_ASSERT(chars[i].last_count <= 7); - - priorities |= (1 << chars[i].last_count); - count++; - } - } - -if (count < 2) - return FALSE; - -for (priority = 7; priority > 2; priority--) - { - if ((priorities & (1 << priority)) == 0) - continue; + sljit_s32 i, j, max_i = 0, max_j = 0; + sljit_u32 max_pri = 0; + sljit_s32 max_offset = max_fast_forward_char_pair_offset(); + PCRE2_UCHAR a1, a2, a_pri, b1, b2, b_pri; for (i = max - 1; i >= 1; i--) - if (chars[i].last_count >= priority) + { + if (chars[i].last_count > 2) { - SLJIT_ASSERT(chars[i].count <= 2 && chars[i].count >= 1); - a1 = chars[i].chars[0]; a2 = chars[i].chars[1]; + a_pri = chars[i].last_count; - j = i - max_fast_forward_char_pair_sse2_offset(); + j = i - max_offset; if (j < 0) j = 0; while (j < i) { - if (chars[j].last_count >= priority) + b_pri = chars[j].last_count; + if (b_pri > 2 && (sljit_u32)a_pri + (sljit_u32)b_pri >= max_pri) { b1 = chars[j].chars[0]; b2 = chars[j].chars[1]; if (a1 != b1 && a1 != b2 && a2 != b1 && a2 != b2) { - fast_forward_char_pair_sse2(common, i, a1, a2, j, b1, b2); - return TRUE; + max_pri = a_pri + b_pri; + max_i = i; + max_j = j; } } j++; } } - } + } -return FALSE; -} +if (max_pri == 0) + return FALSE; -#endif +fast_forward_char_pair_simd(common, max_i, chars[max_i].chars[0], chars[max_i].chars[1], max_j, chars[max_j].chars[0], chars[max_j].chars[1]); +return TRUE; +} -#undef SSE2_COMPARE_TYPE_INDEX - -#endif +#endif /* JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD */ static void fast_forward_first_char2(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset) { @@ -6150,17 +6089,15 @@ if (has_match_end) OP1(SLJIT_MOV, TMP3, 0, STR_END, 0); OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offset + 1)); - OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_END, 0, TMP1, 0); - CMOV(SLJIT_GREATER, STR_END, TMP1, 0); + OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_END, 0, TMP1, 0); + SELECT(SLJIT_GREATER, STR_END, TMP1, 0, STR_END); } -#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) && !(defined SUPPORT_VALGRIND) - -/* SSE2 accelerated first character search. */ +#ifdef JIT_HAS_FAST_FORWARD_CHAR_SIMD -if (sljit_has_cpu_feature(SLJIT_HAS_SSE2)) +if (JIT_HAS_FAST_FORWARD_CHAR_SIMD) { - fast_forward_first_char2_sse2(common, char1, char2, offset); + fast_forward_char_simd(common, char1, char2, offset); if (offset > 0) OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset)); @@ -6267,8 +6204,8 @@ for (i = 0; i < max; i++) chars[i].last_count = (chars[i].count == 255) ? 0 : 1; } -#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) && !(defined SUPPORT_VALGRIND) && !(defined _WIN64) -if (sljit_has_cpu_feature(SLJIT_HAS_SSE2) && check_fast_forward_char_pair_sse2(common, chars, max)) +#ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD +if (JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD && check_fast_forward_char_pair_simd(common, chars, max)) return TRUE; #endif @@ -6353,18 +6290,21 @@ if (common->match_end_ptr != 0) { OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr); OP1(SLJIT_MOV, TMP3, 0, STR_END, 0); - OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max)); - OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_END, 0, TMP1, 0); - CMOV(SLJIT_GREATER, STR_END, TMP1, 0); + OP2(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max)); + add_jump(compiler, &common->failed_match, JUMP(SLJIT_LESS)); + OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_END, 0, TMP1, 0); + SELECT(SLJIT_GREATER, STR_END, TMP1, 0, STR_END); } else - OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max)); + { + OP2(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max)); + add_jump(compiler, &common->failed_match, JUMP(SLJIT_LESS)); + } SLJIT_ASSERT(range_right >= 0); -#if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) -OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table); -#endif +if (!HAS_VIRTUAL_REGISTERS) + OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table); start = LABEL(); add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0)); @@ -6375,11 +6315,11 @@ OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right)); OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1); #endif -#if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) -OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0); -#else -OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table); -#endif +if (!HAS_VIRTUAL_REGISTERS) + OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0); +else + OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start); @@ -6444,8 +6384,8 @@ oc = first_char; if ((common->re->flags & PCRE2_FIRSTCASELESS) != 0) { oc = TABLE_GET(first_char, common->fcc, first_char); -#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8 - if (first_char > 127 && common->utf) +#if defined SUPPORT_UNICODE + if (first_char > 127 && (common->utf || common->ucp)) oc = UCD_OTHERCASE(first_char); #endif } @@ -6457,9 +6397,9 @@ static SLJIT_INLINE void fast_forward_newline(compiler_common *common) { DEFINE_COMPILER; struct sljit_label *loop; -struct sljit_jump *lastchar; +struct sljit_jump *lastchar = NULL; struct sljit_jump *firstchar; -struct sljit_jump *quit; +struct sljit_jump *quit = NULL; struct sljit_jump *foundcr = NULL; struct sljit_jump *notfoundnl; jump_list *newline = NULL; @@ -6472,60 +6412,143 @@ if (common->match_end_ptr != 0) if (common->nltype == NLTYPE_FIXED && common->newline > 255) { - lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); - OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); - OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str)); - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); - firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0); +#ifdef JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD + if (JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD && common->mode == PCRE2_JIT_COMPLETE) + { + if (HAS_VIRTUAL_REGISTERS) + { + OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str)); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); + } + else + { + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str)); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin)); + } + firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0); - OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2)); - OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0); - OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER_EQUAL); + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + OP2U(SLJIT_SUB | SLJIT_SET_Z, STR_PTR, 0, TMP1, 0); + OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_NOT_EQUAL); #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32 - OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT); + OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT); #endif - OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0); - loop = LABEL(); - OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); - quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); - OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2)); - OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1)); - CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop); - CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop); + fast_forward_char_pair_simd(common, 1, common->newline & 0xff, common->newline & 0xff, 0, (common->newline >> 8) & 0xff, (common->newline >> 8) & 0xff); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); + } + else +#endif /* JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD */ + { + lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); + if (HAS_VIRTUAL_REGISTERS) + { + OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str)); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); + } + else + { + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str)); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin)); + } + firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0); + + OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2)); + OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, STR_PTR, 0, TMP1, 0); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER_EQUAL); +#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32 + OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT); +#endif + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); + + loop = LABEL(); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2)); + OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1)); + CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop); + CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop); + + JUMPHERE(quit); + JUMPHERE(lastchar); + } - JUMPHERE(quit); JUMPHERE(firstchar); - JUMPHERE(lastchar); if (common->match_end_ptr != 0) OP1(SLJIT_MOV, STR_END, 0, TMP3, 0); return; } -OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); +if (HAS_VIRTUAL_REGISTERS) + { + OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str)); + } +else + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str)); + /* Example: match /^/ to \r\n from offset 1. */ -OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str)); firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0); -move_back(common, NULL, FALSE); + +if (common->nltype == NLTYPE_ANY) + move_back(common, NULL, FALSE); +else + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); loop = LABEL(); common->ff_newline_shortcut = loop; -read_char(common, common->nlmin, common->nlmax, NULL, READ_CHAR_NEWLINE); -lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); -if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF) - foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR); -check_newlinechar(common, common->nltype, &newline, FALSE); -set_jumps(newline, loop); +#ifdef JIT_HAS_FAST_FORWARD_CHAR_SIMD +if (JIT_HAS_FAST_FORWARD_CHAR_SIMD && (common->nltype == NLTYPE_FIXED || common->nltype == NLTYPE_ANYCRLF)) + { + if (common->nltype == NLTYPE_ANYCRLF) + { + fast_forward_char_simd(common, CHAR_CR, CHAR_LF, 0); + if (common->mode != PCRE2_JIT_COMPLETE) + lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); + + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + quit = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR); + } + else + { + fast_forward_char_simd(common, common->newline, common->newline, 0); + + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + if (common->mode != PCRE2_JIT_COMPLETE) + { + OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0); + SELECT(SLJIT_GREATER, STR_PTR, STR_END, 0, STR_PTR); + } + } + } +else +#endif /* JIT_HAS_FAST_FORWARD_CHAR_SIMD */ + { + read_char(common, common->nlmin, common->nlmax, NULL, READ_CHAR_NEWLINE); + lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); + if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF) + foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR); + check_newlinechar(common, common->nltype, &newline, FALSE); + set_jumps(newline, loop); + } if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF) { - quit = JUMP(SLJIT_JUMP); - JUMPHERE(foundcr); + if (quit == NULL) + { + quit = JUMP(SLJIT_JUMP); + JUMPHERE(foundcr); + } + notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, CHAR_NL); OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL); #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT); @@ -6534,7 +6557,9 @@ if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF) JUMPHERE(notfoundnl); JUMPHERE(quit); } -JUMPHERE(lastchar); + +if (lastchar) + JUMPHERE(lastchar); JUMPHERE(firstchar); if (common->match_end_ptr != 0) @@ -6559,8 +6584,8 @@ if (common->match_end_ptr != 0) OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr); OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0); OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1)); - OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_END, 0, TMP1, 0); - CMOV(SLJIT_GREATER, STR_END, TMP1, 0); + OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_END, 0, TMP1, 0); + SELECT(SLJIT_GREATER, STR_END, TMP1, 0, STR_END); } start = LABEL(); @@ -6586,15 +6611,15 @@ if (!optimize_class(common, start_bits, (start_bits[31] & 0x80) != 0, FALSE, &ma OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7); OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3); OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits); - if (sljit_get_register_index(TMP3) >= 0) + if (!HAS_VIRTUAL_REGISTERS) { OP2(SLJIT_SHL, TMP3, 0, SLJIT_IMM, 1, TMP2, 0); - OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP3, 0); + OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP3, 0); } else { OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); - OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0); + OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0); } JUMPTO(SLJIT_ZERO, start); } @@ -6615,67 +6640,80 @@ if (common->match_end_ptr != 0) OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0); } -static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, PCRE2_UCHAR req_char, BOOL caseless, BOOL has_firstchar) +static SLJIT_INLINE jump_list *search_requested_char(compiler_common *common, PCRE2_UCHAR req_char, BOOL caseless, BOOL has_firstchar) { DEFINE_COMPILER; struct sljit_label *loop; struct sljit_jump *toolong; -struct sljit_jump *alreadyfound; +struct sljit_jump *already_found; struct sljit_jump *found; -struct sljit_jump *foundoc = NULL; -struct sljit_jump *notfound; +struct sljit_jump *found_oc = NULL; +jump_list *not_found = NULL; sljit_u32 oc, bit; SLJIT_ASSERT(common->req_char_ptr != 0); -OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr); -OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_CU_MAX); -toolong = CMP(SLJIT_LESS, TMP1, 0, STR_END, 0); -alreadyfound = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0); +OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(REQ_CU_MAX) * 100); +OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr); +toolong = CMP(SLJIT_LESS, TMP2, 0, STR_END, 0); +already_found = CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0); if (has_firstchar) OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); else OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0); -loop = LABEL(); -notfound = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0); - -OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0); oc = req_char; if (caseless) { oc = TABLE_GET(req_char, common->fcc, req_char); -#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 8 - if (req_char > 127 && common->utf) +#if defined SUPPORT_UNICODE + if (req_char > 127 && (common->utf || common->ucp)) oc = UCD_OTHERCASE(req_char); #endif } -if (req_char == oc) - found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char); + +#ifdef JIT_HAS_FAST_REQUESTED_CHAR_SIMD +if (JIT_HAS_FAST_REQUESTED_CHAR_SIMD) + { + not_found = fast_requested_char_simd(common, req_char, oc); + } else +#endif { - bit = req_char ^ oc; - if (is_powerof2(bit)) - { - OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit); - found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit); - } + loop = LABEL(); + add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0)); + + OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0); + + if (req_char == oc) + found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char); else { - found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char); - foundoc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc); + bit = req_char ^ oc; + if (is_powerof2(bit)) + { + OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit); + found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit); + } + else + { + found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char); + found_oc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc); + } } + OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1)); + JUMPTO(SLJIT_JUMP, loop); + + JUMPHERE(found); + if (found_oc) + JUMPHERE(found_oc); } -OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1)); -JUMPTO(SLJIT_JUMP, loop); -JUMPHERE(found); -if (foundoc) - JUMPHERE(foundoc); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0); -JUMPHERE(alreadyfound); + +JUMPHERE(already_found); JUMPHERE(toolong); -return notfound; +return not_found; } static void do_revertframes(compiler_common *common) @@ -6684,26 +6722,27 @@ DEFINE_COMPILER; struct sljit_jump *jump; struct sljit_label *mainloop; -sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); +sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0); GET_LOCAL_BASE(TMP1, 0, 0); /* Drop frames until we reach STACK_TOP. */ mainloop = LABEL(); -OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), -sizeof(sljit_sw)); -jump = CMP(SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0); +OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), -SSIZE_OF(sw)); +OP2U(SLJIT_SUB | SLJIT_SET_SIG_LESS_EQUAL | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, 0); +jump = JUMP(SLJIT_SIG_LESS_EQUAL); OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0); -if (sljit_get_register_index(TMP3) < 0) +if (HAS_VIRTUAL_REGISTERS) { - OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw))); - OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(STACK_TOP), -(3 * sizeof(sljit_sw))); - OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * sizeof(sljit_sw)); + OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw))); + OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(STACK_TOP), -(3 * SSIZE_OF(sw))); + OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * SSIZE_OF(sw)); } else { - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw))); - OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(3 * sizeof(sljit_sw))); - OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * sizeof(sljit_sw)); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw))); + OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(3 * SSIZE_OF(sw))); + OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * SSIZE_OF(sw)); OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0); GET_LOCAL_BASE(TMP1, 0, 0); OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP3, 0); @@ -6711,71 +6750,98 @@ else JUMPTO(SLJIT_JUMP, mainloop); JUMPHERE(jump); -jump = CMP(SLJIT_NOT_ZERO /* SIG_LESS */, TMP2, 0, SLJIT_IMM, 0); +sljit_set_current_flags(compiler, SLJIT_CURRENT_FLAGS_SUB | SLJIT_CURRENT_FLAGS_COMPARE | SLJIT_SET_SIG_LESS_EQUAL | SLJIT_SET_Z); +jump = JUMP(SLJIT_NOT_ZERO /* SIG_LESS */); /* End of reverting values. */ -sljit_emit_fast_return(compiler, RETURN_ADDR, 0); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); JUMPHERE(jump); -OP1(SLJIT_NEG, TMP2, 0, TMP2, 0); +OP2(SLJIT_SUB, TMP2, 0, SLJIT_IMM, 0, TMP2, 0); OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0); -if (sljit_get_register_index(TMP3) < 0) +if (HAS_VIRTUAL_REGISTERS) { - OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw))); - OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * sizeof(sljit_sw)); + OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw))); + OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * SSIZE_OF(sw)); } else { - OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw))); - OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * sizeof(sljit_sw)); + OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw))); + OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * SSIZE_OF(sw)); OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP3, 0); } JUMPTO(SLJIT_JUMP, mainloop); } -static void check_wordboundary(compiler_common *common) +#ifdef SUPPORT_UNICODE +#define UCPCAT(bit) (1 << (bit)) +#define UCPCAT2(bit1, bit2) (UCPCAT(bit1) | UCPCAT(bit2)) +#define UCPCAT3(bit1, bit2, bit3) (UCPCAT(bit1) | UCPCAT(bit2) | UCPCAT(bit3)) +#define UCPCAT_RANGE(start, end) (((1 << ((end) + 1)) - 1) - ((1 << (start)) - 1)) +#define UCPCAT_L UCPCAT_RANGE(ucp_Ll, ucp_Lu) +#define UCPCAT_N UCPCAT_RANGE(ucp_Nd, ucp_No) +#define UCPCAT_ALL ((1 << (ucp_Zs + 1)) - 1) +#endif + +static void check_wordboundary(compiler_common *common, BOOL ucp) { DEFINE_COMPILER; struct sljit_jump *skipread; jump_list *skipread_list = NULL; -jump_list *invalid_utf = NULL; +#ifdef SUPPORT_UNICODE +struct sljit_label *valid_utf; +jump_list *invalid_utf1 = NULL; +#endif /* SUPPORT_UNICODE */ +jump_list *invalid_utf2 = NULL; #if PCRE2_CODE_UNIT_WIDTH != 8 || defined SUPPORT_UNICODE struct sljit_jump *jump; #endif /* PCRE2_CODE_UNIT_WIDTH != 8 || SUPPORT_UNICODE */ +SLJIT_UNUSED_ARG(ucp); SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16); -sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0); +sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCALS0); /* Get type of the previous char, and put it to TMP3. */ OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0); skipread = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0); -if (common->mode == PCRE2_JIT_COMPLETE) - peek_char_back(common, READ_CHAR_MAX, &invalid_utf); +#ifdef SUPPORT_UNICODE +if (common->invalid_utf) + { + peek_char_back(common, READ_CHAR_MAX, &invalid_utf1); + + if (common->mode != PCRE2_JIT_COMPLETE) + { + OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0); + OP1(SLJIT_MOV, TMP2, 0, STR_PTR, 0); + move_back(common, NULL, TRUE); + check_start_used_ptr(common); + OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0); + OP1(SLJIT_MOV, STR_PTR, 0, TMP2, 0); + } + } else +#endif /* SUPPORT_UNICODE */ { - move_back(common, &invalid_utf, FALSE); - check_start_used_ptr(common); - /* No need precise read since match fails anyway. */ - read_char(common, 0, READ_CHAR_MAX, &invalid_utf, READ_CHAR_UPDATE_STR_PTR); + if (common->mode == PCRE2_JIT_COMPLETE) + peek_char_back(common, READ_CHAR_MAX, NULL); + else + { + move_back(common, NULL, TRUE); + check_start_used_ptr(common); + read_char(common, 0, READ_CHAR_MAX, NULL, READ_CHAR_UPDATE_STR_PTR); + } } /* Testing char type. */ #ifdef SUPPORT_UNICODE -if (common->use_ucp) +if (ucp) { - OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1); - jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE); add_jump(compiler, &common->getucdtype, JUMP(SLJIT_FAST_CALL)); - OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll); - OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll); - OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL); - OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll); - OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd); - OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL); - JUMPHERE(jump); - OP1(SLJIT_MOV, TMP3, 0, TMP2, 0); + OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP1, 0); + OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, UCPCAT2(ucp_Mn, ucp_Pc) | UCPCAT_L | UCPCAT_N); + OP_FLAGS(SLJIT_MOV, TMP3, 0, SLJIT_NOT_ZERO); } else #endif /* SUPPORT_UNICODE */ @@ -6802,22 +6868,19 @@ JUMPHERE(skipread); OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0); check_str_end(common, &skipread_list); -peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCALS1, &invalid_utf); +peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCALS1, &invalid_utf2); /* Testing char type. This is a code duplication. */ #ifdef SUPPORT_UNICODE -if (common->use_ucp) + +valid_utf = LABEL(); + +if (ucp) { - OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1); - jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE); add_jump(compiler, &common->getucdtype, JUMP(SLJIT_FAST_CALL)); - OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll); - OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll); - OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL); - OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll); - OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd); - OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL); - JUMPHERE(jump); + OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP1, 0); + OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, UCPCAT2(ucp_Mn, ucp_Pc) | UCPCAT_L | UCPCAT_N); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO); } else #endif /* SUPPORT_UNICODE */ @@ -6846,18 +6909,24 @@ set_jumps(skipread_list, LABEL()); OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); OP2(SLJIT_XOR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, TMP3, 0); -sljit_emit_fast_return(compiler, TMP1, 0); +OP_SRC(SLJIT_FAST_RETURN, TMP1, 0); #ifdef SUPPORT_UNICODE if (common->invalid_utf) { - SLJIT_ASSERT(invalid_utf != NULL); + set_jumps(invalid_utf1, LABEL()); + + peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCALS1, NULL); + CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR, valid_utf); - set_jumps(invalid_utf, LABEL()); OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, -1); - sljit_emit_fast_return(compiler, TMP1, 0); - return; + OP_SRC(SLJIT_FAST_RETURN, TMP1, 0); + + set_jumps(invalid_utf2, LABEL()); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); + OP1(SLJIT_MOV, TMP2, 0, TMP3, 0); + OP_SRC(SLJIT_FAST_RETURN, TMP1, 0); } #endif /* SUPPORT_UNICODE */ } @@ -7076,7 +7145,7 @@ j = 0; if (char_list[0] == 0) { i++; - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0); OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_ZERO); } else @@ -7088,8 +7157,8 @@ while (i < len) j++; else { - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char_list[i]); - CMOV(SLJIT_ZERO, TMP2, TMP1, 0); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, char_list[i]); + SELECT(SLJIT_ZERO, TMP2, TMP1, 0, TMP2); } i++; } @@ -7102,8 +7171,8 @@ if (j != 0) if ((char_list[i] & 0x100) != 0) { j--; - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char_list[i] & 0xff); - CMOV(SLJIT_ZERO, TMP2, TMP1, 0); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, char_list[i] & 0xff); + SELECT(SLJIT_ZERO, TMP2, TMP1, 0, TMP2); } } @@ -7128,12 +7197,12 @@ static void check_anynewline(compiler_common *common) /* Check whether TMP1 contains a newline character. TMP2 destroyed. */ DEFINE_COMPILER; -sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); +sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0); OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a); -OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a); +OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a); OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL); -OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a); +OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a); #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32 #if PCRE2_CODE_UNIT_WIDTH == 8 if (common->utf) @@ -7141,13 +7210,13 @@ if (common->utf) #endif OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1); - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a); #if PCRE2_CODE_UNIT_WIDTH == 8 } #endif #endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */ OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL); -sljit_emit_fast_return(compiler, RETURN_ADDR, 0); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); } static void check_hspace(compiler_common *common) @@ -7155,38 +7224,38 @@ static void check_hspace(compiler_common *common) /* Check whether TMP1 contains a newline character. TMP2 destroyed. */ DEFINE_COMPILER; -sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); +sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0); -OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09); +OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x09); OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL); -OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20); +OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x20); OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); -OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0); +OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0xa0); #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32 #if PCRE2_CODE_UNIT_WIDTH == 8 if (common->utf) { #endif OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x1680); OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x180e); OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000); - OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000); + OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000); OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL); - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000); OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000); OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000); #if PCRE2_CODE_UNIT_WIDTH == 8 } #endif #endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */ OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL); -sljit_emit_fast_return(compiler, RETURN_ADDR, 0); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); } static void check_vspace(compiler_common *common) @@ -7194,12 +7263,12 @@ static void check_vspace(compiler_common *common) /* Check whether TMP1 contains a newline character. TMP2 destroyed. */ DEFINE_COMPILER; -sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); +sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0); OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a); -OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a); +OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a); OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL); -OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a); +OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a); #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32 #if PCRE2_CODE_UNIT_WIDTH == 8 if (common->utf) @@ -7207,14 +7276,14 @@ if (common->utf) #endif OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1); - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a); #if PCRE2_CODE_UNIT_WIDTH == 8 } #endif #endif /* SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == [16|32] */ OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL); -sljit_emit_fast_return(compiler, RETURN_ADDR, 0); +OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); } static void do_casefulcmp(compiler_common *common) @@ -7225,7 +7294,7 @@ struct sljit_label *label; int char1_reg; int char2_reg; -if (sljit_get_register_index(TMP3) < 0) +if (HAS_VIRTUAL_REGISTERS) { char1_reg = STR_END; char2_reg = STACK_TOP; @@ -7236,7 +7305,7 @@ else char2_reg = RETURN_ADDR; } -sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0); +sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCALS0); OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); if (char1_reg == STR_END) @@ -7245,11 +7314,11 @@ if (char1_reg == STR_END) OP1(SLJIT_MOV, RETURN_ADDR, 0, char2_reg, 0); } -if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS) +if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS) { label = LABEL(); - sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)); - sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); + sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)); + sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0); OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1)); JUMPTO(SLJIT_NOT_ZERO, label); @@ -7257,14 +7326,14 @@ if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_ JUMPHERE(jump); OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); } -else if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS) +else if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS) { OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1)); OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); label = LABEL(); - sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)); - sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); + sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)); + sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0); OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1)); JUMPTO(SLJIT_NOT_ZERO, label); @@ -7294,7 +7363,7 @@ if (char1_reg == STR_END) OP1(SLJIT_MOV, char2_reg, 0, RETURN_ADDR, 0); } -sljit_emit_fast_return(compiler, TMP1, 0); +OP_SRC(SLJIT_FAST_RETURN, TMP1, 0); } static void do_caselesscmp(compiler_common *common) @@ -7307,7 +7376,7 @@ int char2_reg; int lcc_table; int opt_type = 0; -if (sljit_get_register_index(TMP3) < 0) +if (HAS_VIRTUAL_REGISTERS) { char2_reg = STACK_TOP; lcc_table = STACK_LIMIT; @@ -7318,12 +7387,12 @@ else lcc_table = TMP3; } -if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS) +if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS) opt_type = 1; -else if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS) +else if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS) opt_type = 2; -sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0); +sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCALS0); OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, char1_reg, 0); @@ -7339,8 +7408,8 @@ OP1(SLJIT_MOV, lcc_table, 0, SLJIT_IMM, common->lcc); if (opt_type == 1) { label = LABEL(); - sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)); - sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); + sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)); + sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); } else if (opt_type == 2) { @@ -7348,8 +7417,8 @@ else if (opt_type == 2) OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); label = LABEL(); - sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)); - sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); + sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)); + sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); } else { @@ -7392,7 +7461,7 @@ if (char2_reg == STACK_TOP) } OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1); -sljit_emit_fast_return(compiler, TMP1, 0); +OP_SRC(SLJIT_FAST_RETURN, TMP1, 0); } static PCRE2_SPTR byte_sequence_compare(compiler_common *common, BOOL caseless, PCRE2_SPTR cc, @@ -7550,16 +7619,6 @@ return cc; #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8 -#define SET_TYPE_OFFSET(value) \ - if ((value) != typeoffset) \ - { \ - if ((value) < typeoffset) \ - OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \ - else \ - OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \ - } \ - typeoffset = (value); - #define SET_CHAR_OFFSET(value) \ if ((value) != charoffset) \ { \ @@ -7572,6 +7631,20 @@ return cc; static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr); +#ifdef SUPPORT_UNICODE +#define XCLASS_SAVE_CHAR 0x001 +#define XCLASS_CHAR_SAVED 0x002 +#define XCLASS_HAS_TYPE 0x004 +#define XCLASS_HAS_SCRIPT 0x008 +#define XCLASS_HAS_SCRIPT_EXTENSION 0x010 +#define XCLASS_HAS_BOOL 0x020 +#define XCLASS_HAS_BIDICL 0x040 +#define XCLASS_NEEDS_UCD (XCLASS_HAS_TYPE | XCLASS_HAS_SCRIPT | XCLASS_HAS_SCRIPT_EXTENSION | XCLASS_HAS_BOOL | XCLASS_HAS_BIDICL) +#define XCLASS_SCRIPT_EXTENSION_NOTPROP 0x080 +#define XCLASS_SCRIPT_EXTENSION_RESTORE_RETURN_ADDR 0x100 +#define XCLASS_SCRIPT_EXTENSION_RESTORE_LOCALS0 0x200 +#endif /* SUPPORT_UNICODE */ + static void compile_xclass_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks) { DEFINE_COMPILER; @@ -7586,11 +7659,11 @@ BOOL utf = common->utf; #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == [8|16] */ #ifdef SUPPORT_UNICODE -BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE; -BOOL charsaved = FALSE; +sljit_u32 unicode_status = 0; +sljit_u32 category_list = 0; +sljit_u32 items; int typereg = TMP1; const sljit_u32 *other_cases; -sljit_uw typeoffset; #endif /* SUPPORT_UNICODE */ /* Scanning the necessary info. */ @@ -7607,6 +7680,7 @@ if (cc[-1] & XCL_MAP) while (*cc != XCL_END) { compares++; + if (*cc == XCL_SINGLE) { cc ++; @@ -7614,7 +7688,7 @@ while (*cc != XCL_END) if (c > max) max = c; if (c < min) min = c; #ifdef SUPPORT_UNICODE - needschar = TRUE; + unicode_status |= XCLASS_SAVE_CHAR; #endif /* SUPPORT_UNICODE */ } else if (*cc == XCL_RANGE) @@ -7625,7 +7699,7 @@ while (*cc != XCL_END) GETCHARINCTEST(c, cc); if (c > max) max = c; #ifdef SUPPORT_UNICODE - needschar = TRUE; + unicode_status |= XCLASS_SAVE_CHAR; #endif /* SUPPORT_UNICODE */ } #ifdef SUPPORT_UNICODE @@ -7633,7 +7707,8 @@ while (*cc != XCL_END) { SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP); cc++; - if (*cc == PT_CLIST) + + if (*cc == PT_CLIST && cc[-1] == XCL_PROP) { other_cases = PRIV(ucd_caseless_sets) + cc[1]; while (*other_cases != NOTACHAR) @@ -7649,61 +7724,127 @@ while (*cc != XCL_END) min = 0; } + items = 0; + switch(*cc) { case PT_ANY: /* Any either accepts everything or ignored. */ if (cc[-1] == XCL_PROP) - { - compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE); - if (list == backtracks) - add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); - return; - } + items = UCPCAT_ALL; + else + compares--; break; case PT_LAMP: + items = UCPCAT3(ucp_Lu, ucp_Ll, ucp_Lt); + break; + case PT_GC: + items = UCPCAT_RANGE(PRIV(ucp_typerange)[(int)cc[1] * 2], PRIV(ucp_typerange)[(int)cc[1] * 2 + 1]); + break; + case PT_PC: + items = UCPCAT(cc[1]); + break; + + case PT_WORD: + items = UCPCAT2(ucp_Mn, ucp_Pc) | UCPCAT_L | UCPCAT_N; + break; + case PT_ALNUM: - needstype = TRUE; + items = UCPCAT_L | UCPCAT_N; break; + case PT_SCX: + unicode_status |= XCLASS_HAS_SCRIPT_EXTENSION; + if (cc[-1] == XCL_NOTPROP) + { + unicode_status |= XCLASS_SCRIPT_EXTENSION_NOTPROP; + break; + } + compares++; + /* Fall through */ + case PT_SC: - needsscript = TRUE; + unicode_status |= XCLASS_HAS_SCRIPT; break; case PT_SPACE: case PT_PXSPACE: - case PT_WORD: case PT_PXGRAPH: case PT_PXPRINT: case PT_PXPUNCT: - needstype = TRUE; - needschar = TRUE; + unicode_status |= XCLASS_SAVE_CHAR | XCLASS_HAS_TYPE; break; case PT_CLIST: case PT_UCNC: - needschar = TRUE; + case PT_PXXDIGIT: + unicode_status |= XCLASS_SAVE_CHAR; + break; + + case PT_BOOL: + unicode_status |= XCLASS_HAS_BOOL; + break; + + case PT_BIDICL: + unicode_status |= XCLASS_HAS_BIDICL; break; default: SLJIT_UNREACHABLE(); break; } + + if (items > 0) + { + if (cc[-1] == XCL_NOTPROP) + items ^= UCPCAT_ALL; + category_list |= items; + unicode_status |= XCLASS_HAS_TYPE; + compares--; + } + cc += 2; } #endif /* SUPPORT_UNICODE */ } + +#ifdef SUPPORT_UNICODE +if (category_list == UCPCAT_ALL) + { + /* All characters are accepted, same as dotall. */ + compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE); + if (list == backtracks) + add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); + return; + } + +if (compares == 0 && category_list == 0) + { + /* No characters are accepted, same as (*F) or dotall. */ + compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE); + if (list != backtracks) + add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); + return; + } +#else /* !SUPPORT_UNICODE */ SLJIT_ASSERT(compares > 0); +#endif /* SUPPORT_UNICODE */ /* We are not necessary in utf mode even in 8 bit mode. */ cc = ccbegin; if ((cc[-1] & XCL_NOT) != 0) read_char(common, min, max, backtracks, READ_CHAR_UPDATE_STR_PTR); else + { +#ifdef SUPPORT_UNICODE + read_char(common, min, max, (unicode_status & XCLASS_NEEDS_UCD) ? backtracks : NULL, 0); +#else /* !SUPPORT_UNICODE */ read_char(common, min, max, NULL, 0); +#endif /* SUPPORT_UNICODE */ + } if ((cc[-1] & XCL_HASPROP) == 0) { @@ -7716,7 +7857,7 @@ if ((cc[-1] & XCL_HASPROP) == 0) OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3); OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc); OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); - OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0); + OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0); add_jump(compiler, &found, JUMP(SLJIT_NOT_ZERO)); } @@ -7735,7 +7876,7 @@ else if ((cc[-1] & XCL_MAP) != 0) { OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0); #ifdef SUPPORT_UNICODE - charsaved = TRUE; + unicode_status |= XCLASS_CHAR_SAVED; #endif /* SUPPORT_UNICODE */ if (!optimize_class(common, (const sljit_u8 *)cc, FALSE, TRUE, list)) { @@ -7749,7 +7890,7 @@ else if ((cc[-1] & XCL_MAP) != 0) OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3); OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc); OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); - OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0); + OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0); add_jump(compiler, list, JUMP(SLJIT_NOT_ZERO)); #if PCRE2_CODE_UNIT_WIDTH == 8 @@ -7763,9 +7904,9 @@ else if ((cc[-1] & XCL_MAP) != 0) } #ifdef SUPPORT_UNICODE -if (needstype || needsscript) +if (unicode_status & XCLASS_NEEDS_UCD) { - if (needschar && !charsaved) + if ((unicode_status & (XCLASS_SAVE_CHAR | XCLASS_CHAR_SAVED)) == XCLASS_SAVE_CHAR) OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0); #if PCRE2_CODE_UNIT_WIDTH == 32 @@ -7785,24 +7926,19 @@ if (needstype || needsscript) OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0); OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2)); OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1); + OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 3); + OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2); + OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0); - /* Before anything else, we deal with scripts. */ - if (needsscript) - { -// PH hacking -//fprintf(stderr, "~~B\n"); - - OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2); - OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3); - OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0); - - OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script)); - - OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 0); + ccbegin = cc; - // OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3); + if (category_list != 0) + compares++; - ccbegin = cc; + if (unicode_status & XCLASS_HAS_BIDICL) + { + OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, scriptx_bidiclass)); + OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BIDICLASS_SHIFT); while (*cc != XCL_END) { @@ -7821,7 +7957,7 @@ if (needstype || needsscript) { SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP); cc++; - if (*cc == PT_SC) + if (*cc == PT_BIDICL) { compares--; invertcmp = (compares == 0 && list != backtracks); @@ -7837,39 +7973,181 @@ if (needstype || needsscript) cc = ccbegin; } - if (needschar) - OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0); + if (unicode_status & XCLASS_HAS_BOOL) + { + OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, bprops)); + OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BPROPS_MASK); + OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 2); + + while (*cc != XCL_END) + { + if (*cc == XCL_SINGLE) + { + cc ++; + GETCHARINCTEST(c, cc); + } + else if (*cc == XCL_RANGE) + { + cc ++; + GETCHARINCTEST(c, cc); + GETCHARINCTEST(c, cc); + } + else + { + SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP); + cc++; + if (*cc == PT_BOOL) + { + compares--; + invertcmp = (compares == 0 && list != backtracks); + if (cc[-1] == XCL_NOTPROP) + invertcmp ^= 0x1; - if (needstype) + OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP1), (sljit_sw)(PRIV(ucd_boolprop_sets) + (cc[1] >> 5)), SLJIT_IMM, (sljit_sw)1 << (cc[1] & 0x1f)); + add_jump(compiler, compares > 0 ? list : backtracks, JUMP(SLJIT_NOT_ZERO ^ invertcmp)); + } + cc += 2; + } + } + + cc = ccbegin; + } + + if (unicode_status & XCLASS_HAS_SCRIPT) { - if (!needschar) + OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script)); + + while (*cc != XCL_END) { -// PH hacking -//fprintf(stderr, "~~C\n"); - OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2); - OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3); - OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0); - OP2(SLJIT_ADD, TMP1, 0, TMP2, 0, TMP1, 0); + if (*cc == XCL_SINGLE) + { + cc ++; + GETCHARINCTEST(c, cc); + } + else if (*cc == XCL_RANGE) + { + cc ++; + GETCHARINCTEST(c, cc); + GETCHARINCTEST(c, cc); + } + else + { + SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP); + cc++; + switch (*cc) + { + case PT_SCX: + if (cc[-1] == XCL_NOTPROP) + break; + /* Fall through */ + + case PT_SC: + compares--; + invertcmp = (compares == 0 && list != backtracks); + if (cc[-1] == XCL_NOTPROP) + invertcmp ^= 0x1; - OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype)); + add_jump(compiler, compares > 0 ? list : backtracks, CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1])); + } + cc += 2; + } + } - OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 0); + cc = ccbegin; + } + + if (unicode_status & XCLASS_HAS_SCRIPT_EXTENSION) + { + OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, scriptx_bidiclass)); + OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_SCRIPTX_MASK); + OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 2); -// OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3); + if (unicode_status & XCLASS_SCRIPT_EXTENSION_NOTPROP) + { + if (unicode_status & XCLASS_HAS_TYPE) + { + if (unicode_status & XCLASS_SAVE_CHAR) + { + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP2, 0); + unicode_status |= XCLASS_SCRIPT_EXTENSION_RESTORE_LOCALS0; + } + else + { + OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP2, 0); + unicode_status |= XCLASS_SCRIPT_EXTENSION_RESTORE_RETURN_ADDR; + } + } + OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script)); } - else + + while (*cc != XCL_END) { -// PH hacking -//fprintf(stderr, "~~D\n"); - OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2); + if (*cc == XCL_SINGLE) + { + cc ++; + GETCHARINCTEST(c, cc); + } + else if (*cc == XCL_RANGE) + { + cc ++; + GETCHARINCTEST(c, cc); + GETCHARINCTEST(c, cc); + } + else + { + SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP); + cc++; + if (*cc == PT_SCX) + { + compares--; + invertcmp = (compares == 0 && list != backtracks); - OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3); + jump = NULL; + if (cc[-1] == XCL_NOTPROP) + { + jump = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, (int)cc[1]); + if (invertcmp) + { + add_jump(compiler, backtracks, jump); + jump = NULL; + } + invertcmp ^= 0x1; + } - OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0); - OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0); + OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP1), (sljit_sw)(PRIV(ucd_script_sets) + (cc[1] >> 5)), SLJIT_IMM, (sljit_sw)1 << (cc[1] & 0x1f)); + add_jump(compiler, compares > 0 ? list : backtracks, JUMP(SLJIT_NOT_ZERO ^ invertcmp)); + + if (jump != NULL) + JUMPHERE(jump); + } + cc += 2; + } + } + + if (unicode_status & XCLASS_SCRIPT_EXTENSION_RESTORE_LOCALS0) + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); + else if (unicode_status & XCLASS_SCRIPT_EXTENSION_RESTORE_RETURN_ADDR) + OP1(SLJIT_MOV, TMP2, 0, RETURN_ADDR, 0); + cc = ccbegin; + } - OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype)); + if (unicode_status & XCLASS_SAVE_CHAR) + OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0); + + if (unicode_status & XCLASS_HAS_TYPE) + { + if (unicode_status & XCLASS_SAVE_CHAR) typereg = RETURN_ADDR; + + OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype)); + OP2(SLJIT_SHL, typereg, 0, SLJIT_IMM, 1, TMP2, 0); + + if (category_list > 0) + { + compares--; + invertcmp = (compares == 0 && list != backtracks); + OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, category_list); + add_jump(compiler, compares > 0 ? list : backtracks, JUMP(SLJIT_NOT_ZERO ^ invertcmp)); } } } @@ -7878,9 +8156,6 @@ if (needstype || needsscript) /* Generating code. */ charoffset = 0; numberofcmps = 0; -#ifdef SUPPORT_UNICODE -typeoffset = 0; -#endif /* SUPPORT_UNICODE */ while (*cc != XCL_END) { @@ -7895,13 +8170,13 @@ while (*cc != XCL_END) if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE)) { - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset)); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset)); OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_EQUAL); numberofcmps++; } else if (numberofcmps > 0) { - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset)); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset)); OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL); jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); numberofcmps = 0; @@ -7921,13 +8196,13 @@ while (*cc != XCL_END) if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE)) { - OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset)); + OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset)); OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL); numberofcmps++; } else if (numberofcmps > 0) { - OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset)); + OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset)); OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL); jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); numberofcmps = 0; @@ -7948,65 +8223,33 @@ while (*cc != XCL_END) switch(*cc) { case PT_ANY: - if (!invertcmp) - jump = JUMP(SLJIT_JUMP); - break; - case PT_LAMP: - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset); - OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL); - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset); - OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset); - OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL); - jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); - break; - case PT_GC: - c = PRIV(ucp_typerange)[(int)cc[1] * 2]; - SET_TYPE_OFFSET(c); - jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c); - break; - case PT_PC: - jump = CMP(SLJIT_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset); - break; - case PT_SC: + case PT_SCX: + case PT_BOOL: + case PT_BIDICL: + case PT_WORD: + case PT_ALNUM: compares++; - /* Do nothing. */ + /* Already handled. */ break; case PT_SPACE: case PT_PXSPACE: SET_CHAR_OFFSET(9); - OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9); + OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xd - 0x9); OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL); - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x85 - 0x9); OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x180e - 0x9); OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); - SET_TYPE_OFFSET(ucp_Zl); - OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl); - OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL); - jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); - break; - - case PT_WORD: - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset)); - OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL); - /* Fall through. */ - - case PT_ALNUM: - SET_TYPE_OFFSET(ucp_Ll); - OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll); - OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL); - SET_TYPE_OFFSET(ucp_Nd); - OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd); - OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL); + OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Zl, ucp_Zs)); + OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_ZERO); jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); break; @@ -8028,7 +8271,7 @@ while (*cc != XCL_END) OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset); OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]); } - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, other_cases[1]); OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL); other_cases += 2; } @@ -8041,103 +8284,135 @@ while (*cc != XCL_END) OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset); OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]); } - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, other_cases[2]); OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL); - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset)); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset)); OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL); other_cases += 3; } else { - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset)); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset)); OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL); } while (*other_cases != NOTACHAR) { - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset)); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset)); OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL); } jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); break; case PT_UCNC: - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset)); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset)); OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL); - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset)); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset)); OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset)); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset)); OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); SET_CHAR_OFFSET(0xa0); - OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset)); + OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset)); OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL); SET_CHAR_OFFSET(0); - OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0); + OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000 - 0); OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_GREATER_EQUAL); jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); break; case PT_PXGRAPH: - /* C and Z groups are the farthest two groups. */ - SET_TYPE_OFFSET(ucp_Ll); - OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll); - OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER); + OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Cc, ucp_Cs) | UCPCAT_RANGE(ucp_Zl, ucp_Zs)); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO); - jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll); + OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT(ucp_Cf)); + jump = JUMP(SLJIT_ZERO); + c = charoffset; /* In case of ucp_Cf, we overwrite the result. */ SET_CHAR_OFFSET(0x2066); - OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066); + OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066); OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL); - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066); OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066); OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); + /* Restore charoffset. */ + SET_CHAR_OFFSET(c); + JUMPHERE(jump); jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0); break; case PT_PXPRINT: - /* C and Z groups are the farthest two groups. */ - SET_TYPE_OFFSET(ucp_Ll); - OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll); - OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER); + OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Cc, ucp_Cs) | UCPCAT2(ucp_Zl, ucp_Zp)); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO); - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll); - OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_NOT_EQUAL); - - jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll); + OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT(ucp_Cf)); + jump = JUMP(SLJIT_ZERO); + c = charoffset; /* In case of ucp_Cf, we overwrite the result. */ SET_CHAR_OFFSET(0x2066); - OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066); + OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066); OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL); - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066); OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); + /* Restore charoffset. */ + SET_CHAR_OFFSET(c); + JUMPHERE(jump); jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0); break; case PT_PXPUNCT: - SET_TYPE_OFFSET(ucp_Sc); - OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc); - OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL); + OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Sc, ucp_So)); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO); SET_CHAR_OFFSET(0); - OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x7f); + OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x7f); OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_LESS_EQUAL); - SET_TYPE_OFFSET(ucp_Pc); - OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc); - OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL); + OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Pc, ucp_Ps)); + OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_ZERO); + jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); + break; + + case PT_PXXDIGIT: + SET_CHAR_OFFSET(CHAR_A); + OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, ~0x20); + OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP2, 0, SLJIT_IMM, CHAR_F - CHAR_A); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL); + + SET_CHAR_OFFSET(CHAR_0); + OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_9 - CHAR_0); + OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL); + + SET_CHAR_OFFSET(0xff10); + jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 0xff46 - 0xff10); + + OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xff19 - 0xff10); + OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL); + + SET_CHAR_OFFSET(0xff21); + OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xff26 - 0xff21); + OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL); + + SET_CHAR_OFFSET(0xff41); + OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xff46 - 0xff41); + OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL); + + SET_CHAR_OFFSET(0xff10); + + JUMPHERE(jump); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, 0); jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); break; @@ -8153,6 +8428,7 @@ while (*cc != XCL_END) add_jump(compiler, compares > 0 ? list : backtracks, jump); } +SLJIT_ASSERT(compares == 0); if (found != NULL) set_jumps(found, LABEL()); } @@ -8165,40 +8441,46 @@ if (found != NULL) static PCRE2_SPTR compile_simple_assertion_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks) { DEFINE_COMPILER; -int length; struct sljit_jump *jump[4]; -#ifdef SUPPORT_UNICODE -struct sljit_label *label; -#endif /* SUPPORT_UNICODE */ switch(type) { case OP_SOD: - OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); + if (HAS_VIRTUAL_REGISTERS) + { + OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); + } + else + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin)); add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0)); return cc; case OP_SOM: - OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str)); + if (HAS_VIRTUAL_REGISTERS) + { + OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str)); + } + else + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str)); add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0)); return cc; case OP_NOT_WORD_BOUNDARY: case OP_WORD_BOUNDARY: - add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL)); + case OP_NOT_UCP_WORD_BOUNDARY: + case OP_UCP_WORD_BOUNDARY: + add_jump(compiler, (type == OP_NOT_WORD_BOUNDARY || type == OP_WORD_BOUNDARY) ? &common->wordboundary : &common->ucp_wordboundary, JUMP(SLJIT_FAST_CALL)); #ifdef SUPPORT_UNICODE if (common->invalid_utf) { - OP2(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_SIG_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0); - add_jump(compiler, backtracks, JUMP(SLJIT_SIG_LESS)); - add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_NOT_ZERO : SLJIT_ZERO)); + add_jump(compiler, backtracks, CMP((type == OP_NOT_WORD_BOUNDARY || type == OP_NOT_UCP_WORD_BOUNDARY) ? SLJIT_NOT_EQUAL : SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0)); return cc; } #endif /* SUPPORT_UNICODE */ sljit_set_current_flags(compiler, SLJIT_SET_Z); - add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_NOT_ZERO : SLJIT_ZERO)); + add_jump(compiler, backtracks, JUMP((type == OP_NOT_WORD_BOUNDARY || type == OP_NOT_UCP_WORD_BOUNDARY) ? SLJIT_NOT_ZERO : SLJIT_ZERO)); return cc; case OP_EODN: @@ -8213,9 +8495,9 @@ switch(type) else { jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0); - OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0); + OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, STR_END, 0); OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS); - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff); OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_EQUAL); add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL)); check_partial(common, TRUE); @@ -8238,7 +8520,7 @@ switch(type) OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR); OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); - OP2(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0); + OP2U(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_GREATER, TMP2, 0, STR_END, 0); jump[2] = JUMP(SLJIT_GREATER); add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL) /* LESS */); /* Equal. */ @@ -8267,18 +8549,25 @@ switch(type) JUMPHERE(jump[3]); } JUMPHERE(jump[0]); - check_partial(common, FALSE); + if (common->mode != PCRE2_JIT_COMPLETE) + check_partial(common, TRUE); return cc; case OP_EOD: add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0)); - check_partial(common, FALSE); + if (common->mode != PCRE2_JIT_COMPLETE) + check_partial(common, TRUE); return cc; case OP_DOLL: - OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); - OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL); - add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO32)); + if (HAS_VIRTUAL_REGISTERS) + { + OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); + OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL); + } + else + OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL); + add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO)); if (!common->endonly) compile_simple_assertion_matchingpath(common, OP_EODN, cc, backtracks); @@ -8291,9 +8580,14 @@ switch(type) case OP_DOLLM: jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0); - OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); - OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL); - add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO32)); + if (HAS_VIRTUAL_REGISTERS) + { + OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); + OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL); + } + else + OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL); + add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO)); check_partial(common, FALSE); jump[0] = JUMP(SLJIT_JUMP); JUMPHERE(jump[1]); @@ -8327,19 +8621,39 @@ switch(type) return cc; case OP_CIRC: - OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin)); - add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0)); - OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL); - add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO32)); + if (HAS_VIRTUAL_REGISTERS) + { + OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin)); + add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0)); + OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL); + add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO)); + } + else + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin)); + add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0)); + OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL); + add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO)); + } return cc; case OP_CIRCM: - OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); - OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); - jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP2, 0); - OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL); - add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO32)); + /* TMP2 might be used by peek_char_back. */ + if (HAS_VIRTUAL_REGISTERS) + { + OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); + jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP2, 0); + OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL); + } + else + { + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin)); + jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP2, 0); + OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL); + } + add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO)); jump[0] = JUMP(SLJIT_JUMP); JUMPHERE(jump[1]); @@ -8362,32 +8676,6 @@ switch(type) } JUMPHERE(jump[0]); return cc; - - case OP_REVERSE: - length = GET(cc, 0); - if (length == 0) - return cc + LINK_SIZE; - OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); -#ifdef SUPPORT_UNICODE - if (common->utf) - { - OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); - OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, length); - label = LABEL(); - add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0)); - move_back(common, backtracks, FALSE); - OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1); - JUMPTO(SLJIT_NOT_ZERO, label); - } - else -#endif - { - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); - OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length)); - add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0)); - } - check_start_used_ptr(common); - return cc + LINK_SIZE; } SLJIT_UNREACHABLE(); return cc; @@ -8402,12 +8690,12 @@ static PCRE2_SPTR SLJIT_FUNC do_extuni_utf(jit_arguments *args, PCRE2_SPTR cc) PCRE2_SPTR start_subject = args->begin; PCRE2_SPTR end_subject = args->end; int lgb, rgb, ricount; -PCRE2_SPTR prevcc, startcc, bptr; +PCRE2_SPTR prevcc, endcc, bptr; BOOL first = TRUE; uint32_t c; prevcc = cc; -startcc = NULL; +endcc = NULL; do { GETCHARINC(c, cc); @@ -8416,7 +8704,7 @@ do if (first) { lgb = rgb; - startcc = cc; + endcc = cc; first = FALSE; continue; } @@ -8427,7 +8715,7 @@ do /* Not breaking between Regional Indicators is allowed only if there are an even number of preceding RIs. */ - if (lgb == ucp_gbRegionalIndicator && rgb == ucp_gbRegionalIndicator) + if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator) { ricount = 0; bptr = prevcc; @@ -8439,7 +8727,7 @@ do BACKCHAR(bptr); GETCHAR(c, bptr); - if (UCD_GRAPHBREAK(c) != ucp_gbRegionalIndicator) + if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator) break; ricount++; @@ -8455,25 +8743,27 @@ do lgb != ucp_gbExtended_Pictographic) lgb = rgb; - prevcc = startcc; - startcc = cc; + prevcc = endcc; + endcc = cc; } while (cc < end_subject); -return startcc; +return endcc; } +#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */ + static PCRE2_SPTR SLJIT_FUNC do_extuni_utf_invalid(jit_arguments *args, PCRE2_SPTR cc) { PCRE2_SPTR start_subject = args->begin; PCRE2_SPTR end_subject = args->end; int lgb, rgb, ricount; -PCRE2_SPTR prevcc, startcc, bptr; +PCRE2_SPTR prevcc, endcc, bptr; BOOL first = TRUE; uint32_t c; prevcc = cc; -startcc = NULL; +endcc = NULL; do { GETCHARINC_INVALID(c, cc, end_subject, break); @@ -8482,7 +8772,7 @@ do if (first) { lgb = rgb; - startcc = cc; + endcc = cc; first = FALSE; continue; } @@ -8493,7 +8783,7 @@ do /* Not breaking between Regional Indicators is allowed only if there are an even number of preceding RIs. */ - if (lgb == ucp_gbRegionalIndicator && rgb == ucp_gbRegionalIndicator) + if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator) { ricount = 0; bptr = prevcc; @@ -8503,7 +8793,7 @@ do { GETCHARBACK_INVALID(c, bptr, start_subject, break); - if (UCD_GRAPHBREAK(c) != ucp_gbRegionalIndicator) + if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator) break; ricount++; @@ -8520,16 +8810,14 @@ do lgb != ucp_gbExtended_Pictographic) lgb = rgb; - prevcc = startcc; - startcc = cc; + prevcc = endcc; + endcc = cc; } while (cc < end_subject); -return startcc; +return endcc; } -#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */ - static PCRE2_SPTR SLJIT_FUNC do_extuni_no_utf(jit_arguments *args, PCRE2_SPTR cc) { PCRE2_SPTR start_subject = args->begin; @@ -8538,10 +8826,13 @@ int lgb, rgb, ricount; PCRE2_SPTR bptr; uint32_t c; -GETCHARINC(c, cc); +/* Patch by PH */ +/* GETCHARINC(c, cc); */ +c = *cc++; + #if PCRE2_CODE_UNIT_WIDTH == 32 if (c >= 0x110000) - return NULL; + return cc; #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */ lgb = UCD_GRAPHBREAK(c); @@ -8560,7 +8851,7 @@ while (cc < end_subject) /* Not breaking between Regional Indicators is allowed only if there are an even number of preceding RIs. */ - if (lgb == ucp_gbRegionalIndicator && rgb == ucp_gbRegionalIndicator) + if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator) { ricount = 0; bptr = cc - 1; @@ -8575,7 +8866,7 @@ while (cc < end_subject) break; #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */ - if (UCD_GRAPHBREAK(c) != ucp_gbRegionalIndicator) break; + if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator) break; ricount++; } @@ -8625,7 +8916,7 @@ switch(type) #endif read_char8_type(common, backtracks, type == OP_NOT_DIGIT); /* Flip the starting bit in the negative case. */ - OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit); + OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, ctype_digit); add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_ZERO : SLJIT_NOT_ZERO)); return cc; @@ -8639,7 +8930,7 @@ switch(type) else #endif read_char8_type(common, backtracks, type == OP_NOT_WHITESPACE); - OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space); + OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, ctype_space); add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_ZERO : SLJIT_NOT_ZERO)); return cc; @@ -8653,7 +8944,7 @@ switch(type) else #endif read_char8_type(common, backtracks, type == OP_NOT_WORDCHAR); - OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word); + OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, ctype_word); add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_ZERO : SLJIT_NOT_ZERO)); return cc; @@ -8683,35 +8974,14 @@ switch(type) if (check_str_ptr) detect_partial_match(common, backtracks); #ifdef SUPPORT_UNICODE - if (common->utf) + if (common->utf && common->invalid_utf) { - if (common->invalid_utf) - { - read_char(common, 0, READ_CHAR_MAX, backtracks, READ_CHAR_UPDATE_STR_PTR); - return cc; - } - -#if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16 - OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); - OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); -#if PCRE2_CODE_UNIT_WIDTH == 8 - jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0); - OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); - OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); -#elif PCRE2_CODE_UNIT_WIDTH == 16 - jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800); - OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00); - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800); - OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL); - OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); - OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); -#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */ - JUMPHERE(jump[0]); + read_char(common, 0, READ_CHAR_MAX, backtracks, READ_CHAR_UPDATE_STR_PTR); return cc; -#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16] */ } #endif /* SUPPORT_UNICODE */ - OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + + skip_valid_char(common); return cc; case OP_ANYBYTE: @@ -8795,13 +9065,15 @@ switch(type) OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0); #if PCRE2_CODE_UNIT_WIDTH != 32 - sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, - common->utf ? (common->invalid_utf ? SLJIT_FUNC_OFFSET(do_extuni_utf_invalid) : SLJIT_FUNC_OFFSET(do_extuni_utf)) : SLJIT_FUNC_OFFSET(do_extuni_no_utf)); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, + common->utf ? (common->invalid_utf ? SLJIT_FUNC_ADDR(do_extuni_utf_invalid) : SLJIT_FUNC_ADDR(do_extuni_utf)) : SLJIT_FUNC_ADDR(do_extuni_no_utf)); if (common->invalid_utf) add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0)); #else - sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_extuni_no_utf)); - add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0)); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, + common->invalid_utf ? SLJIT_FUNC_ADDR(do_extuni_utf_invalid) : SLJIT_FUNC_ADDR(do_extuni_no_utf)); + if (common->invalid_utf) + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0)); #endif OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0); @@ -8861,8 +9133,8 @@ switch(type) if (sljit_has_cpu_feature(SLJIT_HAS_CMOV)) { - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc); - CMOV(SLJIT_EQUAL, TMP1, SLJIT_IMM, c); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, oc); + SELECT(SLJIT_EQUAL, TMP1, SLJIT_IMM, c, TMP1); add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c)); } else @@ -8981,7 +9253,7 @@ switch(type) OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3); OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc); OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); - OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0); + OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0); add_jump(compiler, backtracks, JUMP(SLJIT_ZERO)); #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8 @@ -9197,18 +9469,13 @@ if (common->utf && *cc == OP_REFI) CMPTO(SLJIT_EQUAL, TMP1, 0, char1_reg, 0, loop); -// PH hacking -//fprintf(stderr, "~~E\n"); - OP1(SLJIT_MOV, TMP3, 0, TMP1, 0); add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL)); - OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2); - + OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 2); OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3); - - OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0); + OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0); OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records)); @@ -9224,7 +9491,7 @@ if (common->utf && *cc == OP_REFI) caseless_loop = LABEL(); OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP2), 0); OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(uint32_t)); - OP2(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, char1_reg, 0); + OP2U(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_LESS, TMP1, 0, char1_reg, 0); JUMPTO(SLJIT_EQUAL, loop); JUMPTO(SLJIT_LESS, caseless_loop); @@ -9386,14 +9653,16 @@ if (!minimize) if (ref) OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset)); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); + if (ref) { - add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1))); + if (!common->unset_backref) + add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1))); zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1)); } else { - compile_dnref_search(common, ccbegin, &backtrack->topbacktracks); + compile_dnref_search(common, ccbegin, &backtrack->own_backtracks); OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0); zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw)); @@ -9406,7 +9675,7 @@ if (!minimize) label = LABEL(); if (!ref) OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1); - compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE); + compile_ref_matchingpath(common, ccbegin, &backtrack->own_backtracks, FALSE, FALSE); if (min > 1 || max > 1) { @@ -9468,12 +9737,13 @@ else { if (ref) { - add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1))); + if (!common->unset_backref) + add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1))); zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1)); } else { - compile_dnref_search(common, ccbegin, &backtrack->topbacktracks); + compile_dnref_search(common, ccbegin, &backtrack->own_backtracks); OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0); zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw)); @@ -9482,11 +9752,11 @@ else BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL(); if (max > 0) - add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max)); + add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max)); if (!ref) OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2)); -compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE); +compile_ref_matchingpath(common, ccbegin, &backtrack->own_backtracks, TRUE, TRUE); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); if (min > 1) @@ -9561,12 +9831,12 @@ if (entry->entry_label == NULL) else JUMPTO(SLJIT_FAST_CALL, entry->entry_label); /* Leave if the match is failed. */ -add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0)); +add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0)); BACKTRACK_AS(recurse_backtrack)->matchingpath = LABEL(); return cc + 1 + LINK_SIZE; } -static sljit_s32 SLJIT_FUNC do_callout(struct jit_arguments *arguments, pcre2_callout_block *callout_block, PCRE2_SPTR *jit_ovector) +static sljit_s32 SLJIT_FUNC SLJIT_FUNC_ATTRIBUTE do_callout_jit(struct jit_arguments *arguments, pcre2_callout_block *callout_block, PCRE2_SPTR *jit_ovector) { PCRE2_SPTR begin; PCRE2_SIZE *ovector; @@ -9633,7 +9903,7 @@ unsigned int callout_length = (*cc == OP_CALLOUT) sljit_sw value1; sljit_sw value2; sljit_sw value3; -sljit_uw callout_arg_size = (common->re->top_bracket + 1) * 2 * sizeof(sljit_sw); +sljit_uw callout_arg_size = (common->re->top_bracket + 1) * 2 * SSIZE_OF(sw); PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL); @@ -9683,23 +9953,123 @@ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STR_PTR, 0); /* SLJIT_R0 = arguments */ OP1(SLJIT_MOV, SLJIT_R1, 0, STACK_TOP, 0); GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START); -sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(S32) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout)); +sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS3(32, W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(do_callout_jit)); OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); free_stack(common, callout_arg_size); /* Check return value. */ -OP2(SLJIT_SUB32 | SLJIT_SET_Z | SLJIT_SET_SIG_GREATER, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0); -add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_SIG_GREATER32)); +OP2U(SLJIT_SUB32 | SLJIT_SET_Z | SLJIT_SET_SIG_GREATER, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0); +add_jump(compiler, &backtrack->own_backtracks, JUMP(SLJIT_SIG_GREATER)); if (common->abort_label == NULL) - add_jump(compiler, &common->abort, JUMP(SLJIT_NOT_EQUAL32) /* SIG_LESS */); + add_jump(compiler, &common->abort, JUMP(SLJIT_NOT_EQUAL) /* SIG_LESS */); else - JUMPTO(SLJIT_NOT_EQUAL32 /* SIG_LESS */, common->abort_label); + JUMPTO(SLJIT_NOT_EQUAL /* SIG_LESS */, common->abort_label); return cc + callout_length; } #undef CALLOUT_ARG_SIZE #undef CALLOUT_ARG_OFFSET +static PCRE2_SPTR compile_reverse_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent) +{ +DEFINE_COMPILER; +backtrack_common *backtrack = NULL; +jump_list **reverse_failed; +unsigned int lmin, lmax; +#ifdef SUPPORT_UNICODE +struct sljit_jump *jump; +struct sljit_label *label; +#endif + +SLJIT_ASSERT(parent->top == NULL); + +if (*cc == OP_REVERSE) + { + reverse_failed = &parent->own_backtracks; + lmin = GET2(cc, 1); + lmax = lmin; + cc += 1 + IMM2_SIZE; + + SLJIT_ASSERT(lmin > 0); + } +else + { + SLJIT_ASSERT(*cc == OP_VREVERSE); + PUSH_BACKTRACK(sizeof(vreverse_backtrack), cc, NULL); + + reverse_failed = &backtrack->own_backtracks; + lmin = GET2(cc, 1); + lmax = GET2(cc, 1 + IMM2_SIZE); + cc += 1 + 2 * IMM2_SIZE; + + SLJIT_ASSERT(lmin < lmax); + } + +if (HAS_VIRTUAL_REGISTERS) + { + OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); + } +else + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin)); + +#ifdef SUPPORT_UNICODE +if (common->utf) + { + if (lmin > 0) + { + OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, lmin); + label = LABEL(); + add_jump(compiler, reverse_failed, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0)); + move_back(common, reverse_failed, FALSE); + OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1); + JUMPTO(SLJIT_NOT_ZERO, label); + } + + if (lmin < lmax) + { + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0); + + OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, lmax - lmin); + label = LABEL(); + jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0); + move_back(common, reverse_failed, FALSE); + OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1); + JUMPTO(SLJIT_NOT_ZERO, label); + + JUMPHERE(jump); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), STR_PTR, 0); + } + } +else +#endif + { + if (lmin > 0) + { + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(lmin)); + add_jump(compiler, reverse_failed, CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0)); + } + + if (lmin < lmax) + { + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0); + + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(lmax - lmin)); + OP2U(SLJIT_SUB | SLJIT_SET_LESS, STR_PTR, 0, TMP2, 0); + SELECT(SLJIT_LESS, STR_PTR, TMP2, 0, STR_PTR); + + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), STR_PTR, 0); + } + } + +check_start_used_ptr(common); + +if (lmin < lmax) + BACKTRACK_AS(vreverse_backtrack)->matchingpath = LABEL(); + +return cc; +} + static SLJIT_INLINE BOOL assert_needs_str_ptr_saving(PCRE2_SPTR cc) { while (TRUE) @@ -9718,6 +10088,8 @@ while (TRUE) case OP_DOLLM: case OP_CALLOUT: case OP_ALT: + case OP_NOT_UCP_WORD_BOUNDARY: + case OP_UCP_WORD_BOUNDARY: cc += PRIV(OP_lengths)[*cc]; break; @@ -9737,13 +10109,15 @@ int framesize; int extrasize; BOOL local_quit_available = FALSE; BOOL needs_control_head; +BOOL end_block_size = 0; +BOOL has_vreverse; int private_data_ptr; backtrack_common altbacktrack; PCRE2_SPTR ccbegin; PCRE2_UCHAR opcode; PCRE2_UCHAR bra = OP_BRA; jump_list *tmp = NULL; -jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks; +jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.own_backtracks; jump_list **found; /* Saving previous accept variables. */ BOOL save_local_quit_available = common->local_quit_available; @@ -9766,6 +10140,7 @@ if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO) bra = *cc; cc++; } + private_data_ptr = PRIVATE_DATA(cc); SLJIT_ASSERT(private_data_ptr != 0); framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head); @@ -9785,12 +10160,17 @@ if (bra == OP_BRAMINZERO) brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0); } +if ((opcode == OP_ASSERTBACK || opcode == OP_ASSERTBACK_NOT) && find_vreverse(ccbegin)) + end_block_size = 3; + if (framesize < 0) { extrasize = 1; if (bra == OP_BRA && !assert_needs_str_ptr_saving(ccbegin + 1 + LINK_SIZE)) extrasize = 0; + extrasize += end_block_size; + if (needs_control_head) extrasize++; @@ -9808,18 +10188,19 @@ if (framesize < 0) if (needs_control_head) { - SLJIT_ASSERT(extrasize == 2); + SLJIT_ASSERT(extrasize == end_block_size + 2); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 1), TMP1, 0); } } else { - extrasize = needs_control_head ? 3 : 2; + extrasize = (needs_control_head ? 3 : 2) + end_block_size; + + OP1(SLJIT_MOV, TMP2, 0, STACK_TOP, 0); allocate_stack(common, framesize + extrasize); OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); - OP2(SLJIT_ADD, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw)); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0); if (needs_control_head) OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr); @@ -9827,16 +10208,22 @@ else if (needs_control_head) { - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 2), TMP1, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 1), TMP2, 0); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0); } else - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 1), TMP1, 0); init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize); } +if (end_block_size > 0) + { + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_END, 0); + OP1(SLJIT_MOV, STR_END, 0, STR_PTR, 0); + } + memset(&altbacktrack, 0, sizeof(backtrack_common)); if (conditional || (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)) { @@ -9855,13 +10242,19 @@ while (1) common->accept_label = NULL; common->accept = NULL; altbacktrack.top = NULL; - altbacktrack.topbacktracks = NULL; + altbacktrack.own_backtracks = NULL; if (*ccbegin == OP_ALT && extrasize > 0) OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); altbacktrack.cc = ccbegin; - compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack); + ccbegin += 1 + LINK_SIZE; + + has_vreverse = (*ccbegin == OP_VREVERSE); + if (*ccbegin == OP_REVERSE || has_vreverse) + ccbegin = compile_reverse_matchingpath(common, ccbegin, &altbacktrack); + + compile_matchingpath(common, ccbegin, cc, &altbacktrack); if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) { if (local_quit_available) @@ -9877,6 +10270,13 @@ while (1) common->accept = save_accept; return NULL; } + + if (has_vreverse) + { + SLJIT_ASSERT(altbacktrack.top != NULL); + add_jump(compiler, &altbacktrack.top->simple_backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0)); + } + common->accept_label = LABEL(); if (common->accept != NULL) set_jumps(common->accept, common->accept_label); @@ -9889,6 +10289,9 @@ while (1) else if (extrasize > 0) free_stack(common, extrasize); + if (end_block_size > 0) + OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize + 1)); + if (needs_control_head) OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1)); } @@ -9898,12 +10301,20 @@ while (1) { /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */ OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw)); + + if (end_block_size > 0) + OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize + 2)); + if (needs_control_head) OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1)); } else { OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); + + if (end_block_size > 0) + OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - extrasize + 1)); + if (needs_control_head) OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 2)); add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); @@ -9917,7 +10328,7 @@ while (1) if (conditional) { if (extrasize > 0) - OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? STACK(-2) : STACK(-1)); + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-end_block_size - (needs_control_head ? 2 : 1))); } else if (bra == OP_BRAZERO) { @@ -9956,7 +10367,7 @@ while (1) common->accept = save_accept; return NULL; } - set_jumps(altbacktrack.topbacktracks, LABEL()); + set_jumps(altbacktrack.own_backtracks, LABEL()); if (*cc != OP_ALT) break; @@ -9989,8 +10400,11 @@ if (common->positive_assertion_quit != NULL) JUMPHERE(jump); } +if (end_block_size > 0) + OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); + if (needs_control_head) - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 1)); if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) { @@ -10003,8 +10417,8 @@ if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) /* The topmost item should be 0. */ if (bra == OP_BRAZERO) { - if (extrasize == 2) - free_stack(common, 1); + if (extrasize >= 2) + free_stack(common, extrasize - 1); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); } else if (extrasize > 0) @@ -10038,8 +10452,9 @@ if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) /* Keep the STR_PTR on the top of the stack. */ if (bra == OP_BRAZERO) { + /* This allocation is always successful. */ OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw)); - if (extrasize == 2) + if (extrasize >= 2) OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); } else if (bra == OP_BRAMINZERO) @@ -10059,8 +10474,9 @@ if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) else { /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */ - OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw)); - if (extrasize == 2) + OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + end_block_size + 2) * sizeof(sljit_sw)); + + if (extrasize == 2 + end_block_size) { OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); if (bra == OP_BRAMINZERO) @@ -10068,7 +10484,8 @@ if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) } else { - OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0); + SLJIT_ASSERT(extrasize == 3 + end_block_size); + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-1)); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0); } } @@ -10091,7 +10508,7 @@ if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw)); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0); } - set_jumps(backtrack->common.topbacktracks, LABEL()); + set_jumps(backtrack->common.own_backtracks, LABEL()); } } else @@ -10104,8 +10521,8 @@ else if (bra != OP_BRA) { - if (extrasize == 2) - free_stack(common, 1); + if (extrasize >= 2) + free_stack(common, extrasize - 1); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); } else if (extrasize > 0) @@ -10136,9 +10553,9 @@ else if (bra != OP_BRA) { - SLJIT_ASSERT(found == &backtrack->common.topbacktracks); - set_jumps(backtrack->common.topbacktracks, LABEL()); - backtrack->common.topbacktracks = NULL; + SLJIT_ASSERT(found == &backtrack->common.own_backtracks); + set_jumps(backtrack->common.own_backtracks, LABEL()); + backtrack->common.own_backtracks = NULL; } } @@ -10247,7 +10664,7 @@ static PCRE2_SPTR SLJIT_FUNC do_script_run_utf(PCRE2_SPTR ptr, PCRE2_SPTR endptr #endif /* SUPPORT_UNICODE */ -static SLJIT_INLINE void match_script_run_common(compiler_common *common, int private_data_ptr, backtrack_common *parent) +static void match_script_run_common(compiler_common *common, int private_data_ptr, backtrack_common *parent) { DEFINE_COMPILER; @@ -10255,14 +10672,14 @@ SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1); OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); #ifdef SUPPORT_UNICODE -sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, - common->utf ? SLJIT_FUNC_OFFSET(do_script_run_utf) : SLJIT_FUNC_OFFSET(do_script_run)); +sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, + common->utf ? SLJIT_FUNC_ADDR(do_script_run_utf) : SLJIT_FUNC_ADDR(do_script_run)); #else -sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_script_run)); +sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(do_script_run)); #endif OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0); -add_jump(compiler, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0)); +add_jump(compiler, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0)); } /* @@ -10336,6 +10753,7 @@ PCRE2_UCHAR ket; assert_backtrack *assert; BOOL has_alternatives; BOOL needs_control_head = FALSE; +BOOL has_vreverse = FALSE; struct sljit_jump *jump; struct sljit_jump *skip; struct sljit_label *rmax_label = NULL; @@ -10400,7 +10818,7 @@ if (opcode == OP_CBRA || opcode == OP_SCBRA) BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr; matchingpath += IMM2_SIZE; } -else if (opcode == OP_ONCE || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND) +else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_ONCE || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND) { /* Other brackets simply allocate the next entry. */ private_data_ptr = PRIVATE_DATA(ccbegin); @@ -10585,13 +11003,31 @@ else if (opcode == OP_CBRA || opcode == OP_SCBRA) OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0); } } -else if (opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND) +else if (opcode == OP_ASSERTBACK_NA && PRIVATE_DATA(ccbegin + 1)) + { + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); + allocate_stack(common, 4); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw), STR_END, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0); + OP1(SLJIT_MOV, STR_END, 0, STR_PTR, 0); + + has_vreverse = (*matchingpath == OP_VREVERSE); + if (*matchingpath == OP_REVERSE || has_vreverse) + matchingpath = compile_reverse_matchingpath(common, matchingpath, backtrack); + } +else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND) { /* Saving the previous value. */ OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); allocate_stack(common, 1); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0); + + if (*matchingpath == OP_REVERSE) + matchingpath = compile_reverse_matchingpath(common, matchingpath, backtrack); } else if (has_alternatives) { @@ -10711,11 +11147,28 @@ compile_matchingpath(common, matchingpath, cc, backtrack); if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) return NULL; -if (opcode == OP_ONCE) - match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head); +switch (opcode) + { + case OP_ASSERTBACK_NA: + if (has_vreverse) + { + SLJIT_ASSERT(backtrack->top != NULL && PRIVATE_DATA(ccbegin + 1)); + add_jump(compiler, &backtrack->top->simple_backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0)); + } -if (opcode == OP_SCRIPT_RUN) - match_script_run_common(common, private_data_ptr, backtrack); + if (PRIVATE_DATA(ccbegin + 1)) + OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw)); + break; + case OP_ASSERT_NA: + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); + break; + case OP_ONCE: + match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head); + break; + case OP_SCRIPT_RUN: + match_script_run_common(common, private_data_ptr, backtrack); + break; + } stacksize = 0; if (repeat_type == OP_MINUPTO) @@ -10759,10 +11212,23 @@ if (ket != OP_KET || bra != OP_BRA) if (offset != 0) stacksize = match_capture_common(common, stacksize, offset, private_data_ptr); +/* Skip and count the other alternatives. */ +i = 1; +while (*cc == OP_ALT) + { + cc += GET(cc, 1); + i++; + } + if (has_alternatives) { if (opcode != OP_ONCE) - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0); + { + if (i <= 3) + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0); + else + BACKTRACK_AS(bracket_backtrack)->u.matching_put_label = sljit_emit_put_label(compiler, SLJIT_MEM1(STACK_TOP), STACK(stacksize)); + } if (ket != OP_KETRMAX) BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL(); } @@ -10851,9 +11317,6 @@ if (bra == OP_BRAMINZERO) if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO) count_match(common); -/* Skip the other alternatives. */ -while (*cc == OP_ALT) - cc += GET(cc, 1); cc += 1 + LINK_SIZE; if (opcode == OP_ONCE) @@ -10904,7 +11367,7 @@ switch(opcode) case OP_CBRAPOS: case OP_SCBRAPOS: offset = GET2(cc, 1 + LINK_SIZE); - /* This case cannot be optimized in the same was as + /* This case cannot be optimized in the same way as normal capturing brackets. */ SLJIT_ASSERT(common->optimized_cbracket[offset] == 0); cbraprivptr = OVECTOR_PRIV(offset); @@ -11021,7 +11484,7 @@ loop = LABEL(); while (*cc != OP_KETRPOS) { backtrack->top = NULL; - backtrack->topbacktracks = NULL; + backtrack->own_backtracks = NULL; cc += GET(cc, 1); compile_matchingpath(common, ccbegin, cc, backtrack); @@ -11102,7 +11565,7 @@ while (*cc != OP_KETRPOS) compile_backtrackingpath(common, backtrack->top); if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) return NULL; - set_jumps(backtrack->topbacktracks, LABEL()); + set_jumps(backtrack->own_backtracks, LABEL()); if (framesize < 0) { @@ -11134,13 +11597,13 @@ while (*cc != OP_KETRPOS) /* We don't have to restore the control head in case of a failed match. */ -backtrack->topbacktracks = NULL; +backtrack->own_backtracks = NULL; if (!zero) { if (framesize < 0) - add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0)); + add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0)); else /* TMP2 is set to [private_data_ptr] above. */ - add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(TMP2), STACK(-stacksize), SLJIT_IMM, 0)); + add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(TMP2), STACK(-stacksize), SLJIT_IMM, 0)); } /* None of them matched. */ @@ -11301,8 +11764,8 @@ backtrack_common *backtrack; PCRE2_UCHAR opcode; PCRE2_UCHAR type; sljit_u32 max = 0, exact; -BOOL fast_fail; -sljit_s32 fast_str_ptr; +sljit_s32 early_fail_ptr = PRIVATE_DATA(cc + 1); +sljit_s32 early_fail_type; BOOL charpos_enabled; PCRE2_UCHAR charpos_char; unsigned int charpos_othercasebit; @@ -11314,23 +11777,29 @@ struct sljit_label *label; int private_data_ptr = PRIVATE_DATA(cc); int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP); int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr; -int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw); +int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + SSIZE_OF(sw); int tmp_base, tmp_offset; +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +BOOL use_tmp; +#endif PUSH_BACKTRACK(sizeof(char_iterator_backtrack), cc, NULL); -fast_str_ptr = PRIVATE_DATA(cc + 1); -fast_fail = TRUE; +early_fail_type = (early_fail_ptr & 0x7); +early_fail_ptr >>= 3; -SLJIT_ASSERT(common->fast_forward_bc_ptr == NULL || fast_str_ptr == 0 || cc == common->fast_forward_bc_ptr); +/* During recursion, these optimizations are disabled. */ +if (common->early_fail_start_ptr == 0 && common->fast_forward_bc_ptr == NULL) + { + early_fail_ptr = 0; + early_fail_type = type_skip; + } -if (cc == common->fast_forward_bc_ptr) - fast_fail = FALSE; -else if (common->fast_fail_start_ptr == 0) - fast_str_ptr = 0; +SLJIT_ASSERT(common->fast_forward_bc_ptr != NULL || early_fail_ptr == 0 + || (early_fail_ptr >= common->early_fail_start_ptr && early_fail_ptr <= common->early_fail_end_ptr)); -SLJIT_ASSERT(common->fast_forward_bc_ptr != NULL || fast_str_ptr == 0 - || (fast_str_ptr >= common->fast_fail_start_ptr && fast_str_ptr <= common->fast_fail_end_ptr)); +if (early_fail_type == type_fail) + add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr)); cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end); @@ -11345,13 +11814,11 @@ else tmp_offset = POSSESSIVE0; } -if (fast_fail && fast_str_ptr != 0) - add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), fast_str_ptr)); - /* Handle fixed part first. */ if (exact > 1) { - SLJIT_ASSERT(fast_str_ptr == 0); + SLJIT_ASSERT(early_fail_ptr == 0); + if (common->mode == PCRE2_JIT_COMPLETE #ifdef SUPPORT_UNICODE && !common->utf @@ -11359,10 +11826,10 @@ if (exact > 1) && type != OP_ANYNL && type != OP_EXTUNI) { OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(exact)); - add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER, TMP1, 0, STR_END, 0)); + add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_GREATER, TMP1, 0, STR_END, 0)); OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact); label = LABEL(); - compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE); + compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, FALSE); OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); JUMPTO(SLJIT_NOT_ZERO, label); } @@ -11370,24 +11837,37 @@ if (exact > 1) { OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact); label = LABEL(); - compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE); + compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, TRUE); OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); JUMPTO(SLJIT_NOT_ZERO, label); } } else if (exact == 1) - compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE); + compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, TRUE); + +if (early_fail_type == type_fail_range) + { + /* Range end first, followed by range start. */ + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + SSIZE_OF(sw)); + OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, TMP2, 0); + OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, TMP2, 0); + add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_LESS_EQUAL, TMP2, 0, TMP1, 0)); + + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + SSIZE_OF(sw), STR_PTR, 0); + } switch(opcode) { case OP_STAR: case OP_UPTO: - SLJIT_ASSERT(fast_str_ptr == 0 || opcode == OP_STAR); + SLJIT_ASSERT(early_fail_ptr == 0 || opcode == OP_STAR); if (type == OP_ANYNL || type == OP_EXTUNI) { SLJIT_ASSERT(private_data_ptr == 0); - SLJIT_ASSERT(fast_str_ptr == 0); + SLJIT_ASSERT(early_fail_ptr == 0); allocate_stack(common, 2); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); @@ -11406,180 +11886,231 @@ switch(opcode) OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0); } - /* We cannot use TMP3 because of this allocate_stack. */ + /* We cannot use TMP3 because of allocate_stack. */ allocate_stack(common, 1); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); JUMPTO(SLJIT_JUMP, label); if (jump != NULL) JUMPHERE(jump); + BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL(); + break; } - else +#ifdef SUPPORT_UNICODE + else if (type == OP_ALLANY && !common->invalid_utf) +#else + else if (type == OP_ALLANY) +#endif { - charpos_enabled = FALSE; - charpos_char = 0; - charpos_othercasebit = 0; - - if ((type != OP_CHAR && type != OP_CHARI) && (*end == OP_CHAR || *end == OP_CHARI)) + if (opcode == OP_STAR) { - charpos_enabled = TRUE; + if (private_data_ptr == 0) + allocate_stack(common, 2); + + OP1(SLJIT_MOV, base, offset0, STR_END, 0); + OP1(SLJIT_MOV, base, offset1, STR_PTR, 0); + + OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0); + process_partial_match(common); + + if (early_fail_ptr != 0) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_END, 0); + BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL(); + break; + } #ifdef SUPPORT_UNICODE - charpos_enabled = !common->utf || !HAS_EXTRALEN(end[1]); + else if (!common->utf) +#else + else #endif - if (charpos_enabled && *end == OP_CHARI && char_has_othercase(common, end + 1)) + { + if (private_data_ptr == 0) + allocate_stack(common, 2); + + OP1(SLJIT_MOV, base, offset1, STR_PTR, 0); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(max)); + + if (common->mode == PCRE2_JIT_COMPLETE) { - charpos_othercasebit = char_get_othercase_bit(common, end + 1); - if (charpos_othercasebit == 0) - charpos_enabled = FALSE; + OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0); + SELECT(SLJIT_GREATER, STR_PTR, STR_END, 0, STR_PTR); } - - if (charpos_enabled) + else { - charpos_char = end[1]; - /* Consumpe the OP_CHAR opcode. */ - end += 2; + jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, STR_END, 0); + process_partial_match(common); + JUMPHERE(jump); + } + + OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); + + if (early_fail_ptr != 0) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0); + BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL(); + break; + } + } + + charpos_enabled = FALSE; + charpos_char = 0; + charpos_othercasebit = 0; + + if ((type != OP_CHAR && type != OP_CHARI) && (*end == OP_CHAR || *end == OP_CHARI)) + { +#ifdef SUPPORT_UNICODE + charpos_enabled = !common->utf || !HAS_EXTRALEN(end[1]); +#else + charpos_enabled = TRUE; +#endif + if (charpos_enabled && *end == OP_CHARI && char_has_othercase(common, end + 1)) + { + charpos_othercasebit = char_get_othercase_bit(common, end + 1); + if (charpos_othercasebit == 0) + charpos_enabled = FALSE; + } + + if (charpos_enabled) + { + charpos_char = end[1]; + /* Consume the OP_CHAR opcode. */ + end += 2; #if PCRE2_CODE_UNIT_WIDTH == 8 - SLJIT_ASSERT((charpos_othercasebit >> 8) == 0); + SLJIT_ASSERT((charpos_othercasebit >> 8) == 0); #elif PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32 - SLJIT_ASSERT((charpos_othercasebit >> 9) == 0); - if ((charpos_othercasebit & 0x100) != 0) - charpos_othercasebit = (charpos_othercasebit & 0xff) << 8; + SLJIT_ASSERT((charpos_othercasebit >> 9) == 0); + if ((charpos_othercasebit & 0x100) != 0) + charpos_othercasebit = (charpos_othercasebit & 0xff) << 8; #endif - if (charpos_othercasebit != 0) - charpos_char |= charpos_othercasebit; + if (charpos_othercasebit != 0) + charpos_char |= charpos_othercasebit; - BACKTRACK_AS(char_iterator_backtrack)->u.charpos.enabled = TRUE; - BACKTRACK_AS(char_iterator_backtrack)->u.charpos.chr = charpos_char; - BACKTRACK_AS(char_iterator_backtrack)->u.charpos.othercasebit = charpos_othercasebit; - } + BACKTRACK_AS(char_iterator_backtrack)->u.charpos.enabled = TRUE; + BACKTRACK_AS(char_iterator_backtrack)->u.charpos.chr = charpos_char; + BACKTRACK_AS(char_iterator_backtrack)->u.charpos.othercasebit = charpos_othercasebit; } + } - if (charpos_enabled) - { - if (opcode == OP_UPTO) - OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max + 1); + if (charpos_enabled) + { + if (opcode == OP_UPTO) + OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max + 1); - /* Search the first instance of charpos_char. */ - jump = JUMP(SLJIT_JUMP); - label = LABEL(); - if (opcode == OP_UPTO) - { - OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); - add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_ZERO)); - } - compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE); - if (fast_str_ptr != 0) - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0); - JUMPHERE(jump); + /* Search the first instance of charpos_char. */ + jump = JUMP(SLJIT_JUMP); + label = LABEL(); + if (opcode == OP_UPTO) + { + OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); + add_jump(compiler, &backtrack->own_backtracks, JUMP(SLJIT_ZERO)); + } + compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, FALSE); + if (early_fail_ptr != 0) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0); + JUMPHERE(jump); - detect_partial_match(common, &backtrack->topbacktracks); - OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); - if (charpos_othercasebit != 0) - OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit); - CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label); + detect_partial_match(common, &backtrack->own_backtracks); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); + if (charpos_othercasebit != 0) + OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit); + CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label); - if (private_data_ptr == 0) - allocate_stack(common, 2); - OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); - OP1(SLJIT_MOV, base, offset1, STR_PTR, 0); - if (opcode == OP_UPTO) - { - OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); - add_jump(compiler, &no_match, JUMP(SLJIT_ZERO)); - } + if (private_data_ptr == 0) + allocate_stack(common, 2); + OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); + OP1(SLJIT_MOV, base, offset1, STR_PTR, 0); - /* Search the last instance of charpos_char. */ - label = LABEL(); - compile_char1_matchingpath(common, type, cc, &no_match, FALSE); - if (fast_str_ptr != 0) - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0); - detect_partial_match(common, &no_match); - OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); - if (charpos_othercasebit != 0) - OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit); - if (opcode == OP_STAR) - { - CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label); - OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); - } - else - { - jump = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char); - OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); - JUMPHERE(jump); - } + if (opcode == OP_UPTO) + { + OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); + add_jump(compiler, &no_match, JUMP(SLJIT_ZERO)); + } - if (opcode == OP_UPTO) - { - OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); - JUMPTO(SLJIT_NOT_ZERO, label); - } - else - JUMPTO(SLJIT_JUMP, label); + /* Search the last instance of charpos_char. */ + label = LABEL(); + compile_char1_matchingpath(common, type, cc, &no_match, FALSE); + if (early_fail_ptr != 0) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0); + detect_partial_match(common, &no_match); + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); + if (charpos_othercasebit != 0) + OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit); - set_jumps(no_match, LABEL()); - OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); - OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + if (opcode == OP_STAR) + { + CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label); OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); + JUMPTO(SLJIT_JUMP, label); } -#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 - else if (common->utf) + else { - if (private_data_ptr == 0) - allocate_stack(common, 2); - + jump = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char); OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); - OP1(SLJIT_MOV, base, offset1, STR_PTR, 0); + JUMPHERE(jump); + OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); + JUMPTO(SLJIT_NOT_ZERO, label); + } - if (opcode == OP_UPTO) - OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max); + set_jumps(no_match, LABEL()); + OP2(SLJIT_ADD, STR_PTR, 0, base, offset0, SLJIT_IMM, IN_UCHARS(1)); + OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); + } + else + { + if (private_data_ptr == 0) + allocate_stack(common, 2); - label = LABEL(); - compile_char1_matchingpath(common, type, cc, &no_match, TRUE); - OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); + OP1(SLJIT_MOV, base, offset1, STR_PTR, 0); +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 + use_tmp = (!HAS_VIRTUAL_REGISTERS && opcode == OP_STAR); + SLJIT_ASSERT(!use_tmp || tmp_base == TMP3); - if (opcode == OP_UPTO) - { - OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); - JUMPTO(SLJIT_NOT_ZERO, label); - } - else - JUMPTO(SLJIT_JUMP, label); + if (common->utf) + OP1(SLJIT_MOV, use_tmp ? TMP3 : base, use_tmp ? 0 : offset0, STR_PTR, 0); +#endif + if (opcode == OP_UPTO) + OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max); - set_jumps(no_match, LABEL()); - OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); - if (fast_str_ptr != 0) - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0); - } + detect_partial_match(common, &no_match); + label = LABEL(); + compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE); +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 + if (common->utf) + OP1(SLJIT_MOV, use_tmp ? TMP3 : base, use_tmp ? 0 : offset0, STR_PTR, 0); #endif - else + + if (opcode == OP_UPTO) { - if (private_data_ptr == 0) - allocate_stack(common, 2); + OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); + add_jump(compiler, &no_match, JUMP(SLJIT_ZERO)); + } - OP1(SLJIT_MOV, base, offset1, STR_PTR, 0); - if (opcode == OP_UPTO) - OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max); + detect_partial_match_to(common, label); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); - label = LABEL(); - detect_partial_match(common, &no_match); - compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE); - if (opcode == OP_UPTO) + set_jumps(no_char1_match, LABEL()); +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 + if (common->utf) + { + set_jumps(no_match, LABEL()); + if (use_tmp) { - OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); - JUMPTO(SLJIT_NOT_ZERO, label); - OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0); + OP1(SLJIT_MOV, base, offset0, TMP3, 0); } else - JUMPTO(SLJIT_JUMP, label); - - set_jumps(no_char1_match, LABEL()); + OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); + } + else +#endif + { OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); set_jumps(no_match, LABEL()); OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); - if (fast_str_ptr != 0) - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0); } + + if (early_fail_ptr != 0) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0); } + BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL(); break; @@ -11588,12 +12119,12 @@ switch(opcode) allocate_stack(common, 1); OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL(); - if (fast_str_ptr != 0) - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0); + if (early_fail_ptr != 0) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0); break; case OP_MINUPTO: - SLJIT_ASSERT(fast_str_ptr == 0); + SLJIT_ASSERT(early_fail_ptr == 0); if (private_data_ptr == 0) allocate_stack(common, 2); OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); @@ -11603,7 +12134,7 @@ switch(opcode) case OP_QUERY: case OP_MINQUERY: - SLJIT_ASSERT(fast_str_ptr == 0); + SLJIT_ASSERT(early_fail_ptr == 0); if (private_data_ptr == 0) allocate_stack(common, 1); OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); @@ -11616,63 +12147,112 @@ switch(opcode) break; case OP_POSSTAR: +#if defined SUPPORT_UNICODE + if (type == OP_ALLANY && !common->invalid_utf) +#else + if (type == OP_ALLANY) +#endif + { + OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0); + process_partial_match(common); + if (early_fail_ptr != 0) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_END, 0); + break; + } + #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 - if (common->utf) + if (type == OP_EXTUNI || common->utf) { OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0); + detect_partial_match(common, &no_match); label = LABEL(); - compile_char1_matchingpath(common, type, cc, &no_match, TRUE); + compile_char1_matchingpath(common, type, cc, &no_match, FALSE); OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0); - JUMPTO(SLJIT_JUMP, label); + detect_partial_match_to(common, label); + set_jumps(no_match, LABEL()); OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset); - if (fast_str_ptr != 0) - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0); + if (early_fail_ptr != 0) + { + if (!HAS_VIRTUAL_REGISTERS && tmp_base == TMP3) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, TMP3, 0); + else + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0); + } break; } #endif - label = LABEL(); + detect_partial_match(common, &no_match); + label = LABEL(); compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE); - JUMPTO(SLJIT_JUMP, label); + detect_partial_match_to(common, label); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + set_jumps(no_char1_match, LABEL()); OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); set_jumps(no_match, LABEL()); - if (fast_str_ptr != 0) - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0); + if (early_fail_ptr != 0) + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0); break; case OP_POSUPTO: - SLJIT_ASSERT(fast_str_ptr == 0); + SLJIT_ASSERT(early_fail_ptr == 0); #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 if (common->utf) { OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0); OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max); + + detect_partial_match(common, &no_match); label = LABEL(); - compile_char1_matchingpath(common, type, cc, &no_match, TRUE); + compile_char1_matchingpath(common, type, cc, &no_match, FALSE); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0); OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); - JUMPTO(SLJIT_NOT_ZERO, label); + add_jump(compiler, &no_match, JUMP(SLJIT_ZERO)); + detect_partial_match_to(common, label); + set_jumps(no_match, LABEL()); OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1); break; } #endif + + if (type == OP_ALLANY) + { + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(max)); + + if (common->mode == PCRE2_JIT_COMPLETE) + { + OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0); + SELECT(SLJIT_GREATER, STR_PTR, STR_END, 0, STR_PTR); + } + else + { + jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, STR_END, 0); + process_partial_match(common); + JUMPHERE(jump); + } + break; + } + OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max); - label = LABEL(); + detect_partial_match(common, &no_match); + label = LABEL(); compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE); OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); - JUMPTO(SLJIT_NOT_ZERO, label); + add_jump(compiler, &no_match, JUMP(SLJIT_ZERO)); + detect_partial_match_to(common, label); OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + set_jumps(no_char1_match, LABEL()); OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); set_jumps(no_match, LABEL()); break; case OP_POSQUERY: - SLJIT_ASSERT(fast_str_ptr == 0); + SLJIT_ASSERT(early_fail_ptr == 0); OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0); compile_char1_matchingpath(common, type, cc, &no_match, TRUE); OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0); @@ -11698,12 +12278,12 @@ PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL); if (*cc == OP_FAIL) { - add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP)); + add_jump(compiler, &backtrack->own_backtracks, JUMP(SLJIT_JUMP)); return cc + 1; } if (*cc == OP_ACCEPT && common->currententry == NULL && (common->re->overall_options & PCRE2_ENDANCHORED) != 0) - add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0)); + add_jump(compiler, &common->restart_match, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0)); if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL || !common->might_be_empty) { @@ -11719,21 +12299,29 @@ if (common->accept_label == NULL) add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0))); else CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), common->accept_label); -OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); -OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options)); -OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY); -add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_NOT_ZERO)); -OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART); + +if (HAS_VIRTUAL_REGISTERS) + { + OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); + OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options)); + } +else + OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options)); + +OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY); +add_jump(compiler, &backtrack->own_backtracks, JUMP(SLJIT_NOT_ZERO)); +OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART); if (common->accept_label == NULL) add_jump(compiler, &common->accept, JUMP(SLJIT_ZERO)); else JUMPTO(SLJIT_ZERO, common->accept_label); -OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str)); + +OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, str)); if (common->accept_label == NULL) add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0)); else CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->accept_label); -add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP)); +add_jump(compiler, &backtrack->own_backtracks, JUMP(SLJIT_JUMP)); return cc + 1; } @@ -11778,10 +12366,11 @@ if (opcode == OP_SKIP) if (opcode == OP_COMMIT_ARG || opcode == OP_PRUNE_ARG || opcode == OP_THEN_ARG) { - OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); + if (HAS_VIRTUAL_REGISTERS) + OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2)); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0); } return ccend; @@ -11852,8 +12441,9 @@ while (cc < ccend) case OP_DOLLM: case OP_CIRC: case OP_CIRCM: - case OP_REVERSE: - cc = compile_simple_assertion_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks); + case OP_NOT_UCP_WORD_BOUNDARY: + case OP_UCP_WORD_BOUNDARY: + cc = compile_simple_assertion_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks); break; case OP_NOT_DIGIT: @@ -11875,7 +12465,7 @@ while (cc < ccend) case OP_EXTUNI: case OP_NOT: case OP_NOTI: - cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE); + cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE); break; case OP_SET_SOM: @@ -11890,9 +12480,9 @@ while (cc < ccend) case OP_CHAR: case OP_CHARI: if (common->mode == PCRE2_JIT_COMPLETE) - cc = compile_charn_matchingpath(common, cc, ccend, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks); + cc = compile_charn_matchingpath(common, cc, ccend, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks); else - cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE); + cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE); break; case OP_STAR: @@ -11968,7 +12558,7 @@ while (cc < ccend) if (cc[1 + (32 / sizeof(PCRE2_UCHAR))] >= OP_CRSTAR && cc[1 + (32 / sizeof(PCRE2_UCHAR))] <= OP_CRPOSRANGE) cc = compile_iterator_matchingpath(common, cc, parent); else - cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE); + cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE); break; #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32 @@ -11976,7 +12566,7 @@ while (cc < ccend) if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRPOSRANGE) cc = compile_iterator_matchingpath(common, cc, parent); else - cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE); + cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE); break; #endif @@ -11986,7 +12576,7 @@ while (cc < ccend) cc = compile_ref_iterator_matchingpath(common, cc, parent); else { - compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE); + compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE, FALSE); cc += 1 + IMM2_SIZE; } break; @@ -11997,8 +12587,8 @@ while (cc < ccend) cc = compile_ref_iterator_matchingpath(common, cc, parent); else { - compile_dnref_search(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks); - compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE); + compile_dnref_search(common, cc, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks); + compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE, FALSE); cc += 1 + 2 * IMM2_SIZE; } break; @@ -12038,6 +12628,8 @@ while (cc < ccend) count_match(common); break; + case OP_ASSERT_NA: + case OP_ASSERTBACK_NA: case OP_ONCE: case OP_SCRIPT_RUN: case OP_BRA: @@ -12072,11 +12664,12 @@ while (cc < ccend) SLJIT_ASSERT(common->mark_ptr != 0); OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr); allocate_stack(common, common->has_skip_arg ? 5 : 1); - OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); + if (HAS_VIRTUAL_REGISTERS) + OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0), TMP2, 0); OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2)); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? TMP1 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0); if (common->has_skip_arg) { OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr); @@ -12162,7 +12755,7 @@ PCRE2_SPTR end; int private_data_ptr = PRIVATE_DATA(cc); int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP); int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr; -int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw); +int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + SSIZE_OF(sw); cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end); @@ -12273,7 +12866,7 @@ switch(opcode) break; } -set_jumps(current->topbacktracks, LABEL()); +set_jumps(current->own_backtracks, LABEL()); } static SLJIT_INLINE void compile_ref_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current) @@ -12288,7 +12881,7 @@ type = cc[ref ? 1 + IMM2_SIZE : 1 + 2 * IMM2_SIZE]; if ((type & 0x1) == 0) { /* Maximize case. */ - set_jumps(current->topbacktracks, LABEL()); + set_jumps(current->own_backtracks, LABEL()); OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); free_stack(common, 1); CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath); @@ -12297,7 +12890,7 @@ if ((type & 0x1) == 0) OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath); -set_jumps(current->topbacktracks, LABEL()); +set_jumps(current->own_backtracks, LABEL()); free_stack(common, ref ? 2 : 3); } @@ -12318,7 +12911,7 @@ if (!CURRENT_AS(recurse_backtrack)->inlined_pattern) else compile_backtrackingpath(common, current->top); -set_jumps(current->topbacktracks, LABEL()); +set_jumps(current->own_backtracks, LABEL()); } static void compile_assert_backtrackingpath(compiler_common *common, struct backtrack_common *current) @@ -12337,13 +12930,13 @@ if (*cc == OP_BRAZERO) if (bra == OP_BRAZERO) { - SLJIT_ASSERT(current->topbacktracks == NULL); + SLJIT_ASSERT(current->own_backtracks == NULL); OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); } if (CURRENT_AS(assert_backtrack)->framesize < 0) { - set_jumps(current->topbacktracks, LABEL()); + set_jumps(current->own_backtracks, LABEL()); if (bra == OP_BRAZERO) { @@ -12375,10 +12968,10 @@ if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK) OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(assert_backtrack)->framesize - 1) * sizeof(sljit_sw)); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr, TMP1, 0); - set_jumps(current->topbacktracks, LABEL()); + set_jumps(current->own_backtracks, LABEL()); } else - set_jumps(current->topbacktracks, LABEL()); + set_jumps(current->own_backtracks, LABEL()); if (bra == OP_BRAZERO) { @@ -12403,16 +12996,16 @@ PCRE2_SPTR ccprev; PCRE2_UCHAR bra = OP_BRA; PCRE2_UCHAR ket; assert_backtrack *assert; -sljit_uw *next_update_addr = NULL; BOOL has_alternatives; BOOL needs_control_head = FALSE; +BOOL has_vreverse; struct sljit_jump *brazero = NULL; -struct sljit_jump *alt1 = NULL; -struct sljit_jump *alt2 = NULL; +struct sljit_jump *next_alt = NULL; struct sljit_jump *once = NULL; struct sljit_jump *cond = NULL; struct sljit_label *rmin_label = NULL; struct sljit_label *exact_label = NULL; +struct sljit_put_label *put_label = NULL; if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO) { @@ -12561,7 +13154,7 @@ else if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND) free_stack(common, 1); alt_max = 2; - alt1 = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, sizeof(sljit_uw)); + next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0); } } else if (has_alternatives) @@ -12569,26 +13162,21 @@ else if (has_alternatives) OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); free_stack(common, 1); - if (alt_max > 4) + if (alt_max > 3) { - /* Table jump if alt_max is greater than 4. */ - next_update_addr = allocate_read_only_data(common, alt_max * sizeof(sljit_uw)); - if (SLJIT_UNLIKELY(next_update_addr == NULL)) - return; - sljit_emit_ijump(compiler, SLJIT_JUMP, SLJIT_MEM1(TMP1), (sljit_sw)next_update_addr); - add_label_addr(common, next_update_addr++); + sljit_emit_ijump(compiler, SLJIT_JUMP, TMP1, 0); + + SLJIT_ASSERT(CURRENT_AS(bracket_backtrack)->u.matching_put_label); + sljit_set_put_label(CURRENT_AS(bracket_backtrack)->u.matching_put_label, LABEL()); + sljit_emit_op0(compiler, SLJIT_ENDBR); } else - { - if (alt_max == 4) - alt2 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw)); - alt1 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, sizeof(sljit_uw)); - } + next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0); } COMPILE_BACKTRACKINGPATH(current->top); -if (current->topbacktracks) - set_jumps(current->topbacktracks, LABEL()); +if (current->own_backtracks) + set_jumps(current->own_backtracks, LABEL()); if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND)) { @@ -12620,18 +13208,29 @@ if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND)) if (has_alternatives) { - alt_count = sizeof(sljit_uw); + alt_count = 1; do { current->top = NULL; - current->topbacktracks = NULL; - current->nextbacktracks = NULL; + current->own_backtracks = NULL; + current->simple_backtracks = NULL; /* Conditional blocks always have an additional alternative, even if it is empty. */ if (*cc == OP_ALT) { ccprev = cc + 1 + LINK_SIZE; cc += GET(cc, 1); - if (opcode != OP_COND && opcode != OP_SCOND) + + has_vreverse = FALSE; + if (opcode == OP_ASSERTBACK || opcode == OP_ASSERTBACK_NA) + { + SLJIT_ASSERT(private_data_ptr != 0); + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); + + has_vreverse = (*ccprev == OP_VREVERSE); + if (*ccprev == OP_REVERSE || has_vreverse) + ccprev = compile_reverse_matchingpath(common, ccprev, current); + } + else if (opcode != OP_COND && opcode != OP_SCOND) { if (opcode != OP_ONCE) { @@ -12643,12 +13242,30 @@ if (has_alternatives) else OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(needs_control_head ? 1 : 0)); } + compile_matchingpath(common, ccprev, cc, current); if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) return; - if (opcode == OP_SCRIPT_RUN) - match_script_run_common(common, private_data_ptr, current); + switch (opcode) + { + case OP_ASSERTBACK_NA: + if (has_vreverse) + { + SLJIT_ASSERT(current->top != NULL && PRIVATE_DATA(ccbegin + 1)); + add_jump(compiler, ¤t->top->simple_backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0)); + } + + if (PRIVATE_DATA(ccbegin + 1)) + OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw)); + break; + case OP_ASSERT_NA: + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); + break; + case OP_SCRIPT_RUN: + match_script_run_common(common, private_data_ptr, current); + break; + } } /* Instructions after the current alternative is successfully matched. */ @@ -12699,7 +13316,12 @@ if (has_alternatives) stacksize = match_capture_common(common, stacksize, offset, private_data_ptr); if (opcode != OP_ONCE) - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, alt_count); + { + if (alt_max <= 3) + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, alt_count); + else + put_label = sljit_emit_put_label(compiler, SLJIT_MEM1(STACK_TOP), STACK(stacksize)); + } if (offset != 0 && ket == OP_KETRMAX && common->optimized_cbracket[offset >> 1] != 0) { @@ -12712,30 +13334,27 @@ if (has_alternatives) if (opcode != OP_ONCE) { - if (alt_max > 4) - add_label_addr(common, next_update_addr++); - else + if (alt_max <= 3) { - if (alt_count != 2 * sizeof(sljit_uw)) + JUMPHERE(next_alt); + alt_count++; + if (alt_count < alt_max) { - JUMPHERE(alt1); - if (alt_max == 3 && alt_count == sizeof(sljit_uw)) - alt2 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw)); - } - else - { - JUMPHERE(alt2); - if (alt_max == 4) - alt1 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_uw)); + SLJIT_ASSERT(alt_count == 2 && alt_max == 3); + next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 1); } } - alt_count += sizeof(sljit_uw); + else + { + sljit_set_put_label(put_label, LABEL()); + sljit_emit_op0(compiler, SLJIT_ENDBR); + } } COMPILE_BACKTRACKINGPATH(current->top); - if (current->topbacktracks) - set_jumps(current->topbacktracks, LABEL()); - SLJIT_ASSERT(!current->nextbacktracks); + if (current->own_backtracks) + set_jumps(current->own_backtracks, LABEL()); + SLJIT_ASSERT(!current->simple_backtracks); } while (*cc == OP_ALT); @@ -12777,7 +13396,16 @@ if (offset != 0) OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0); } } -else if (opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND) +else if (opcode == OP_ASSERTBACK_NA && PRIVATE_DATA(ccbegin + 1)) + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); + OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw), TMP2, 0); + free_stack(common, 4); + } +else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND) { OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(0)); free_stack(common, 1); @@ -12863,12 +13491,19 @@ static SLJIT_INLINE void compile_bracketpos_backtrackingpath(compiler_common *co DEFINE_COMPILER; int offset; struct sljit_jump *jump; +PCRE2_SPTR cc; +/* No retry on backtrack, just drop everything. */ if (CURRENT_AS(bracketpos_backtrack)->framesize < 0) { - if (*current->cc == OP_CBRAPOS || *current->cc == OP_SCBRAPOS) + cc = current->cc; + + if (*cc == OP_BRAPOSZERO) + cc++; + + if (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS) { - offset = (GET2(current->cc, 1 + LINK_SIZE)) << 1; + offset = (GET2(cc, 1 + LINK_SIZE)) << 1; OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0); @@ -12878,7 +13513,7 @@ if (CURRENT_AS(bracketpos_backtrack)->framesize < 0) if (common->capture_last_ptr != 0) OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0); } - set_jumps(current->topbacktracks, LABEL()); + set_jumps(current->own_backtracks, LABEL()); free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize); return; } @@ -12887,10 +13522,10 @@ OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtra add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(bracketpos_backtrack)->framesize - 1) * sizeof(sljit_sw)); -if (current->topbacktracks) +if (current->own_backtracks) { jump = JUMP(SLJIT_JUMP); - set_jumps(current->topbacktracks, LABEL()); + set_jumps(current->own_backtracks, LABEL()); /* Drop the stack frame. */ free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize); JUMPHERE(jump); @@ -12903,8 +13538,8 @@ static SLJIT_INLINE void compile_braminzero_backtrackingpath(compiler_common *co assert_backtrack backtrack; current->top = NULL; -current->topbacktracks = NULL; -current->nextbacktracks = NULL; +current->own_backtracks = NULL; +current->simple_backtracks = NULL; if (current->cc[1] > OP_ASSERTBACK_NOT) { /* Manual call of compile_bracket_matchingpath and compile_bracket_backtrackingpath. */ @@ -12919,7 +13554,7 @@ else /* Manual call of compile_assert_matchingpath. */ compile_assert_matchingpath(common, current->cc, &backtrack, FALSE); } -SLJIT_ASSERT(!current->nextbacktracks && !current->topbacktracks); +SLJIT_ASSERT(!current->simple_backtracks && !current->own_backtracks); } static SLJIT_INLINE void compile_control_verb_backtrackingpath(compiler_common *common, struct backtrack_common *current) @@ -12970,7 +13605,7 @@ if (opcode == OP_SKIP_ARG) SLJIT_ASSERT(common->control_head_ptr != 0 && TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1); OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr); OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (sljit_sw)(current->cc + 2)); - sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_search_mark)); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(do_search_mark)); OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_R0, 0); add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, SLJIT_R0, 0, SLJIT_IMM, 0)); @@ -12984,6 +13619,23 @@ else add_jump(compiler, &common->reset_match, JUMP(SLJIT_JUMP)); } +static SLJIT_INLINE void compile_vreverse_backtrackingpath(compiler_common *common, struct backtrack_common *current) +{ +DEFINE_COMPILER; +struct sljit_jump *jump; +struct sljit_label *label; + +OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(2)); +jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(3)); +skip_valid_char(common); +OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), STR_PTR, 0); +JUMPTO(SLJIT_JUMP, CURRENT_AS(vreverse_backtrack)->matchingpath); + +label = LABEL(); +sljit_set_label(jump, label); +set_jumps(current->own_backtracks, label); +} + static SLJIT_INLINE void compile_then_trap_backtrackingpath(compiler_common *common, struct backtrack_common *current) { DEFINE_COMPILER; @@ -13024,8 +13676,8 @@ then_trap_backtrack *save_then_trap = common->then_trap; while (current) { - if (current->nextbacktracks != NULL) - set_jumps(current->nextbacktracks, LABEL()); + if (current->simple_backtracks != NULL) + set_jumps(current->simple_backtracks, LABEL()); switch(*current->cc) { case OP_SET_SOM: @@ -13125,6 +13777,8 @@ while (current) compile_assert_backtrackingpath(common, current); break; + case OP_ASSERT_NA: + case OP_ASSERTBACK_NA: case OP_ONCE: case OP_SCRIPT_RUN: case OP_BRA: @@ -13189,7 +13843,11 @@ while (current) case OP_FAIL: case OP_ACCEPT: case OP_ASSERT_ACCEPT: - set_jumps(current->topbacktracks, LABEL()); + set_jumps(current->own_backtracks, LABEL()); + break; + + case OP_VREVERSE: + compile_vreverse_backtrackingpath(common, current); break; case OP_THEN_TRAP: @@ -13212,18 +13870,15 @@ DEFINE_COMPILER; PCRE2_SPTR cc = common->start + common->currententry->start; PCRE2_SPTR ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE); PCRE2_SPTR ccend = bracketend(cc) - (1 + LINK_SIZE); -BOOL needs_control_head; -BOOL has_quit; -BOOL has_accept; -int private_data_size = get_recurse_data_length(common, ccbegin, ccend, &needs_control_head, &has_quit, &has_accept); +uint32_t recurse_flags = 0; +int private_data_size = get_recurse_data_length(common, ccbegin, ccend, &recurse_flags); int alt_count, alt_max, local_size; backtrack_common altbacktrack; jump_list *match = NULL; -sljit_uw *next_update_addr = NULL; -struct sljit_jump *alt1 = NULL; -struct sljit_jump *alt2 = NULL; +struct sljit_jump *next_alt = NULL; struct sljit_jump *accept_exit = NULL; struct sljit_label *quit; +struct sljit_put_label *put_label = NULL; /* Recurse captures then. */ common->then_trap = NULL; @@ -13238,7 +13893,7 @@ SLJIT_ASSERT(common->currententry->entry_label == NULL && common->recursive_head common->currententry->entry_label = LABEL(); set_jumps(common->currententry->entry_calls, common->currententry->entry_label); -sljit_emit_fast_enter(compiler, TMP2, 0); +sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, TMP2, 0); count_match(common); local_size = (alt_max > 1) ? 2 : 1; @@ -13250,12 +13905,12 @@ allocate_stack(common, private_data_size + local_size); /* Save return address. */ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP2, 0); -copy_recurse_data(common, ccbegin, ccend, recurse_copy_from_global, local_size, private_data_size + local_size, has_quit); +copy_recurse_data(common, ccbegin, ccend, recurse_copy_from_global, local_size, private_data_size + local_size, recurse_flags); /* This variable is saved and restored all time when we enter or exit from a recursive context. */ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, STACK_TOP, 0); -if (needs_control_head) +if (recurse_flags & recurse_flag_control_head_found) OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0); if (alt_max > 1) @@ -13271,7 +13926,7 @@ cc += GET(cc, 1); while (1) { altbacktrack.top = NULL; - altbacktrack.topbacktracks = NULL; + altbacktrack.own_backtracks = NULL; if (altbacktrack.cc != ccbegin) OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); @@ -13280,11 +13935,16 @@ while (1) if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) return; - allocate_stack(common, (alt_max > 1 || has_accept) ? 2 : 1); + allocate_stack(common, (alt_max > 1 || (recurse_flags & recurse_flag_accept_found)) ? 2 : 1); OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr); - if (alt_max > 1 || has_accept) - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, alt_count); + if (alt_max > 1 || (recurse_flags & recurse_flag_accept_found)) + { + if (alt_max > 3) + put_label = sljit_emit_put_label(compiler, SLJIT_MEM1(STACK_TOP), STACK(1)); + else + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, alt_count); + } add_jump(compiler, &match, JUMP(SLJIT_JUMP)); @@ -13295,65 +13955,55 @@ while (1) common->currententry->backtrack_label = LABEL(); set_jumps(common->currententry->backtrack_calls, common->currententry->backtrack_label); - sljit_emit_fast_enter(compiler, TMP1, 0); + sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, TMP1, 0); - if (has_accept) - accept_exit = CMP(SLJIT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, alt_max * sizeof (sljit_sw)); + if (recurse_flags & recurse_flag_accept_found) + accept_exit = CMP(SLJIT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, -1); OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); /* Save return address. */ OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(local_size - 1), TMP1, 0); - copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, has_quit); + copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, recurse_flags); if (alt_max > 1) { OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); free_stack(common, 2); - if (alt_max > 4) + if (alt_max > 3) { - /* Table jump if alt_max is greater than 4. */ - next_update_addr = allocate_read_only_data(common, alt_max * sizeof(sljit_uw)); - if (SLJIT_UNLIKELY(next_update_addr == NULL)) - return; - sljit_emit_ijump(compiler, SLJIT_JUMP, SLJIT_MEM1(TMP1), (sljit_sw)next_update_addr); - add_label_addr(common, next_update_addr++); + sljit_emit_ijump(compiler, SLJIT_JUMP, TMP1, 0); + sljit_set_put_label(put_label, LABEL()); + sljit_emit_op0(compiler, SLJIT_ENDBR); } else - { - if (alt_max == 4) - alt2 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw)); - alt1 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, sizeof(sljit_uw)); - } + next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0); } else - free_stack(common, has_accept ? 2 : 1); + free_stack(common, (recurse_flags & recurse_flag_accept_found) ? 2 : 1); + } + else if (alt_max > 3) + { + sljit_set_put_label(put_label, LABEL()); + sljit_emit_op0(compiler, SLJIT_ENDBR); } - else if (alt_max > 4) - add_label_addr(common, next_update_addr++); else { - if (alt_count != 2 * sizeof(sljit_uw)) + JUMPHERE(next_alt); + if (alt_count + 1 < alt_max) { - JUMPHERE(alt1); - if (alt_max == 3 && alt_count == sizeof(sljit_uw)) - alt2 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw)); - } - else - { - JUMPHERE(alt2); - if (alt_max == 4) - alt1 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_uw)); + SLJIT_ASSERT(alt_count == 1 && alt_max == 3); + next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 1); } } - alt_count += sizeof(sljit_uw); + alt_count++; compile_backtrackingpath(common, altbacktrack.top); if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) return; - set_jumps(altbacktrack.topbacktracks, LABEL()); + set_jumps(altbacktrack.own_backtracks, LABEL()); if (*cc != OP_ALT) break; @@ -13366,24 +14016,24 @@ while (1) quit = LABEL(); -copy_recurse_data(common, ccbegin, ccend, recurse_copy_private_to_global, local_size, private_data_size + local_size, has_quit); +copy_recurse_data(common, ccbegin, ccend, recurse_copy_private_to_global, local_size, private_data_size + local_size, recurse_flags); OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1)); free_stack(common, private_data_size + local_size); OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); -sljit_emit_fast_return(compiler, TMP2, 0); +OP_SRC(SLJIT_FAST_RETURN, TMP2, 0); if (common->quit != NULL) { - SLJIT_ASSERT(has_quit); + SLJIT_ASSERT(recurse_flags & recurse_flag_quit_found); set_jumps(common->quit, LABEL()); OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr); - copy_recurse_data(common, ccbegin, ccend, recurse_copy_shared_to_global, local_size, private_data_size + local_size, has_quit); + copy_recurse_data(common, ccbegin, ccend, recurse_copy_shared_to_global, local_size, private_data_size + local_size, recurse_flags); JUMPTO(SLJIT_JUMP, quit); } -if (has_accept) +if (recurse_flags & recurse_flag_accept_found) { JUMPHERE(accept_exit); free_stack(common, 2); @@ -13391,17 +14041,17 @@ if (has_accept) /* Save return address. */ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP1, 0); - copy_recurse_data(common, ccbegin, ccend, recurse_copy_kept_shared_to_global, local_size, private_data_size + local_size, has_quit); + copy_recurse_data(common, ccbegin, ccend, recurse_copy_kept_shared_to_global, local_size, private_data_size + local_size, recurse_flags); OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1)); free_stack(common, private_data_size + local_size); OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); - sljit_emit_fast_return(compiler, TMP2, 0); + OP_SRC(SLJIT_FAST_RETURN, TMP2, 0); } if (common->accept != NULL) { - SLJIT_ASSERT(has_accept); + SLJIT_ASSERT(recurse_flags & recurse_flag_accept_found); set_jumps(common->accept, LABEL()); @@ -13409,18 +14059,18 @@ if (common->accept != NULL) OP1(SLJIT_MOV, TMP2, 0, STACK_TOP, 0); allocate_stack(common, 2); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, alt_count); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, -1); } set_jumps(match, LABEL()); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0); -copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, has_quit); +copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, recurse_flags); OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), STACK(local_size - 1)); OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1); -sljit_emit_fast_return(compiler, TMP2, 0); +OP_SRC(SLJIT_FAST_RETURN, TMP2, 0); } #undef COMPILE_BACKTRACKINGPATH @@ -13444,7 +14094,6 @@ executable_functions *functions; void *executable_func; sljit_uw executable_size; sljit_uw total_length; -label_addr_list *label_addr; struct sljit_label *mainloop_label = NULL; struct sljit_label *continue_match_label; struct sljit_label *empty_match_found_label = NULL; @@ -13453,12 +14102,20 @@ struct sljit_label *reset_match_label; struct sljit_label *quit_label; struct sljit_jump *jump; struct sljit_jump *minlength_check_failed = NULL; -struct sljit_jump *reqbyte_notfound = NULL; struct sljit_jump *empty_match = NULL; struct sljit_jump *end_anchor_failed = NULL; +jump_list *reqcu_not_found = NULL; SLJIT_ASSERT(tables); +#if HAS_VIRTUAL_REGISTERS == 1 +SLJIT_ASSERT(sljit_get_register_index(SLJIT_GP_REGISTER, TMP3) < 0 && sljit_get_register_index(SLJIT_GP_REGISTER, ARGUMENTS) < 0 && sljit_get_register_index(SLJIT_GP_REGISTER, RETURN_ADDR) < 0); +#elif HAS_VIRTUAL_REGISTERS == 0 +SLJIT_ASSERT(sljit_get_register_index(SLJIT_GP_REGISTER, TMP3) >= 0 && sljit_get_register_index(SLJIT_GP_REGISTER, ARGUMENTS) >= 0 && sljit_get_register_index(SLJIT_GP_REGISTER, RETURN_ADDR) >= 0); +#else +#error "Invalid value for HAS_VIRTUAL_REGISTERS" +#endif + memset(&rootbacktrack, 0, sizeof(backtrack_common)); memset(common, 0, sizeof(compiler_common)); common->re = re; @@ -13475,7 +14132,8 @@ common->read_only_data_head = NULL; common->fcc = tables + fcc_offset; common->lcc = (sljit_sw)(tables + lcc_offset); common->mode = mode; -common->might_be_empty = re->minlength == 0; +common->might_be_empty = (re->minlength == 0) || (re->flags & PCRE2_MATCH_EMPTY); +common->allow_empty_partial = (re->max_lookbehind > 0) || (re->flags & PCRE2_MATCH_EMPTY); common->nltype = NLTYPE_FIXED; switch(re->newline_convention) { @@ -13512,7 +14170,7 @@ common->alt_circumflex = (re->overall_options & PCRE2_ALT_CIRCUMFLEX) != 0; #ifdef SUPPORT_UNICODE /* PCRE_UTF[16|32] have the same value as PCRE_UTF8. */ common->utf = (re->overall_options & PCRE2_UTF) != 0; -common->use_ucp = (re->overall_options & PCRE2_UCP) != 0; +common->ucp = (re->overall_options & PCRE2_UCP) != 0; if (common->utf) { if (common->nltype == NLTYPE_ANY) @@ -13614,7 +14272,7 @@ SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0)); common->cbra_ptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_sw); total_length = ccend - common->start; -common->private_data_ptrs = (sljit_s32 *)SLJIT_MALLOC(total_length * (sizeof(sljit_s32) + (common->has_then ? 1 : 0)), allocator_data); +common->private_data_ptrs = (sljit_s32*)SLJIT_MALLOC(total_length * (sizeof(sljit_s32) + (common->has_then ? 1 : 0)), allocator_data); if (!common->private_data_ptrs) { SLJIT_FREE(common->optimized_cbracket, allocator_data); @@ -13623,16 +14281,15 @@ if (!common->private_data_ptrs) memset(common->private_data_ptrs, 0, total_length * sizeof(sljit_s32)); private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw); + +if ((re->overall_options & PCRE2_ANCHORED) == 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0 && !common->has_skip_in_assert_back) + detect_early_fail(common, common->start, &private_data_size, 0, 0); + set_private_data_ptrs(common, &private_data_size, ccend); -if ((re->overall_options & PCRE2_ANCHORED) == 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0) - { - if (!detect_fast_forward_skip(common, &private_data_size) && !common->has_skip_in_assert_back) - detect_fast_fail(common, common->start, &private_data_size, 4); - } -SLJIT_ASSERT(common->fast_fail_start_ptr <= common->fast_fail_end_ptr); +SLJIT_ASSERT(common->early_fail_start_ptr <= common->early_fail_end_ptr); -if (private_data_size > SLJIT_MAX_LOCAL_SIZE) +if (private_data_size > 65536) { SLJIT_FREE(common->private_data_ptrs, allocator_data); SLJIT_FREE(common->optimized_cbracket, allocator_data); @@ -13646,7 +14303,7 @@ if (common->has_then) set_then_offsets(common, common->start, NULL); } -compiler = sljit_create_compiler(allocator_data); +compiler = sljit_create_compiler(allocator_data, NULL); if (!compiler) { SLJIT_FREE(common->optimized_cbracket, allocator_data); @@ -13655,8 +14312,9 @@ if (!compiler) } common->compiler = compiler; -/* Main pcre_jit_exec entry. */ -sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW), 5, 5, 0, 0, private_data_size); +/* Main pcre2_jit_exec entry. */ +SLJIT_ASSERT((private_data_size & (sizeof(sljit_sw) - 1)) == 0); +sljit_emit_enter(compiler, 0, SLJIT_ARGS1(W, W), 5, 5, SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS, 0, private_data_size); /* Register init. */ reset_ovector(common, (re->top_bracket + 1) * 2); @@ -13674,8 +14332,8 @@ OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_sta OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH, TMP1, 0); -if (common->fast_fail_start_ptr < common->fast_fail_end_ptr) - reset_fast_fail(common); +if (common->early_fail_start_ptr < common->early_fail_end_ptr) + reset_early_fail(common); if (mode == PCRE2_JIT_PARTIAL_SOFT) OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1); @@ -13712,7 +14370,7 @@ if (mode == PCRE2_JIT_COMPLETE && re->minlength > 0 && (re->overall_options & PC minlength_check_failed = CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0); } if (common->req_char_ptr != 0) - reqbyte_notfound = search_requested_char(common, (PCRE2_UCHAR)(re->last_codeunit), (re->flags & PCRE2_LASTCASELESS) != 0, (re->flags & PCRE2_FIRSTSET) != 0); + reqcu_not_found = search_requested_char(common, (PCRE2_UCHAR)(re->last_codeunit), (re->flags & PCRE2_LASTCASELESS) != 0, (re->flags & PCRE2_FIRSTSET) != 0); /* Store the current STR_PTR in OVECTOR(0). */ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0); @@ -13721,7 +14379,7 @@ OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH); if (common->capture_last_ptr != 0) OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, 0); if (common->fast_forward_bc_ptr != NULL) - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), PRIVATE_DATA(common->fast_forward_bc_ptr + 1), STR_PTR, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), PRIVATE_DATA(common->fast_forward_bc_ptr + 1) >> 3, STR_PTR, 0); if (common->start_ptr != OVECTOR(0)) OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_ptr, STR_PTR, 0); @@ -13742,7 +14400,7 @@ if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) sljit_free_compiler(compiler); SLJIT_FREE(common->optimized_cbracket, allocator_data); SLJIT_FREE(common->private_data_ptrs, allocator_data); - PRIV(jit_free_rodata)(common->read_only_data_head, compiler->allocator_data); + PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data); return PCRE2_ERROR_NOMEMORY; } @@ -13768,6 +14426,8 @@ if (common->abort != NULL) set_jumps(common->abort, common->abort_label); if (minlength_check_failed != NULL) SET_LABEL(minlength_check_failed, common->abort_label); + +sljit_emit_op0(compiler, SLJIT_SKIP_FRAMES_BEFORE_RETURN); sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0); if (common->failed_match != NULL) @@ -13796,7 +14456,7 @@ if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) sljit_free_compiler(compiler); SLJIT_FREE(common->optimized_cbracket, allocator_data); SLJIT_FREE(common->private_data_ptrs, allocator_data); - PRIV(jit_free_rodata)(common->read_only_data_head, compiler->allocator_data); + PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data); return PCRE2_ERROR_NOMEMORY; } @@ -13820,7 +14480,7 @@ if ((re->overall_options & PCRE2_ANCHORED) == 0 && common->match_end_ptr != 0) } OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), - (common->fast_forward_bc_ptr != NULL) ? (PRIVATE_DATA(common->fast_forward_bc_ptr + 1)) : common->start_ptr); + (common->fast_forward_bc_ptr != NULL) ? (PRIVATE_DATA(common->fast_forward_bc_ptr + 1) >> 3) : common->start_ptr); if ((re->overall_options & PCRE2_ANCHORED) == 0) { @@ -13845,8 +14505,8 @@ if ((re->overall_options & PCRE2_ANCHORED) == 0) } /* No more remaining characters. */ -if (reqbyte_notfound != NULL) - JUMPHERE(reqbyte_notfound); +if (reqcu_not_found != NULL) + set_jumps(reqcu_not_found, LABEL()); if (mode == PCRE2_JIT_PARTIAL_SOFT) CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1, common->partialmatchlabel); @@ -13861,9 +14521,9 @@ if (common->might_be_empty) JUMPHERE(empty_match); OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options)); - OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY); + OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY); JUMPTO(SLJIT_NOT_ZERO, empty_match_backtrack_label); - OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART); + OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART); JUMPTO(SLJIT_ZERO, empty_match_found_label); OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str)); CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found_label); @@ -13871,25 +14531,45 @@ if (common->might_be_empty) } common->fast_forward_bc_ptr = NULL; -common->fast_fail_start_ptr = 0; -common->fast_fail_end_ptr = 0; +common->early_fail_start_ptr = 0; +common->early_fail_end_ptr = 0; common->currententry = common->entries; common->local_quit_available = TRUE; quit_label = common->quit_label; -while (common->currententry != NULL) +if (common->currententry != NULL) { - /* Might add new entries. */ - compile_recurse(common); - if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) + /* A free bit for each private data. */ + common->recurse_bitset_size = ((private_data_size / SSIZE_OF(sw)) + 7) >> 3; + SLJIT_ASSERT(common->recurse_bitset_size > 0); + common->recurse_bitset = (sljit_u8*)SLJIT_MALLOC(common->recurse_bitset_size, allocator_data);; + + if (common->recurse_bitset != NULL) { + do + { + /* Might add new entries. */ + compile_recurse(common); + if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) + break; + flush_stubs(common); + common->currententry = common->currententry->next; + } + while (common->currententry != NULL); + + SLJIT_FREE(common->recurse_bitset, allocator_data); + } + + if (common->currententry != NULL) + { + /* The common->recurse_bitset has been freed. */ + SLJIT_ASSERT(sljit_get_compiler_error(compiler) || common->recurse_bitset == NULL); + sljit_free_compiler(compiler); SLJIT_FREE(common->optimized_cbracket, allocator_data); SLJIT_FREE(common->private_data_ptrs, allocator_data); - PRIV(jit_free_rodata)(common->read_only_data_head, compiler->allocator_data); + PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data); return PCRE2_ERROR_NOMEMORY; } - flush_stubs(common); - common->currententry = common->currententry->next; } common->local_quit_available = FALSE; common->quit_label = quit_label; @@ -13898,7 +14578,7 @@ common->quit_label = quit_label; /* This is a (really) rare case. */ set_jumps(common->stackalloc, LABEL()); /* RETURN_ADDR is not a saved register. */ -sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0); +sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCALS0); SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1); @@ -13908,14 +14588,14 @@ OP2(SLJIT_SUB, SLJIT_R1, 0, STACK_LIMIT, 0, SLJIT_IMM, STACK_GROWTH_RATE); OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, stack)); OP1(SLJIT_MOV, STACK_LIMIT, 0, TMP2, 0); -sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_stack_resize)); +sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(sljit_stack_resize)); jump = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0); OP1(SLJIT_MOV, TMP2, 0, STACK_LIMIT, 0); OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_RETURN_REG, 0); OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1); -sljit_emit_fast_return(compiler, TMP1, 0); +OP_SRC(SLJIT_FAST_RETURN, TMP1, 0); /* Allocation failed. */ JUMPHERE(jump); @@ -13936,7 +14616,12 @@ if (common->revertframes != NULL) if (common->wordboundary != NULL) { set_jumps(common->wordboundary, LABEL()); - check_wordboundary(common); + check_wordboundary(common, FALSE); + } +if (common->ucp_wordboundary != NULL) + { + set_jumps(common->ucp_wordboundary, LABEL()); + check_wordboundary(common, TRUE); } if (common->anynewline != NULL) { @@ -13963,10 +14648,17 @@ if (common->caselesscmp != NULL) set_jumps(common->caselesscmp, LABEL()); do_caselesscmp(common); } -if (common->reset_match != NULL) +if (common->reset_match != NULL || common->restart_match != NULL) { + if (common->restart_match != NULL) + { + set_jumps(common->restart_match, LABEL()); + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr); + } + set_jumps(common->reset_match, LABEL()); do_reset_match(common, (re->top_bracket + 1) * 2); + /* The value of restart_match is in TMP1. */ CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP1, 0, continue_match_label); OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0); JUMPTO(SLJIT_JUMP, reset_match_label); @@ -14028,16 +14720,11 @@ SLJIT_FREE(common->private_data_ptrs, allocator_data); executable_func = sljit_generate_code(compiler); executable_size = sljit_get_generated_code_size(compiler); -label_addr = common->label_addrs; -while (label_addr != NULL) - { - *label_addr->update_addr = sljit_get_label_addr(label_addr->label); - label_addr = label_addr->next; - } sljit_free_compiler(compiler); + if (executable_func == NULL) { - PRIV(jit_free_rodata)(common->read_only_data_head, compiler->allocator_data); + PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data); return PCRE2_ERROR_NOMEMORY; } @@ -14051,8 +14738,8 @@ else { /* This case is highly unlikely since we just recently freed a lot of memory. Not impossible though. */ - sljit_free_code(executable_func); - PRIV(jit_free_rodata)(common->read_only_data_head, compiler->allocator_data); + sljit_free_code(executable_func, NULL); + PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data); return PCRE2_ERROR_NOMEMORY; } memset(functions, 0, sizeof(executable_functions)); @@ -14097,18 +14784,11 @@ Returns: 0: success or (*NOJIT) was used PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION pcre2_jit_compile(pcre2_code *code, uint32_t options) { -#ifndef SUPPORT_JIT - -(void)code; -(void)options; -return PCRE2_ERROR_JIT_BADOPTION; - -#else /* SUPPORT_JIT */ - pcre2_real_code *re = (pcre2_real_code *)code; +#ifdef SUPPORT_JIT executable_functions *functions; -uint32_t excluded_options; -int result; +static int executable_allocator_is_working = -1; +#endif if (code == NULL) return PCRE2_ERROR_NULL; @@ -14116,30 +14796,100 @@ if (code == NULL) if ((options & ~PUBLIC_JIT_COMPILE_OPTIONS) != 0) return PCRE2_ERROR_JIT_BADOPTION; -if ((re->flags & PCRE2_NOJIT) != 0) return 0; +/* Support for invalid UTF was first introduced in JIT, with the option +PCRE2_JIT_INVALID_UTF. Later, support was added to the interpreter, and the +compile-time option PCRE2_MATCH_INVALID_UTF was created. This is now the +preferred feature, with the earlier option deprecated. However, for backward +compatibility, if the earlier option is set, it forces the new option so that +if JIT matching falls back to the interpreter, there is still support for +invalid UTF. However, if this function has already been successfully called +without PCRE2_JIT_INVALID_UTF and without PCRE2_MATCH_INVALID_UTF (meaning that +non-invalid-supporting JIT code was compiled), give an error. + +If in the future support for PCRE2_JIT_INVALID_UTF is withdrawn, the following +actions are needed: + + 1. Remove the definition from pcre2.h.in and from the list in + PUBLIC_JIT_COMPILE_OPTIONS above. + + 2. Replace PCRE2_JIT_INVALID_UTF with a local flag in this module. + + 3. Replace PCRE2_JIT_INVALID_UTF in pcre2_jit_test.c. + + 4. Delete the following short block of code. The setting of "re" and + "functions" can be moved into the JIT-only block below, but if that is + done, (void)re and (void)functions will be needed in the non-JIT case, to + avoid compiler warnings. +*/ +#ifdef SUPPORT_JIT functions = (executable_functions *)re->executable_jit; +#endif + +if ((options & PCRE2_JIT_INVALID_UTF) != 0) + { + if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) == 0) + { +#ifdef SUPPORT_JIT + if (functions != NULL) return PCRE2_ERROR_JIT_BADOPTION; +#endif + re->overall_options |= PCRE2_MATCH_INVALID_UTF; + } + } + +/* The above tests are run with and without JIT support. This means that +PCRE2_JIT_INVALID_UTF propagates back into the regex options (ensuring +interpreter support) even in the absence of JIT. But now, if there is no JIT +support, give an error return. */ + +#ifndef SUPPORT_JIT +return PCRE2_ERROR_JIT_BADOPTION; +#else /* SUPPORT_JIT */ + +/* There is JIT support. Do the necessary. */ + +if ((re->flags & PCRE2_NOJIT) != 0) return 0; + +if (executable_allocator_is_working == -1) + { + /* Checks whether the executable allocator is working. This check + might run multiple times in multi-threaded environments, but the + result should not be affected by it. */ + void *ptr = SLJIT_MALLOC_EXEC(32, NULL); + if (ptr != NULL) + { + SLJIT_FREE_EXEC(((sljit_u8*)(ptr)) + SLJIT_EXEC_OFFSET(ptr), NULL); + executable_allocator_is_working = 1; + } + else executable_allocator_is_working = 0; + } + +if (!executable_allocator_is_working) + return PCRE2_ERROR_NOMEMORY; + +if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) != 0) + options |= PCRE2_JIT_INVALID_UTF; if ((options & PCRE2_JIT_COMPLETE) != 0 && (functions == NULL || functions->executable_funcs[0] == NULL)) { - excluded_options = (PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_PARTIAL_HARD); - result = jit_compile(code, options & ~excluded_options); + uint32_t excluded_options = (PCRE2_JIT_PARTIAL_SOFT | PCRE2_JIT_PARTIAL_HARD); + int result = jit_compile(code, options & ~excluded_options); if (result != 0) return result; } if ((options & PCRE2_JIT_PARTIAL_SOFT) != 0 && (functions == NULL || functions->executable_funcs[1] == NULL)) { - excluded_options = (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_HARD); - result = jit_compile(code, options & ~excluded_options); + uint32_t excluded_options = (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_HARD); + int result = jit_compile(code, options & ~excluded_options); if (result != 0) return result; } if ((options & PCRE2_JIT_PARTIAL_HARD) != 0 && (functions == NULL || functions->executable_funcs[2] == NULL)) { - excluded_options = (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT); - result = jit_compile(code, options & ~excluded_options); + uint32_t excluded_options = (PCRE2_JIT_COMPLETE | PCRE2_JIT_PARTIAL_SOFT); + int result = jit_compile(code, options & ~excluded_options); if (result != 0) return result; } |