diff options
Diffstat (limited to 'src/3rdparty/pcre2/src/pcre2_jit_compile.c')
-rw-r--r-- | src/3rdparty/pcre2/src/pcre2_jit_compile.c | 2340 |
1 files changed, 1500 insertions, 840 deletions
diff --git a/src/3rdparty/pcre2/src/pcre2_jit_compile.c b/src/3rdparty/pcre2/src/pcre2_jit_compile.c index f3a26aeee0..050063ec6d 100644 --- a/src/3rdparty/pcre2/src/pcre2_jit_compile.c +++ b/src/3rdparty/pcre2/src/pcre2_jit_compile.c @@ -8,7 +8,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel This module by Zoltan Herczeg Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2019 University of Cambridge + New API code Copyright (c) 2016-2021 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -43,6 +43,12 @@ POSSIBILITY OF SUCH DAMAGE. #include "config.h" #endif +#if defined(__has_feature) +#if __has_feature(memory_sanitizer) +#include <sanitizer/msan_interface.h> +#endif /* __has_feature(memory_sanitizer) */ +#endif /* defined(__has_feature) */ + #include "pcre2_internal.h" #ifdef SUPPORT_JIT @@ -236,12 +242,21 @@ code generator. It is allocated by compile_matchingpath, and contains the arguments for compile_backtrackingpath. Must be the first member of its descendants. */ typedef struct backtrack_common { - /* Concatenation stack. */ + /* Backtracking path of an opcode, which falls back + to our opcode, if it cannot resume matching. */ struct backtrack_common *prev; - jump_list *nextbacktracks; - /* Internal stack (for component operators). */ + /* Backtracks for opcodes without backtracking path. + These opcodes are between 'prev' and the current + opcode, and they never resume the match. */ + jump_list *simple_backtracks; + /* Internal backtracking list for block constructs + which contains other opcodes, such as brackets, + asserts, conditionals, etc. */ struct backtrack_common *top; - jump_list *topbacktracks; + /* Backtracks used internally by the opcode. For component + opcodes, this list is also used by those opcodes without + backtracking path which follows the 'top' backtrack. */ + jump_list *own_backtracks; /* Opcode pointer. */ PCRE2_SPTR cc; } backtrack_common; @@ -338,6 +353,12 @@ typedef struct recurse_backtrack { BOOL inlined_pattern; } recurse_backtrack; +typedef struct vreverse_backtrack { + backtrack_common common; + /* Return to the matching path. */ + struct sljit_label *matchingpath; +} vreverse_backtrack; + #define OP_THEN_TRAP OP_TABLE_LENGTH typedef struct then_trap_backtrack { @@ -404,7 +425,9 @@ typedef struct compiler_common { sljit_s32 match_end_ptr; /* Points to the marked string. */ sljit_s32 mark_ptr; - /* Recursive control verb management chain. */ + /* Head of the recursive control verb management chain. + Each item must have a previous offset and type + (see control_types) values. See do_search_mark. */ sljit_s32 control_head_ptr; /* Points to the last matched capture block index. */ sljit_s32 capture_last_ptr; @@ -413,6 +436,9 @@ typedef struct compiler_common { /* Locals used by fast fail optimization. */ sljit_s32 early_fail_start_ptr; sljit_s32 early_fail_end_ptr; + /* Variables used by recursive call generator. */ + sljit_s32 recurse_bitset_size; + uint8_t *recurse_bitset; /* Flipped and lower case tables. */ const sljit_u8 *fcc; @@ -471,12 +497,15 @@ typedef struct compiler_common { jump_list *stackalloc; jump_list *revertframes; jump_list *wordboundary; + jump_list *ucp_wordboundary; jump_list *anynewline; jump_list *hspace; jump_list *vspace; jump_list *casefulcmp; jump_list *caselesscmp; jump_list *reset_match; + /* Same as reset_match, but resets the STR_PTR as well. */ + jump_list *restart_match; BOOL unset_backref; BOOL alt_circumflex; #ifdef SUPPORT_UNICODE @@ -539,7 +568,7 @@ typedef struct compare_context { #undef CMP /* Used for accessing the elements of the stack. */ -#define STACK(i) ((i) * (int)sizeof(sljit_sw)) +#define STACK(i) ((i) * SSIZE_OF(sw)) #ifdef SLJIT_PREF_SHIFT_REG #if SLJIT_PREF_SHIFT_REG == SLJIT_R2 @@ -587,8 +616,8 @@ to characters. The vector data is divided into two groups: the first group contains the start / end character pointers, and the second is the start pointers when the end of the capturing group has not yet reached. */ #define OVECTOR_START (common->ovector_start) -#define OVECTOR(i) (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw)) -#define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw)) +#define OVECTOR(i) (OVECTOR_START + (i) * SSIZE_OF(sw)) +#define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * SSIZE_OF(sw)) #define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start]) #if PCRE2_CODE_UNIT_WIDTH == 8 @@ -613,6 +642,8 @@ the start pointers when the end of the capturing group has not yet reached. */ sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw)) #define OP2(op, dst, dstw, src1, src1w, src2, src2w) \ sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w)) +#define OP2U(op, src1, src1w, src2, src2w) \ + sljit_emit_op2u(compiler, (op), (src1), (src1w), (src2), (src2w)) #define OP_SRC(op, src, srcw) \ sljit_emit_op_src(compiler, (op), (src), (srcw)) #define LABEL() \ @@ -631,8 +662,8 @@ the start pointers when the end of the capturing group has not yet reached. */ sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label)) #define OP_FLAGS(op, dst, dstw, type) \ sljit_emit_op_flags(compiler, (op), (dst), (dstw), (type)) -#define CMOV(type, dst_reg, src, srcw) \ - sljit_emit_cmov(compiler, (type), (dst_reg), (src), (srcw)) +#define SELECT(type, dst_reg, src1, src1w, src2_reg) \ + sljit_emit_select(compiler, (type), (dst_reg), (src1), (src1w), (src2_reg)) #define GET_LOCAL_BASE(dst, dstw, offset) \ sljit_get_local_base(compiler, (dst), (dstw), (offset)) @@ -852,6 +883,21 @@ SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS); return count; } +static BOOL find_vreverse(PCRE2_SPTR cc) +{ + SLJIT_ASSERT(*cc == OP_ASSERTBACK || *cc == OP_ASSERTBACK_NOT || *cc == OP_ASSERTBACK_NA); + + do + { + if (cc[1 + LINK_SIZE] == OP_VREVERSE) + return TRUE; + cc += GET(cc, 1); + } + while (*cc == OP_ALT); + + return FALSE; +} + /* Functions whose might need modification for all new supported opcodes: next_opcode check_opcode_types @@ -922,6 +968,7 @@ switch(*cc) case OP_KETRMIN: case OP_KETRPOS: case OP_REVERSE: + case OP_VREVERSE: case OP_ASSERT: case OP_ASSERT_NOT: case OP_ASSERTBACK: @@ -958,6 +1005,8 @@ switch(*cc) case OP_ASSERT_ACCEPT: case OP_CLOSE: case OP_SKIPZERO: + case OP_NOT_UCP_WORD_BOUNDARY: + case OP_UCP_WORD_BOUNDARY: return cc + PRIV(OP_lengths)[*cc]; case OP_CHAR: @@ -1226,34 +1275,41 @@ while (cc < ccend) return TRUE; } -#define EARLY_FAIL_ENHANCE_MAX (1 + 3) +#define EARLY_FAIL_ENHANCE_MAX (3 + 3) /* -start: - 0 - skip / early fail allowed - 1 - only early fail with range allowed - >1 - (start - 1) early fail is processed + Start represent the number of allowed early fail enhancements + + The 0-2 values has a special meaning: + 0 - skip is allowed for all iterators + 1 - fail is allowed for all iterators + 2 - fail is allowed for greedy iterators + 3 - only ranged early fail is allowed + >3 - (start - 3) number of remaining ranged early fails allowed -return: current number of iterators enhanced with fast fail +return: the updated value of start */ -static int detect_early_fail(compiler_common *common, PCRE2_SPTR cc, int *private_data_start, sljit_s32 depth, int start) +static int detect_early_fail(compiler_common *common, PCRE2_SPTR cc, + int *private_data_start, sljit_s32 depth, int start) { PCRE2_SPTR begin = cc; PCRE2_SPTR next_alt; PCRE2_SPTR end; PCRE2_SPTR accelerated_start; int result = 0; -int count; -BOOL fast_forward_allowed = TRUE; +int count, prev_count; SLJIT_ASSERT(*cc == OP_ONCE || *cc == OP_BRA || *cc == OP_CBRA); SLJIT_ASSERT(*cc != OP_CBRA || common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] != 0); SLJIT_ASSERT(start < EARLY_FAIL_ENHANCE_MAX); +next_alt = cc + GET(cc, 1); +if (*next_alt == OP_ALT && start < 1) + start = 1; + do { count = start; - next_alt = cc + GET(cc, 1); cc += 1 + LINK_SIZE + ((*cc == OP_CBRA) ? IMM2_SIZE : 0); while (TRUE) @@ -1273,6 +1329,8 @@ do case OP_CIRCM: case OP_DOLL: case OP_DOLLM: + case OP_NOT_UCP_WORD_BOUNDARY: + case OP_UCP_WORD_BOUNDARY: /* Zero width assertions. */ cc++; continue; @@ -1290,21 +1348,22 @@ do case OP_HSPACE: case OP_NOT_VSPACE: case OP_VSPACE: - fast_forward_allowed = FALSE; + if (count < 1) + count = 1; cc++; continue; case OP_ANYNL: case OP_EXTUNI: - fast_forward_allowed = FALSE; - if (count == 0) - count = 1; + if (count < 3) + count = 3; cc++; continue; case OP_NOTPROP: case OP_PROP: - fast_forward_allowed = FALSE; + if (count < 1) + count = 1; cc += 1 + 2; continue; @@ -1312,17 +1371,22 @@ do case OP_CHARI: case OP_NOT: case OP_NOTI: - fast_forward_allowed = FALSE; + if (count < 1) + count = 1; cc += 2; #ifdef SUPPORT_UNICODE if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); #endif continue; - case OP_TYPESTAR: case OP_TYPEMINSTAR: - case OP_TYPEPLUS: case OP_TYPEMINPLUS: + if (count == 2) + count = 3; + /* Fall through */ + + case OP_TYPESTAR: + case OP_TYPEPLUS: case OP_TYPEPOSSTAR: case OP_TYPEPOSPLUS: /* The type or prop opcode is skipped in the next iteration. */ @@ -1334,14 +1398,18 @@ do break; } - if (count == 0) + if (count < 3) + count = 3; + continue; + + case OP_TYPEEXACT: + if (count < 1) count = 1; - fast_forward_allowed = FALSE; + cc += 1 + IMM2_SIZE; continue; case OP_TYPEUPTO: case OP_TYPEMINUPTO: - case OP_TYPEEXACT: case OP_TYPEPOSUPTO: cc += IMM2_SIZE; /* Fall through */ @@ -1350,37 +1418,40 @@ do case OP_TYPEMINQUERY: case OP_TYPEPOSQUERY: /* The type or prop opcode is skipped in the next iteration. */ - fast_forward_allowed = FALSE; - if (count == 0) - count = 1; + if (count < 3) + count = 3; cc += 1; continue; - case OP_STAR: case OP_MINSTAR: - case OP_PLUS: case OP_MINPLUS: + case OP_MINSTARI: + case OP_MINPLUSI: + case OP_NOTMINSTAR: + case OP_NOTMINPLUS: + case OP_NOTMINSTARI: + case OP_NOTMINPLUSI: + if (count == 2) + count = 3; + /* Fall through */ + + case OP_STAR: + case OP_PLUS: case OP_POSSTAR: case OP_POSPLUS: case OP_STARI: - case OP_MINSTARI: case OP_PLUSI: - case OP_MINPLUSI: case OP_POSSTARI: case OP_POSPLUSI: case OP_NOTSTAR: - case OP_NOTMINSTAR: case OP_NOTPLUS: - case OP_NOTMINPLUS: case OP_NOTPOSSTAR: case OP_NOTPOSPLUS: case OP_NOTSTARI: - case OP_NOTMINSTARI: case OP_NOTPLUSI: - case OP_NOTMINPLUSI: case OP_NOTPOSSTARI: case OP_NOTPOSPLUSI: accelerated_start = cc; @@ -1390,9 +1461,17 @@ do #endif break; + case OP_EXACT: + if (count < 1) + count = 1; + cc += 2 + IMM2_SIZE; +#ifdef SUPPORT_UNICODE + if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); +#endif + continue; + case OP_UPTO: case OP_MINUPTO: - case OP_EXACT: case OP_POSUPTO: case OP_UPTOI: case OP_MINUPTOI: @@ -1421,9 +1500,8 @@ do case OP_NOTQUERYI: case OP_NOTMINQUERYI: case OP_NOTPOSQUERYI: - fast_forward_allowed = FALSE; - if (count == 0) - count = 1; + if (count < 3) + count = 3; cc += 2; #ifdef SUPPORT_UNICODE if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); @@ -1443,10 +1521,14 @@ do switch (*cc) { - case OP_CRSTAR: case OP_CRMINSTAR: - case OP_CRPLUS: case OP_CRMINPLUS: + if (count == 2) + count = 3; + /* Fall through */ + + case OP_CRSTAR: + case OP_CRPLUS: case OP_CRPOSSTAR: case OP_CRPOSPLUS: cc++; @@ -1455,43 +1537,60 @@ do case OP_CRRANGE: case OP_CRMINRANGE: case OP_CRPOSRANGE: + if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE)) + { + /* Exact repeat. */ + cc += 1 + 2 * IMM2_SIZE; + if (count < 1) + count = 1; + continue; + } + cc += 2 * IMM2_SIZE; /* Fall through */ case OP_CRQUERY: case OP_CRMINQUERY: case OP_CRPOSQUERY: cc++; - if (count == 0) - count = 1; - /* Fall through */ + if (count < 3) + count = 3; + continue; + default: - accelerated_start = NULL; - fast_forward_allowed = FALSE; + /* No repeat. */ + if (count < 1) + count = 1; continue; } break; - case OP_ONCE: case OP_BRA: case OP_CBRA: - end = cc + GET(cc, 1); + prev_count = count; + if (count < 1) + count = 1; - fast_forward_allowed = FALSE; if (depth >= 4) break; - end = bracketend(cc) - (1 + LINK_SIZE); - if (*end != OP_KET || (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)) + if (count < 3 && cc[GET(cc, 1)] == OP_ALT) + count = 3; + + end = bracketend(cc); + if (end[-1 - LINK_SIZE] != OP_KET || (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)) break; - count = detect_early_fail(common, cc, private_data_start, depth + 1, count); + prev_count = detect_early_fail(common, cc, private_data_start, depth + 1, prev_count); + + if (prev_count > count) + count = prev_count; if (PRIVATE_DATA(cc) != 0) common->private_data_ptrs[begin - common->start] = 1; if (count < EARLY_FAIL_ENHANCE_MAX) { - cc = end + (1 + LINK_SIZE); + cc = end; continue; } break; @@ -1504,55 +1603,52 @@ do continue; } - if (accelerated_start != NULL) + if (accelerated_start == NULL) + break; + + if (count == 0) { - if (count == 0) - { - count++; + common->fast_forward_bc_ptr = accelerated_start; + common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_skip; + *private_data_start += sizeof(sljit_sw); + count = 4; + } + else if (count < 3) + { + common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_fail; - if (fast_forward_allowed && *next_alt == OP_KET) - { - common->fast_forward_bc_ptr = accelerated_start; - common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_skip; - *private_data_start += sizeof(sljit_sw); - } - else - { - common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_fail; + if (common->early_fail_start_ptr == 0) + common->early_fail_start_ptr = *private_data_start; - if (common->early_fail_start_ptr == 0) - common->early_fail_start_ptr = *private_data_start; + *private_data_start += sizeof(sljit_sw); + common->early_fail_end_ptr = *private_data_start; - *private_data_start += sizeof(sljit_sw); - common->early_fail_end_ptr = *private_data_start; + if (*private_data_start > SLJIT_MAX_LOCAL_SIZE) + return EARLY_FAIL_ENHANCE_MAX; - if (*private_data_start > SLJIT_MAX_LOCAL_SIZE) - return EARLY_FAIL_ENHANCE_MAX; - } - } - else - { - common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_fail_range; + count = 4; + } + else + { + common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_fail_range; - if (common->early_fail_start_ptr == 0) - common->early_fail_start_ptr = *private_data_start; + if (common->early_fail_start_ptr == 0) + common->early_fail_start_ptr = *private_data_start; - *private_data_start += 2 * sizeof(sljit_sw); - common->early_fail_end_ptr = *private_data_start; + *private_data_start += 2 * sizeof(sljit_sw); + common->early_fail_end_ptr = *private_data_start; - if (*private_data_start > SLJIT_MAX_LOCAL_SIZE) - return EARLY_FAIL_ENHANCE_MAX; - } + if (*private_data_start > SLJIT_MAX_LOCAL_SIZE) + return EARLY_FAIL_ENHANCE_MAX; - /* Cannot be part of a repeat. */ - common->private_data_ptrs[begin - common->start] = 1; count++; - - if (count < EARLY_FAIL_ENHANCE_MAX) - continue; } - break; + /* Cannot be part of a repeat. */ + common->private_data_ptrs[begin - common->start] = 1; + + if (count >= EARLY_FAIL_ENHANCE_MAX) + break; } if (*cc != OP_ALT && *cc != OP_KET) @@ -1560,8 +1656,8 @@ do else if (result < count) result = count; - fast_forward_allowed = FALSE; cc = next_alt; + next_alt = cc + GET(cc, 1); } while (*cc == OP_ALT); @@ -1616,7 +1712,7 @@ if (end[-(1 + LINK_SIZE)] != OP_KET || PRIVATE_DATA(begin) != 0) /* /(?:AB){4,6}/ is currently converted to /(?:AB){3}(?AB){1,3}/ * Skip the check of the second part. */ -if (PRIVATE_DATA(end - LINK_SIZE) == 0) +if (PRIVATE_DATA(end - LINK_SIZE) != 0) return TRUE; next = end; @@ -1785,7 +1881,6 @@ while (cc < ccend) case OP_ASSERTBACK: case OP_ASSERTBACK_NOT: case OP_ASSERT_NA: - case OP_ASSERTBACK_NA: case OP_ONCE: case OP_SCRIPT_RUN: case OP_BRAPOS: @@ -1797,6 +1892,19 @@ while (cc < ccend) bracketlen = 1 + LINK_SIZE; break; + case OP_ASSERTBACK_NA: + common->private_data_ptrs[cc - common->start] = private_data_ptr; + private_data_ptr += sizeof(sljit_sw); + + if (find_vreverse(cc)) + { + common->private_data_ptrs[cc + 1 - common->start] = 1; + private_data_ptr += sizeof(sljit_sw); + } + + bracketlen = 1 + LINK_SIZE; + break; + case OP_CBRAPOS: case OP_SCBRAPOS: common->private_data_ptrs[cc - common->start] = private_data_ptr; @@ -2096,6 +2204,9 @@ while (cc < ccend) case OP_CALLOUT: case OP_CALLOUT_STR: + case OP_NOT_UCP_WORD_BOUNDARY: + case OP_UCP_WORD_BOUNDARY: + cc = next_opcode(common, cc); SLJIT_ASSERT(cc != NULL); break; @@ -2141,9 +2252,9 @@ while (cc < ccend) { OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0)); - stackpos -= (int)sizeof(sljit_sw); + stackpos -= SSIZE_OF(sw); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); - stackpos -= (int)sizeof(sljit_sw); + stackpos -= SSIZE_OF(sw); setsom_found = TRUE; } cc += 1; @@ -2158,9 +2269,9 @@ while (cc < ccend) { OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr); - stackpos -= (int)sizeof(sljit_sw); + stackpos -= SSIZE_OF(sw); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); - stackpos -= (int)sizeof(sljit_sw); + stackpos -= SSIZE_OF(sw); setmark_found = TRUE; } cc += 1 + 2 + cc[1]; @@ -2171,27 +2282,27 @@ while (cc < ccend) { OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0)); - stackpos -= (int)sizeof(sljit_sw); + stackpos -= SSIZE_OF(sw); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); - stackpos -= (int)sizeof(sljit_sw); + stackpos -= SSIZE_OF(sw); setsom_found = TRUE; } if (common->mark_ptr != 0 && !setmark_found) { OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr); - stackpos -= (int)sizeof(sljit_sw); + stackpos -= SSIZE_OF(sw); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); - stackpos -= (int)sizeof(sljit_sw); + stackpos -= SSIZE_OF(sw); setmark_found = TRUE; } if (common->capture_last_ptr != 0 && !capture_last_found) { OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr); - stackpos -= (int)sizeof(sljit_sw); + stackpos -= SSIZE_OF(sw); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); - stackpos -= (int)sizeof(sljit_sw); + stackpos -= SSIZE_OF(sw); capture_last_found = TRUE; } cc += 1 + LINK_SIZE; @@ -2205,20 +2316,20 @@ while (cc < ccend) { OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr); - stackpos -= (int)sizeof(sljit_sw); + stackpos -= SSIZE_OF(sw); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); - stackpos -= (int)sizeof(sljit_sw); + stackpos -= SSIZE_OF(sw); capture_last_found = TRUE; } offset = (GET2(cc, 1 + LINK_SIZE)) << 1; OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset)); - stackpos -= (int)sizeof(sljit_sw); + stackpos -= SSIZE_OF(sw); OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset)); OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1)); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); - stackpos -= (int)sizeof(sljit_sw); + stackpos -= SSIZE_OF(sw); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0); - stackpos -= (int)sizeof(sljit_sw); + stackpos -= SSIZE_OF(sw); cc += 1 + LINK_SIZE + IMM2_SIZE; break; @@ -2251,7 +2362,7 @@ int i; for (i = 0; i < RECURSE_TMP_REG_COUNT; i++) { SLJIT_ASSERT(status->tmp_regs[i] >= 0); - SLJIT_ASSERT(sljit_get_register_index(status->saved_tmp_regs[i]) < 0 || status->tmp_regs[i] == status->saved_tmp_regs[i]); + SLJIT_ASSERT(sljit_get_register_index(SLJIT_GP_REGISTER, status->saved_tmp_regs[i]) < 0 || status->tmp_regs[i] == status->saved_tmp_regs[i]); status->store_bases[i] = -1; } @@ -2271,7 +2382,7 @@ SLJIT_ASSERT(load_base > 0 && store_base > 0); if (status->store_bases[next_tmp_reg] == -1) { /* Preserve virtual registers. */ - if (sljit_get_register_index(status->saved_tmp_regs[next_tmp_reg]) < 0) + if (sljit_get_register_index(SLJIT_GP_REGISTER, status->saved_tmp_regs[next_tmp_reg]) < 0) OP1(SLJIT_MOV, status->saved_tmp_regs[next_tmp_reg], 0, tmp_reg, 0); } else @@ -2300,7 +2411,7 @@ for (i = 0; i < RECURSE_TMP_REG_COUNT; i++) OP1(SLJIT_MOV, SLJIT_MEM1(status->store_bases[next_tmp_reg]), status->store_offsets[next_tmp_reg], tmp_reg, 0); /* Restore virtual registers. */ - if (sljit_get_register_index(saved_tmp_reg) < 0) + if (sljit_get_register_index(SLJIT_GP_REGISTER, saved_tmp_reg) < 0) OP1(SLJIT_MOV, tmp_reg, 0, saved_tmp_reg, 0); } @@ -2310,22 +2421,47 @@ for (i = 0; i < RECURSE_TMP_REG_COUNT; i++) #undef RECURSE_TMP_REG_COUNT -static int get_recurse_data_length(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, - BOOL *needs_control_head, BOOL *has_quit, BOOL *has_accept) +static BOOL recurse_check_bit(compiler_common *common, sljit_sw bit_index) +{ +uint8_t *byte; +uint8_t mask; + +SLJIT_ASSERT((bit_index & (sizeof(sljit_sw) - 1)) == 0); + +bit_index >>= SLJIT_WORD_SHIFT; + +SLJIT_ASSERT((bit_index >> 3) < common->recurse_bitset_size); + +mask = 1 << (bit_index & 0x7); +byte = common->recurse_bitset + (bit_index >> 3); + +if (*byte & mask) + return FALSE; + +*byte |= mask; +return TRUE; +} + +enum get_recurse_flags { + recurse_flag_quit_found = (1 << 0), + recurse_flag_accept_found = (1 << 1), + recurse_flag_setsom_found = (1 << 2), + recurse_flag_setmark_found = (1 << 3), + recurse_flag_control_head_found = (1 << 4), +}; + +static int get_recurse_data_length(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, uint32_t *result_flags) { int length = 1; -int size; +int size, offset; PCRE2_SPTR alternative; -BOOL quit_found = FALSE; -BOOL accept_found = FALSE; -BOOL setsom_found = FALSE; -BOOL setmark_found = FALSE; -BOOL capture_last_found = FALSE; -BOOL control_head_found = FALSE; +uint32_t recurse_flags = 0; + +memset(common->recurse_bitset, 0, common->recurse_bitset_size); #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD SLJIT_ASSERT(common->control_head_ptr != 0); -control_head_found = TRUE; +recurse_flags |= recurse_flag_control_head_found; #endif /* Calculate the sum of the private machine words. */ @@ -2336,24 +2472,26 @@ while (cc < ccend) { case OP_SET_SOM: SLJIT_ASSERT(common->has_set_som); - setsom_found = TRUE; + recurse_flags |= recurse_flag_setsom_found; cc += 1; break; case OP_RECURSE: if (common->has_set_som) - setsom_found = TRUE; + recurse_flags |= recurse_flag_setsom_found; if (common->mark_ptr != 0) - setmark_found = TRUE; - if (common->capture_last_ptr != 0) - capture_last_found = TRUE; + recurse_flags |= recurse_flag_setmark_found; + if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr)) + length++; cc += 1 + LINK_SIZE; break; case OP_KET: - if (PRIVATE_DATA(cc) != 0) + offset = PRIVATE_DATA(cc); + if (offset != 0) { - length++; + if (recurse_check_bit(common, offset)) + length++; SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0); cc += PRIVATE_DATA(cc + 1); } @@ -2372,39 +2510,55 @@ while (cc < ccend) case OP_SBRA: case OP_SBRAPOS: case OP_SCOND: - length++; SLJIT_ASSERT(PRIVATE_DATA(cc) != 0); + if (recurse_check_bit(common, PRIVATE_DATA(cc))) + length++; cc += 1 + LINK_SIZE; break; case OP_CBRA: case OP_SCBRA: - length += 2; - if (common->capture_last_ptr != 0) - capture_last_found = TRUE; - if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0) + offset = GET2(cc, 1 + LINK_SIZE); + if (recurse_check_bit(common, OVECTOR(offset << 1))) + { + SLJIT_ASSERT(recurse_check_bit(common, OVECTOR((offset << 1) + 1))); + length += 2; + } + if (common->optimized_cbracket[offset] == 0 && recurse_check_bit(common, OVECTOR_PRIV(offset))) + length++; + if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr)) length++; cc += 1 + LINK_SIZE + IMM2_SIZE; break; case OP_CBRAPOS: case OP_SCBRAPOS: - length += 2 + 2; - if (common->capture_last_ptr != 0) - capture_last_found = TRUE; + offset = GET2(cc, 1 + LINK_SIZE); + if (recurse_check_bit(common, OVECTOR(offset << 1))) + { + SLJIT_ASSERT(recurse_check_bit(common, OVECTOR((offset << 1) + 1))); + length += 2; + } + if (recurse_check_bit(common, OVECTOR_PRIV(offset))) + length++; + if (recurse_check_bit(common, PRIVATE_DATA(cc))) + length++; + if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr)) + length++; cc += 1 + LINK_SIZE + IMM2_SIZE; break; case OP_COND: /* Might be a hidden SCOND. */ alternative = cc + GET(cc, 1); - if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN) + if ((*alternative == OP_KETRMAX || *alternative == OP_KETRMIN) && recurse_check_bit(common, PRIVATE_DATA(cc))) length++; cc += 1 + LINK_SIZE; break; CASE_ITERATOR_PRIVATE_DATA_1 - if (PRIVATE_DATA(cc) != 0) + offset = PRIVATE_DATA(cc); + if (offset != 0 && recurse_check_bit(common, offset)) length++; cc += 2; #ifdef SUPPORT_UNICODE @@ -2413,8 +2567,12 @@ while (cc < ccend) break; CASE_ITERATOR_PRIVATE_DATA_2A - if (PRIVATE_DATA(cc) != 0) + offset = PRIVATE_DATA(cc); + if (offset != 0 && recurse_check_bit(common, offset)) + { + SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw))); length += 2; + } cc += 2; #ifdef SUPPORT_UNICODE if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); @@ -2422,8 +2580,12 @@ while (cc < ccend) break; CASE_ITERATOR_PRIVATE_DATA_2B - if (PRIVATE_DATA(cc) != 0) + offset = PRIVATE_DATA(cc); + if (offset != 0 && recurse_check_bit(common, offset)) + { + SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw))); length += 2; + } cc += 2 + IMM2_SIZE; #ifdef SUPPORT_UNICODE if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); @@ -2431,20 +2593,29 @@ while (cc < ccend) break; CASE_ITERATOR_TYPE_PRIVATE_DATA_1 - if (PRIVATE_DATA(cc) != 0) + offset = PRIVATE_DATA(cc); + if (offset != 0 && recurse_check_bit(common, offset)) length++; cc += 1; break; CASE_ITERATOR_TYPE_PRIVATE_DATA_2A - if (PRIVATE_DATA(cc) != 0) + offset = PRIVATE_DATA(cc); + if (offset != 0 && recurse_check_bit(common, offset)) + { + SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw))); length += 2; + } cc += 1; break; CASE_ITERATOR_TYPE_PRIVATE_DATA_2B - if (PRIVATE_DATA(cc) != 0) + offset = PRIVATE_DATA(cc); + if (offset != 0 && recurse_check_bit(common, offset)) + { + SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw))); length += 2; + } cc += 1 + IMM2_SIZE; break; @@ -2456,7 +2627,9 @@ while (cc < ccend) #else size = 1 + 32 / (int)sizeof(PCRE2_UCHAR); #endif - if (PRIVATE_DATA(cc) != 0) + + offset = PRIVATE_DATA(cc); + if (offset != 0 && recurse_check_bit(common, offset)) length += get_class_iterator_size(cc + size); cc += size; break; @@ -2466,12 +2639,11 @@ while (cc < ccend) case OP_PRUNE_ARG: case OP_THEN_ARG: SLJIT_ASSERT(common->mark_ptr != 0); - if (!setmark_found) - setmark_found = TRUE; + recurse_flags |= recurse_flag_setmark_found; if (common->control_head_ptr != 0) - control_head_found = TRUE; + recurse_flags |= recurse_flag_control_head_found; if (*cc != OP_MARK) - quit_found = TRUE; + recurse_flags |= recurse_flag_quit_found; cc += 1 + 2 + cc[1]; break; @@ -2479,26 +2651,24 @@ while (cc < ccend) case OP_PRUNE: case OP_SKIP: case OP_COMMIT: - quit_found = TRUE; + recurse_flags |= recurse_flag_quit_found; cc++; break; case OP_SKIP_ARG: - quit_found = TRUE; + recurse_flags |= recurse_flag_quit_found; cc += 1 + 2 + cc[1]; break; case OP_THEN: SLJIT_ASSERT(common->control_head_ptr != 0); - quit_found = TRUE; - if (!control_head_found) - control_head_found = TRUE; + recurse_flags |= recurse_flag_quit_found | recurse_flag_control_head_found; cc++; break; case OP_ACCEPT: case OP_ASSERT_ACCEPT: - accept_found = TRUE; + recurse_flags |= recurse_flag_accept_found; cc++; break; @@ -2510,21 +2680,17 @@ while (cc < ccend) } SLJIT_ASSERT(cc == ccend); -if (control_head_found) - length++; -if (capture_last_found) +if (recurse_flags & recurse_flag_control_head_found) length++; -if (quit_found) +if (recurse_flags & recurse_flag_quit_found) { - if (setsom_found) + if (recurse_flags & recurse_flag_setsom_found) length++; - if (setmark_found) + if (recurse_flags & recurse_flag_setmark_found) length++; } -*needs_control_head = control_head_found; -*has_quit = quit_found; -*has_accept = accept_found; +*result_flags = recurse_flags; return length; } @@ -2537,7 +2703,7 @@ enum copy_recurse_data_types { }; static void copy_recurse_data(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, - int type, int stackptr, int stacktop, BOOL has_quit) + int type, int stackptr, int stacktop, uint32_t recurse_flags) { delayed_mem_copy_status status; PCRE2_SPTR alternative; @@ -2546,14 +2712,12 @@ sljit_sw shared_srcw[3]; sljit_sw kept_shared_srcw[2]; int private_count, shared_count, kept_shared_count; int from_sp, base_reg, offset, i; -BOOL setsom_found = FALSE; -BOOL setmark_found = FALSE; -BOOL capture_last_found = FALSE; -BOOL control_head_found = FALSE; + +memset(common->recurse_bitset, 0, common->recurse_bitset_size); #if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD SLJIT_ASSERT(common->control_head_ptr != 0); -control_head_found = TRUE; +recurse_check_bit(common, common->control_head_ptr); #endif switch (type) @@ -2641,45 +2805,42 @@ while (cc < ccend) { case OP_SET_SOM: SLJIT_ASSERT(common->has_set_som); - if (has_quit && !setsom_found) + if ((recurse_flags & recurse_flag_quit_found) && recurse_check_bit(common, OVECTOR(0))) { kept_shared_srcw[0] = OVECTOR(0); kept_shared_count = 1; - setsom_found = TRUE; } cc += 1; break; case OP_RECURSE: - if (has_quit) + if (recurse_flags & recurse_flag_quit_found) { - if (common->has_set_som && !setsom_found) + if (common->has_set_som && recurse_check_bit(common, OVECTOR(0))) { kept_shared_srcw[0] = OVECTOR(0); kept_shared_count = 1; - setsom_found = TRUE; } - if (common->mark_ptr != 0 && !setmark_found) + if (common->mark_ptr != 0 && recurse_check_bit(common, common->mark_ptr)) { kept_shared_srcw[kept_shared_count] = common->mark_ptr; kept_shared_count++; - setmark_found = TRUE; } } - if (common->capture_last_ptr != 0 && !capture_last_found) + if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr)) { shared_srcw[0] = common->capture_last_ptr; shared_count = 1; - capture_last_found = TRUE; } cc += 1 + LINK_SIZE; break; case OP_KET: - if (PRIVATE_DATA(cc) != 0) + private_srcw[0] = PRIVATE_DATA(cc); + if (private_srcw[0] != 0) { - private_count = 1; - private_srcw[0] = PRIVATE_DATA(cc); + if (recurse_check_bit(common, private_srcw[0])) + private_count = 1; SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0); cc += PRIVATE_DATA(cc + 1); } @@ -2698,50 +2859,66 @@ while (cc < ccend) case OP_SBRA: case OP_SBRAPOS: case OP_SCOND: - private_count = 1; private_srcw[0] = PRIVATE_DATA(cc); + if (recurse_check_bit(common, private_srcw[0])) + private_count = 1; cc += 1 + LINK_SIZE; break; case OP_CBRA: case OP_SCBRA: - offset = (GET2(cc, 1 + LINK_SIZE)) << 1; - shared_srcw[0] = OVECTOR(offset); - shared_srcw[1] = OVECTOR(offset + 1); - shared_count = 2; + offset = GET2(cc, 1 + LINK_SIZE); + shared_srcw[0] = OVECTOR(offset << 1); + if (recurse_check_bit(common, shared_srcw[0])) + { + shared_srcw[1] = shared_srcw[0] + sizeof(sljit_sw); + SLJIT_ASSERT(recurse_check_bit(common, shared_srcw[1])); + shared_count = 2; + } - if (common->capture_last_ptr != 0 && !capture_last_found) + if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr)) { - shared_srcw[2] = common->capture_last_ptr; - shared_count = 3; - capture_last_found = TRUE; + shared_srcw[shared_count] = common->capture_last_ptr; + shared_count++; } - if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0) + if (common->optimized_cbracket[offset] == 0) { - private_count = 1; - private_srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE)); + private_srcw[0] = OVECTOR_PRIV(offset); + if (recurse_check_bit(common, private_srcw[0])) + private_count = 1; } + cc += 1 + LINK_SIZE + IMM2_SIZE; break; case OP_CBRAPOS: case OP_SCBRAPOS: - offset = (GET2(cc, 1 + LINK_SIZE)) << 1; - shared_srcw[0] = OVECTOR(offset); - shared_srcw[1] = OVECTOR(offset + 1); - shared_count = 2; + offset = GET2(cc, 1 + LINK_SIZE); + shared_srcw[0] = OVECTOR(offset << 1); + if (recurse_check_bit(common, shared_srcw[0])) + { + shared_srcw[1] = shared_srcw[0] + sizeof(sljit_sw); + SLJIT_ASSERT(recurse_check_bit(common, shared_srcw[1])); + shared_count = 2; + } - if (common->capture_last_ptr != 0 && !capture_last_found) + if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr)) { - shared_srcw[2] = common->capture_last_ptr; - shared_count = 3; - capture_last_found = TRUE; + shared_srcw[shared_count] = common->capture_last_ptr; + shared_count++; } - private_count = 2; private_srcw[0] = PRIVATE_DATA(cc); - private_srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE)); + if (recurse_check_bit(common, private_srcw[0])) + private_count = 1; + + offset = OVECTOR_PRIV(offset); + if (recurse_check_bit(common, offset)) + { + private_srcw[private_count] = offset; + private_count++; + } cc += 1 + LINK_SIZE + IMM2_SIZE; break; @@ -2750,18 +2927,17 @@ while (cc < ccend) alternative = cc + GET(cc, 1); if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN) { - private_count = 1; private_srcw[0] = PRIVATE_DATA(cc); + if (recurse_check_bit(common, private_srcw[0])) + private_count = 1; } cc += 1 + LINK_SIZE; break; CASE_ITERATOR_PRIVATE_DATA_1 - if (PRIVATE_DATA(cc)) - { + private_srcw[0] = PRIVATE_DATA(cc); + if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0])) private_count = 1; - private_srcw[0] = PRIVATE_DATA(cc); - } cc += 2; #ifdef SUPPORT_UNICODE if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); @@ -2769,11 +2945,12 @@ while (cc < ccend) break; CASE_ITERATOR_PRIVATE_DATA_2A - if (PRIVATE_DATA(cc)) + private_srcw[0] = PRIVATE_DATA(cc); + if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0])) { private_count = 2; - private_srcw[0] = PRIVATE_DATA(cc); - private_srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw); + private_srcw[1] = private_srcw[0] + sizeof(sljit_sw); + SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1])); } cc += 2; #ifdef SUPPORT_UNICODE @@ -2782,11 +2959,12 @@ while (cc < ccend) break; CASE_ITERATOR_PRIVATE_DATA_2B - if (PRIVATE_DATA(cc)) + private_srcw[0] = PRIVATE_DATA(cc); + if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0])) { private_count = 2; - private_srcw[0] = PRIVATE_DATA(cc); - private_srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw); + private_srcw[1] = private_srcw[0] + sizeof(sljit_sw); + SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1])); } cc += 2 + IMM2_SIZE; #ifdef SUPPORT_UNICODE @@ -2795,30 +2973,30 @@ while (cc < ccend) break; CASE_ITERATOR_TYPE_PRIVATE_DATA_1 - if (PRIVATE_DATA(cc)) - { + private_srcw[0] = PRIVATE_DATA(cc); + if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0])) private_count = 1; - private_srcw[0] = PRIVATE_DATA(cc); - } cc += 1; break; CASE_ITERATOR_TYPE_PRIVATE_DATA_2A - if (PRIVATE_DATA(cc)) + private_srcw[0] = PRIVATE_DATA(cc); + if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0])) { private_count = 2; - private_srcw[0] = PRIVATE_DATA(cc); private_srcw[1] = private_srcw[0] + sizeof(sljit_sw); + SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1])); } cc += 1; break; CASE_ITERATOR_TYPE_PRIVATE_DATA_2B - if (PRIVATE_DATA(cc)) + private_srcw[0] = PRIVATE_DATA(cc); + if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0])) { private_count = 2; - private_srcw[0] = PRIVATE_DATA(cc); private_srcw[1] = private_srcw[0] + sizeof(sljit_sw); + SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1])); } cc += 1 + IMM2_SIZE; break; @@ -2832,23 +3010,28 @@ while (cc < ccend) i = 1 + 32 / (int)sizeof(PCRE2_UCHAR); #endif if (PRIVATE_DATA(cc) != 0) + { + private_count = 1; + private_srcw[0] = PRIVATE_DATA(cc); switch(get_class_iterator_size(cc + i)) { case 1: - private_count = 1; - private_srcw[0] = PRIVATE_DATA(cc); break; case 2: - private_count = 2; - private_srcw[0] = PRIVATE_DATA(cc); - private_srcw[1] = private_srcw[0] + sizeof(sljit_sw); + if (recurse_check_bit(common, private_srcw[0])) + { + private_count = 2; + private_srcw[1] = private_srcw[0] + sizeof(sljit_sw); + SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1])); + } break; default: SLJIT_UNREACHABLE(); break; } + } cc += i; break; @@ -2857,28 +3040,25 @@ while (cc < ccend) case OP_PRUNE_ARG: case OP_THEN_ARG: SLJIT_ASSERT(common->mark_ptr != 0); - if (has_quit && !setmark_found) + if ((recurse_flags & recurse_flag_quit_found) && recurse_check_bit(common, common->mark_ptr)) { kept_shared_srcw[0] = common->mark_ptr; kept_shared_count = 1; - setmark_found = TRUE; } - if (common->control_head_ptr != 0 && !control_head_found) + if (common->control_head_ptr != 0 && recurse_check_bit(common, common->control_head_ptr)) { private_srcw[0] = common->control_head_ptr; private_count = 1; - control_head_found = TRUE; } cc += 1 + 2 + cc[1]; break; case OP_THEN: SLJIT_ASSERT(common->control_head_ptr != 0); - if (!control_head_found) + if (recurse_check_bit(common, common->control_head_ptr)) { private_srcw[0] = common->control_head_ptr; private_count = 1; - control_head_found = TRUE; } cc++; break; @@ -2886,7 +3066,7 @@ while (cc < ccend) default: cc = next_opcode(common, cc); SLJIT_ASSERT(cc != NULL); - break; + continue; } if (type != recurse_copy_shared_to_global && type != recurse_copy_kept_shared_to_global) @@ -2968,8 +3148,16 @@ if (*cc == OP_COND || *cc == OP_SCOND) has_alternatives = FALSE; cc = next_opcode(common, cc); + if (has_alternatives) + { + if (*cc == OP_REVERSE) + cc += 1 + IMM2_SIZE; + else if (*cc == OP_VREVERSE) + cc += 1 + 2 * IMM2_SIZE; + current_offset = common->then_offsets + (cc - common->start); + } while (cc < end) { @@ -2978,7 +3166,18 @@ while (cc < end) else { if (*cc == OP_ALT && has_alternatives) - current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start); + { + cc += 1 + LINK_SIZE; + + if (*cc == OP_REVERSE) + cc += 1 + IMM2_SIZE; + else if (*cc == OP_VREVERSE) + cc += 1 + 2 * IMM2_SIZE; + + current_offset = common->then_offsets + (cc - common->start); + continue; + } + if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL) *current_offset = 1; cc = next_opcode(common, cc); @@ -3002,7 +3201,7 @@ return (value & (value - 1)) == 0; static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label) { -while (list) +while (list != NULL) { /* sljit_set_label is clever enough to do nothing if either the jump or the label is NULL. */ @@ -3065,7 +3264,7 @@ static SLJIT_INLINE void allocate_stack(compiler_common *common, int size) DEFINE_COMPILER; SLJIT_ASSERT(size > 0); -OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw)); +OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * SSIZE_OF(sw)); #ifdef DESTROY_REGISTERS OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345); OP1(SLJIT_MOV, TMP3, 0, TMP1, 0); @@ -3081,7 +3280,7 @@ static SLJIT_INLINE void free_stack(compiler_common *common, int size) DEFINE_COMPILER; SLJIT_ASSERT(size > 0); -OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw)); +OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * SSIZE_OF(sw)); } static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size) @@ -3121,12 +3320,12 @@ if (length < 8) } else { - if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw)) == SLJIT_SUCCESS) + if (sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw)) == SLJIT_SUCCESS) { GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START); OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1); loop = LABEL(); - sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw)); + sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw)); OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1); JUMPTO(SLJIT_NOT_ZERO, loop); } @@ -3160,7 +3359,7 @@ if (size == sizeof(sljit_sw)) return; } -if (sljit_get_register_index(TMP3) >= 0 && !sljit_has_cpu_feature(SLJIT_HAS_ZERO_REGISTER)) +if (sljit_get_register_index(SLJIT_GP_REGISTER, TMP3) >= 0 && !sljit_has_cpu_feature(SLJIT_HAS_ZERO_REGISTER)) { OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0); src = TMP3; @@ -3182,8 +3381,8 @@ OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, size - uncleared_size); loop = LABEL(); OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), 0, src, 0); OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw)); -OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -2 * (sljit_sw)sizeof(sljit_sw), src, 0); -OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -1 * (sljit_sw)sizeof(sljit_sw), src, 0); +OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -2 * SSIZE_OF(sw), src, 0); +OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -1 * SSIZE_OF(sw), src, 0); CMPTO(SLJIT_LESS, TMP1, 0, TMP2, 0, loop); if (uncleared_size >= sizeof(sljit_sw)) @@ -3210,12 +3409,12 @@ if (length < 8) } else { - if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw)) == SLJIT_SUCCESS) + if (sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw)) == SLJIT_SUCCESS) { GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw)); OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2); loop = LABEL(); - sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw)); + sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw)); OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1); JUMPTO(SLJIT_NOT_ZERO, loop); } @@ -3307,7 +3506,7 @@ else OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, SLJIT_OFFSETOF(pcre2_match_data, ovector) - sizeof(PCRE2_SIZE)); } -has_pre = sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw)) == SLJIT_SUCCESS; +has_pre = sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw)) == SLJIT_SUCCESS; GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START - (has_pre ? sizeof(sljit_sw) : 0)); OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? SLJIT_R0 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin)); @@ -3315,7 +3514,7 @@ OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? SLJIT_R0 : ARGUME loop = LABEL(); if (has_pre) - sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw)); + sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw)); else { OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0); @@ -3338,14 +3537,14 @@ JUMPTO(SLJIT_NOT_ZERO, loop); /* Calculate the return value, which is the maximum ovector value. */ if (topbracket > 1) { - if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw))) == SLJIT_SUCCESS) + if (sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * SSIZE_OF(sw))) == SLJIT_SUCCESS) { GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw)); OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1); /* OVECTOR(0) is never equal to SLJIT_S2. */ loop = LABEL(); - sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw))); + sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * SSIZE_OF(sw))); OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1); CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop); OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0); @@ -3358,7 +3557,7 @@ if (topbracket > 1) /* OVECTOR(0) is never equal to SLJIT_S2. */ loop = LABEL(); OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), 0); - OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 2 * (sljit_sw)sizeof(sljit_sw)); + OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 2 * SSIZE_OF(sw)); OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1); CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop); OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0); @@ -3738,10 +3937,10 @@ if (common->invalid_utf) else { OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800); - OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x110000); - CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR); - OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800); - CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR); + OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000); + SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1); + OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800); + SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1); } } #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */ @@ -3977,11 +4176,11 @@ if (common->utf) { if (options & READ_CHAR_UPDATE_STR_PTR) OP2(SLJIT_ADD, RETURN_ADDR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); - OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x400); + OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0x400); if (options & READ_CHAR_UPDATE_STR_PTR) - CMOV(SLJIT_LESS, STR_PTR, RETURN_ADDR, 0); + SELECT(SLJIT_LESS, STR_PTR, RETURN_ADDR, 0, STR_PTR); if (max >= 0xd800) - CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, 0x10000); + SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, 0x10000, TMP1); } else { @@ -4005,16 +4204,47 @@ if (common->invalid_utf) else { OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800); - OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x110000); - CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR); - OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800); - CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR); + OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000); + SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1); + OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800); + SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1); } } #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */ #endif /* SUPPORT_UNICODE */ } +static void skip_valid_char(compiler_common *common) +{ +DEFINE_COMPILER; +#if (defined SUPPORT_UNICODE) && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16) +struct sljit_jump *jump; +#endif + +#if (defined SUPPORT_UNICODE) && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16) + if (common->utf) + { + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); +#if PCRE2_CODE_UNIT_WIDTH == 8 + jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0); + OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); +#elif PCRE2_CODE_UNIT_WIDTH == 16 + jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800); + OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0xd800); + OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL); + OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); +#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */ + JUMPHERE(jump); + return; + } +#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == [8|16] */ + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); +} + #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8 static BOOL is_char7_bitset(const sljit_u8 *bitset, BOOL nclass) @@ -4056,6 +4286,7 @@ if (negated) if (common->invalid_utf) { + OP1(SLJIT_MOV, TMP1, 0, TMP2, 0); add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL)); add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR)); OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); @@ -4162,8 +4393,8 @@ if (common->utf && negated) if (sljit_has_cpu_feature(SLJIT_HAS_CMOV) && !HAS_VIRTUAL_REGISTERS) { OP2(SLJIT_ADD, RETURN_ADDR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); - OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x400); - CMOV(SLJIT_LESS, STR_PTR, RETURN_ADDR, 0); + OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0x400); + SELECT(SLJIT_LESS, STR_PTR, RETURN_ADDR, 0, STR_PTR); } else { @@ -4197,9 +4428,6 @@ TMP2 is not used. Otherwise TMP2 must contain the start of the subject buffer, and it is destroyed. Does not modify STR_PTR for invalid character sequences. */ DEFINE_COMPILER; -SLJIT_UNUSED_ARG(backtracks); -SLJIT_UNUSED_ARG(must_be_valid); - #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 struct sljit_jump *jump; #endif @@ -4248,7 +4476,7 @@ if (common->utf) /* Skip low surrogate if necessary. */ OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00); - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0xdc00); OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL); OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT); OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0); @@ -4265,7 +4493,7 @@ if (common->invalid_utf && !must_be_valid) return; } - OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x110000); + OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x110000); OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_LESS); OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT); OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0); @@ -4273,6 +4501,10 @@ if (common->invalid_utf && !must_be_valid) } #endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */ #endif /* SUPPORT_UNICODE */ + +SLJIT_UNUSED_ARG(backtracks); +SLJIT_UNUSED_ARG(must_be_valid); + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); } @@ -4319,14 +4551,14 @@ of the character (>= 0xc0). Return char value in TMP1. */ DEFINE_COMPILER; struct sljit_jump *jump; -sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); +sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0); OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); /* Searching for the first zero. */ -OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800); +OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x800); jump = JUMP(SLJIT_NOT_ZERO); /* Two byte sequence. */ OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3000); @@ -4339,7 +4571,7 @@ OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); -OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000); +OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x10000); jump = JUMP(SLJIT_NOT_ZERO); /* Three byte sequence. */ OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0000); @@ -4365,9 +4597,9 @@ DEFINE_COMPILER; struct sljit_jump *jump; struct sljit_jump *compare; -sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); +sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0); -OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20); +OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, 0x20); jump = JUMP(SLJIT_NOT_ZERO); /* Two byte sequence. */ OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); @@ -4407,7 +4639,7 @@ struct sljit_label *three_byte_entry; struct sljit_label *exit_invalid_label; struct sljit_jump *exit_invalid[11]; -sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); +sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0); OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc2); @@ -4426,7 +4658,7 @@ OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0); OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80); exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40); -OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800); +OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x800); jump = JUMP(SLJIT_NOT_ZERO); OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); @@ -4441,14 +4673,14 @@ OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80); OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); if (has_cmov) { - OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x40); - CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, 0x20000); + OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40); + SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, 0x20000, TMP1); exit_invalid[2] = NULL; } else exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40); -OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000); +OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x10000); jump = JUMP(SLJIT_NOT_ZERO); three_byte_entry = LABEL(); @@ -4456,8 +4688,8 @@ three_byte_entry = LABEL(); OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2d800); if (has_cmov) { - OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800); - CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0xd800); + OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800); + SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0xd800, TMP1); exit_invalid[3] = NULL; } else @@ -4467,8 +4699,8 @@ OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); if (has_cmov) { - OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800); - CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR); + OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800); + SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1); exit_invalid[4] = NULL; } else @@ -4484,8 +4716,8 @@ OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80); OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); if (has_cmov) { - OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x40); - CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, 0); + OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40); + SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, 0, TMP1); exit_invalid[5] = NULL; } else @@ -4494,8 +4726,8 @@ else OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc10000); if (has_cmov) { - OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x100000); - CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0x10000); + OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000); + SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0x10000, TMP1); exit_invalid[6] = NULL; } else @@ -4516,7 +4748,7 @@ OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0); OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80); exit_invalid[8] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40); -OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800); +OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x800); jump = JUMP(SLJIT_NOT_ZERO); OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); @@ -4531,8 +4763,8 @@ OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80); OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); if (has_cmov) { - OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x40); - CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR); + OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40); + SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1); exit_invalid[10] = NULL; } else @@ -4563,7 +4795,7 @@ struct sljit_label *skip_start; struct sljit_label *three_byte_exit; struct sljit_jump *jump[5]; -sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); +sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0); if (common->nltype != NLTYPE_ANY) { @@ -4572,8 +4804,8 @@ if (common->nltype != NLTYPE_ANY) /* All newlines are ascii, just skip intermediate octets. */ jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); loop = LABEL(); - if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, TMP2, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)) == SLJIT_SUCCESS) - sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, TMP2, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); + if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, TMP2, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)) == SLJIT_SUCCESS) + sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, TMP2, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); else { OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); @@ -4654,7 +4886,7 @@ struct sljit_label *exit_ok_label; struct sljit_label *exit_invalid_label; struct sljit_jump *exit_invalid[7]; -sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); +sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0); OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3)); exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xc0); @@ -4745,7 +4977,7 @@ static void do_utfpeakcharback(compiler_common *common) DEFINE_COMPILER; struct sljit_jump *jump[2]; -sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); +sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0); OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2)); OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0); @@ -4788,7 +5020,7 @@ struct sljit_label *three_byte_entry; struct sljit_label *exit_invalid_label; struct sljit_jump *exit_invalid[8]; -sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); +sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0); OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(3)); exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xc0); @@ -4824,8 +5056,8 @@ OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800); if (has_cmov) { - OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800); - CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, -0xd800); + OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800); + SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, -0xd800, TMP1); exit_invalid[2] = NULL; } else @@ -4834,8 +5066,8 @@ else OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800); if (has_cmov) { - OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800); - CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR); + OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800); + SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1); exit_invalid[3] = NULL; } else @@ -4859,8 +5091,8 @@ OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0); if (has_cmov) { - OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x100000); - CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0x10000); + OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000); + SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0x10000, TMP1); exit_invalid[5] = NULL; } else @@ -4920,7 +5152,7 @@ undefined for invalid characters. */ DEFINE_COMPILER; struct sljit_jump *exit_invalid[3]; -sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); +sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0); /* TMP2 contains the high surrogate. */ exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00); @@ -4953,7 +5185,7 @@ char value in TMP1. */ DEFINE_COMPILER; struct sljit_jump *exit_invalid[2]; -sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); +sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0); /* TMP2 contains the high surrogate. */ exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); @@ -4962,7 +5194,7 @@ OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00); OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xdc00); -OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x400); +OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0x400); OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS); OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000); OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT); @@ -4982,7 +5214,7 @@ static void do_utfmoveback_invalid(compiler_common *common) DEFINE_COMPILER; struct sljit_jump *exit_invalid[3]; -sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); +sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0); exit_invalid[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x400); exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0); @@ -5011,7 +5243,7 @@ DEFINE_COMPILER; struct sljit_jump *jump; struct sljit_jump *exit_invalid[3]; -sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); +sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0); jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000); OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1)); @@ -5060,7 +5292,7 @@ SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0); SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 12); -sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); +sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0); #if PCRE2_CODE_UNIT_WIDTH == 32 if (!common->utf) @@ -5100,7 +5332,7 @@ SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0); SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 12); -sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); +sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0); #if PCRE2_CODE_UNIT_WIDTH == 32 if (!common->utf) @@ -5233,7 +5465,7 @@ if (newlinecheck) OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, common->newline & 0xff); OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL); #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT); @@ -5298,12 +5530,12 @@ else if (common->utf) if (sljit_has_cpu_feature(SLJIT_HAS_CMOV)) { OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); - OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400); - CMOV(SLJIT_LESS, STR_PTR, TMP2, 0); + OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x400); + SELECT(SLJIT_LESS, STR_PTR, TMP2, 0, STR_PTR); } else { - OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400); + OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x400); OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_LESS); OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT); OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); @@ -5406,6 +5638,8 @@ while (TRUE) case OP_CIRCM: case OP_DOLL: case OP_DOLLM: + case OP_NOT_UCP_WORD_BOUNDARY: + case OP_UCP_WORD_BOUNDARY: /* Zero width assertions. */ cc++; continue; @@ -5789,6 +6023,7 @@ static BOOL check_fast_forward_char_pair_simd(compiler_common *common, fast_forw { sljit_s32 i, j, max_i = 0, max_j = 0; sljit_u32 max_pri = 0; + sljit_s32 max_offset = max_fast_forward_char_pair_offset(); PCRE2_UCHAR a1, a2, a_pri, b1, b2, b_pri; for (i = max - 1; i >= 1; i--) @@ -5799,14 +6034,14 @@ static BOOL check_fast_forward_char_pair_simd(compiler_common *common, fast_forw a2 = chars[i].chars[1]; a_pri = chars[i].last_count; - j = i - max_fast_forward_char_pair_offset(); + j = i - max_offset; if (j < 0) j = 0; while (j < i) { b_pri = chars[j].last_count; - if (b_pri > 2 && a_pri + b_pri >= max_pri) + if (b_pri > 2 && (sljit_u32)a_pri + (sljit_u32)b_pri >= max_pri) { b1 = chars[j].chars[0]; b2 = chars[j].chars[1]; @@ -5854,8 +6089,8 @@ if (has_match_end) OP1(SLJIT_MOV, TMP3, 0, STR_END, 0); OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offset + 1)); - OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_END, 0, TMP1, 0); - CMOV(SLJIT_GREATER, STR_END, TMP1, 0); + OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_END, 0, TMP1, 0); + SELECT(SLJIT_GREATER, STR_END, TMP1, 0, STR_END); } #ifdef JIT_HAS_FAST_FORWARD_CHAR_SIMD @@ -6057,8 +6292,8 @@ if (common->match_end_ptr != 0) OP1(SLJIT_MOV, TMP3, 0, STR_END, 0); OP2(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max)); add_jump(compiler, &common->failed_match, JUMP(SLJIT_LESS)); - OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_END, 0, TMP1, 0); - CMOV(SLJIT_GREATER, STR_END, TMP1, 0); + OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_END, 0, TMP1, 0); + SELECT(SLJIT_GREATER, STR_END, TMP1, 0, STR_END); } else { @@ -6194,7 +6429,7 @@ if (common->nltype == NLTYPE_FIXED && common->newline > 255) firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0); OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0); + OP2U(SLJIT_SUB | SLJIT_SET_Z, STR_PTR, 0, TMP1, 0); OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_NOT_EQUAL); #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT); @@ -6222,7 +6457,7 @@ if (common->nltype == NLTYPE_FIXED && common->newline > 255) firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0); OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2)); - OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0); + OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, STR_PTR, 0, TMP1, 0); OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER_EQUAL); #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32 OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT); @@ -6287,8 +6522,8 @@ if (JIT_HAS_FAST_FORWARD_CHAR_SIMD && (common->nltype == NLTYPE_FIXED || common- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); if (common->mode != PCRE2_JIT_COMPLETE) { - OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0); - CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0); + OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0); + SELECT(SLJIT_GREATER, STR_PTR, STR_END, 0, STR_PTR); } } } @@ -6313,7 +6548,7 @@ if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF) notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, CHAR_NL); OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL); #if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32 OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT); @@ -6349,8 +6584,8 @@ if (common->match_end_ptr != 0) OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr); OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0); OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1)); - OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_END, 0, TMP1, 0); - CMOV(SLJIT_GREATER, STR_END, TMP1, 0); + OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_END, 0, TMP1, 0); + SELECT(SLJIT_GREATER, STR_END, TMP1, 0, STR_END); } start = LABEL(); @@ -6379,12 +6614,12 @@ if (!optimize_class(common, start_bits, (start_bits[31] & 0x80) != 0, FALSE, &ma if (!HAS_VIRTUAL_REGISTERS) { OP2(SLJIT_SHL, TMP3, 0, SLJIT_IMM, 1, TMP2, 0); - OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP3, 0); + OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP3, 0); } else { OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); - OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0); + OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0); } JUMPTO(SLJIT_ZERO, start); } @@ -6487,26 +6722,27 @@ DEFINE_COMPILER; struct sljit_jump *jump; struct sljit_label *mainloop; -sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); +sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0); GET_LOCAL_BASE(TMP1, 0, 0); /* Drop frames until we reach STACK_TOP. */ mainloop = LABEL(); -OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), -sizeof(sljit_sw)); -jump = CMP(SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0); +OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), -SSIZE_OF(sw)); +OP2U(SLJIT_SUB | SLJIT_SET_SIG_LESS_EQUAL | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, 0); +jump = JUMP(SLJIT_SIG_LESS_EQUAL); OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0); if (HAS_VIRTUAL_REGISTERS) { - OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw))); - OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(STACK_TOP), -(3 * sizeof(sljit_sw))); - OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * sizeof(sljit_sw)); + OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw))); + OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(STACK_TOP), -(3 * SSIZE_OF(sw))); + OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * SSIZE_OF(sw)); } else { - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw))); - OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(3 * sizeof(sljit_sw))); - OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * sizeof(sljit_sw)); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw))); + OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(3 * SSIZE_OF(sw))); + OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * SSIZE_OF(sw)); OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0); GET_LOCAL_BASE(TMP1, 0, 0); OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP3, 0); @@ -6514,28 +6750,39 @@ else JUMPTO(SLJIT_JUMP, mainloop); JUMPHERE(jump); -jump = CMP(SLJIT_NOT_ZERO /* SIG_LESS */, TMP2, 0, SLJIT_IMM, 0); +sljit_set_current_flags(compiler, SLJIT_CURRENT_FLAGS_SUB | SLJIT_CURRENT_FLAGS_COMPARE | SLJIT_SET_SIG_LESS_EQUAL | SLJIT_SET_Z); +jump = JUMP(SLJIT_NOT_ZERO /* SIG_LESS */); /* End of reverting values. */ OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0); JUMPHERE(jump); -OP1(SLJIT_NEG, TMP2, 0, TMP2, 0); +OP2(SLJIT_SUB, TMP2, 0, SLJIT_IMM, 0, TMP2, 0); OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0); if (HAS_VIRTUAL_REGISTERS) { - OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw))); - OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * sizeof(sljit_sw)); + OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw))); + OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * SSIZE_OF(sw)); } else { - OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw))); - OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * sizeof(sljit_sw)); + OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw))); + OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * SSIZE_OF(sw)); OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP3, 0); } JUMPTO(SLJIT_JUMP, mainloop); } -static void check_wordboundary(compiler_common *common) +#ifdef SUPPORT_UNICODE +#define UCPCAT(bit) (1 << (bit)) +#define UCPCAT2(bit1, bit2) (UCPCAT(bit1) | UCPCAT(bit2)) +#define UCPCAT3(bit1, bit2, bit3) (UCPCAT(bit1) | UCPCAT(bit2) | UCPCAT(bit3)) +#define UCPCAT_RANGE(start, end) (((1 << ((end) + 1)) - 1) - ((1 << (start)) - 1)) +#define UCPCAT_L UCPCAT_RANGE(ucp_Ll, ucp_Lu) +#define UCPCAT_N UCPCAT_RANGE(ucp_Nd, ucp_No) +#define UCPCAT_ALL ((1 << (ucp_Zs + 1)) - 1) +#endif + +static void check_wordboundary(compiler_common *common, BOOL ucp) { DEFINE_COMPILER; struct sljit_jump *skipread; @@ -6549,9 +6796,10 @@ jump_list *invalid_utf2 = NULL; struct sljit_jump *jump; #endif /* PCRE2_CODE_UNIT_WIDTH != 8 || SUPPORT_UNICODE */ +SLJIT_UNUSED_ARG(ucp); SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16); -sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0); +sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCALS0); /* Get type of the previous char, and put it to TMP3. */ OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); @@ -6588,19 +6836,12 @@ else /* Testing char type. */ #ifdef SUPPORT_UNICODE -if (common->ucp) +if (ucp) { - OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1); - jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE); add_jump(compiler, &common->getucdtype, JUMP(SLJIT_FAST_CALL)); - OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll); - OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll); - OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL); - OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll); - OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd); - OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL); - JUMPHERE(jump); - OP1(SLJIT_MOV, TMP3, 0, TMP2, 0); + OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP1, 0); + OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, UCPCAT2(ucp_Mn, ucp_Pc) | UCPCAT_L | UCPCAT_N); + OP_FLAGS(SLJIT_MOV, TMP3, 0, SLJIT_NOT_ZERO); } else #endif /* SUPPORT_UNICODE */ @@ -6634,18 +6875,12 @@ peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCALS1, &invalid_utf2); valid_utf = LABEL(); -if (common->ucp) +if (ucp) { - OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1); - jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE); add_jump(compiler, &common->getucdtype, JUMP(SLJIT_FAST_CALL)); - OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll); - OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll); - OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL); - OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll); - OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd); - OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL); - JUMPHERE(jump); + OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP1, 0); + OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, UCPCAT2(ucp_Mn, ucp_Pc) | UCPCAT_L | UCPCAT_N); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO); } else #endif /* SUPPORT_UNICODE */ @@ -6910,7 +7145,7 @@ j = 0; if (char_list[0] == 0) { i++; - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0); OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_ZERO); } else @@ -6922,8 +7157,8 @@ while (i < len) j++; else { - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char_list[i]); - CMOV(SLJIT_ZERO, TMP2, TMP1, 0); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, char_list[i]); + SELECT(SLJIT_ZERO, TMP2, TMP1, 0, TMP2); } i++; } @@ -6936,8 +7171,8 @@ if (j != 0) if ((char_list[i] & 0x100) != 0) { j--; - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char_list[i] & 0xff); - CMOV(SLJIT_ZERO, TMP2, TMP1, 0); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, char_list[i] & 0xff); + SELECT(SLJIT_ZERO, TMP2, TMP1, 0, TMP2); } } @@ -6962,12 +7197,12 @@ static void check_anynewline(compiler_common *common) /* Check whether TMP1 contains a newline character. TMP2 destroyed. */ DEFINE_COMPILER; -sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); +sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0); OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a); -OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a); +OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a); OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL); -OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a); +OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a); #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32 #if PCRE2_CODE_UNIT_WIDTH == 8 if (common->utf) @@ -6975,7 +7210,7 @@ if (common->utf) #endif OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1); - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a); #if PCRE2_CODE_UNIT_WIDTH == 8 } #endif @@ -6989,31 +7224,31 @@ static void check_hspace(compiler_common *common) /* Check whether TMP1 contains a newline character. TMP2 destroyed. */ DEFINE_COMPILER; -sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); +sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0); -OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09); +OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x09); OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL); -OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20); +OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x20); OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); -OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0); +OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0xa0); #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32 #if PCRE2_CODE_UNIT_WIDTH == 8 if (common->utf) { #endif OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x1680); OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x180e); OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000); - OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000); + OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000); OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL); - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000); OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000); OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000); #if PCRE2_CODE_UNIT_WIDTH == 8 } #endif @@ -7028,12 +7263,12 @@ static void check_vspace(compiler_common *common) /* Check whether TMP1 contains a newline character. TMP2 destroyed. */ DEFINE_COMPILER; -sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); +sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0); OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a); -OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a); +OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a); OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL); -OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a); +OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a); #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32 #if PCRE2_CODE_UNIT_WIDTH == 8 if (common->utf) @@ -7041,7 +7276,7 @@ if (common->utf) #endif OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1); - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a); #if PCRE2_CODE_UNIT_WIDTH == 8 } #endif @@ -7070,7 +7305,7 @@ else char2_reg = RETURN_ADDR; } -sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0); +sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCALS0); OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); if (char1_reg == STR_END) @@ -7079,11 +7314,11 @@ if (char1_reg == STR_END) OP1(SLJIT_MOV, RETURN_ADDR, 0, char2_reg, 0); } -if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS) +if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS) { label = LABEL(); - sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)); - sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); + sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)); + sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0); OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1)); JUMPTO(SLJIT_NOT_ZERO, label); @@ -7091,14 +7326,14 @@ if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_ JUMPHERE(jump); OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); } -else if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS) +else if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS) { OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1)); OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); label = LABEL(); - sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)); - sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); + sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)); + sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0); OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1)); JUMPTO(SLJIT_NOT_ZERO, label); @@ -7152,12 +7387,12 @@ else lcc_table = TMP3; } -if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS) +if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS) opt_type = 1; -else if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS) +else if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS) opt_type = 2; -sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0); +sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCALS0); OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, char1_reg, 0); @@ -7173,8 +7408,8 @@ OP1(SLJIT_MOV, lcc_table, 0, SLJIT_IMM, common->lcc); if (opt_type == 1) { label = LABEL(); - sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)); - sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); + sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)); + sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); } else if (opt_type == 2) { @@ -7182,8 +7417,8 @@ else if (opt_type == 2) OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); label = LABEL(); - sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)); - sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); + sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)); + sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); } else { @@ -7384,16 +7619,6 @@ return cc; #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8 -#define SET_TYPE_OFFSET(value) \ - if ((value) != typeoffset) \ - { \ - if ((value) < typeoffset) \ - OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \ - else \ - OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \ - } \ - typeoffset = (value); - #define SET_CHAR_OFFSET(value) \ if ((value) != charoffset) \ { \ @@ -7406,6 +7631,20 @@ return cc; static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr); +#ifdef SUPPORT_UNICODE +#define XCLASS_SAVE_CHAR 0x001 +#define XCLASS_CHAR_SAVED 0x002 +#define XCLASS_HAS_TYPE 0x004 +#define XCLASS_HAS_SCRIPT 0x008 +#define XCLASS_HAS_SCRIPT_EXTENSION 0x010 +#define XCLASS_HAS_BOOL 0x020 +#define XCLASS_HAS_BIDICL 0x040 +#define XCLASS_NEEDS_UCD (XCLASS_HAS_TYPE | XCLASS_HAS_SCRIPT | XCLASS_HAS_SCRIPT_EXTENSION | XCLASS_HAS_BOOL | XCLASS_HAS_BIDICL) +#define XCLASS_SCRIPT_EXTENSION_NOTPROP 0x080 +#define XCLASS_SCRIPT_EXTENSION_RESTORE_RETURN_ADDR 0x100 +#define XCLASS_SCRIPT_EXTENSION_RESTORE_LOCALS0 0x200 +#endif /* SUPPORT_UNICODE */ + static void compile_xclass_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks) { DEFINE_COMPILER; @@ -7420,11 +7659,11 @@ BOOL utf = common->utf; #endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == [8|16] */ #ifdef SUPPORT_UNICODE -BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE; -BOOL charsaved = FALSE; +sljit_u32 unicode_status = 0; +sljit_u32 category_list = 0; +sljit_u32 items; int typereg = TMP1; const sljit_u32 *other_cases; -sljit_uw typeoffset; #endif /* SUPPORT_UNICODE */ /* Scanning the necessary info. */ @@ -7441,6 +7680,7 @@ if (cc[-1] & XCL_MAP) while (*cc != XCL_END) { compares++; + if (*cc == XCL_SINGLE) { cc ++; @@ -7448,7 +7688,7 @@ while (*cc != XCL_END) if (c > max) max = c; if (c < min) min = c; #ifdef SUPPORT_UNICODE - needschar = TRUE; + unicode_status |= XCLASS_SAVE_CHAR; #endif /* SUPPORT_UNICODE */ } else if (*cc == XCL_RANGE) @@ -7459,7 +7699,7 @@ while (*cc != XCL_END) GETCHARINCTEST(c, cc); if (c > max) max = c; #ifdef SUPPORT_UNICODE - needschar = TRUE; + unicode_status |= XCLASS_SAVE_CHAR; #endif /* SUPPORT_UNICODE */ } #ifdef SUPPORT_UNICODE @@ -7467,7 +7707,8 @@ while (*cc != XCL_END) { SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP); cc++; - if (*cc == PT_CLIST) + + if (*cc == PT_CLIST && cc[-1] == XCL_PROP) { other_cases = PRIV(ucd_caseless_sets) + cc[1]; while (*other_cases != NOTACHAR) @@ -7483,54 +7724,114 @@ while (*cc != XCL_END) min = 0; } + items = 0; + switch(*cc) { case PT_ANY: /* Any either accepts everything or ignored. */ if (cc[-1] == XCL_PROP) - { - compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE); - if (list == backtracks) - add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); - return; - } + items = UCPCAT_ALL; + else + compares--; break; case PT_LAMP: + items = UCPCAT3(ucp_Lu, ucp_Ll, ucp_Lt); + break; + case PT_GC: + items = UCPCAT_RANGE(PRIV(ucp_typerange)[(int)cc[1] * 2], PRIV(ucp_typerange)[(int)cc[1] * 2 + 1]); + break; + case PT_PC: + items = UCPCAT(cc[1]); + break; + + case PT_WORD: + items = UCPCAT2(ucp_Mn, ucp_Pc) | UCPCAT_L | UCPCAT_N; + break; + case PT_ALNUM: - needstype = TRUE; + items = UCPCAT_L | UCPCAT_N; break; + case PT_SCX: + unicode_status |= XCLASS_HAS_SCRIPT_EXTENSION; + if (cc[-1] == XCL_NOTPROP) + { + unicode_status |= XCLASS_SCRIPT_EXTENSION_NOTPROP; + break; + } + compares++; + /* Fall through */ + case PT_SC: - needsscript = TRUE; + unicode_status |= XCLASS_HAS_SCRIPT; break; case PT_SPACE: case PT_PXSPACE: - case PT_WORD: case PT_PXGRAPH: case PT_PXPRINT: case PT_PXPUNCT: - needstype = TRUE; - needschar = TRUE; + unicode_status |= XCLASS_SAVE_CHAR | XCLASS_HAS_TYPE; break; case PT_CLIST: case PT_UCNC: - needschar = TRUE; + case PT_PXXDIGIT: + unicode_status |= XCLASS_SAVE_CHAR; + break; + + case PT_BOOL: + unicode_status |= XCLASS_HAS_BOOL; + break; + + case PT_BIDICL: + unicode_status |= XCLASS_HAS_BIDICL; break; default: SLJIT_UNREACHABLE(); break; } + + if (items > 0) + { + if (cc[-1] == XCL_NOTPROP) + items ^= UCPCAT_ALL; + category_list |= items; + unicode_status |= XCLASS_HAS_TYPE; + compares--; + } + cc += 2; } #endif /* SUPPORT_UNICODE */ } + +#ifdef SUPPORT_UNICODE +if (category_list == UCPCAT_ALL) + { + /* All characters are accepted, same as dotall. */ + compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE); + if (list == backtracks) + add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); + return; + } + +if (compares == 0 && category_list == 0) + { + /* No characters are accepted, same as (*F) or dotall. */ + compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE); + if (list != backtracks) + add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); + return; + } +#else /* !SUPPORT_UNICODE */ SLJIT_ASSERT(compares > 0); +#endif /* SUPPORT_UNICODE */ /* We are not necessary in utf mode even in 8 bit mode. */ cc = ccbegin; @@ -7539,7 +7840,7 @@ if ((cc[-1] & XCL_NOT) != 0) else { #ifdef SUPPORT_UNICODE - read_char(common, min, max, (needstype || needsscript) ? backtracks : NULL, 0); + read_char(common, min, max, (unicode_status & XCLASS_NEEDS_UCD) ? backtracks : NULL, 0); #else /* !SUPPORT_UNICODE */ read_char(common, min, max, NULL, 0); #endif /* SUPPORT_UNICODE */ @@ -7556,7 +7857,7 @@ if ((cc[-1] & XCL_HASPROP) == 0) OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3); OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc); OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); - OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0); + OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0); add_jump(compiler, &found, JUMP(SLJIT_NOT_ZERO)); } @@ -7575,7 +7876,7 @@ else if ((cc[-1] & XCL_MAP) != 0) { OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0); #ifdef SUPPORT_UNICODE - charsaved = TRUE; + unicode_status |= XCLASS_CHAR_SAVED; #endif /* SUPPORT_UNICODE */ if (!optimize_class(common, (const sljit_u8 *)cc, FALSE, TRUE, list)) { @@ -7589,7 +7890,7 @@ else if ((cc[-1] & XCL_MAP) != 0) OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3); OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc); OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); - OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0); + OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0); add_jump(compiler, list, JUMP(SLJIT_NOT_ZERO)); #if PCRE2_CODE_UNIT_WIDTH == 8 @@ -7603,9 +7904,9 @@ else if ((cc[-1] & XCL_MAP) != 0) } #ifdef SUPPORT_UNICODE -if (needstype || needsscript) +if (unicode_status & XCLASS_NEEDS_UCD) { - if (needschar && !charsaved) + if ((unicode_status & (XCLASS_SAVE_CHAR | XCLASS_CHAR_SAVED)) == XCLASS_SAVE_CHAR) OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0); #if PCRE2_CODE_UNIT_WIDTH == 32 @@ -7625,17 +7926,19 @@ if (needstype || needsscript) OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0); OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2)); OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1); + OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 3); + OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2); + OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0); - /* Before anything else, we deal with scripts. */ - if (needsscript) - { - OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 3); - OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2); - OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0); + ccbegin = cc; - OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script)); + if (category_list != 0) + compares++; - ccbegin = cc; + if (unicode_status & XCLASS_HAS_BIDICL) + { + OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, scriptx_bidiclass)); + OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BIDICLASS_SHIFT); while (*cc != XCL_END) { @@ -7654,7 +7957,7 @@ if (needstype || needsscript) { SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP); cc++; - if (*cc == PT_SC) + if (*cc == PT_BIDICL) { compares--; invertcmp = (compares == 0 && list != backtracks); @@ -7668,61 +7971,191 @@ if (needstype || needsscript) } cc = ccbegin; + } + + if (unicode_status & XCLASS_HAS_BOOL) + { + OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, bprops)); + OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BPROPS_MASK); + OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 2); - if (needstype) + while (*cc != XCL_END) { - /* TMP2 has already been shifted by 2 */ - if (!needschar) + if (*cc == XCL_SINGLE) { - OP2(SLJIT_ADD, TMP1, 0, TMP2, 0, TMP2, 0); - OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0); - - OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype)); + cc ++; + GETCHARINCTEST(c, cc); + } + else if (*cc == XCL_RANGE) + { + cc ++; + GETCHARINCTEST(c, cc); + GETCHARINCTEST(c, cc); } else { - OP2(SLJIT_ADD, TMP1, 0, TMP2, 0, TMP2, 0); - OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0); + SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP); + cc++; + if (*cc == PT_BOOL) + { + compares--; + invertcmp = (compares == 0 && list != backtracks); + if (cc[-1] == XCL_NOTPROP) + invertcmp ^= 0x1; - OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0); - OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype)); - typereg = RETURN_ADDR; + OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP1), (sljit_sw)(PRIV(ucd_boolprop_sets) + (cc[1] >> 5)), SLJIT_IMM, (sljit_sw)1 << (cc[1] & 0x1f)); + add_jump(compiler, compares > 0 ? list : backtracks, JUMP(SLJIT_NOT_ZERO ^ invertcmp)); + } + cc += 2; } } - else if (needschar) - OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0); + + cc = ccbegin; } - else if (needstype) + + if (unicode_status & XCLASS_HAS_SCRIPT) { - OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 3); - OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2); + OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script)); - if (!needschar) + while (*cc != XCL_END) { - OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0); + if (*cc == XCL_SINGLE) + { + cc ++; + GETCHARINCTEST(c, cc); + } + else if (*cc == XCL_RANGE) + { + cc ++; + GETCHARINCTEST(c, cc); + GETCHARINCTEST(c, cc); + } + else + { + SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP); + cc++; + switch (*cc) + { + case PT_SCX: + if (cc[-1] == XCL_NOTPROP) + break; + /* Fall through */ - OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype)); + case PT_SC: + compares--; + invertcmp = (compares == 0 && list != backtracks); + if (cc[-1] == XCL_NOTPROP) + invertcmp ^= 0x1; + + add_jump(compiler, compares > 0 ? list : backtracks, CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1])); + } + cc += 2; + } } - else + + cc = ccbegin; + } + + if (unicode_status & XCLASS_HAS_SCRIPT_EXTENSION) + { + OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, scriptx_bidiclass)); + OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_SCRIPTX_MASK); + OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 2); + + if (unicode_status & XCLASS_SCRIPT_EXTENSION_NOTPROP) { - OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0); + if (unicode_status & XCLASS_HAS_TYPE) + { + if (unicode_status & XCLASS_SAVE_CHAR) + { + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP2, 0); + unicode_status |= XCLASS_SCRIPT_EXTENSION_RESTORE_LOCALS0; + } + else + { + OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP2, 0); + unicode_status |= XCLASS_SCRIPT_EXTENSION_RESTORE_RETURN_ADDR; + } + } + OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script)); + } - OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0); - OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype)); - typereg = RETURN_ADDR; + while (*cc != XCL_END) + { + if (*cc == XCL_SINGLE) + { + cc ++; + GETCHARINCTEST(c, cc); + } + else if (*cc == XCL_RANGE) + { + cc ++; + GETCHARINCTEST(c, cc); + GETCHARINCTEST(c, cc); + } + else + { + SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP); + cc++; + if (*cc == PT_SCX) + { + compares--; + invertcmp = (compares == 0 && list != backtracks); + + jump = NULL; + if (cc[-1] == XCL_NOTPROP) + { + jump = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, (int)cc[1]); + if (invertcmp) + { + add_jump(compiler, backtracks, jump); + jump = NULL; + } + invertcmp ^= 0x1; + } + + OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP1), (sljit_sw)(PRIV(ucd_script_sets) + (cc[1] >> 5)), SLJIT_IMM, (sljit_sw)1 << (cc[1] & 0x1f)); + add_jump(compiler, compares > 0 ? list : backtracks, JUMP(SLJIT_NOT_ZERO ^ invertcmp)); + + if (jump != NULL) + JUMPHERE(jump); + } + cc += 2; + } } + + if (unicode_status & XCLASS_SCRIPT_EXTENSION_RESTORE_LOCALS0) + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); + else if (unicode_status & XCLASS_SCRIPT_EXTENSION_RESTORE_RETURN_ADDR) + OP1(SLJIT_MOV, TMP2, 0, RETURN_ADDR, 0); + cc = ccbegin; } - else if (needschar) + + if (unicode_status & XCLASS_SAVE_CHAR) OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0); + + if (unicode_status & XCLASS_HAS_TYPE) + { + if (unicode_status & XCLASS_SAVE_CHAR) + typereg = RETURN_ADDR; + + OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype)); + OP2(SLJIT_SHL, typereg, 0, SLJIT_IMM, 1, TMP2, 0); + + if (category_list > 0) + { + compares--; + invertcmp = (compares == 0 && list != backtracks); + OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, category_list); + add_jump(compiler, compares > 0 ? list : backtracks, JUMP(SLJIT_NOT_ZERO ^ invertcmp)); + } + } } #endif /* SUPPORT_UNICODE */ /* Generating code. */ charoffset = 0; numberofcmps = 0; -#ifdef SUPPORT_UNICODE -typeoffset = 0; -#endif /* SUPPORT_UNICODE */ while (*cc != XCL_END) { @@ -7737,13 +8170,13 @@ while (*cc != XCL_END) if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE)) { - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset)); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset)); OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_EQUAL); numberofcmps++; } else if (numberofcmps > 0) { - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset)); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset)); OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL); jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); numberofcmps = 0; @@ -7763,13 +8196,13 @@ while (*cc != XCL_END) if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE)) { - OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset)); + OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset)); OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL); numberofcmps++; } else if (numberofcmps > 0) { - OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset)); + OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset)); OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL); jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); numberofcmps = 0; @@ -7790,65 +8223,33 @@ while (*cc != XCL_END) switch(*cc) { case PT_ANY: - if (!invertcmp) - jump = JUMP(SLJIT_JUMP); - break; - case PT_LAMP: - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset); - OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL); - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset); - OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset); - OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL); - jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); - break; - case PT_GC: - c = PRIV(ucp_typerange)[(int)cc[1] * 2]; - SET_TYPE_OFFSET(c); - jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c); - break; - case PT_PC: - jump = CMP(SLJIT_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset); - break; - case PT_SC: + case PT_SCX: + case PT_BOOL: + case PT_BIDICL: + case PT_WORD: + case PT_ALNUM: compares++; - /* Do nothing. */ + /* Already handled. */ break; case PT_SPACE: case PT_PXSPACE: SET_CHAR_OFFSET(9); - OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9); + OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xd - 0x9); OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL); - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x85 - 0x9); OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x180e - 0x9); OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); - SET_TYPE_OFFSET(ucp_Zl); - OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl); - OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL); - jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); - break; - - case PT_WORD: - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset)); - OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL); - /* Fall through. */ - - case PT_ALNUM: - SET_TYPE_OFFSET(ucp_Ll); - OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll); - OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL); - SET_TYPE_OFFSET(ucp_Nd); - OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd); - OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL); + OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Zl, ucp_Zs)); + OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_ZERO); jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); break; @@ -7870,7 +8271,7 @@ while (*cc != XCL_END) OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset); OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]); } - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, other_cases[1]); OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL); other_cases += 2; } @@ -7883,103 +8284,135 @@ while (*cc != XCL_END) OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset); OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]); } - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, other_cases[2]); OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL); - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset)); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset)); OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL); other_cases += 3; } else { - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset)); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset)); OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL); } while (*other_cases != NOTACHAR) { - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset)); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset)); OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL); } jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); break; case PT_UCNC: - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset)); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset)); OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL); - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset)); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset)); OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset)); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset)); OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); SET_CHAR_OFFSET(0xa0); - OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset)); + OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset)); OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL); SET_CHAR_OFFSET(0); - OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0); + OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000 - 0); OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_GREATER_EQUAL); jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); break; case PT_PXGRAPH: - /* C and Z groups are the farthest two groups. */ - SET_TYPE_OFFSET(ucp_Ll); - OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll); - OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER); + OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Cc, ucp_Cs) | UCPCAT_RANGE(ucp_Zl, ucp_Zs)); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO); - jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll); + OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT(ucp_Cf)); + jump = JUMP(SLJIT_ZERO); + c = charoffset; /* In case of ucp_Cf, we overwrite the result. */ SET_CHAR_OFFSET(0x2066); - OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066); + OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066); OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL); - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066); OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066); OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); + /* Restore charoffset. */ + SET_CHAR_OFFSET(c); + JUMPHERE(jump); jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0); break; case PT_PXPRINT: - /* C and Z groups are the farthest two groups. */ - SET_TYPE_OFFSET(ucp_Ll); - OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll); - OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER); - - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll); - OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_NOT_EQUAL); + OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Cc, ucp_Cs) | UCPCAT2(ucp_Zl, ucp_Zp)); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO); - jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll); + OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT(ucp_Cf)); + jump = JUMP(SLJIT_ZERO); + c = charoffset; /* In case of ucp_Cf, we overwrite the result. */ SET_CHAR_OFFSET(0x2066); - OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066); + OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066); OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL); - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066); OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); + /* Restore charoffset. */ + SET_CHAR_OFFSET(c); + JUMPHERE(jump); jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0); break; case PT_PXPUNCT: - SET_TYPE_OFFSET(ucp_Sc); - OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc); - OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL); + OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Sc, ucp_So)); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO); SET_CHAR_OFFSET(0); - OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x7f); + OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x7f); OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_LESS_EQUAL); - SET_TYPE_OFFSET(ucp_Pc); - OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc); - OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL); + OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Pc, ucp_Ps)); + OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_ZERO); + jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); + break; + + case PT_PXXDIGIT: + SET_CHAR_OFFSET(CHAR_A); + OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, ~0x20); + OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP2, 0, SLJIT_IMM, CHAR_F - CHAR_A); + OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL); + + SET_CHAR_OFFSET(CHAR_0); + OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_9 - CHAR_0); + OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL); + + SET_CHAR_OFFSET(0xff10); + jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 0xff46 - 0xff10); + + OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xff19 - 0xff10); + OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL); + + SET_CHAR_OFFSET(0xff21); + OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xff26 - 0xff21); + OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL); + + SET_CHAR_OFFSET(0xff41); + OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xff46 - 0xff41); + OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL); + + SET_CHAR_OFFSET(0xff10); + + JUMPHERE(jump); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, 0); jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); break; @@ -7995,6 +8428,7 @@ while (*cc != XCL_END) add_jump(compiler, compares > 0 ? list : backtracks, jump); } +SLJIT_ASSERT(compares == 0); if (found != NULL) set_jumps(found, LABEL()); } @@ -8007,11 +8441,7 @@ if (found != NULL) static PCRE2_SPTR compile_simple_assertion_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks) { DEFINE_COMPILER; -int length; struct sljit_jump *jump[4]; -#ifdef SUPPORT_UNICODE -struct sljit_label *label; -#endif /* SUPPORT_UNICODE */ switch(type) { @@ -8039,16 +8469,18 @@ switch(type) case OP_NOT_WORD_BOUNDARY: case OP_WORD_BOUNDARY: - add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL)); + case OP_NOT_UCP_WORD_BOUNDARY: + case OP_UCP_WORD_BOUNDARY: + add_jump(compiler, (type == OP_NOT_WORD_BOUNDARY || type == OP_WORD_BOUNDARY) ? &common->wordboundary : &common->ucp_wordboundary, JUMP(SLJIT_FAST_CALL)); #ifdef SUPPORT_UNICODE if (common->invalid_utf) { - add_jump(compiler, backtracks, CMP((type == OP_NOT_WORD_BOUNDARY) ? SLJIT_NOT_EQUAL : SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0)); + add_jump(compiler, backtracks, CMP((type == OP_NOT_WORD_BOUNDARY || type == OP_NOT_UCP_WORD_BOUNDARY) ? SLJIT_NOT_EQUAL : SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0)); return cc; } #endif /* SUPPORT_UNICODE */ sljit_set_current_flags(compiler, SLJIT_SET_Z); - add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_NOT_ZERO : SLJIT_ZERO)); + add_jump(compiler, backtracks, JUMP((type == OP_NOT_WORD_BOUNDARY || type == OP_NOT_UCP_WORD_BOUNDARY) ? SLJIT_NOT_ZERO : SLJIT_ZERO)); return cc; case OP_EODN: @@ -8063,9 +8495,9 @@ switch(type) else { jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0); - OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0); + OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, STR_END, 0); OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS); - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff); OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_EQUAL); add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL)); check_partial(common, TRUE); @@ -8088,7 +8520,7 @@ switch(type) OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR); OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); - OP2(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0); + OP2U(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_GREATER, TMP2, 0, STR_END, 0); jump[2] = JUMP(SLJIT_GREATER); add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL) /* LESS */); /* Equal. */ @@ -8131,11 +8563,11 @@ switch(type) if (HAS_VIRTUAL_REGISTERS) { OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); - OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL); + OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL); } else - OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL); - add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO32)); + OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL); + add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO)); if (!common->endonly) compile_simple_assertion_matchingpath(common, OP_EODN, cc, backtracks); @@ -8151,11 +8583,11 @@ switch(type) if (HAS_VIRTUAL_REGISTERS) { OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); - OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL); + OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL); } else - OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL); - add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO32)); + OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL); + add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO)); check_partial(common, FALSE); jump[0] = JUMP(SLJIT_JUMP); JUMPHERE(jump[1]); @@ -8194,15 +8626,15 @@ switch(type) OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin)); add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0)); - OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL); - add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO32)); + OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL); + add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO)); } else { OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin)); add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0)); - OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL); - add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO32)); + OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL); + add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO)); } return cc; @@ -8213,15 +8645,15 @@ switch(type) OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP2, 0); - OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL); + OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL); } else { OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin)); jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP2, 0); - OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL); + OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL); } - add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO32)); + add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO)); jump[0] = JUMP(SLJIT_JUMP); JUMPHERE(jump[1]); @@ -8244,36 +8676,6 @@ switch(type) } JUMPHERE(jump[0]); return cc; - - case OP_REVERSE: - length = GET(cc, 0); - if (length == 0) - return cc + LINK_SIZE; - if (HAS_VIRTUAL_REGISTERS) - { - OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); - OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); - } - else - OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin)); -#ifdef SUPPORT_UNICODE - if (common->utf) - { - OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, length); - label = LABEL(); - add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0)); - move_back(common, backtracks, FALSE); - OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1); - JUMPTO(SLJIT_NOT_ZERO, label); - } - else -#endif - { - OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length)); - add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0)); - } - check_start_used_ptr(common); - return cc + LINK_SIZE; } SLJIT_UNREACHABLE(); return cc; @@ -8313,7 +8715,7 @@ do /* Not breaking between Regional Indicators is allowed only if there are an even number of preceding RIs. */ - if (lgb == ucp_gbRegionalIndicator && rgb == ucp_gbRegionalIndicator) + if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator) { ricount = 0; bptr = prevcc; @@ -8325,7 +8727,7 @@ do BACKCHAR(bptr); GETCHAR(c, bptr); - if (UCD_GRAPHBREAK(c) != ucp_gbRegionalIndicator) + if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator) break; ricount++; @@ -8381,7 +8783,7 @@ do /* Not breaking between Regional Indicators is allowed only if there are an even number of preceding RIs. */ - if (lgb == ucp_gbRegionalIndicator && rgb == ucp_gbRegionalIndicator) + if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator) { ricount = 0; bptr = prevcc; @@ -8391,7 +8793,7 @@ do { GETCHARBACK_INVALID(c, bptr, start_subject, break); - if (UCD_GRAPHBREAK(c) != ucp_gbRegionalIndicator) + if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator) break; ricount++; @@ -8430,7 +8832,7 @@ c = *cc++; #if PCRE2_CODE_UNIT_WIDTH == 32 if (c >= 0x110000) - return NULL; + return cc; #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */ lgb = UCD_GRAPHBREAK(c); @@ -8449,7 +8851,7 @@ while (cc < end_subject) /* Not breaking between Regional Indicators is allowed only if there are an even number of preceding RIs. */ - if (lgb == ucp_gbRegionalIndicator && rgb == ucp_gbRegionalIndicator) + if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator) { ricount = 0; bptr = cc - 1; @@ -8464,7 +8866,7 @@ while (cc < end_subject) break; #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */ - if (UCD_GRAPHBREAK(c) != ucp_gbRegionalIndicator) break; + if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator) break; ricount++; } @@ -8514,7 +8916,7 @@ switch(type) #endif read_char8_type(common, backtracks, type == OP_NOT_DIGIT); /* Flip the starting bit in the negative case. */ - OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit); + OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, ctype_digit); add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_ZERO : SLJIT_NOT_ZERO)); return cc; @@ -8528,7 +8930,7 @@ switch(type) else #endif read_char8_type(common, backtracks, type == OP_NOT_WHITESPACE); - OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space); + OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, ctype_space); add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_ZERO : SLJIT_NOT_ZERO)); return cc; @@ -8542,7 +8944,7 @@ switch(type) else #endif read_char8_type(common, backtracks, type == OP_NOT_WORDCHAR); - OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word); + OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, ctype_word); add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_ZERO : SLJIT_NOT_ZERO)); return cc; @@ -8572,35 +8974,14 @@ switch(type) if (check_str_ptr) detect_partial_match(common, backtracks); #ifdef SUPPORT_UNICODE - if (common->utf) + if (common->utf && common->invalid_utf) { - if (common->invalid_utf) - { - read_char(common, 0, READ_CHAR_MAX, backtracks, READ_CHAR_UPDATE_STR_PTR); - return cc; - } - -#if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16 - OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); - OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); -#if PCRE2_CODE_UNIT_WIDTH == 8 - jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0); - OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); - OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); -#elif PCRE2_CODE_UNIT_WIDTH == 16 - jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800); - OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00); - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800); - OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL); - OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); - OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); -#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */ - JUMPHERE(jump[0]); + read_char(common, 0, READ_CHAR_MAX, backtracks, READ_CHAR_UPDATE_STR_PTR); return cc; -#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16] */ } #endif /* SUPPORT_UNICODE */ - OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + + skip_valid_char(common); return cc; case OP_ANYBYTE: @@ -8684,14 +9065,14 @@ switch(type) OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0); #if PCRE2_CODE_UNIT_WIDTH != 32 - sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, - common->utf ? (common->invalid_utf ? SLJIT_FUNC_OFFSET(do_extuni_utf_invalid) : SLJIT_FUNC_OFFSET(do_extuni_utf)) : SLJIT_FUNC_OFFSET(do_extuni_no_utf)); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, + common->utf ? (common->invalid_utf ? SLJIT_FUNC_ADDR(do_extuni_utf_invalid) : SLJIT_FUNC_ADDR(do_extuni_utf)) : SLJIT_FUNC_ADDR(do_extuni_no_utf)); if (common->invalid_utf) add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0)); #else - sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, - common->invalid_utf ? SLJIT_FUNC_OFFSET(do_extuni_utf_invalid) : SLJIT_FUNC_OFFSET(do_extuni_no_utf)); - if (!common->utf || common->invalid_utf) + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, + common->invalid_utf ? SLJIT_FUNC_ADDR(do_extuni_utf_invalid) : SLJIT_FUNC_ADDR(do_extuni_no_utf)); + if (common->invalid_utf) add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0)); #endif @@ -8752,8 +9133,8 @@ switch(type) if (sljit_has_cpu_feature(SLJIT_HAS_CMOV)) { - OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc); - CMOV(SLJIT_EQUAL, TMP1, SLJIT_IMM, c); + OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, oc); + SELECT(SLJIT_EQUAL, TMP1, SLJIT_IMM, c, TMP1); add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c)); } else @@ -8872,7 +9253,7 @@ switch(type) OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3); OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc); OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); - OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0); + OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0); add_jump(compiler, backtracks, JUMP(SLJIT_ZERO)); #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8 @@ -9110,7 +9491,7 @@ if (common->utf && *cc == OP_REFI) caseless_loop = LABEL(); OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP2), 0); OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(uint32_t)); - OP2(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, char1_reg, 0); + OP2U(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_LESS, TMP1, 0, char1_reg, 0); JUMPTO(SLJIT_EQUAL, loop); JUMPTO(SLJIT_LESS, caseless_loop); @@ -9272,14 +9653,16 @@ if (!minimize) if (ref) OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset)); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); + if (ref) { - add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1))); + if (!common->unset_backref) + add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1))); zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1)); } else { - compile_dnref_search(common, ccbegin, &backtrack->topbacktracks); + compile_dnref_search(common, ccbegin, &backtrack->own_backtracks); OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0); zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw)); @@ -9292,7 +9675,7 @@ if (!minimize) label = LABEL(); if (!ref) OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1); - compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE); + compile_ref_matchingpath(common, ccbegin, &backtrack->own_backtracks, FALSE, FALSE); if (min > 1 || max > 1) { @@ -9354,12 +9737,13 @@ else { if (ref) { - add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1))); + if (!common->unset_backref) + add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1))); zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1)); } else { - compile_dnref_search(common, ccbegin, &backtrack->topbacktracks); + compile_dnref_search(common, ccbegin, &backtrack->own_backtracks); OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0); zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw)); @@ -9368,11 +9752,11 @@ else BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL(); if (max > 0) - add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max)); + add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max)); if (!ref) OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2)); -compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE); +compile_ref_matchingpath(common, ccbegin, &backtrack->own_backtracks, TRUE, TRUE); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); if (min > 1) @@ -9447,12 +9831,12 @@ if (entry->entry_label == NULL) else JUMPTO(SLJIT_FAST_CALL, entry->entry_label); /* Leave if the match is failed. */ -add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0)); +add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0)); BACKTRACK_AS(recurse_backtrack)->matchingpath = LABEL(); return cc + 1 + LINK_SIZE; } -static sljit_s32 SLJIT_FUNC do_callout(struct jit_arguments *arguments, pcre2_callout_block *callout_block, PCRE2_SPTR *jit_ovector) +static sljit_s32 SLJIT_FUNC SLJIT_FUNC_ATTRIBUTE do_callout_jit(struct jit_arguments *arguments, pcre2_callout_block *callout_block, PCRE2_SPTR *jit_ovector) { PCRE2_SPTR begin; PCRE2_SIZE *ovector; @@ -9519,7 +9903,7 @@ unsigned int callout_length = (*cc == OP_CALLOUT) sljit_sw value1; sljit_sw value2; sljit_sw value3; -sljit_uw callout_arg_size = (common->re->top_bracket + 1) * 2 * sizeof(sljit_sw); +sljit_uw callout_arg_size = (common->re->top_bracket + 1) * 2 * SSIZE_OF(sw); PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL); @@ -9569,23 +9953,123 @@ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STR_PTR, 0); /* SLJIT_R0 = arguments */ OP1(SLJIT_MOV, SLJIT_R1, 0, STACK_TOP, 0); GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START); -sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(S32) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout)); +sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS3(32, W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(do_callout_jit)); OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); free_stack(common, callout_arg_size); /* Check return value. */ -OP2(SLJIT_SUB32 | SLJIT_SET_Z | SLJIT_SET_SIG_GREATER, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0); -add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_SIG_GREATER32)); +OP2U(SLJIT_SUB32 | SLJIT_SET_Z | SLJIT_SET_SIG_GREATER, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0); +add_jump(compiler, &backtrack->own_backtracks, JUMP(SLJIT_SIG_GREATER)); if (common->abort_label == NULL) - add_jump(compiler, &common->abort, JUMP(SLJIT_NOT_EQUAL32) /* SIG_LESS */); + add_jump(compiler, &common->abort, JUMP(SLJIT_NOT_EQUAL) /* SIG_LESS */); else - JUMPTO(SLJIT_NOT_EQUAL32 /* SIG_LESS */, common->abort_label); + JUMPTO(SLJIT_NOT_EQUAL /* SIG_LESS */, common->abort_label); return cc + callout_length; } #undef CALLOUT_ARG_SIZE #undef CALLOUT_ARG_OFFSET +static PCRE2_SPTR compile_reverse_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent) +{ +DEFINE_COMPILER; +backtrack_common *backtrack = NULL; +jump_list **reverse_failed; +unsigned int lmin, lmax; +#ifdef SUPPORT_UNICODE +struct sljit_jump *jump; +struct sljit_label *label; +#endif + +SLJIT_ASSERT(parent->top == NULL); + +if (*cc == OP_REVERSE) + { + reverse_failed = &parent->own_backtracks; + lmin = GET2(cc, 1); + lmax = lmin; + cc += 1 + IMM2_SIZE; + + SLJIT_ASSERT(lmin > 0); + } +else + { + SLJIT_ASSERT(*cc == OP_VREVERSE); + PUSH_BACKTRACK(sizeof(vreverse_backtrack), cc, NULL); + + reverse_failed = &backtrack->own_backtracks; + lmin = GET2(cc, 1); + lmax = GET2(cc, 1 + IMM2_SIZE); + cc += 1 + 2 * IMM2_SIZE; + + SLJIT_ASSERT(lmin < lmax); + } + +if (HAS_VIRTUAL_REGISTERS) + { + OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); + } +else + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin)); + +#ifdef SUPPORT_UNICODE +if (common->utf) + { + if (lmin > 0) + { + OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, lmin); + label = LABEL(); + add_jump(compiler, reverse_failed, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0)); + move_back(common, reverse_failed, FALSE); + OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1); + JUMPTO(SLJIT_NOT_ZERO, label); + } + + if (lmin < lmax) + { + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0); + + OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, lmax - lmin); + label = LABEL(); + jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0); + move_back(common, reverse_failed, FALSE); + OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1); + JUMPTO(SLJIT_NOT_ZERO, label); + + JUMPHERE(jump); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), STR_PTR, 0); + } + } +else +#endif + { + if (lmin > 0) + { + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(lmin)); + add_jump(compiler, reverse_failed, CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0)); + } + + if (lmin < lmax) + { + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0); + + OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(lmax - lmin)); + OP2U(SLJIT_SUB | SLJIT_SET_LESS, STR_PTR, 0, TMP2, 0); + SELECT(SLJIT_LESS, STR_PTR, TMP2, 0, STR_PTR); + + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), STR_PTR, 0); + } + } + +check_start_used_ptr(common); + +if (lmin < lmax) + BACKTRACK_AS(vreverse_backtrack)->matchingpath = LABEL(); + +return cc; +} + static SLJIT_INLINE BOOL assert_needs_str_ptr_saving(PCRE2_SPTR cc) { while (TRUE) @@ -9604,6 +10088,8 @@ while (TRUE) case OP_DOLLM: case OP_CALLOUT: case OP_ALT: + case OP_NOT_UCP_WORD_BOUNDARY: + case OP_UCP_WORD_BOUNDARY: cc += PRIV(OP_lengths)[*cc]; break; @@ -9623,13 +10109,15 @@ int framesize; int extrasize; BOOL local_quit_available = FALSE; BOOL needs_control_head; +BOOL end_block_size = 0; +BOOL has_vreverse; int private_data_ptr; backtrack_common altbacktrack; PCRE2_SPTR ccbegin; PCRE2_UCHAR opcode; PCRE2_UCHAR bra = OP_BRA; jump_list *tmp = NULL; -jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks; +jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.own_backtracks; jump_list **found; /* Saving previous accept variables. */ BOOL save_local_quit_available = common->local_quit_available; @@ -9652,6 +10140,7 @@ if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO) bra = *cc; cc++; } + private_data_ptr = PRIVATE_DATA(cc); SLJIT_ASSERT(private_data_ptr != 0); framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head); @@ -9671,12 +10160,17 @@ if (bra == OP_BRAMINZERO) brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0); } +if ((opcode == OP_ASSERTBACK || opcode == OP_ASSERTBACK_NOT) && find_vreverse(ccbegin)) + end_block_size = 3; + if (framesize < 0) { extrasize = 1; if (bra == OP_BRA && !assert_needs_str_ptr_saving(ccbegin + 1 + LINK_SIZE)) extrasize = 0; + extrasize += end_block_size; + if (needs_control_head) extrasize++; @@ -9694,18 +10188,19 @@ if (framesize < 0) if (needs_control_head) { - SLJIT_ASSERT(extrasize == 2); + SLJIT_ASSERT(extrasize == end_block_size + 2); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 1), TMP1, 0); } } else { - extrasize = needs_control_head ? 3 : 2; + extrasize = (needs_control_head ? 3 : 2) + end_block_size; + + OP1(SLJIT_MOV, TMP2, 0, STACK_TOP, 0); allocate_stack(common, framesize + extrasize); OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); - OP2(SLJIT_ADD, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw)); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0); if (needs_control_head) OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr); @@ -9713,16 +10208,22 @@ else if (needs_control_head) { - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0); - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 2), TMP1, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 1), TMP2, 0); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0); } else - OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 1), TMP1, 0); init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize); } +if (end_block_size > 0) + { + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_END, 0); + OP1(SLJIT_MOV, STR_END, 0, STR_PTR, 0); + } + memset(&altbacktrack, 0, sizeof(backtrack_common)); if (conditional || (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT)) { @@ -9741,13 +10242,19 @@ while (1) common->accept_label = NULL; common->accept = NULL; altbacktrack.top = NULL; - altbacktrack.topbacktracks = NULL; + altbacktrack.own_backtracks = NULL; if (*ccbegin == OP_ALT && extrasize > 0) OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); altbacktrack.cc = ccbegin; - compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack); + ccbegin += 1 + LINK_SIZE; + + has_vreverse = (*ccbegin == OP_VREVERSE); + if (*ccbegin == OP_REVERSE || has_vreverse) + ccbegin = compile_reverse_matchingpath(common, ccbegin, &altbacktrack); + + compile_matchingpath(common, ccbegin, cc, &altbacktrack); if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) { if (local_quit_available) @@ -9763,6 +10270,13 @@ while (1) common->accept = save_accept; return NULL; } + + if (has_vreverse) + { + SLJIT_ASSERT(altbacktrack.top != NULL); + add_jump(compiler, &altbacktrack.top->simple_backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0)); + } + common->accept_label = LABEL(); if (common->accept != NULL) set_jumps(common->accept, common->accept_label); @@ -9775,6 +10289,9 @@ while (1) else if (extrasize > 0) free_stack(common, extrasize); + if (end_block_size > 0) + OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize + 1)); + if (needs_control_head) OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1)); } @@ -9784,12 +10301,20 @@ while (1) { /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */ OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw)); + + if (end_block_size > 0) + OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize + 2)); + if (needs_control_head) OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1)); } else { OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); + + if (end_block_size > 0) + OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - extrasize + 1)); + if (needs_control_head) OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 2)); add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); @@ -9803,7 +10328,7 @@ while (1) if (conditional) { if (extrasize > 0) - OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? STACK(-2) : STACK(-1)); + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-end_block_size - (needs_control_head ? 2 : 1))); } else if (bra == OP_BRAZERO) { @@ -9842,7 +10367,7 @@ while (1) common->accept = save_accept; return NULL; } - set_jumps(altbacktrack.topbacktracks, LABEL()); + set_jumps(altbacktrack.own_backtracks, LABEL()); if (*cc != OP_ALT) break; @@ -9875,8 +10400,11 @@ if (common->positive_assertion_quit != NULL) JUMPHERE(jump); } +if (end_block_size > 0) + OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); + if (needs_control_head) - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 1)); if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) { @@ -9889,8 +10417,8 @@ if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) /* The topmost item should be 0. */ if (bra == OP_BRAZERO) { - if (extrasize == 2) - free_stack(common, 1); + if (extrasize >= 2) + free_stack(common, extrasize - 1); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); } else if (extrasize > 0) @@ -9924,8 +10452,9 @@ if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) /* Keep the STR_PTR on the top of the stack. */ if (bra == OP_BRAZERO) { + /* This allocation is always successful. */ OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw)); - if (extrasize == 2) + if (extrasize >= 2) OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); } else if (bra == OP_BRAMINZERO) @@ -9945,8 +10474,9 @@ if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) else { /* We don't need to keep the STR_PTR, only the previous private_data_ptr. */ - OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw)); - if (extrasize == 2) + OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + end_block_size + 2) * sizeof(sljit_sw)); + + if (extrasize == 2 + end_block_size) { OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); if (bra == OP_BRAMINZERO) @@ -9954,7 +10484,7 @@ if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) } else { - SLJIT_ASSERT(extrasize == 3); + SLJIT_ASSERT(extrasize == 3 + end_block_size); OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-1)); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0); } @@ -9978,7 +10508,7 @@ if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw)); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0); } - set_jumps(backtrack->common.topbacktracks, LABEL()); + set_jumps(backtrack->common.own_backtracks, LABEL()); } } else @@ -9991,8 +10521,8 @@ else if (bra != OP_BRA) { - if (extrasize == 2) - free_stack(common, 1); + if (extrasize >= 2) + free_stack(common, extrasize - 1); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); } else if (extrasize > 0) @@ -10023,9 +10553,9 @@ else if (bra != OP_BRA) { - SLJIT_ASSERT(found == &backtrack->common.topbacktracks); - set_jumps(backtrack->common.topbacktracks, LABEL()); - backtrack->common.topbacktracks = NULL; + SLJIT_ASSERT(found == &backtrack->common.own_backtracks); + set_jumps(backtrack->common.own_backtracks, LABEL()); + backtrack->common.own_backtracks = NULL; } } @@ -10134,7 +10664,7 @@ static PCRE2_SPTR SLJIT_FUNC do_script_run_utf(PCRE2_SPTR ptr, PCRE2_SPTR endptr #endif /* SUPPORT_UNICODE */ -static SLJIT_INLINE void match_script_run_common(compiler_common *common, int private_data_ptr, backtrack_common *parent) +static void match_script_run_common(compiler_common *common, int private_data_ptr, backtrack_common *parent) { DEFINE_COMPILER; @@ -10142,14 +10672,14 @@ SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1); OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); #ifdef SUPPORT_UNICODE -sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, - common->utf ? SLJIT_FUNC_OFFSET(do_script_run_utf) : SLJIT_FUNC_OFFSET(do_script_run)); +sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, + common->utf ? SLJIT_FUNC_ADDR(do_script_run_utf) : SLJIT_FUNC_ADDR(do_script_run)); #else -sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_script_run)); +sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(do_script_run)); #endif OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0); -add_jump(compiler, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0)); +add_jump(compiler, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0)); } /* @@ -10223,6 +10753,7 @@ PCRE2_UCHAR ket; assert_backtrack *assert; BOOL has_alternatives; BOOL needs_control_head = FALSE; +BOOL has_vreverse = FALSE; struct sljit_jump *jump; struct sljit_jump *skip; struct sljit_label *rmax_label = NULL; @@ -10472,6 +11003,21 @@ else if (opcode == OP_CBRA || opcode == OP_SCBRA) OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0); } } +else if (opcode == OP_ASSERTBACK_NA && PRIVATE_DATA(ccbegin + 1)) + { + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); + allocate_stack(common, 4); + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw), STR_END, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0); + OP1(SLJIT_MOV, STR_END, 0, STR_PTR, 0); + + has_vreverse = (*matchingpath == OP_VREVERSE); + if (*matchingpath == OP_REVERSE || has_vreverse) + matchingpath = compile_reverse_matchingpath(common, matchingpath, backtrack); + } else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND) { /* Saving the previous value. */ @@ -10479,6 +11025,9 @@ else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_SC allocate_stack(common, 1); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0); + + if (*matchingpath == OP_REVERSE) + matchingpath = compile_reverse_matchingpath(common, matchingpath, backtrack); } else if (has_alternatives) { @@ -10598,14 +11147,28 @@ compile_matchingpath(common, matchingpath, cc, backtrack); if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) return NULL; -if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA) - OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); - -if (opcode == OP_ONCE) - match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head); +switch (opcode) + { + case OP_ASSERTBACK_NA: + if (has_vreverse) + { + SLJIT_ASSERT(backtrack->top != NULL && PRIVATE_DATA(ccbegin + 1)); + add_jump(compiler, &backtrack->top->simple_backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0)); + } -if (opcode == OP_SCRIPT_RUN) - match_script_run_common(common, private_data_ptr, backtrack); + if (PRIVATE_DATA(ccbegin + 1)) + OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw)); + break; + case OP_ASSERT_NA: + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); + break; + case OP_ONCE: + match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head); + break; + case OP_SCRIPT_RUN: + match_script_run_common(common, private_data_ptr, backtrack); + break; + } stacksize = 0; if (repeat_type == OP_MINUPTO) @@ -10804,7 +11367,7 @@ switch(opcode) case OP_CBRAPOS: case OP_SCBRAPOS: offset = GET2(cc, 1 + LINK_SIZE); - /* This case cannot be optimized in the same was as + /* This case cannot be optimized in the same way as normal capturing brackets. */ SLJIT_ASSERT(common->optimized_cbracket[offset] == 0); cbraprivptr = OVECTOR_PRIV(offset); @@ -10921,7 +11484,7 @@ loop = LABEL(); while (*cc != OP_KETRPOS) { backtrack->top = NULL; - backtrack->topbacktracks = NULL; + backtrack->own_backtracks = NULL; cc += GET(cc, 1); compile_matchingpath(common, ccbegin, cc, backtrack); @@ -11002,7 +11565,7 @@ while (*cc != OP_KETRPOS) compile_backtrackingpath(common, backtrack->top); if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) return NULL; - set_jumps(backtrack->topbacktracks, LABEL()); + set_jumps(backtrack->own_backtracks, LABEL()); if (framesize < 0) { @@ -11034,13 +11597,13 @@ while (*cc != OP_KETRPOS) /* We don't have to restore the control head in case of a failed match. */ -backtrack->topbacktracks = NULL; +backtrack->own_backtracks = NULL; if (!zero) { if (framesize < 0) - add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0)); + add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0)); else /* TMP2 is set to [private_data_ptr] above. */ - add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(TMP2), STACK(-stacksize), SLJIT_IMM, 0)); + add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(TMP2), STACK(-stacksize), SLJIT_IMM, 0)); } /* None of them matched. */ @@ -11214,7 +11777,7 @@ struct sljit_label *label; int private_data_ptr = PRIVATE_DATA(cc); int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP); int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr; -int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw); +int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + SSIZE_OF(sw); int tmp_base, tmp_offset; #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 BOOL use_tmp; @@ -11226,7 +11789,7 @@ early_fail_type = (early_fail_ptr & 0x7); early_fail_ptr >>= 3; /* During recursion, these optimizations are disabled. */ -if (common->early_fail_start_ptr == 0) +if (common->early_fail_start_ptr == 0 && common->fast_forward_bc_ptr == NULL) { early_fail_ptr = 0; early_fail_type = type_skip; @@ -11236,7 +11799,7 @@ SLJIT_ASSERT(common->fast_forward_bc_ptr != NULL || early_fail_ptr == 0 || (early_fail_ptr >= common->early_fail_start_ptr && early_fail_ptr <= common->early_fail_end_ptr)); if (early_fail_type == type_fail) - add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr)); + add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr)); cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end); @@ -11263,10 +11826,10 @@ if (exact > 1) && type != OP_ANYNL && type != OP_EXTUNI) { OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(exact)); - add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER, TMP1, 0, STR_END, 0)); + add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_GREATER, TMP1, 0, STR_END, 0)); OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact); label = LABEL(); - compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE); + compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, FALSE); OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); JUMPTO(SLJIT_NOT_ZERO, label); } @@ -11274,25 +11837,25 @@ if (exact > 1) { OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact); label = LABEL(); - compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE); + compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, TRUE); OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); JUMPTO(SLJIT_NOT_ZERO, label); } } else if (exact == 1) - { - compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE); + compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, TRUE); - if (early_fail_type == type_fail_range) - { - OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr); - OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + (int)sizeof(sljit_sw)); - OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, TMP2, 0); - OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, TMP2, 0); - add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS_EQUAL, TMP2, 0, TMP1, 0)); +if (early_fail_type == type_fail_range) + { + /* Range end first, followed by range start. */ + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + SSIZE_OF(sw)); + OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, TMP2, 0); + OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, TMP2, 0); + add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_LESS_EQUAL, TMP2, 0, TMP1, 0)); - OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + (int)sizeof(sljit_sw), STR_PTR, 0); - } + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + SSIZE_OF(sw), STR_PTR, 0); } switch(opcode) @@ -11368,8 +11931,8 @@ switch(opcode) if (common->mode == PCRE2_JIT_COMPLETE) { - OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0); - CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0); + OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0); + SELECT(SLJIT_GREATER, STR_PTR, STR_END, 0, STR_PTR); } else { @@ -11437,14 +12000,14 @@ switch(opcode) if (opcode == OP_UPTO) { OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); - add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_ZERO)); + add_jump(compiler, &backtrack->own_backtracks, JUMP(SLJIT_ZERO)); } - compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE); + compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, FALSE); if (early_fail_ptr != 0) OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0); JUMPHERE(jump); - detect_partial_match(common, &backtrack->topbacktracks); + detect_partial_match(common, &backtrack->own_backtracks); OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); if (charpos_othercasebit != 0) OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit); @@ -11598,7 +12161,7 @@ switch(opcode) } #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 - if (common->utf) + if (type == OP_EXTUNI || common->utf) { OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0); detect_partial_match(common, &no_match); @@ -11661,8 +12224,8 @@ switch(opcode) if (common->mode == PCRE2_JIT_COMPLETE) { - OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0); - CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0); + OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0); + SELECT(SLJIT_GREATER, STR_PTR, STR_END, 0, STR_PTR); } else { @@ -11715,12 +12278,12 @@ PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL); if (*cc == OP_FAIL) { - add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP)); + add_jump(compiler, &backtrack->own_backtracks, JUMP(SLJIT_JUMP)); return cc + 1; } if (*cc == OP_ACCEPT && common->currententry == NULL && (common->re->overall_options & PCRE2_ENDANCHORED) != 0) - add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0)); + add_jump(compiler, &common->restart_match, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0)); if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL || !common->might_be_empty) { @@ -11745,9 +12308,9 @@ if (HAS_VIRTUAL_REGISTERS) else OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options)); -OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY); -add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_NOT_ZERO)); -OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART); +OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY); +add_jump(compiler, &backtrack->own_backtracks, JUMP(SLJIT_NOT_ZERO)); +OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART); if (common->accept_label == NULL) add_jump(compiler, &common->accept, JUMP(SLJIT_ZERO)); else @@ -11758,7 +12321,7 @@ if (common->accept_label == NULL) add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0)); else CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->accept_label); -add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP)); +add_jump(compiler, &backtrack->own_backtracks, JUMP(SLJIT_JUMP)); return cc + 1; } @@ -11878,8 +12441,9 @@ while (cc < ccend) case OP_DOLLM: case OP_CIRC: case OP_CIRCM: - case OP_REVERSE: - cc = compile_simple_assertion_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks); + case OP_NOT_UCP_WORD_BOUNDARY: + case OP_UCP_WORD_BOUNDARY: + cc = compile_simple_assertion_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks); break; case OP_NOT_DIGIT: @@ -11901,7 +12465,7 @@ while (cc < ccend) case OP_EXTUNI: case OP_NOT: case OP_NOTI: - cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE); + cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE); break; case OP_SET_SOM: @@ -11916,9 +12480,9 @@ while (cc < ccend) case OP_CHAR: case OP_CHARI: if (common->mode == PCRE2_JIT_COMPLETE) - cc = compile_charn_matchingpath(common, cc, ccend, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks); + cc = compile_charn_matchingpath(common, cc, ccend, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks); else - cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE); + cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE); break; case OP_STAR: @@ -11994,7 +12558,7 @@ while (cc < ccend) if (cc[1 + (32 / sizeof(PCRE2_UCHAR))] >= OP_CRSTAR && cc[1 + (32 / sizeof(PCRE2_UCHAR))] <= OP_CRPOSRANGE) cc = compile_iterator_matchingpath(common, cc, parent); else - cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE); + cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE); break; #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32 @@ -12002,7 +12566,7 @@ while (cc < ccend) if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRPOSRANGE) cc = compile_iterator_matchingpath(common, cc, parent); else - cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE); + cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE); break; #endif @@ -12012,7 +12576,7 @@ while (cc < ccend) cc = compile_ref_iterator_matchingpath(common, cc, parent); else { - compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE); + compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE, FALSE); cc += 1 + IMM2_SIZE; } break; @@ -12023,8 +12587,8 @@ while (cc < ccend) cc = compile_ref_iterator_matchingpath(common, cc, parent); else { - compile_dnref_search(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks); - compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE); + compile_dnref_search(common, cc, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks); + compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE, FALSE); cc += 1 + 2 * IMM2_SIZE; } break; @@ -12191,7 +12755,7 @@ PCRE2_SPTR end; int private_data_ptr = PRIVATE_DATA(cc); int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP); int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr; -int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw); +int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + SSIZE_OF(sw); cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end); @@ -12302,7 +12866,7 @@ switch(opcode) break; } -set_jumps(current->topbacktracks, LABEL()); +set_jumps(current->own_backtracks, LABEL()); } static SLJIT_INLINE void compile_ref_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current) @@ -12317,7 +12881,7 @@ type = cc[ref ? 1 + IMM2_SIZE : 1 + 2 * IMM2_SIZE]; if ((type & 0x1) == 0) { /* Maximize case. */ - set_jumps(current->topbacktracks, LABEL()); + set_jumps(current->own_backtracks, LABEL()); OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); free_stack(common, 1); CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath); @@ -12326,7 +12890,7 @@ if ((type & 0x1) == 0) OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath); -set_jumps(current->topbacktracks, LABEL()); +set_jumps(current->own_backtracks, LABEL()); free_stack(common, ref ? 2 : 3); } @@ -12347,7 +12911,7 @@ if (!CURRENT_AS(recurse_backtrack)->inlined_pattern) else compile_backtrackingpath(common, current->top); -set_jumps(current->topbacktracks, LABEL()); +set_jumps(current->own_backtracks, LABEL()); } static void compile_assert_backtrackingpath(compiler_common *common, struct backtrack_common *current) @@ -12366,13 +12930,13 @@ if (*cc == OP_BRAZERO) if (bra == OP_BRAZERO) { - SLJIT_ASSERT(current->topbacktracks == NULL); + SLJIT_ASSERT(current->own_backtracks == NULL); OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); } if (CURRENT_AS(assert_backtrack)->framesize < 0) { - set_jumps(current->topbacktracks, LABEL()); + set_jumps(current->own_backtracks, LABEL()); if (bra == OP_BRAZERO) { @@ -12404,10 +12968,10 @@ if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK) OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(assert_backtrack)->framesize - 1) * sizeof(sljit_sw)); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr, TMP1, 0); - set_jumps(current->topbacktracks, LABEL()); + set_jumps(current->own_backtracks, LABEL()); } else - set_jumps(current->topbacktracks, LABEL()); + set_jumps(current->own_backtracks, LABEL()); if (bra == OP_BRAZERO) { @@ -12434,6 +12998,7 @@ PCRE2_UCHAR ket; assert_backtrack *assert; BOOL has_alternatives; BOOL needs_control_head = FALSE; +BOOL has_vreverse; struct sljit_jump *brazero = NULL; struct sljit_jump *next_alt = NULL; struct sljit_jump *once = NULL; @@ -12610,8 +13175,8 @@ else if (has_alternatives) } COMPILE_BACKTRACKINGPATH(current->top); -if (current->topbacktracks) - set_jumps(current->topbacktracks, LABEL()); +if (current->own_backtracks) + set_jumps(current->own_backtracks, LABEL()); if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND)) { @@ -12647,14 +13212,25 @@ if (has_alternatives) do { current->top = NULL; - current->topbacktracks = NULL; - current->nextbacktracks = NULL; + current->own_backtracks = NULL; + current->simple_backtracks = NULL; /* Conditional blocks always have an additional alternative, even if it is empty. */ if (*cc == OP_ALT) { ccprev = cc + 1 + LINK_SIZE; cc += GET(cc, 1); - if (opcode != OP_COND && opcode != OP_SCOND) + + has_vreverse = FALSE; + if (opcode == OP_ASSERTBACK || opcode == OP_ASSERTBACK_NA) + { + SLJIT_ASSERT(private_data_ptr != 0); + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); + + has_vreverse = (*ccprev == OP_VREVERSE); + if (*ccprev == OP_REVERSE || has_vreverse) + ccprev = compile_reverse_matchingpath(common, ccprev, current); + } + else if (opcode != OP_COND && opcode != OP_SCOND) { if (opcode != OP_ONCE) { @@ -12666,15 +13242,30 @@ if (has_alternatives) else OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(needs_control_head ? 1 : 0)); } + compile_matchingpath(common, ccprev, cc, current); if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) return; - if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA) - OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); + switch (opcode) + { + case OP_ASSERTBACK_NA: + if (has_vreverse) + { + SLJIT_ASSERT(current->top != NULL && PRIVATE_DATA(ccbegin + 1)); + add_jump(compiler, ¤t->top->simple_backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0)); + } - if (opcode == OP_SCRIPT_RUN) - match_script_run_common(common, private_data_ptr, current); + if (PRIVATE_DATA(ccbegin + 1)) + OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw)); + break; + case OP_ASSERT_NA: + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); + break; + case OP_SCRIPT_RUN: + match_script_run_common(common, private_data_ptr, current); + break; + } } /* Instructions after the current alternative is successfully matched. */ @@ -12761,9 +13352,9 @@ if (has_alternatives) } COMPILE_BACKTRACKINGPATH(current->top); - if (current->topbacktracks) - set_jumps(current->topbacktracks, LABEL()); - SLJIT_ASSERT(!current->nextbacktracks); + if (current->own_backtracks) + set_jumps(current->own_backtracks, LABEL()); + SLJIT_ASSERT(!current->simple_backtracks); } while (*cc == OP_ALT); @@ -12805,6 +13396,15 @@ if (offset != 0) OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0); } } +else if (opcode == OP_ASSERTBACK_NA && PRIVATE_DATA(ccbegin + 1)) + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); + OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); + OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw)); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0); + OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw), TMP2, 0); + free_stack(common, 4); + } else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND) { OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(0)); @@ -12891,12 +13491,19 @@ static SLJIT_INLINE void compile_bracketpos_backtrackingpath(compiler_common *co DEFINE_COMPILER; int offset; struct sljit_jump *jump; +PCRE2_SPTR cc; +/* No retry on backtrack, just drop everything. */ if (CURRENT_AS(bracketpos_backtrack)->framesize < 0) { - if (*current->cc == OP_CBRAPOS || *current->cc == OP_SCBRAPOS) + cc = current->cc; + + if (*cc == OP_BRAPOSZERO) + cc++; + + if (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS) { - offset = (GET2(current->cc, 1 + LINK_SIZE)) << 1; + offset = (GET2(cc, 1 + LINK_SIZE)) << 1; OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0); @@ -12906,7 +13513,7 @@ if (CURRENT_AS(bracketpos_backtrack)->framesize < 0) if (common->capture_last_ptr != 0) OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0); } - set_jumps(current->topbacktracks, LABEL()); + set_jumps(current->own_backtracks, LABEL()); free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize); return; } @@ -12915,10 +13522,10 @@ OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtra add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(bracketpos_backtrack)->framesize - 1) * sizeof(sljit_sw)); -if (current->topbacktracks) +if (current->own_backtracks) { jump = JUMP(SLJIT_JUMP); - set_jumps(current->topbacktracks, LABEL()); + set_jumps(current->own_backtracks, LABEL()); /* Drop the stack frame. */ free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize); JUMPHERE(jump); @@ -12931,8 +13538,8 @@ static SLJIT_INLINE void compile_braminzero_backtrackingpath(compiler_common *co assert_backtrack backtrack; current->top = NULL; -current->topbacktracks = NULL; -current->nextbacktracks = NULL; +current->own_backtracks = NULL; +current->simple_backtracks = NULL; if (current->cc[1] > OP_ASSERTBACK_NOT) { /* Manual call of compile_bracket_matchingpath and compile_bracket_backtrackingpath. */ @@ -12947,7 +13554,7 @@ else /* Manual call of compile_assert_matchingpath. */ compile_assert_matchingpath(common, current->cc, &backtrack, FALSE); } -SLJIT_ASSERT(!current->nextbacktracks && !current->topbacktracks); +SLJIT_ASSERT(!current->simple_backtracks && !current->own_backtracks); } static SLJIT_INLINE void compile_control_verb_backtrackingpath(compiler_common *common, struct backtrack_common *current) @@ -12998,7 +13605,7 @@ if (opcode == OP_SKIP_ARG) SLJIT_ASSERT(common->control_head_ptr != 0 && TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1); OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr); OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (sljit_sw)(current->cc + 2)); - sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_search_mark)); + sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(do_search_mark)); OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_R0, 0); add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, SLJIT_R0, 0, SLJIT_IMM, 0)); @@ -13012,6 +13619,23 @@ else add_jump(compiler, &common->reset_match, JUMP(SLJIT_JUMP)); } +static SLJIT_INLINE void compile_vreverse_backtrackingpath(compiler_common *common, struct backtrack_common *current) +{ +DEFINE_COMPILER; +struct sljit_jump *jump; +struct sljit_label *label; + +OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(2)); +jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(3)); +skip_valid_char(common); +OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), STR_PTR, 0); +JUMPTO(SLJIT_JUMP, CURRENT_AS(vreverse_backtrack)->matchingpath); + +label = LABEL(); +sljit_set_label(jump, label); +set_jumps(current->own_backtracks, label); +} + static SLJIT_INLINE void compile_then_trap_backtrackingpath(compiler_common *common, struct backtrack_common *current) { DEFINE_COMPILER; @@ -13052,8 +13676,8 @@ then_trap_backtrack *save_then_trap = common->then_trap; while (current) { - if (current->nextbacktracks != NULL) - set_jumps(current->nextbacktracks, LABEL()); + if (current->simple_backtracks != NULL) + set_jumps(current->simple_backtracks, LABEL()); switch(*current->cc) { case OP_SET_SOM: @@ -13219,7 +13843,11 @@ while (current) case OP_FAIL: case OP_ACCEPT: case OP_ASSERT_ACCEPT: - set_jumps(current->topbacktracks, LABEL()); + set_jumps(current->own_backtracks, LABEL()); + break; + + case OP_VREVERSE: + compile_vreverse_backtrackingpath(common, current); break; case OP_THEN_TRAP: @@ -13242,10 +13870,8 @@ DEFINE_COMPILER; PCRE2_SPTR cc = common->start + common->currententry->start; PCRE2_SPTR ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE); PCRE2_SPTR ccend = bracketend(cc) - (1 + LINK_SIZE); -BOOL needs_control_head; -BOOL has_quit; -BOOL has_accept; -int private_data_size = get_recurse_data_length(common, ccbegin, ccend, &needs_control_head, &has_quit, &has_accept); +uint32_t recurse_flags = 0; +int private_data_size = get_recurse_data_length(common, ccbegin, ccend, &recurse_flags); int alt_count, alt_max, local_size; backtrack_common altbacktrack; jump_list *match = NULL; @@ -13267,7 +13893,7 @@ SLJIT_ASSERT(common->currententry->entry_label == NULL && common->recursive_head common->currententry->entry_label = LABEL(); set_jumps(common->currententry->entry_calls, common->currententry->entry_label); -sljit_emit_fast_enter(compiler, TMP2, 0); +sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, TMP2, 0); count_match(common); local_size = (alt_max > 1) ? 2 : 1; @@ -13279,12 +13905,12 @@ allocate_stack(common, private_data_size + local_size); /* Save return address. */ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP2, 0); -copy_recurse_data(common, ccbegin, ccend, recurse_copy_from_global, local_size, private_data_size + local_size, has_quit); +copy_recurse_data(common, ccbegin, ccend, recurse_copy_from_global, local_size, private_data_size + local_size, recurse_flags); /* This variable is saved and restored all time when we enter or exit from a recursive context. */ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, STACK_TOP, 0); -if (needs_control_head) +if (recurse_flags & recurse_flag_control_head_found) OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0); if (alt_max > 1) @@ -13300,7 +13926,7 @@ cc += GET(cc, 1); while (1) { altbacktrack.top = NULL; - altbacktrack.topbacktracks = NULL; + altbacktrack.own_backtracks = NULL; if (altbacktrack.cc != ccbegin) OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); @@ -13309,10 +13935,10 @@ while (1) if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) return; - allocate_stack(common, (alt_max > 1 || has_accept) ? 2 : 1); + allocate_stack(common, (alt_max > 1 || (recurse_flags & recurse_flag_accept_found)) ? 2 : 1); OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr); - if (alt_max > 1 || has_accept) + if (alt_max > 1 || (recurse_flags & recurse_flag_accept_found)) { if (alt_max > 3) put_label = sljit_emit_put_label(compiler, SLJIT_MEM1(STACK_TOP), STACK(1)); @@ -13329,16 +13955,16 @@ while (1) common->currententry->backtrack_label = LABEL(); set_jumps(common->currententry->backtrack_calls, common->currententry->backtrack_label); - sljit_emit_fast_enter(compiler, TMP1, 0); + sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, TMP1, 0); - if (has_accept) + if (recurse_flags & recurse_flag_accept_found) accept_exit = CMP(SLJIT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, -1); OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); /* Save return address. */ OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(local_size - 1), TMP1, 0); - copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, has_quit); + copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, recurse_flags); if (alt_max > 1) { @@ -13355,7 +13981,7 @@ while (1) next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0); } else - free_stack(common, has_accept ? 2 : 1); + free_stack(common, (recurse_flags & recurse_flag_accept_found) ? 2 : 1); } else if (alt_max > 3) { @@ -13377,7 +14003,7 @@ while (1) compile_backtrackingpath(common, altbacktrack.top); if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) return; - set_jumps(altbacktrack.topbacktracks, LABEL()); + set_jumps(altbacktrack.own_backtracks, LABEL()); if (*cc != OP_ALT) break; @@ -13390,7 +14016,7 @@ while (1) quit = LABEL(); -copy_recurse_data(common, ccbegin, ccend, recurse_copy_private_to_global, local_size, private_data_size + local_size, has_quit); +copy_recurse_data(common, ccbegin, ccend, recurse_copy_private_to_global, local_size, private_data_size + local_size, recurse_flags); OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1)); free_stack(common, private_data_size + local_size); @@ -13399,15 +14025,15 @@ OP_SRC(SLJIT_FAST_RETURN, TMP2, 0); if (common->quit != NULL) { - SLJIT_ASSERT(has_quit); + SLJIT_ASSERT(recurse_flags & recurse_flag_quit_found); set_jumps(common->quit, LABEL()); OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr); - copy_recurse_data(common, ccbegin, ccend, recurse_copy_shared_to_global, local_size, private_data_size + local_size, has_quit); + copy_recurse_data(common, ccbegin, ccend, recurse_copy_shared_to_global, local_size, private_data_size + local_size, recurse_flags); JUMPTO(SLJIT_JUMP, quit); } -if (has_accept) +if (recurse_flags & recurse_flag_accept_found) { JUMPHERE(accept_exit); free_stack(common, 2); @@ -13415,7 +14041,7 @@ if (has_accept) /* Save return address. */ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP1, 0); - copy_recurse_data(common, ccbegin, ccend, recurse_copy_kept_shared_to_global, local_size, private_data_size + local_size, has_quit); + copy_recurse_data(common, ccbegin, ccend, recurse_copy_kept_shared_to_global, local_size, private_data_size + local_size, recurse_flags); OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1)); free_stack(common, private_data_size + local_size); @@ -13425,7 +14051,7 @@ if (has_accept) if (common->accept != NULL) { - SLJIT_ASSERT(has_accept); + SLJIT_ASSERT(recurse_flags & recurse_flag_accept_found); set_jumps(common->accept, LABEL()); @@ -13440,7 +14066,7 @@ set_jumps(match, LABEL()); OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0); -copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, has_quit); +copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, recurse_flags); OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), STACK(local_size - 1)); OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1); @@ -13483,9 +14109,9 @@ jump_list *reqcu_not_found = NULL; SLJIT_ASSERT(tables); #if HAS_VIRTUAL_REGISTERS == 1 -SLJIT_ASSERT(sljit_get_register_index(TMP3) < 0 && sljit_get_register_index(ARGUMENTS) < 0 && sljit_get_register_index(RETURN_ADDR) < 0); +SLJIT_ASSERT(sljit_get_register_index(SLJIT_GP_REGISTER, TMP3) < 0 && sljit_get_register_index(SLJIT_GP_REGISTER, ARGUMENTS) < 0 && sljit_get_register_index(SLJIT_GP_REGISTER, RETURN_ADDR) < 0); #elif HAS_VIRTUAL_REGISTERS == 0 -SLJIT_ASSERT(sljit_get_register_index(TMP3) >= 0 && sljit_get_register_index(ARGUMENTS) >= 0 && sljit_get_register_index(RETURN_ADDR) >= 0); +SLJIT_ASSERT(sljit_get_register_index(SLJIT_GP_REGISTER, TMP3) >= 0 && sljit_get_register_index(SLJIT_GP_REGISTER, ARGUMENTS) >= 0 && sljit_get_register_index(SLJIT_GP_REGISTER, RETURN_ADDR) >= 0); #else #error "Invalid value for HAS_VIRTUAL_REGISTERS" #endif @@ -13646,7 +14272,7 @@ SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0)); common->cbra_ptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_sw); total_length = ccend - common->start; -common->private_data_ptrs = (sljit_s32 *)SLJIT_MALLOC(total_length * (sizeof(sljit_s32) + (common->has_then ? 1 : 0)), allocator_data); +common->private_data_ptrs = (sljit_s32*)SLJIT_MALLOC(total_length * (sizeof(sljit_s32) + (common->has_then ? 1 : 0)), allocator_data); if (!common->private_data_ptrs) { SLJIT_FREE(common->optimized_cbracket, allocator_data); @@ -13663,7 +14289,7 @@ set_private_data_ptrs(common, &private_data_size, ccend); SLJIT_ASSERT(common->early_fail_start_ptr <= common->early_fail_end_ptr); -if (private_data_size > SLJIT_MAX_LOCAL_SIZE) +if (private_data_size > 65536) { SLJIT_FREE(common->private_data_ptrs, allocator_data); SLJIT_FREE(common->optimized_cbracket, allocator_data); @@ -13686,8 +14312,9 @@ if (!compiler) } common->compiler = compiler; -/* Main pcre_jit_exec entry. */ -sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW), 5, 5, 0, 0, private_data_size); +/* Main pcre2_jit_exec entry. */ +SLJIT_ASSERT((private_data_size & (sizeof(sljit_sw) - 1)) == 0); +sljit_emit_enter(compiler, 0, SLJIT_ARGS1(W, W), 5, 5, SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS, 0, private_data_size); /* Register init. */ reset_ovector(common, (re->top_bracket + 1) * 2); @@ -13894,9 +14521,9 @@ if (common->might_be_empty) JUMPHERE(empty_match); OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options)); - OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY); + OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY); JUMPTO(SLJIT_NOT_ZERO, empty_match_backtrack_label); - OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART); + OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART); JUMPTO(SLJIT_ZERO, empty_match_found_label); OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str)); CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found_label); @@ -13909,20 +14536,40 @@ common->early_fail_end_ptr = 0; common->currententry = common->entries; common->local_quit_available = TRUE; quit_label = common->quit_label; -while (common->currententry != NULL) +if (common->currententry != NULL) { - /* Might add new entries. */ - compile_recurse(common); - if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) + /* A free bit for each private data. */ + common->recurse_bitset_size = ((private_data_size / SSIZE_OF(sw)) + 7) >> 3; + SLJIT_ASSERT(common->recurse_bitset_size > 0); + common->recurse_bitset = (sljit_u8*)SLJIT_MALLOC(common->recurse_bitset_size, allocator_data);; + + if (common->recurse_bitset != NULL) { + do + { + /* Might add new entries. */ + compile_recurse(common); + if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) + break; + flush_stubs(common); + common->currententry = common->currententry->next; + } + while (common->currententry != NULL); + + SLJIT_FREE(common->recurse_bitset, allocator_data); + } + + if (common->currententry != NULL) + { + /* The common->recurse_bitset has been freed. */ + SLJIT_ASSERT(sljit_get_compiler_error(compiler) || common->recurse_bitset == NULL); + sljit_free_compiler(compiler); SLJIT_FREE(common->optimized_cbracket, allocator_data); SLJIT_FREE(common->private_data_ptrs, allocator_data); PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data); return PCRE2_ERROR_NOMEMORY; } - flush_stubs(common); - common->currententry = common->currententry->next; } common->local_quit_available = FALSE; common->quit_label = quit_label; @@ -13931,7 +14578,7 @@ common->quit_label = quit_label; /* This is a (really) rare case. */ set_jumps(common->stackalloc, LABEL()); /* RETURN_ADDR is not a saved register. */ -sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0); +sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCALS0); SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1); @@ -13941,7 +14588,7 @@ OP2(SLJIT_SUB, SLJIT_R1, 0, STACK_LIMIT, 0, SLJIT_IMM, STACK_GROWTH_RATE); OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, stack)); OP1(SLJIT_MOV, STACK_LIMIT, 0, TMP2, 0); -sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_stack_resize)); +sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(sljit_stack_resize)); jump = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0); OP1(SLJIT_MOV, TMP2, 0, STACK_LIMIT, 0); @@ -13969,7 +14616,12 @@ if (common->revertframes != NULL) if (common->wordboundary != NULL) { set_jumps(common->wordboundary, LABEL()); - check_wordboundary(common); + check_wordboundary(common, FALSE); + } +if (common->ucp_wordboundary != NULL) + { + set_jumps(common->ucp_wordboundary, LABEL()); + check_wordboundary(common, TRUE); } if (common->anynewline != NULL) { @@ -13996,10 +14648,17 @@ if (common->caselesscmp != NULL) set_jumps(common->caselesscmp, LABEL()); do_caselesscmp(common); } -if (common->reset_match != NULL) +if (common->reset_match != NULL || common->restart_match != NULL) { + if (common->restart_match != NULL) + { + set_jumps(common->restart_match, LABEL()); + OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr); + } + set_jumps(common->reset_match, LABEL()); do_reset_match(common, (re->top_bracket + 1) * 2); + /* The value of restart_match is in TMP1. */ CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP1, 0, continue_match_label); OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0); JUMPTO(SLJIT_JUMP, reset_match_label); @@ -14126,6 +14785,10 @@ PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION pcre2_jit_compile(pcre2_code *code, uint32_t options) { pcre2_real_code *re = (pcre2_real_code *)code; +#ifdef SUPPORT_JIT +executable_functions *functions; +static int executable_allocator_is_working = -1; +#endif if (code == NULL) return PCRE2_ERROR_NULL; @@ -14160,8 +14823,7 @@ actions are needed: */ #ifdef SUPPORT_JIT -executable_functions *functions = (executable_functions *)re->executable_jit; -static int executable_allocator_is_working = 0; +functions = (executable_functions *)re->executable_jit; #endif if ((options & PCRE2_JIT_INVALID_UTF) != 0) @@ -14188,23 +14850,21 @@ return PCRE2_ERROR_JIT_BADOPTION; if ((re->flags & PCRE2_NOJIT) != 0) return 0; -if (executable_allocator_is_working == 0) +if (executable_allocator_is_working == -1) { /* Checks whether the executable allocator is working. This check might run multiple times in multi-threaded environments, but the result should not be affected by it. */ void *ptr = SLJIT_MALLOC_EXEC(32, NULL); - - executable_allocator_is_working = -1; - if (ptr != NULL) { SLJIT_FREE_EXEC(((sljit_u8*)(ptr)) + SLJIT_EXEC_OFFSET(ptr), NULL); executable_allocator_is_working = 1; } + else executable_allocator_is_working = 0; } -if (executable_allocator_is_working < 0) +if (!executable_allocator_is_working) return PCRE2_ERROR_NOMEMORY; if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) != 0) |