summaryrefslogtreecommitdiffstats
path: root/src/3rdparty/pcre2/src/pcre2_jit_compile.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/3rdparty/pcre2/src/pcre2_jit_compile.c')
-rw-r--r--src/3rdparty/pcre2/src/pcre2_jit_compile.c2313
1 files changed, 1482 insertions, 831 deletions
diff --git a/src/3rdparty/pcre2/src/pcre2_jit_compile.c b/src/3rdparty/pcre2/src/pcre2_jit_compile.c
index db2ce65598..050063ec6d 100644
--- a/src/3rdparty/pcre2/src/pcre2_jit_compile.c
+++ b/src/3rdparty/pcre2/src/pcre2_jit_compile.c
@@ -8,7 +8,7 @@ and semantics are as close as possible to those of the Perl 5 language.
Written by Philip Hazel
This module by Zoltan Herczeg
Original API code Copyright (c) 1997-2012 University of Cambridge
- New API code Copyright (c) 2016-2019 University of Cambridge
+ New API code Copyright (c) 2016-2021 University of Cambridge
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
@@ -43,6 +43,12 @@ POSSIBILITY OF SUCH DAMAGE.
#include "config.h"
#endif
+#if defined(__has_feature)
+#if __has_feature(memory_sanitizer)
+#include <sanitizer/msan_interface.h>
+#endif /* __has_feature(memory_sanitizer) */
+#endif /* defined(__has_feature) */
+
#include "pcre2_internal.h"
#ifdef SUPPORT_JIT
@@ -236,12 +242,21 @@ code generator. It is allocated by compile_matchingpath, and contains
the arguments for compile_backtrackingpath. Must be the first member
of its descendants. */
typedef struct backtrack_common {
- /* Concatenation stack. */
+ /* Backtracking path of an opcode, which falls back
+ to our opcode, if it cannot resume matching. */
struct backtrack_common *prev;
- jump_list *nextbacktracks;
- /* Internal stack (for component operators). */
+ /* Backtracks for opcodes without backtracking path.
+ These opcodes are between 'prev' and the current
+ opcode, and they never resume the match. */
+ jump_list *simple_backtracks;
+ /* Internal backtracking list for block constructs
+ which contains other opcodes, such as brackets,
+ asserts, conditionals, etc. */
struct backtrack_common *top;
- jump_list *topbacktracks;
+ /* Backtracks used internally by the opcode. For component
+ opcodes, this list is also used by those opcodes without
+ backtracking path which follows the 'top' backtrack. */
+ jump_list *own_backtracks;
/* Opcode pointer. */
PCRE2_SPTR cc;
} backtrack_common;
@@ -338,6 +353,12 @@ typedef struct recurse_backtrack {
BOOL inlined_pattern;
} recurse_backtrack;
+typedef struct vreverse_backtrack {
+ backtrack_common common;
+ /* Return to the matching path. */
+ struct sljit_label *matchingpath;
+} vreverse_backtrack;
+
#define OP_THEN_TRAP OP_TABLE_LENGTH
typedef struct then_trap_backtrack {
@@ -404,7 +425,9 @@ typedef struct compiler_common {
sljit_s32 match_end_ptr;
/* Points to the marked string. */
sljit_s32 mark_ptr;
- /* Recursive control verb management chain. */
+ /* Head of the recursive control verb management chain.
+ Each item must have a previous offset and type
+ (see control_types) values. See do_search_mark. */
sljit_s32 control_head_ptr;
/* Points to the last matched capture block index. */
sljit_s32 capture_last_ptr;
@@ -413,6 +436,9 @@ typedef struct compiler_common {
/* Locals used by fast fail optimization. */
sljit_s32 early_fail_start_ptr;
sljit_s32 early_fail_end_ptr;
+ /* Variables used by recursive call generator. */
+ sljit_s32 recurse_bitset_size;
+ uint8_t *recurse_bitset;
/* Flipped and lower case tables. */
const sljit_u8 *fcc;
@@ -471,12 +497,15 @@ typedef struct compiler_common {
jump_list *stackalloc;
jump_list *revertframes;
jump_list *wordboundary;
+ jump_list *ucp_wordboundary;
jump_list *anynewline;
jump_list *hspace;
jump_list *vspace;
jump_list *casefulcmp;
jump_list *caselesscmp;
jump_list *reset_match;
+ /* Same as reset_match, but resets the STR_PTR as well. */
+ jump_list *restart_match;
BOOL unset_backref;
BOOL alt_circumflex;
#ifdef SUPPORT_UNICODE
@@ -539,7 +568,7 @@ typedef struct compare_context {
#undef CMP
/* Used for accessing the elements of the stack. */
-#define STACK(i) ((i) * (int)sizeof(sljit_sw))
+#define STACK(i) ((i) * SSIZE_OF(sw))
#ifdef SLJIT_PREF_SHIFT_REG
#if SLJIT_PREF_SHIFT_REG == SLJIT_R2
@@ -587,8 +616,8 @@ to characters. The vector data is divided into two groups: the first
group contains the start / end character pointers, and the second is
the start pointers when the end of the capturing group has not yet reached. */
#define OVECTOR_START (common->ovector_start)
-#define OVECTOR(i) (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw))
-#define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw))
+#define OVECTOR(i) (OVECTOR_START + (i) * SSIZE_OF(sw))
+#define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * SSIZE_OF(sw))
#define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start])
#if PCRE2_CODE_UNIT_WIDTH == 8
@@ -613,6 +642,8 @@ the start pointers when the end of the capturing group has not yet reached. */
sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw))
#define OP2(op, dst, dstw, src1, src1w, src2, src2w) \
sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w))
+#define OP2U(op, src1, src1w, src2, src2w) \
+ sljit_emit_op2u(compiler, (op), (src1), (src1w), (src2), (src2w))
#define OP_SRC(op, src, srcw) \
sljit_emit_op_src(compiler, (op), (src), (srcw))
#define LABEL() \
@@ -631,8 +662,8 @@ the start pointers when the end of the capturing group has not yet reached. */
sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label))
#define OP_FLAGS(op, dst, dstw, type) \
sljit_emit_op_flags(compiler, (op), (dst), (dstw), (type))
-#define CMOV(type, dst_reg, src, srcw) \
- sljit_emit_cmov(compiler, (type), (dst_reg), (src), (srcw))
+#define SELECT(type, dst_reg, src1, src1w, src2_reg) \
+ sljit_emit_select(compiler, (type), (dst_reg), (src1), (src1w), (src2_reg))
#define GET_LOCAL_BASE(dst, dstw, offset) \
sljit_get_local_base(compiler, (dst), (dstw), (offset))
@@ -852,6 +883,21 @@ SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS);
return count;
}
+static BOOL find_vreverse(PCRE2_SPTR cc)
+{
+ SLJIT_ASSERT(*cc == OP_ASSERTBACK || *cc == OP_ASSERTBACK_NOT || *cc == OP_ASSERTBACK_NA);
+
+ do
+ {
+ if (cc[1 + LINK_SIZE] == OP_VREVERSE)
+ return TRUE;
+ cc += GET(cc, 1);
+ }
+ while (*cc == OP_ALT);
+
+ return FALSE;
+}
+
/* Functions whose might need modification for all new supported opcodes:
next_opcode
check_opcode_types
@@ -922,6 +968,7 @@ switch(*cc)
case OP_KETRMIN:
case OP_KETRPOS:
case OP_REVERSE:
+ case OP_VREVERSE:
case OP_ASSERT:
case OP_ASSERT_NOT:
case OP_ASSERTBACK:
@@ -958,6 +1005,8 @@ switch(*cc)
case OP_ASSERT_ACCEPT:
case OP_CLOSE:
case OP_SKIPZERO:
+ case OP_NOT_UCP_WORD_BOUNDARY:
+ case OP_UCP_WORD_BOUNDARY:
return cc + PRIV(OP_lengths)[*cc];
case OP_CHAR:
@@ -1226,34 +1275,37 @@ while (cc < ccend)
return TRUE;
}
-#define EARLY_FAIL_ENHANCE_MAX (1 + 3)
+#define EARLY_FAIL_ENHANCE_MAX (3 + 3)
/*
-start:
- 0 - skip / early fail allowed
- 1 - only early fail with range allowed
- >1 - (start - 1) early fail is processed
+ Start represent the number of allowed early fail enhancements
+
+ The 0-2 values has a special meaning:
+ 0 - skip is allowed for all iterators
+ 1 - fail is allowed for all iterators
+ 2 - fail is allowed for greedy iterators
+ 3 - only ranged early fail is allowed
+ >3 - (start - 3) number of remaining ranged early fails allowed
-return: current number of iterators enhanced with fast fail
+return: the updated value of start
*/
-static int detect_early_fail(compiler_common *common, PCRE2_SPTR cc, int *private_data_start,
- sljit_s32 depth, int start, BOOL fast_forward_allowed)
+static int detect_early_fail(compiler_common *common, PCRE2_SPTR cc,
+ int *private_data_start, sljit_s32 depth, int start)
{
PCRE2_SPTR begin = cc;
PCRE2_SPTR next_alt;
PCRE2_SPTR end;
PCRE2_SPTR accelerated_start;
-BOOL prev_fast_forward_allowed;
int result = 0;
-int count;
+int count, prev_count;
SLJIT_ASSERT(*cc == OP_ONCE || *cc == OP_BRA || *cc == OP_CBRA);
SLJIT_ASSERT(*cc != OP_CBRA || common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] != 0);
SLJIT_ASSERT(start < EARLY_FAIL_ENHANCE_MAX);
next_alt = cc + GET(cc, 1);
-if (*next_alt == OP_ALT)
- fast_forward_allowed = FALSE;
+if (*next_alt == OP_ALT && start < 1)
+ start = 1;
do
{
@@ -1277,6 +1329,8 @@ do
case OP_CIRCM:
case OP_DOLL:
case OP_DOLLM:
+ case OP_NOT_UCP_WORD_BOUNDARY:
+ case OP_UCP_WORD_BOUNDARY:
/* Zero width assertions. */
cc++;
continue;
@@ -1294,21 +1348,22 @@ do
case OP_HSPACE:
case OP_NOT_VSPACE:
case OP_VSPACE:
- fast_forward_allowed = FALSE;
+ if (count < 1)
+ count = 1;
cc++;
continue;
case OP_ANYNL:
case OP_EXTUNI:
- fast_forward_allowed = FALSE;
- if (count == 0)
- count = 1;
+ if (count < 3)
+ count = 3;
cc++;
continue;
case OP_NOTPROP:
case OP_PROP:
- fast_forward_allowed = FALSE;
+ if (count < 1)
+ count = 1;
cc += 1 + 2;
continue;
@@ -1316,17 +1371,22 @@ do
case OP_CHARI:
case OP_NOT:
case OP_NOTI:
- fast_forward_allowed = FALSE;
+ if (count < 1)
+ count = 1;
cc += 2;
#ifdef SUPPORT_UNICODE
if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
#endif
continue;
- case OP_TYPESTAR:
case OP_TYPEMINSTAR:
- case OP_TYPEPLUS:
case OP_TYPEMINPLUS:
+ if (count == 2)
+ count = 3;
+ /* Fall through */
+
+ case OP_TYPESTAR:
+ case OP_TYPEPLUS:
case OP_TYPEPOSSTAR:
case OP_TYPEPOSPLUS:
/* The type or prop opcode is skipped in the next iteration. */
@@ -1338,14 +1398,18 @@ do
break;
}
- if (count == 0)
+ if (count < 3)
+ count = 3;
+ continue;
+
+ case OP_TYPEEXACT:
+ if (count < 1)
count = 1;
- fast_forward_allowed = FALSE;
+ cc += 1 + IMM2_SIZE;
continue;
case OP_TYPEUPTO:
case OP_TYPEMINUPTO:
- case OP_TYPEEXACT:
case OP_TYPEPOSUPTO:
cc += IMM2_SIZE;
/* Fall through */
@@ -1354,37 +1418,40 @@ do
case OP_TYPEMINQUERY:
case OP_TYPEPOSQUERY:
/* The type or prop opcode is skipped in the next iteration. */
- fast_forward_allowed = FALSE;
- if (count == 0)
- count = 1;
+ if (count < 3)
+ count = 3;
cc += 1;
continue;
- case OP_STAR:
case OP_MINSTAR:
- case OP_PLUS:
case OP_MINPLUS:
+ case OP_MINSTARI:
+ case OP_MINPLUSI:
+ case OP_NOTMINSTAR:
+ case OP_NOTMINPLUS:
+ case OP_NOTMINSTARI:
+ case OP_NOTMINPLUSI:
+ if (count == 2)
+ count = 3;
+ /* Fall through */
+
+ case OP_STAR:
+ case OP_PLUS:
case OP_POSSTAR:
case OP_POSPLUS:
case OP_STARI:
- case OP_MINSTARI:
case OP_PLUSI:
- case OP_MINPLUSI:
case OP_POSSTARI:
case OP_POSPLUSI:
case OP_NOTSTAR:
- case OP_NOTMINSTAR:
case OP_NOTPLUS:
- case OP_NOTMINPLUS:
case OP_NOTPOSSTAR:
case OP_NOTPOSPLUS:
case OP_NOTSTARI:
- case OP_NOTMINSTARI:
case OP_NOTPLUSI:
- case OP_NOTMINPLUSI:
case OP_NOTPOSSTARI:
case OP_NOTPOSPLUSI:
accelerated_start = cc;
@@ -1394,9 +1461,17 @@ do
#endif
break;
+ case OP_EXACT:
+ if (count < 1)
+ count = 1;
+ cc += 2 + IMM2_SIZE;
+#ifdef SUPPORT_UNICODE
+ if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
+#endif
+ continue;
+
case OP_UPTO:
case OP_MINUPTO:
- case OP_EXACT:
case OP_POSUPTO:
case OP_UPTOI:
case OP_MINUPTOI:
@@ -1425,9 +1500,8 @@ do
case OP_NOTQUERYI:
case OP_NOTMINQUERYI:
case OP_NOTPOSQUERYI:
- fast_forward_allowed = FALSE;
- if (count == 0)
- count = 1;
+ if (count < 3)
+ count = 3;
cc += 2;
#ifdef SUPPORT_UNICODE
if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
@@ -1447,10 +1521,14 @@ do
switch (*cc)
{
- case OP_CRSTAR:
case OP_CRMINSTAR:
- case OP_CRPLUS:
case OP_CRMINPLUS:
+ if (count == 2)
+ count = 3;
+ /* Fall through */
+
+ case OP_CRSTAR:
+ case OP_CRPLUS:
case OP_CRPOSSTAR:
case OP_CRPOSPLUS:
cc++;
@@ -1459,44 +1537,60 @@ do
case OP_CRRANGE:
case OP_CRMINRANGE:
case OP_CRPOSRANGE:
+ if (GET2(cc, 1) == GET2(cc, 1 + IMM2_SIZE))
+ {
+ /* Exact repeat. */
+ cc += 1 + 2 * IMM2_SIZE;
+ if (count < 1)
+ count = 1;
+ continue;
+ }
+
cc += 2 * IMM2_SIZE;
/* Fall through */
case OP_CRQUERY:
case OP_CRMINQUERY:
case OP_CRPOSQUERY:
cc++;
- if (count == 0)
- count = 1;
- /* Fall through */
+ if (count < 3)
+ count = 3;
+ continue;
+
default:
- accelerated_start = NULL;
- fast_forward_allowed = FALSE;
+ /* No repeat. */
+ if (count < 1)
+ count = 1;
continue;
}
break;
- case OP_ONCE:
case OP_BRA:
case OP_CBRA:
- end = cc + GET(cc, 1);
+ prev_count = count;
+ if (count < 1)
+ count = 1;
- prev_fast_forward_allowed = fast_forward_allowed;
- fast_forward_allowed = FALSE;
if (depth >= 4)
break;
- end = bracketend(cc) - (1 + LINK_SIZE);
- if (*end != OP_KET || (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0))
+ if (count < 3 && cc[GET(cc, 1)] == OP_ALT)
+ count = 3;
+
+ end = bracketend(cc);
+ if (end[-1 - LINK_SIZE] != OP_KET || (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0))
break;
- count = detect_early_fail(common, cc, private_data_start, depth + 1, count, prev_fast_forward_allowed);
+ prev_count = detect_early_fail(common, cc, private_data_start, depth + 1, prev_count);
+
+ if (prev_count > count)
+ count = prev_count;
if (PRIVATE_DATA(cc) != 0)
common->private_data_ptrs[begin - common->start] = 1;
if (count < EARLY_FAIL_ENHANCE_MAX)
{
- cc = end + (1 + LINK_SIZE);
+ cc = end;
continue;
}
break;
@@ -1509,55 +1603,52 @@ do
continue;
}
- if (accelerated_start != NULL)
+ if (accelerated_start == NULL)
+ break;
+
+ if (count == 0)
{
- if (count == 0)
- {
- count++;
+ common->fast_forward_bc_ptr = accelerated_start;
+ common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_skip;
+ *private_data_start += sizeof(sljit_sw);
+ count = 4;
+ }
+ else if (count < 3)
+ {
+ common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_fail;
- if (fast_forward_allowed)
- {
- common->fast_forward_bc_ptr = accelerated_start;
- common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_skip;
- *private_data_start += sizeof(sljit_sw);
- }
- else
- {
- common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_fail;
+ if (common->early_fail_start_ptr == 0)
+ common->early_fail_start_ptr = *private_data_start;
- if (common->early_fail_start_ptr == 0)
- common->early_fail_start_ptr = *private_data_start;
+ *private_data_start += sizeof(sljit_sw);
+ common->early_fail_end_ptr = *private_data_start;
- *private_data_start += sizeof(sljit_sw);
- common->early_fail_end_ptr = *private_data_start;
+ if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
+ return EARLY_FAIL_ENHANCE_MAX;
- if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
- return EARLY_FAIL_ENHANCE_MAX;
- }
- }
- else
- {
- common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_fail_range;
+ count = 4;
+ }
+ else
+ {
+ common->private_data_ptrs[(accelerated_start + 1) - common->start] = ((*private_data_start) << 3) | type_fail_range;
- if (common->early_fail_start_ptr == 0)
- common->early_fail_start_ptr = *private_data_start;
+ if (common->early_fail_start_ptr == 0)
+ common->early_fail_start_ptr = *private_data_start;
- *private_data_start += 2 * sizeof(sljit_sw);
- common->early_fail_end_ptr = *private_data_start;
+ *private_data_start += 2 * sizeof(sljit_sw);
+ common->early_fail_end_ptr = *private_data_start;
- if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
- return EARLY_FAIL_ENHANCE_MAX;
- }
+ if (*private_data_start > SLJIT_MAX_LOCAL_SIZE)
+ return EARLY_FAIL_ENHANCE_MAX;
- /* Cannot be part of a repeat. */
- common->private_data_ptrs[begin - common->start] = 1;
count++;
-
- if (count < EARLY_FAIL_ENHANCE_MAX)
- continue;
}
- break;
+ /* Cannot be part of a repeat. */
+ common->private_data_ptrs[begin - common->start] = 1;
+
+ if (count >= EARLY_FAIL_ENHANCE_MAX)
+ break;
}
if (*cc != OP_ALT && *cc != OP_KET)
@@ -1621,7 +1712,7 @@ if (end[-(1 + LINK_SIZE)] != OP_KET || PRIVATE_DATA(begin) != 0)
/* /(?:AB){4,6}/ is currently converted to /(?:AB){3}(?AB){1,3}/
* Skip the check of the second part. */
-if (PRIVATE_DATA(end - LINK_SIZE) == 0)
+if (PRIVATE_DATA(end - LINK_SIZE) != 0)
return TRUE;
next = end;
@@ -1790,7 +1881,6 @@ while (cc < ccend)
case OP_ASSERTBACK:
case OP_ASSERTBACK_NOT:
case OP_ASSERT_NA:
- case OP_ASSERTBACK_NA:
case OP_ONCE:
case OP_SCRIPT_RUN:
case OP_BRAPOS:
@@ -1802,6 +1892,19 @@ while (cc < ccend)
bracketlen = 1 + LINK_SIZE;
break;
+ case OP_ASSERTBACK_NA:
+ common->private_data_ptrs[cc - common->start] = private_data_ptr;
+ private_data_ptr += sizeof(sljit_sw);
+
+ if (find_vreverse(cc))
+ {
+ common->private_data_ptrs[cc + 1 - common->start] = 1;
+ private_data_ptr += sizeof(sljit_sw);
+ }
+
+ bracketlen = 1 + LINK_SIZE;
+ break;
+
case OP_CBRAPOS:
case OP_SCBRAPOS:
common->private_data_ptrs[cc - common->start] = private_data_ptr;
@@ -2101,6 +2204,9 @@ while (cc < ccend)
case OP_CALLOUT:
case OP_CALLOUT_STR:
+ case OP_NOT_UCP_WORD_BOUNDARY:
+ case OP_UCP_WORD_BOUNDARY:
+
cc = next_opcode(common, cc);
SLJIT_ASSERT(cc != NULL);
break;
@@ -2146,9 +2252,9 @@ while (cc < ccend)
{
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
- stackpos -= (int)sizeof(sljit_sw);
+ stackpos -= SSIZE_OF(sw);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
- stackpos -= (int)sizeof(sljit_sw);
+ stackpos -= SSIZE_OF(sw);
setsom_found = TRUE;
}
cc += 1;
@@ -2163,9 +2269,9 @@ while (cc < ccend)
{
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
- stackpos -= (int)sizeof(sljit_sw);
+ stackpos -= SSIZE_OF(sw);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
- stackpos -= (int)sizeof(sljit_sw);
+ stackpos -= SSIZE_OF(sw);
setmark_found = TRUE;
}
cc += 1 + 2 + cc[1];
@@ -2176,27 +2282,27 @@ while (cc < ccend)
{
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0));
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0));
- stackpos -= (int)sizeof(sljit_sw);
+ stackpos -= SSIZE_OF(sw);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
- stackpos -= (int)sizeof(sljit_sw);
+ stackpos -= SSIZE_OF(sw);
setsom_found = TRUE;
}
if (common->mark_ptr != 0 && !setmark_found)
{
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr);
- stackpos -= (int)sizeof(sljit_sw);
+ stackpos -= SSIZE_OF(sw);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
- stackpos -= (int)sizeof(sljit_sw);
+ stackpos -= SSIZE_OF(sw);
setmark_found = TRUE;
}
if (common->capture_last_ptr != 0 && !capture_last_found)
{
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
- stackpos -= (int)sizeof(sljit_sw);
+ stackpos -= SSIZE_OF(sw);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
- stackpos -= (int)sizeof(sljit_sw);
+ stackpos -= SSIZE_OF(sw);
capture_last_found = TRUE;
}
cc += 1 + LINK_SIZE;
@@ -2210,20 +2316,20 @@ while (cc < ccend)
{
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr);
- stackpos -= (int)sizeof(sljit_sw);
+ stackpos -= SSIZE_OF(sw);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
- stackpos -= (int)sizeof(sljit_sw);
+ stackpos -= SSIZE_OF(sw);
capture_last_found = TRUE;
}
offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset));
- stackpos -= (int)sizeof(sljit_sw);
+ stackpos -= SSIZE_OF(sw);
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0);
- stackpos -= (int)sizeof(sljit_sw);
+ stackpos -= SSIZE_OF(sw);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0);
- stackpos -= (int)sizeof(sljit_sw);
+ stackpos -= SSIZE_OF(sw);
cc += 1 + LINK_SIZE + IMM2_SIZE;
break;
@@ -2256,7 +2362,7 @@ int i;
for (i = 0; i < RECURSE_TMP_REG_COUNT; i++)
{
SLJIT_ASSERT(status->tmp_regs[i] >= 0);
- SLJIT_ASSERT(sljit_get_register_index(status->saved_tmp_regs[i]) < 0 || status->tmp_regs[i] == status->saved_tmp_regs[i]);
+ SLJIT_ASSERT(sljit_get_register_index(SLJIT_GP_REGISTER, status->saved_tmp_regs[i]) < 0 || status->tmp_regs[i] == status->saved_tmp_regs[i]);
status->store_bases[i] = -1;
}
@@ -2276,7 +2382,7 @@ SLJIT_ASSERT(load_base > 0 && store_base > 0);
if (status->store_bases[next_tmp_reg] == -1)
{
/* Preserve virtual registers. */
- if (sljit_get_register_index(status->saved_tmp_regs[next_tmp_reg]) < 0)
+ if (sljit_get_register_index(SLJIT_GP_REGISTER, status->saved_tmp_regs[next_tmp_reg]) < 0)
OP1(SLJIT_MOV, status->saved_tmp_regs[next_tmp_reg], 0, tmp_reg, 0);
}
else
@@ -2305,7 +2411,7 @@ for (i = 0; i < RECURSE_TMP_REG_COUNT; i++)
OP1(SLJIT_MOV, SLJIT_MEM1(status->store_bases[next_tmp_reg]), status->store_offsets[next_tmp_reg], tmp_reg, 0);
/* Restore virtual registers. */
- if (sljit_get_register_index(saved_tmp_reg) < 0)
+ if (sljit_get_register_index(SLJIT_GP_REGISTER, saved_tmp_reg) < 0)
OP1(SLJIT_MOV, tmp_reg, 0, saved_tmp_reg, 0);
}
@@ -2315,22 +2421,47 @@ for (i = 0; i < RECURSE_TMP_REG_COUNT; i++)
#undef RECURSE_TMP_REG_COUNT
-static int get_recurse_data_length(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend,
- BOOL *needs_control_head, BOOL *has_quit, BOOL *has_accept)
+static BOOL recurse_check_bit(compiler_common *common, sljit_sw bit_index)
+{
+uint8_t *byte;
+uint8_t mask;
+
+SLJIT_ASSERT((bit_index & (sizeof(sljit_sw) - 1)) == 0);
+
+bit_index >>= SLJIT_WORD_SHIFT;
+
+SLJIT_ASSERT((bit_index >> 3) < common->recurse_bitset_size);
+
+mask = 1 << (bit_index & 0x7);
+byte = common->recurse_bitset + (bit_index >> 3);
+
+if (*byte & mask)
+ return FALSE;
+
+*byte |= mask;
+return TRUE;
+}
+
+enum get_recurse_flags {
+ recurse_flag_quit_found = (1 << 0),
+ recurse_flag_accept_found = (1 << 1),
+ recurse_flag_setsom_found = (1 << 2),
+ recurse_flag_setmark_found = (1 << 3),
+ recurse_flag_control_head_found = (1 << 4),
+};
+
+static int get_recurse_data_length(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend, uint32_t *result_flags)
{
int length = 1;
-int size;
+int size, offset;
PCRE2_SPTR alternative;
-BOOL quit_found = FALSE;
-BOOL accept_found = FALSE;
-BOOL setsom_found = FALSE;
-BOOL setmark_found = FALSE;
-BOOL capture_last_found = FALSE;
-BOOL control_head_found = FALSE;
+uint32_t recurse_flags = 0;
+
+memset(common->recurse_bitset, 0, common->recurse_bitset_size);
#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
SLJIT_ASSERT(common->control_head_ptr != 0);
-control_head_found = TRUE;
+recurse_flags |= recurse_flag_control_head_found;
#endif
/* Calculate the sum of the private machine words. */
@@ -2341,24 +2472,26 @@ while (cc < ccend)
{
case OP_SET_SOM:
SLJIT_ASSERT(common->has_set_som);
- setsom_found = TRUE;
+ recurse_flags |= recurse_flag_setsom_found;
cc += 1;
break;
case OP_RECURSE:
if (common->has_set_som)
- setsom_found = TRUE;
+ recurse_flags |= recurse_flag_setsom_found;
if (common->mark_ptr != 0)
- setmark_found = TRUE;
- if (common->capture_last_ptr != 0)
- capture_last_found = TRUE;
+ recurse_flags |= recurse_flag_setmark_found;
+ if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
+ length++;
cc += 1 + LINK_SIZE;
break;
case OP_KET:
- if (PRIVATE_DATA(cc) != 0)
+ offset = PRIVATE_DATA(cc);
+ if (offset != 0)
{
- length++;
+ if (recurse_check_bit(common, offset))
+ length++;
SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
cc += PRIVATE_DATA(cc + 1);
}
@@ -2377,39 +2510,55 @@ while (cc < ccend)
case OP_SBRA:
case OP_SBRAPOS:
case OP_SCOND:
- length++;
SLJIT_ASSERT(PRIVATE_DATA(cc) != 0);
+ if (recurse_check_bit(common, PRIVATE_DATA(cc)))
+ length++;
cc += 1 + LINK_SIZE;
break;
case OP_CBRA:
case OP_SCBRA:
- length += 2;
- if (common->capture_last_ptr != 0)
- capture_last_found = TRUE;
- if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
+ offset = GET2(cc, 1 + LINK_SIZE);
+ if (recurse_check_bit(common, OVECTOR(offset << 1)))
+ {
+ SLJIT_ASSERT(recurse_check_bit(common, OVECTOR((offset << 1) + 1)));
+ length += 2;
+ }
+ if (common->optimized_cbracket[offset] == 0 && recurse_check_bit(common, OVECTOR_PRIV(offset)))
+ length++;
+ if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
length++;
cc += 1 + LINK_SIZE + IMM2_SIZE;
break;
case OP_CBRAPOS:
case OP_SCBRAPOS:
- length += 2 + 2;
- if (common->capture_last_ptr != 0)
- capture_last_found = TRUE;
+ offset = GET2(cc, 1 + LINK_SIZE);
+ if (recurse_check_bit(common, OVECTOR(offset << 1)))
+ {
+ SLJIT_ASSERT(recurse_check_bit(common, OVECTOR((offset << 1) + 1)));
+ length += 2;
+ }
+ if (recurse_check_bit(common, OVECTOR_PRIV(offset)))
+ length++;
+ if (recurse_check_bit(common, PRIVATE_DATA(cc)))
+ length++;
+ if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
+ length++;
cc += 1 + LINK_SIZE + IMM2_SIZE;
break;
case OP_COND:
/* Might be a hidden SCOND. */
alternative = cc + GET(cc, 1);
- if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
+ if ((*alternative == OP_KETRMAX || *alternative == OP_KETRMIN) && recurse_check_bit(common, PRIVATE_DATA(cc)))
length++;
cc += 1 + LINK_SIZE;
break;
CASE_ITERATOR_PRIVATE_DATA_1
- if (PRIVATE_DATA(cc) != 0)
+ offset = PRIVATE_DATA(cc);
+ if (offset != 0 && recurse_check_bit(common, offset))
length++;
cc += 2;
#ifdef SUPPORT_UNICODE
@@ -2418,8 +2567,12 @@ while (cc < ccend)
break;
CASE_ITERATOR_PRIVATE_DATA_2A
- if (PRIVATE_DATA(cc) != 0)
+ offset = PRIVATE_DATA(cc);
+ if (offset != 0 && recurse_check_bit(common, offset))
+ {
+ SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));
length += 2;
+ }
cc += 2;
#ifdef SUPPORT_UNICODE
if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
@@ -2427,8 +2580,12 @@ while (cc < ccend)
break;
CASE_ITERATOR_PRIVATE_DATA_2B
- if (PRIVATE_DATA(cc) != 0)
+ offset = PRIVATE_DATA(cc);
+ if (offset != 0 && recurse_check_bit(common, offset))
+ {
+ SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));
length += 2;
+ }
cc += 2 + IMM2_SIZE;
#ifdef SUPPORT_UNICODE
if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
@@ -2436,20 +2593,29 @@ while (cc < ccend)
break;
CASE_ITERATOR_TYPE_PRIVATE_DATA_1
- if (PRIVATE_DATA(cc) != 0)
+ offset = PRIVATE_DATA(cc);
+ if (offset != 0 && recurse_check_bit(common, offset))
length++;
cc += 1;
break;
CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
- if (PRIVATE_DATA(cc) != 0)
+ offset = PRIVATE_DATA(cc);
+ if (offset != 0 && recurse_check_bit(common, offset))
+ {
+ SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));
length += 2;
+ }
cc += 1;
break;
CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
- if (PRIVATE_DATA(cc) != 0)
+ offset = PRIVATE_DATA(cc);
+ if (offset != 0 && recurse_check_bit(common, offset))
+ {
+ SLJIT_ASSERT(recurse_check_bit(common, offset + sizeof(sljit_sw)));
length += 2;
+ }
cc += 1 + IMM2_SIZE;
break;
@@ -2461,7 +2627,9 @@ while (cc < ccend)
#else
size = 1 + 32 / (int)sizeof(PCRE2_UCHAR);
#endif
- if (PRIVATE_DATA(cc) != 0)
+
+ offset = PRIVATE_DATA(cc);
+ if (offset != 0 && recurse_check_bit(common, offset))
length += get_class_iterator_size(cc + size);
cc += size;
break;
@@ -2471,12 +2639,11 @@ while (cc < ccend)
case OP_PRUNE_ARG:
case OP_THEN_ARG:
SLJIT_ASSERT(common->mark_ptr != 0);
- if (!setmark_found)
- setmark_found = TRUE;
+ recurse_flags |= recurse_flag_setmark_found;
if (common->control_head_ptr != 0)
- control_head_found = TRUE;
+ recurse_flags |= recurse_flag_control_head_found;
if (*cc != OP_MARK)
- quit_found = TRUE;
+ recurse_flags |= recurse_flag_quit_found;
cc += 1 + 2 + cc[1];
break;
@@ -2484,26 +2651,24 @@ while (cc < ccend)
case OP_PRUNE:
case OP_SKIP:
case OP_COMMIT:
- quit_found = TRUE;
+ recurse_flags |= recurse_flag_quit_found;
cc++;
break;
case OP_SKIP_ARG:
- quit_found = TRUE;
+ recurse_flags |= recurse_flag_quit_found;
cc += 1 + 2 + cc[1];
break;
case OP_THEN:
SLJIT_ASSERT(common->control_head_ptr != 0);
- quit_found = TRUE;
- if (!control_head_found)
- control_head_found = TRUE;
+ recurse_flags |= recurse_flag_quit_found | recurse_flag_control_head_found;
cc++;
break;
case OP_ACCEPT:
case OP_ASSERT_ACCEPT:
- accept_found = TRUE;
+ recurse_flags |= recurse_flag_accept_found;
cc++;
break;
@@ -2515,21 +2680,17 @@ while (cc < ccend)
}
SLJIT_ASSERT(cc == ccend);
-if (control_head_found)
+if (recurse_flags & recurse_flag_control_head_found)
length++;
-if (capture_last_found)
- length++;
-if (quit_found)
+if (recurse_flags & recurse_flag_quit_found)
{
- if (setsom_found)
+ if (recurse_flags & recurse_flag_setsom_found)
length++;
- if (setmark_found)
+ if (recurse_flags & recurse_flag_setmark_found)
length++;
}
-*needs_control_head = control_head_found;
-*has_quit = quit_found;
-*has_accept = accept_found;
+*result_flags = recurse_flags;
return length;
}
@@ -2542,7 +2703,7 @@ enum copy_recurse_data_types {
};
static void copy_recurse_data(compiler_common *common, PCRE2_SPTR cc, PCRE2_SPTR ccend,
- int type, int stackptr, int stacktop, BOOL has_quit)
+ int type, int stackptr, int stacktop, uint32_t recurse_flags)
{
delayed_mem_copy_status status;
PCRE2_SPTR alternative;
@@ -2551,14 +2712,12 @@ sljit_sw shared_srcw[3];
sljit_sw kept_shared_srcw[2];
int private_count, shared_count, kept_shared_count;
int from_sp, base_reg, offset, i;
-BOOL setsom_found = FALSE;
-BOOL setmark_found = FALSE;
-BOOL capture_last_found = FALSE;
-BOOL control_head_found = FALSE;
+
+memset(common->recurse_bitset, 0, common->recurse_bitset_size);
#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD
SLJIT_ASSERT(common->control_head_ptr != 0);
-control_head_found = TRUE;
+recurse_check_bit(common, common->control_head_ptr);
#endif
switch (type)
@@ -2646,45 +2805,42 @@ while (cc < ccend)
{
case OP_SET_SOM:
SLJIT_ASSERT(common->has_set_som);
- if (has_quit && !setsom_found)
+ if ((recurse_flags & recurse_flag_quit_found) && recurse_check_bit(common, OVECTOR(0)))
{
kept_shared_srcw[0] = OVECTOR(0);
kept_shared_count = 1;
- setsom_found = TRUE;
}
cc += 1;
break;
case OP_RECURSE:
- if (has_quit)
+ if (recurse_flags & recurse_flag_quit_found)
{
- if (common->has_set_som && !setsom_found)
+ if (common->has_set_som && recurse_check_bit(common, OVECTOR(0)))
{
kept_shared_srcw[0] = OVECTOR(0);
kept_shared_count = 1;
- setsom_found = TRUE;
}
- if (common->mark_ptr != 0 && !setmark_found)
+ if (common->mark_ptr != 0 && recurse_check_bit(common, common->mark_ptr))
{
kept_shared_srcw[kept_shared_count] = common->mark_ptr;
kept_shared_count++;
- setmark_found = TRUE;
}
}
- if (common->capture_last_ptr != 0 && !capture_last_found)
+ if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
{
shared_srcw[0] = common->capture_last_ptr;
shared_count = 1;
- capture_last_found = TRUE;
}
cc += 1 + LINK_SIZE;
break;
case OP_KET:
- if (PRIVATE_DATA(cc) != 0)
+ private_srcw[0] = PRIVATE_DATA(cc);
+ if (private_srcw[0] != 0)
{
- private_count = 1;
- private_srcw[0] = PRIVATE_DATA(cc);
+ if (recurse_check_bit(common, private_srcw[0]))
+ private_count = 1;
SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0);
cc += PRIVATE_DATA(cc + 1);
}
@@ -2703,50 +2859,66 @@ while (cc < ccend)
case OP_SBRA:
case OP_SBRAPOS:
case OP_SCOND:
- private_count = 1;
private_srcw[0] = PRIVATE_DATA(cc);
+ if (recurse_check_bit(common, private_srcw[0]))
+ private_count = 1;
cc += 1 + LINK_SIZE;
break;
case OP_CBRA:
case OP_SCBRA:
- offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
- shared_srcw[0] = OVECTOR(offset);
- shared_srcw[1] = OVECTOR(offset + 1);
- shared_count = 2;
+ offset = GET2(cc, 1 + LINK_SIZE);
+ shared_srcw[0] = OVECTOR(offset << 1);
+ if (recurse_check_bit(common, shared_srcw[0]))
+ {
+ shared_srcw[1] = shared_srcw[0] + sizeof(sljit_sw);
+ SLJIT_ASSERT(recurse_check_bit(common, shared_srcw[1]));
+ shared_count = 2;
+ }
- if (common->capture_last_ptr != 0 && !capture_last_found)
+ if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
{
- shared_srcw[2] = common->capture_last_ptr;
- shared_count = 3;
- capture_last_found = TRUE;
+ shared_srcw[shared_count] = common->capture_last_ptr;
+ shared_count++;
}
- if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)
+ if (common->optimized_cbracket[offset] == 0)
{
- private_count = 1;
- private_srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
+ private_srcw[0] = OVECTOR_PRIV(offset);
+ if (recurse_check_bit(common, private_srcw[0]))
+ private_count = 1;
}
+
cc += 1 + LINK_SIZE + IMM2_SIZE;
break;
case OP_CBRAPOS:
case OP_SCBRAPOS:
- offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
- shared_srcw[0] = OVECTOR(offset);
- shared_srcw[1] = OVECTOR(offset + 1);
- shared_count = 2;
+ offset = GET2(cc, 1 + LINK_SIZE);
+ shared_srcw[0] = OVECTOR(offset << 1);
+ if (recurse_check_bit(common, shared_srcw[0]))
+ {
+ shared_srcw[1] = shared_srcw[0] + sizeof(sljit_sw);
+ SLJIT_ASSERT(recurse_check_bit(common, shared_srcw[1]));
+ shared_count = 2;
+ }
- if (common->capture_last_ptr != 0 && !capture_last_found)
+ if (common->capture_last_ptr != 0 && recurse_check_bit(common, common->capture_last_ptr))
{
- shared_srcw[2] = common->capture_last_ptr;
- shared_count = 3;
- capture_last_found = TRUE;
+ shared_srcw[shared_count] = common->capture_last_ptr;
+ shared_count++;
}
- private_count = 2;
private_srcw[0] = PRIVATE_DATA(cc);
- private_srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE));
+ if (recurse_check_bit(common, private_srcw[0]))
+ private_count = 1;
+
+ offset = OVECTOR_PRIV(offset);
+ if (recurse_check_bit(common, offset))
+ {
+ private_srcw[private_count] = offset;
+ private_count++;
+ }
cc += 1 + LINK_SIZE + IMM2_SIZE;
break;
@@ -2755,18 +2927,17 @@ while (cc < ccend)
alternative = cc + GET(cc, 1);
if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN)
{
- private_count = 1;
private_srcw[0] = PRIVATE_DATA(cc);
+ if (recurse_check_bit(common, private_srcw[0]))
+ private_count = 1;
}
cc += 1 + LINK_SIZE;
break;
CASE_ITERATOR_PRIVATE_DATA_1
- if (PRIVATE_DATA(cc))
- {
+ private_srcw[0] = PRIVATE_DATA(cc);
+ if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
private_count = 1;
- private_srcw[0] = PRIVATE_DATA(cc);
- }
cc += 2;
#ifdef SUPPORT_UNICODE
if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
@@ -2774,11 +2945,12 @@ while (cc < ccend)
break;
CASE_ITERATOR_PRIVATE_DATA_2A
- if (PRIVATE_DATA(cc))
+ private_srcw[0] = PRIVATE_DATA(cc);
+ if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
{
private_count = 2;
- private_srcw[0] = PRIVATE_DATA(cc);
- private_srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
+ private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
+ SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
}
cc += 2;
#ifdef SUPPORT_UNICODE
@@ -2787,11 +2959,12 @@ while (cc < ccend)
break;
CASE_ITERATOR_PRIVATE_DATA_2B
- if (PRIVATE_DATA(cc))
+ private_srcw[0] = PRIVATE_DATA(cc);
+ if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
{
private_count = 2;
- private_srcw[0] = PRIVATE_DATA(cc);
- private_srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw);
+ private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
+ SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
}
cc += 2 + IMM2_SIZE;
#ifdef SUPPORT_UNICODE
@@ -2800,30 +2973,30 @@ while (cc < ccend)
break;
CASE_ITERATOR_TYPE_PRIVATE_DATA_1
- if (PRIVATE_DATA(cc))
- {
+ private_srcw[0] = PRIVATE_DATA(cc);
+ if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
private_count = 1;
- private_srcw[0] = PRIVATE_DATA(cc);
- }
cc += 1;
break;
CASE_ITERATOR_TYPE_PRIVATE_DATA_2A
- if (PRIVATE_DATA(cc))
+ private_srcw[0] = PRIVATE_DATA(cc);
+ if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
{
private_count = 2;
- private_srcw[0] = PRIVATE_DATA(cc);
private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
+ SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
}
cc += 1;
break;
CASE_ITERATOR_TYPE_PRIVATE_DATA_2B
- if (PRIVATE_DATA(cc))
+ private_srcw[0] = PRIVATE_DATA(cc);
+ if (private_srcw[0] != 0 && recurse_check_bit(common, private_srcw[0]))
{
private_count = 2;
- private_srcw[0] = PRIVATE_DATA(cc);
private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
+ SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
}
cc += 1 + IMM2_SIZE;
break;
@@ -2837,23 +3010,28 @@ while (cc < ccend)
i = 1 + 32 / (int)sizeof(PCRE2_UCHAR);
#endif
if (PRIVATE_DATA(cc) != 0)
+ {
+ private_count = 1;
+ private_srcw[0] = PRIVATE_DATA(cc);
switch(get_class_iterator_size(cc + i))
{
case 1:
- private_count = 1;
- private_srcw[0] = PRIVATE_DATA(cc);
break;
case 2:
- private_count = 2;
- private_srcw[0] = PRIVATE_DATA(cc);
- private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
+ if (recurse_check_bit(common, private_srcw[0]))
+ {
+ private_count = 2;
+ private_srcw[1] = private_srcw[0] + sizeof(sljit_sw);
+ SLJIT_ASSERT(recurse_check_bit(common, private_srcw[1]));
+ }
break;
default:
SLJIT_UNREACHABLE();
break;
}
+ }
cc += i;
break;
@@ -2862,28 +3040,25 @@ while (cc < ccend)
case OP_PRUNE_ARG:
case OP_THEN_ARG:
SLJIT_ASSERT(common->mark_ptr != 0);
- if (has_quit && !setmark_found)
+ if ((recurse_flags & recurse_flag_quit_found) && recurse_check_bit(common, common->mark_ptr))
{
kept_shared_srcw[0] = common->mark_ptr;
kept_shared_count = 1;
- setmark_found = TRUE;
}
- if (common->control_head_ptr != 0 && !control_head_found)
+ if (common->control_head_ptr != 0 && recurse_check_bit(common, common->control_head_ptr))
{
private_srcw[0] = common->control_head_ptr;
private_count = 1;
- control_head_found = TRUE;
}
cc += 1 + 2 + cc[1];
break;
case OP_THEN:
SLJIT_ASSERT(common->control_head_ptr != 0);
- if (!control_head_found)
+ if (recurse_check_bit(common, common->control_head_ptr))
{
private_srcw[0] = common->control_head_ptr;
private_count = 1;
- control_head_found = TRUE;
}
cc++;
break;
@@ -2891,7 +3066,7 @@ while (cc < ccend)
default:
cc = next_opcode(common, cc);
SLJIT_ASSERT(cc != NULL);
- break;
+ continue;
}
if (type != recurse_copy_shared_to_global && type != recurse_copy_kept_shared_to_global)
@@ -2973,8 +3148,16 @@ if (*cc == OP_COND || *cc == OP_SCOND)
has_alternatives = FALSE;
cc = next_opcode(common, cc);
+
if (has_alternatives)
+ {
+ if (*cc == OP_REVERSE)
+ cc += 1 + IMM2_SIZE;
+ else if (*cc == OP_VREVERSE)
+ cc += 1 + 2 * IMM2_SIZE;
+
current_offset = common->then_offsets + (cc - common->start);
+ }
while (cc < end)
{
@@ -2983,7 +3166,18 @@ while (cc < end)
else
{
if (*cc == OP_ALT && has_alternatives)
- current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start);
+ {
+ cc += 1 + LINK_SIZE;
+
+ if (*cc == OP_REVERSE)
+ cc += 1 + IMM2_SIZE;
+ else if (*cc == OP_VREVERSE)
+ cc += 1 + 2 * IMM2_SIZE;
+
+ current_offset = common->then_offsets + (cc - common->start);
+ continue;
+ }
+
if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL)
*current_offset = 1;
cc = next_opcode(common, cc);
@@ -3007,7 +3201,7 @@ return (value & (value - 1)) == 0;
static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
{
-while (list)
+while (list != NULL)
{
/* sljit_set_label is clever enough to do nothing
if either the jump or the label is NULL. */
@@ -3070,7 +3264,7 @@ static SLJIT_INLINE void allocate_stack(compiler_common *common, int size)
DEFINE_COMPILER;
SLJIT_ASSERT(size > 0);
-OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
+OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * SSIZE_OF(sw));
#ifdef DESTROY_REGISTERS
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345);
OP1(SLJIT_MOV, TMP3, 0, TMP1, 0);
@@ -3086,7 +3280,7 @@ static SLJIT_INLINE void free_stack(compiler_common *common, int size)
DEFINE_COMPILER;
SLJIT_ASSERT(size > 0);
-OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw));
+OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * SSIZE_OF(sw));
}
static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size)
@@ -3126,12 +3320,12 @@ if (length < 8)
}
else
{
- if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw)) == SLJIT_SUCCESS)
+ if (sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw)) == SLJIT_SUCCESS)
{
GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START);
OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1);
loop = LABEL();
- sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw));
+ sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw));
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1);
JUMPTO(SLJIT_NOT_ZERO, loop);
}
@@ -3165,7 +3359,7 @@ if (size == sizeof(sljit_sw))
return;
}
-if (sljit_get_register_index(TMP3) >= 0 && !sljit_has_cpu_feature(SLJIT_HAS_ZERO_REGISTER))
+if (sljit_get_register_index(SLJIT_GP_REGISTER, TMP3) >= 0 && !sljit_has_cpu_feature(SLJIT_HAS_ZERO_REGISTER))
{
OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0);
src = TMP3;
@@ -3187,8 +3381,8 @@ OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, size - uncleared_size);
loop = LABEL();
OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), 0, src, 0);
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
-OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -2 * (sljit_sw)sizeof(sljit_sw), src, 0);
-OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -1 * (sljit_sw)sizeof(sljit_sw), src, 0);
+OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -2 * SSIZE_OF(sw), src, 0);
+OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), -1 * SSIZE_OF(sw), src, 0);
CMPTO(SLJIT_LESS, TMP1, 0, TMP2, 0, loop);
if (uncleared_size >= sizeof(sljit_sw))
@@ -3215,12 +3409,12 @@ if (length < 8)
}
else
{
- if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw)) == SLJIT_SUCCESS)
+ if (sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw)) == SLJIT_SUCCESS)
{
GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw));
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2);
loop = LABEL();
- sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
+ sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1);
JUMPTO(SLJIT_NOT_ZERO, loop);
}
@@ -3312,7 +3506,7 @@ else
OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, SLJIT_OFFSETOF(pcre2_match_data, ovector) - sizeof(PCRE2_SIZE));
}
-has_pre = sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw)) == SLJIT_SUCCESS;
+has_pre = sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw)) == SLJIT_SUCCESS;
GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START - (has_pre ? sizeof(sljit_sw) : 0));
OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? SLJIT_R0 : ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
@@ -3320,7 +3514,7 @@ OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(HAS_VIRTUAL_REGISTERS ? SLJIT_R0 : ARGUME
loop = LABEL();
if (has_pre)
- sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw));
+ sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw));
else
{
OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0);
@@ -3343,14 +3537,14 @@ JUMPTO(SLJIT_NOT_ZERO, loop);
/* Calculate the return value, which is the maximum ovector value. */
if (topbracket > 1)
{
- if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw))) == SLJIT_SUCCESS)
+ if (sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * SSIZE_OF(sw))) == SLJIT_SUCCESS)
{
GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw));
OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1);
/* OVECTOR(0) is never equal to SLJIT_S2. */
loop = LABEL();
- sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw)));
+ sljit_emit_mem_update(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * SSIZE_OF(sw)));
OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
@@ -3363,7 +3557,7 @@ if (topbracket > 1)
/* OVECTOR(0) is never equal to SLJIT_S2. */
loop = LABEL();
OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), 0);
- OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 2 * (sljit_sw)sizeof(sljit_sw));
+ OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 2 * SSIZE_OF(sw));
OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1);
CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop);
OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0);
@@ -3743,10 +3937,10 @@ if (common->invalid_utf)
else
{
OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
- OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x110000);
- CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
- OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
- CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
+ OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000);
+ SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
+ OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
+ SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
}
}
#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
@@ -3982,11 +4176,11 @@ if (common->utf)
{
if (options & READ_CHAR_UPDATE_STR_PTR)
OP2(SLJIT_ADD, RETURN_ADDR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x400);
+ OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0x400);
if (options & READ_CHAR_UPDATE_STR_PTR)
- CMOV(SLJIT_LESS, STR_PTR, RETURN_ADDR, 0);
+ SELECT(SLJIT_LESS, STR_PTR, RETURN_ADDR, 0, STR_PTR);
if (max >= 0xd800)
- CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, 0x10000);
+ SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, 0x10000, TMP1);
}
else
{
@@ -4010,16 +4204,47 @@ if (common->invalid_utf)
else
{
OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800);
- OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x110000);
- CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
- OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
- CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
+ OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x110000);
+ SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
+ OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0xe000 - 0xd800);
+ SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
}
}
#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16|32] */
#endif /* SUPPORT_UNICODE */
}
+static void skip_valid_char(compiler_common *common)
+{
+DEFINE_COMPILER;
+#if (defined SUPPORT_UNICODE) && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
+struct sljit_jump *jump;
+#endif
+
+#if (defined SUPPORT_UNICODE) && (PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16)
+ if (common->utf)
+ {
+ OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
+ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+#if PCRE2_CODE_UNIT_WIDTH == 8
+ jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
+ OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
+ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
+#elif PCRE2_CODE_UNIT_WIDTH == 16
+ jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
+ OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
+ OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0xd800);
+ OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
+ OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
+ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
+#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
+ JUMPHERE(jump);
+ return;
+ }
+#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == [8|16] */
+ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+}
+
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
static BOOL is_char7_bitset(const sljit_u8 *bitset, BOOL nclass)
@@ -4061,6 +4286,7 @@ if (negated)
if (common->invalid_utf)
{
+ OP1(SLJIT_MOV, TMP1, 0, TMP2, 0);
add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
@@ -4167,8 +4393,8 @@ if (common->utf && negated)
if (sljit_has_cpu_feature(SLJIT_HAS_CMOV) && !HAS_VIRTUAL_REGISTERS)
{
OP2(SLJIT_ADD, RETURN_ADDR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x400);
- CMOV(SLJIT_LESS, STR_PTR, RETURN_ADDR, 0);
+ OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0x400);
+ SELECT(SLJIT_LESS, STR_PTR, RETURN_ADDR, 0, STR_PTR);
}
else
{
@@ -4250,7 +4476,7 @@ if (common->utf)
/* Skip low surrogate if necessary. */
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00);
+ OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0xdc00);
OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
@@ -4267,7 +4493,7 @@ if (common->invalid_utf && !must_be_valid)
return;
}
- OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x110000);
+ OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x110000);
OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_LESS);
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
@@ -4325,14 +4551,14 @@ of the character (>= 0xc0). Return char value in TMP1. */
DEFINE_COMPILER;
struct sljit_jump *jump;
-sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
+sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
/* Searching for the first zero. */
-OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
+OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x800);
jump = JUMP(SLJIT_NOT_ZERO);
/* Two byte sequence. */
OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3000);
@@ -4345,7 +4571,7 @@ OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6);
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f);
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
-OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
+OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x10000);
jump = JUMP(SLJIT_NOT_ZERO);
/* Three byte sequence. */
OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xe0000);
@@ -4371,9 +4597,9 @@ DEFINE_COMPILER;
struct sljit_jump *jump;
struct sljit_jump *compare;
-sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
+sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
-OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20);
+OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, 0x20);
jump = JUMP(SLJIT_NOT_ZERO);
/* Two byte sequence. */
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
@@ -4413,7 +4639,7 @@ struct sljit_label *three_byte_entry;
struct sljit_label *exit_invalid_label;
struct sljit_jump *exit_invalid[11];
-sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
+sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc2);
@@ -4432,7 +4658,7 @@ OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
-OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
+OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x800);
jump = JUMP(SLJIT_NOT_ZERO);
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
@@ -4447,14 +4673,14 @@ OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
if (has_cmov)
{
- OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x40);
- CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, 0x20000);
+ OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
+ SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, 0x20000, TMP1);
exit_invalid[2] = NULL;
}
else
exit_invalid[2] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
-OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000);
+OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x10000);
jump = JUMP(SLJIT_NOT_ZERO);
three_byte_entry = LABEL();
@@ -4462,8 +4688,8 @@ three_byte_entry = LABEL();
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2d800);
if (has_cmov)
{
- OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
- CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0xd800);
+ OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800);
+ SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0xd800, TMP1);
exit_invalid[3] = NULL;
}
else
@@ -4473,8 +4699,8 @@ OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
if (has_cmov)
{
- OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
- CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
+ OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800);
+ SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
exit_invalid[4] = NULL;
}
else
@@ -4490,8 +4716,8 @@ OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
if (has_cmov)
{
- OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x40);
- CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, 0);
+ OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
+ SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, 0, TMP1);
exit_invalid[5] = NULL;
}
else
@@ -4500,8 +4726,8 @@ else
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc10000);
if (has_cmov)
{
- OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x100000);
- CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0x10000);
+ OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
+ SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0x10000, TMP1);
exit_invalid[6] = NULL;
}
else
@@ -4522,7 +4748,7 @@ OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
exit_invalid[8] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
-OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
+OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x800);
jump = JUMP(SLJIT_NOT_ZERO);
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
@@ -4537,8 +4763,8 @@ OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x80);
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
if (has_cmov)
{
- OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x40);
- CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
+ OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 0x40);
+ SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
exit_invalid[10] = NULL;
}
else
@@ -4569,7 +4795,7 @@ struct sljit_label *skip_start;
struct sljit_label *three_byte_exit;
struct sljit_jump *jump[5];
-sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
+sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
if (common->nltype != NLTYPE_ANY)
{
@@ -4578,8 +4804,8 @@ if (common->nltype != NLTYPE_ANY)
/* All newlines are ascii, just skip intermediate octets. */
jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
loop = LABEL();
- if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, TMP2, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)) == SLJIT_SUCCESS)
- sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, TMP2, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
+ if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, TMP2, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)) == SLJIT_SUCCESS)
+ sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, TMP2, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
else
{
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
@@ -4660,7 +4886,7 @@ struct sljit_label *exit_ok_label;
struct sljit_label *exit_invalid_label;
struct sljit_jump *exit_invalid[7];
-sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
+sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3));
exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xc0);
@@ -4751,7 +4977,7 @@ static void do_utfpeakcharback(compiler_common *common)
DEFINE_COMPILER;
struct sljit_jump *jump[2];
-sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
+sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2));
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0);
@@ -4794,7 +5020,7 @@ struct sljit_label *three_byte_entry;
struct sljit_label *exit_invalid_label;
struct sljit_jump *exit_invalid[8];
-sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
+sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(3));
exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xc0);
@@ -4830,8 +5056,8 @@ OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0);
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
if (has_cmov)
{
- OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
- CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, -0xd800);
+ OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800);
+ SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, -0xd800, TMP1);
exit_invalid[2] = NULL;
}
else
@@ -4840,8 +5066,8 @@ else
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xd800);
if (has_cmov)
{
- OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800);
- CMOV(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR);
+ OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x800);
+ SELECT(SLJIT_LESS, TMP1, SLJIT_IMM, INVALID_UTF_CHAR, TMP1);
exit_invalid[3] = NULL;
}
else
@@ -4865,8 +5091,8 @@ OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
if (has_cmov)
{
- OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x100000);
- CMOV(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0x10000);
+ OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0x100000);
+ SELECT(SLJIT_GREATER_EQUAL, TMP1, SLJIT_IMM, INVALID_UTF_CHAR - 0x10000, TMP1);
exit_invalid[5] = NULL;
}
else
@@ -4926,7 +5152,7 @@ undefined for invalid characters. */
DEFINE_COMPILER;
struct sljit_jump *exit_invalid[3];
-sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
+sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
/* TMP2 contains the high surrogate. */
exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00);
@@ -4959,7 +5185,7 @@ char value in TMP1. */
DEFINE_COMPILER;
struct sljit_jump *exit_invalid[2];
-sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
+sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
/* TMP2 contains the high surrogate. */
exit_invalid[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
@@ -4968,7 +5194,7 @@ OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00);
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xdc00);
-OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x400);
+OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, SLJIT_IMM, 0x400);
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS);
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000);
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
@@ -4988,7 +5214,7 @@ static void do_utfmoveback_invalid(compiler_common *common)
DEFINE_COMPILER;
struct sljit_jump *exit_invalid[3];
-sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
+sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
exit_invalid[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x400);
exit_invalid[1] = CMP(SLJIT_GREATER_EQUAL, TMP2, 0, STR_PTR, 0);
@@ -5017,7 +5243,7 @@ DEFINE_COMPILER;
struct sljit_jump *jump;
struct sljit_jump *exit_invalid[3];
-sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
+sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000);
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
@@ -5066,7 +5292,7 @@ SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0);
SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 12);
-sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
+sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
#if PCRE2_CODE_UNIT_WIDTH == 32
if (!common->utf)
@@ -5106,7 +5332,7 @@ SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0);
SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 12);
-sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
+sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
#if PCRE2_CODE_UNIT_WIDTH == 32
if (!common->utf)
@@ -5239,7 +5465,7 @@ if (newlinecheck)
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
+ OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, common->newline & 0xff);
OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
@@ -5304,12 +5530,12 @@ else if (common->utf)
if (sljit_has_cpu_feature(SLJIT_HAS_CMOV))
{
OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
- CMOV(SLJIT_LESS, STR_PTR, TMP2, 0);
+ OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x400);
+ SELECT(SLJIT_LESS, STR_PTR, TMP2, 0, STR_PTR);
}
else
{
- OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400);
+ OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP1, 0, SLJIT_IMM, 0x400);
OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_LESS);
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
@@ -5412,6 +5638,8 @@ while (TRUE)
case OP_CIRCM:
case OP_DOLL:
case OP_DOLLM:
+ case OP_NOT_UCP_WORD_BOUNDARY:
+ case OP_UCP_WORD_BOUNDARY:
/* Zero width assertions. */
cc++;
continue;
@@ -5795,6 +6023,7 @@ static BOOL check_fast_forward_char_pair_simd(compiler_common *common, fast_forw
{
sljit_s32 i, j, max_i = 0, max_j = 0;
sljit_u32 max_pri = 0;
+ sljit_s32 max_offset = max_fast_forward_char_pair_offset();
PCRE2_UCHAR a1, a2, a_pri, b1, b2, b_pri;
for (i = max - 1; i >= 1; i--)
@@ -5805,14 +6034,14 @@ static BOOL check_fast_forward_char_pair_simd(compiler_common *common, fast_forw
a2 = chars[i].chars[1];
a_pri = chars[i].last_count;
- j = i - max_fast_forward_char_pair_offset();
+ j = i - max_offset;
if (j < 0)
j = 0;
while (j < i)
{
b_pri = chars[j].last_count;
- if (b_pri > 2 && a_pri + b_pri >= max_pri)
+ if (b_pri > 2 && (sljit_u32)a_pri + (sljit_u32)b_pri >= max_pri)
{
b1 = chars[j].chars[0];
b2 = chars[j].chars[1];
@@ -5860,8 +6089,8 @@ if (has_match_end)
OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offset + 1));
- OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_END, 0, TMP1, 0);
- CMOV(SLJIT_GREATER, STR_END, TMP1, 0);
+ OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_END, 0, TMP1, 0);
+ SELECT(SLJIT_GREATER, STR_END, TMP1, 0, STR_END);
}
#ifdef JIT_HAS_FAST_FORWARD_CHAR_SIMD
@@ -6063,8 +6292,8 @@ if (common->match_end_ptr != 0)
OP1(SLJIT_MOV, TMP3, 0, STR_END, 0);
OP2(SLJIT_SUB | SLJIT_SET_LESS, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max));
add_jump(compiler, &common->failed_match, JUMP(SLJIT_LESS));
- OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_END, 0, TMP1, 0);
- CMOV(SLJIT_GREATER, STR_END, TMP1, 0);
+ OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_END, 0, TMP1, 0);
+ SELECT(SLJIT_GREATER, STR_END, TMP1, 0, STR_END);
}
else
{
@@ -6200,7 +6429,7 @@ if (common->nltype == NLTYPE_FIXED && common->newline > 255)
firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
+ OP2U(SLJIT_SUB | SLJIT_SET_Z, STR_PTR, 0, TMP1, 0);
OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_NOT_EQUAL);
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
@@ -6228,7 +6457,7 @@ if (common->nltype == NLTYPE_FIXED && common->newline > 255)
firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2));
- OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0);
+ OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, STR_PTR, 0, TMP1, 0);
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER_EQUAL);
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT);
@@ -6293,8 +6522,8 @@ if (JIT_HAS_FAST_FORWARD_CHAR_SIMD && (common->nltype == NLTYPE_FIXED || common-
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
if (common->mode != PCRE2_JIT_COMPLETE)
{
- OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0);
- CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0);
+ OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0);
+ SELECT(SLJIT_GREATER, STR_PTR, STR_END, 0, STR_PTR);
}
}
}
@@ -6319,7 +6548,7 @@ if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF)
notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0);
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL);
+ OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, CHAR_NL);
OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
#if PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT);
@@ -6355,8 +6584,8 @@ if (common->match_end_ptr != 0)
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr);
OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0);
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
- OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_END, 0, TMP1, 0);
- CMOV(SLJIT_GREATER, STR_END, TMP1, 0);
+ OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_END, 0, TMP1, 0);
+ SELECT(SLJIT_GREATER, STR_END, TMP1, 0, STR_END);
}
start = LABEL();
@@ -6385,12 +6614,12 @@ if (!optimize_class(common, start_bits, (start_bits[31] & 0x80) != 0, FALSE, &ma
if (!HAS_VIRTUAL_REGISTERS)
{
OP2(SLJIT_SHL, TMP3, 0, SLJIT_IMM, 1, TMP2, 0);
- OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP3, 0);
+ OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP3, 0);
}
else
{
OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
- OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
+ OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0);
}
JUMPTO(SLJIT_ZERO, start);
}
@@ -6493,26 +6722,27 @@ DEFINE_COMPILER;
struct sljit_jump *jump;
struct sljit_label *mainloop;
-sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
+sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
GET_LOCAL_BASE(TMP1, 0, 0);
/* Drop frames until we reach STACK_TOP. */
mainloop = LABEL();
-OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), -sizeof(sljit_sw));
-jump = CMP(SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0);
+OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), -SSIZE_OF(sw));
+OP2U(SLJIT_SUB | SLJIT_SET_SIG_LESS_EQUAL | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, 0);
+jump = JUMP(SLJIT_SIG_LESS_EQUAL);
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
if (HAS_VIRTUAL_REGISTERS)
{
- OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
- OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(STACK_TOP), -(3 * sizeof(sljit_sw)));
- OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
+ OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw)));
+ OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(STACK_TOP), -(3 * SSIZE_OF(sw)));
+ OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * SSIZE_OF(sw));
}
else
{
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
- OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(3 * sizeof(sljit_sw)));
- OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * sizeof(sljit_sw));
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw)));
+ OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(3 * SSIZE_OF(sw)));
+ OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * SSIZE_OF(sw));
OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0);
GET_LOCAL_BASE(TMP1, 0, 0);
OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP3, 0);
@@ -6520,28 +6750,39 @@ else
JUMPTO(SLJIT_JUMP, mainloop);
JUMPHERE(jump);
-jump = CMP(SLJIT_NOT_ZERO /* SIG_LESS */, TMP2, 0, SLJIT_IMM, 0);
+sljit_set_current_flags(compiler, SLJIT_CURRENT_FLAGS_SUB | SLJIT_CURRENT_FLAGS_COMPARE | SLJIT_SET_SIG_LESS_EQUAL | SLJIT_SET_Z);
+jump = JUMP(SLJIT_NOT_ZERO /* SIG_LESS */);
/* End of reverting values. */
OP_SRC(SLJIT_FAST_RETURN, RETURN_ADDR, 0);
JUMPHERE(jump);
-OP1(SLJIT_NEG, TMP2, 0, TMP2, 0);
+OP2(SLJIT_SUB, TMP2, 0, SLJIT_IMM, 0, TMP2, 0);
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
if (HAS_VIRTUAL_REGISTERS)
{
- OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
- OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
+ OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw)));
+ OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * SSIZE_OF(sw));
}
else
{
- OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(2 * sizeof(sljit_sw)));
- OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * sizeof(sljit_sw));
+ OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(STACK_TOP), -(2 * SSIZE_OF(sw)));
+ OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * SSIZE_OF(sw));
OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP3, 0);
}
JUMPTO(SLJIT_JUMP, mainloop);
}
-static void check_wordboundary(compiler_common *common)
+#ifdef SUPPORT_UNICODE
+#define UCPCAT(bit) (1 << (bit))
+#define UCPCAT2(bit1, bit2) (UCPCAT(bit1) | UCPCAT(bit2))
+#define UCPCAT3(bit1, bit2, bit3) (UCPCAT(bit1) | UCPCAT(bit2) | UCPCAT(bit3))
+#define UCPCAT_RANGE(start, end) (((1 << ((end) + 1)) - 1) - ((1 << (start)) - 1))
+#define UCPCAT_L UCPCAT_RANGE(ucp_Ll, ucp_Lu)
+#define UCPCAT_N UCPCAT_RANGE(ucp_Nd, ucp_No)
+#define UCPCAT_ALL ((1 << (ucp_Zs + 1)) - 1)
+#endif
+
+static void check_wordboundary(compiler_common *common, BOOL ucp)
{
DEFINE_COMPILER;
struct sljit_jump *skipread;
@@ -6555,9 +6796,10 @@ jump_list *invalid_utf2 = NULL;
struct sljit_jump *jump;
#endif /* PCRE2_CODE_UNIT_WIDTH != 8 || SUPPORT_UNICODE */
+SLJIT_UNUSED_ARG(ucp);
SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16);
-sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
+sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCALS0);
/* Get type of the previous char, and put it to TMP3. */
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
@@ -6594,19 +6836,12 @@ else
/* Testing char type. */
#ifdef SUPPORT_UNICODE
-if (common->ucp)
+if (ucp)
{
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
- jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
add_jump(compiler, &common->getucdtype, JUMP(SLJIT_FAST_CALL));
- OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
- OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
- OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
- OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
- OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
- OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
- JUMPHERE(jump);
- OP1(SLJIT_MOV, TMP3, 0, TMP2, 0);
+ OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP1, 0);
+ OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, UCPCAT2(ucp_Mn, ucp_Pc) | UCPCAT_L | UCPCAT_N);
+ OP_FLAGS(SLJIT_MOV, TMP3, 0, SLJIT_NOT_ZERO);
}
else
#endif /* SUPPORT_UNICODE */
@@ -6640,18 +6875,12 @@ peek_char(common, READ_CHAR_MAX, SLJIT_MEM1(SLJIT_SP), LOCALS1, &invalid_utf2);
valid_utf = LABEL();
-if (common->ucp)
+if (ucp)
{
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1);
- jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE);
add_jump(compiler, &common->getucdtype, JUMP(SLJIT_FAST_CALL));
- OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll);
- OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
- OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
- OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll);
- OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd);
- OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
- JUMPHERE(jump);
+ OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP1, 0);
+ OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, UCPCAT2(ucp_Mn, ucp_Pc) | UCPCAT_L | UCPCAT_N);
+ OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO);
}
else
#endif /* SUPPORT_UNICODE */
@@ -6916,7 +7145,7 @@ j = 0;
if (char_list[0] == 0)
{
i++;
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0);
+ OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0);
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_ZERO);
}
else
@@ -6928,8 +7157,8 @@ while (i < len)
j++;
else
{
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char_list[i]);
- CMOV(SLJIT_ZERO, TMP2, TMP1, 0);
+ OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, char_list[i]);
+ SELECT(SLJIT_ZERO, TMP2, TMP1, 0, TMP2);
}
i++;
}
@@ -6942,8 +7171,8 @@ if (j != 0)
if ((char_list[i] & 0x100) != 0)
{
j--;
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char_list[i] & 0xff);
- CMOV(SLJIT_ZERO, TMP2, TMP1, 0);
+ OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, char_list[i] & 0xff);
+ SELECT(SLJIT_ZERO, TMP2, TMP1, 0, TMP2);
}
}
@@ -6968,12 +7197,12 @@ static void check_anynewline(compiler_common *common)
/* Check whether TMP1 contains a newline character. TMP2 destroyed. */
DEFINE_COMPILER;
-sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
+sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
-OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
+OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
-OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
+OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
#if PCRE2_CODE_UNIT_WIDTH == 8
if (common->utf)
@@ -6981,7 +7210,7 @@ if (common->utf)
#endif
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
+ OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
#if PCRE2_CODE_UNIT_WIDTH == 8
}
#endif
@@ -6995,31 +7224,31 @@ static void check_hspace(compiler_common *common)
/* Check whether TMP1 contains a newline character. TMP2 destroyed. */
DEFINE_COMPILER;
-sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
+sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
-OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09);
+OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x09);
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
-OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20);
+OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x20);
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
-OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0);
+OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0xa0);
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
#if PCRE2_CODE_UNIT_WIDTH == 8
if (common->utf)
{
#endif
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680);
+ OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x1680);
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e);
+ OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x180e);
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000);
- OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
+ OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000);
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
+ OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000);
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
+ OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000);
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
+ OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000);
#if PCRE2_CODE_UNIT_WIDTH == 8
}
#endif
@@ -7034,12 +7263,12 @@ static void check_vspace(compiler_common *common)
/* Check whether TMP1 contains a newline character. TMP2 destroyed. */
DEFINE_COMPILER;
-sljit_emit_fast_enter(compiler, RETURN_ADDR, 0);
+sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, RETURN_ADDR, 0);
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a);
-OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
+OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a);
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
-OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
+OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a);
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
#if PCRE2_CODE_UNIT_WIDTH == 8
if (common->utf)
@@ -7047,7 +7276,7 @@ if (common->utf)
#endif
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1);
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
+ OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a);
#if PCRE2_CODE_UNIT_WIDTH == 8
}
#endif
@@ -7076,7 +7305,7 @@ else
char2_reg = RETURN_ADDR;
}
-sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
+sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCALS0);
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
if (char1_reg == STR_END)
@@ -7085,11 +7314,11 @@ if (char1_reg == STR_END)
OP1(SLJIT_MOV, RETURN_ADDR, 0, char2_reg, 0);
}
-if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
+if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
{
label = LABEL();
- sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
- sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
+ sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
+ sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
JUMPTO(SLJIT_NOT_ZERO, label);
@@ -7097,14 +7326,14 @@ if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_
JUMPHERE(jump);
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
}
-else if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
+else if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
{
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1));
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
label = LABEL();
- sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
- sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
+ sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
+ sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0);
OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1));
JUMPTO(SLJIT_NOT_ZERO, label);
@@ -7158,12 +7387,12 @@ else
lcc_table = TMP3;
}
-if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
+if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
opt_type = 1;
-else if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
+else if (sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS)
opt_type = 2;
-sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
+sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCALS0);
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0);
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, char1_reg, 0);
@@ -7179,8 +7408,8 @@ OP1(SLJIT_MOV, lcc_table, 0, SLJIT_IMM, common->lcc);
if (opt_type == 1)
{
label = LABEL();
- sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
- sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
+ sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
+ sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
}
else if (opt_type == 2)
{
@@ -7188,8 +7417,8 @@ else if (opt_type == 2)
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
label = LABEL();
- sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
- sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
+ sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1));
+ sljit_emit_mem_update(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1));
}
else
{
@@ -7390,16 +7619,6 @@ return cc;
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
-#define SET_TYPE_OFFSET(value) \
- if ((value) != typeoffset) \
- { \
- if ((value) < typeoffset) \
- OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \
- else \
- OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \
- } \
- typeoffset = (value);
-
#define SET_CHAR_OFFSET(value) \
if ((value) != charoffset) \
{ \
@@ -7412,6 +7631,20 @@ return cc;
static PCRE2_SPTR compile_char1_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks, BOOL check_str_ptr);
+#ifdef SUPPORT_UNICODE
+#define XCLASS_SAVE_CHAR 0x001
+#define XCLASS_CHAR_SAVED 0x002
+#define XCLASS_HAS_TYPE 0x004
+#define XCLASS_HAS_SCRIPT 0x008
+#define XCLASS_HAS_SCRIPT_EXTENSION 0x010
+#define XCLASS_HAS_BOOL 0x020
+#define XCLASS_HAS_BIDICL 0x040
+#define XCLASS_NEEDS_UCD (XCLASS_HAS_TYPE | XCLASS_HAS_SCRIPT | XCLASS_HAS_SCRIPT_EXTENSION | XCLASS_HAS_BOOL | XCLASS_HAS_BIDICL)
+#define XCLASS_SCRIPT_EXTENSION_NOTPROP 0x080
+#define XCLASS_SCRIPT_EXTENSION_RESTORE_RETURN_ADDR 0x100
+#define XCLASS_SCRIPT_EXTENSION_RESTORE_LOCALS0 0x200
+#endif /* SUPPORT_UNICODE */
+
static void compile_xclass_matchingpath(compiler_common *common, PCRE2_SPTR cc, jump_list **backtracks)
{
DEFINE_COMPILER;
@@ -7426,11 +7659,11 @@ BOOL utf = common->utf;
#endif /* SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == [8|16] */
#ifdef SUPPORT_UNICODE
-BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE;
-BOOL charsaved = FALSE;
+sljit_u32 unicode_status = 0;
+sljit_u32 category_list = 0;
+sljit_u32 items;
int typereg = TMP1;
const sljit_u32 *other_cases;
-sljit_uw typeoffset;
#endif /* SUPPORT_UNICODE */
/* Scanning the necessary info. */
@@ -7447,6 +7680,7 @@ if (cc[-1] & XCL_MAP)
while (*cc != XCL_END)
{
compares++;
+
if (*cc == XCL_SINGLE)
{
cc ++;
@@ -7454,7 +7688,7 @@ while (*cc != XCL_END)
if (c > max) max = c;
if (c < min) min = c;
#ifdef SUPPORT_UNICODE
- needschar = TRUE;
+ unicode_status |= XCLASS_SAVE_CHAR;
#endif /* SUPPORT_UNICODE */
}
else if (*cc == XCL_RANGE)
@@ -7465,7 +7699,7 @@ while (*cc != XCL_END)
GETCHARINCTEST(c, cc);
if (c > max) max = c;
#ifdef SUPPORT_UNICODE
- needschar = TRUE;
+ unicode_status |= XCLASS_SAVE_CHAR;
#endif /* SUPPORT_UNICODE */
}
#ifdef SUPPORT_UNICODE
@@ -7473,7 +7707,8 @@ while (*cc != XCL_END)
{
SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
cc++;
- if (*cc == PT_CLIST)
+
+ if (*cc == PT_CLIST && cc[-1] == XCL_PROP)
{
other_cases = PRIV(ucd_caseless_sets) + cc[1];
while (*other_cases != NOTACHAR)
@@ -7489,54 +7724,114 @@ while (*cc != XCL_END)
min = 0;
}
+ items = 0;
+
switch(*cc)
{
case PT_ANY:
/* Any either accepts everything or ignored. */
if (cc[-1] == XCL_PROP)
- {
- compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE);
- if (list == backtracks)
- add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
- return;
- }
+ items = UCPCAT_ALL;
+ else
+ compares--;
break;
case PT_LAMP:
+ items = UCPCAT3(ucp_Lu, ucp_Ll, ucp_Lt);
+ break;
+
case PT_GC:
+ items = UCPCAT_RANGE(PRIV(ucp_typerange)[(int)cc[1] * 2], PRIV(ucp_typerange)[(int)cc[1] * 2 + 1]);
+ break;
+
case PT_PC:
+ items = UCPCAT(cc[1]);
+ break;
+
+ case PT_WORD:
+ items = UCPCAT2(ucp_Mn, ucp_Pc) | UCPCAT_L | UCPCAT_N;
+ break;
+
case PT_ALNUM:
- needstype = TRUE;
+ items = UCPCAT_L | UCPCAT_N;
break;
+ case PT_SCX:
+ unicode_status |= XCLASS_HAS_SCRIPT_EXTENSION;
+ if (cc[-1] == XCL_NOTPROP)
+ {
+ unicode_status |= XCLASS_SCRIPT_EXTENSION_NOTPROP;
+ break;
+ }
+ compares++;
+ /* Fall through */
+
case PT_SC:
- needsscript = TRUE;
+ unicode_status |= XCLASS_HAS_SCRIPT;
break;
case PT_SPACE:
case PT_PXSPACE:
- case PT_WORD:
case PT_PXGRAPH:
case PT_PXPRINT:
case PT_PXPUNCT:
- needstype = TRUE;
- needschar = TRUE;
+ unicode_status |= XCLASS_SAVE_CHAR | XCLASS_HAS_TYPE;
break;
case PT_CLIST:
case PT_UCNC:
- needschar = TRUE;
+ case PT_PXXDIGIT:
+ unicode_status |= XCLASS_SAVE_CHAR;
+ break;
+
+ case PT_BOOL:
+ unicode_status |= XCLASS_HAS_BOOL;
+ break;
+
+ case PT_BIDICL:
+ unicode_status |= XCLASS_HAS_BIDICL;
break;
default:
SLJIT_UNREACHABLE();
break;
}
+
+ if (items > 0)
+ {
+ if (cc[-1] == XCL_NOTPROP)
+ items ^= UCPCAT_ALL;
+ category_list |= items;
+ unicode_status |= XCLASS_HAS_TYPE;
+ compares--;
+ }
+
cc += 2;
}
#endif /* SUPPORT_UNICODE */
}
+
+#ifdef SUPPORT_UNICODE
+if (category_list == UCPCAT_ALL)
+ {
+ /* All characters are accepted, same as dotall. */
+ compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE);
+ if (list == backtracks)
+ add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
+ return;
+ }
+
+if (compares == 0 && category_list == 0)
+ {
+ /* No characters are accepted, same as (*F) or dotall. */
+ compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE);
+ if (list != backtracks)
+ add_jump(compiler, backtracks, JUMP(SLJIT_JUMP));
+ return;
+ }
+#else /* !SUPPORT_UNICODE */
SLJIT_ASSERT(compares > 0);
+#endif /* SUPPORT_UNICODE */
/* We are not necessary in utf mode even in 8 bit mode. */
cc = ccbegin;
@@ -7545,7 +7840,7 @@ if ((cc[-1] & XCL_NOT) != 0)
else
{
#ifdef SUPPORT_UNICODE
- read_char(common, min, max, (needstype || needsscript) ? backtracks : NULL, 0);
+ read_char(common, min, max, (unicode_status & XCLASS_NEEDS_UCD) ? backtracks : NULL, 0);
#else /* !SUPPORT_UNICODE */
read_char(common, min, max, NULL, 0);
#endif /* SUPPORT_UNICODE */
@@ -7562,7 +7857,7 @@ if ((cc[-1] & XCL_HASPROP) == 0)
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
- OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
+ OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0);
add_jump(compiler, &found, JUMP(SLJIT_NOT_ZERO));
}
@@ -7581,7 +7876,7 @@ else if ((cc[-1] & XCL_MAP) != 0)
{
OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
#ifdef SUPPORT_UNICODE
- charsaved = TRUE;
+ unicode_status |= XCLASS_CHAR_SAVED;
#endif /* SUPPORT_UNICODE */
if (!optimize_class(common, (const sljit_u8 *)cc, FALSE, TRUE, list))
{
@@ -7595,7 +7890,7 @@ else if ((cc[-1] & XCL_MAP) != 0)
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
- OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
+ OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0);
add_jump(compiler, list, JUMP(SLJIT_NOT_ZERO));
#if PCRE2_CODE_UNIT_WIDTH == 8
@@ -7609,9 +7904,9 @@ else if ((cc[-1] & XCL_MAP) != 0)
}
#ifdef SUPPORT_UNICODE
-if (needstype || needsscript)
+if (unicode_status & XCLASS_NEEDS_UCD)
{
- if (needschar && !charsaved)
+ if ((unicode_status & (XCLASS_SAVE_CHAR | XCLASS_CHAR_SAVED)) == XCLASS_SAVE_CHAR)
OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0);
#if PCRE2_CODE_UNIT_WIDTH == 32
@@ -7631,17 +7926,19 @@ if (needstype || needsscript)
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2));
OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1);
+ OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 3);
+ OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
+ OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
- /* Before anything else, we deal with scripts. */
- if (needsscript)
- {
- OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 3);
- OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
- OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
+ ccbegin = cc;
- OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
+ if (category_list != 0)
+ compares++;
- ccbegin = cc;
+ if (unicode_status & XCLASS_HAS_BIDICL)
+ {
+ OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, scriptx_bidiclass));
+ OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BIDICLASS_SHIFT);
while (*cc != XCL_END)
{
@@ -7660,7 +7957,7 @@ if (needstype || needsscript)
{
SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
cc++;
- if (*cc == PT_SC)
+ if (*cc == PT_BIDICL)
{
compares--;
invertcmp = (compares == 0 && list != backtracks);
@@ -7674,61 +7971,191 @@ if (needstype || needsscript)
}
cc = ccbegin;
+ }
+
+ if (unicode_status & XCLASS_HAS_BOOL)
+ {
+ OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, bprops));
+ OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BPROPS_MASK);
+ OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 2);
- if (needstype)
+ while (*cc != XCL_END)
{
- /* TMP2 has already been shifted by 2 */
- if (!needschar)
+ if (*cc == XCL_SINGLE)
{
- OP2(SLJIT_ADD, TMP1, 0, TMP2, 0, TMP2, 0);
- OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
-
- OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
+ cc ++;
+ GETCHARINCTEST(c, cc);
+ }
+ else if (*cc == XCL_RANGE)
+ {
+ cc ++;
+ GETCHARINCTEST(c, cc);
+ GETCHARINCTEST(c, cc);
}
else
{
- OP2(SLJIT_ADD, TMP1, 0, TMP2, 0, TMP2, 0);
- OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
+ SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
+ cc++;
+ if (*cc == PT_BOOL)
+ {
+ compares--;
+ invertcmp = (compares == 0 && list != backtracks);
+ if (cc[-1] == XCL_NOTPROP)
+ invertcmp ^= 0x1;
- OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
- OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
- typereg = RETURN_ADDR;
+ OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP1), (sljit_sw)(PRIV(ucd_boolprop_sets) + (cc[1] >> 5)), SLJIT_IMM, (sljit_sw)1 << (cc[1] & 0x1f));
+ add_jump(compiler, compares > 0 ? list : backtracks, JUMP(SLJIT_NOT_ZERO ^ invertcmp));
+ }
+ cc += 2;
}
}
- else if (needschar)
- OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
+
+ cc = ccbegin;
}
- else if (needstype)
+
+ if (unicode_status & XCLASS_HAS_SCRIPT)
{
- OP2(SLJIT_SHL, TMP1, 0, TMP2, 0, SLJIT_IMM, 3);
- OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 2);
+ OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
- if (!needschar)
+ while (*cc != XCL_END)
{
- OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0);
+ if (*cc == XCL_SINGLE)
+ {
+ cc ++;
+ GETCHARINCTEST(c, cc);
+ }
+ else if (*cc == XCL_RANGE)
+ {
+ cc ++;
+ GETCHARINCTEST(c, cc);
+ GETCHARINCTEST(c, cc);
+ }
+ else
+ {
+ SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
+ cc++;
+ switch (*cc)
+ {
+ case PT_SCX:
+ if (cc[-1] == XCL_NOTPROP)
+ break;
+ /* Fall through */
+
+ case PT_SC:
+ compares--;
+ invertcmp = (compares == 0 && list != backtracks);
+ if (cc[-1] == XCL_NOTPROP)
+ invertcmp ^= 0x1;
- OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
+ add_jump(compiler, compares > 0 ? list : backtracks, CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1]));
+ }
+ cc += 2;
+ }
}
- else
+
+ cc = ccbegin;
+ }
+
+ if (unicode_status & XCLASS_HAS_SCRIPT_EXTENSION)
+ {
+ OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, scriptx_bidiclass));
+ OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_SCRIPTX_MASK);
+ OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 2);
+
+ if (unicode_status & XCLASS_SCRIPT_EXTENSION_NOTPROP)
{
- OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0);
+ if (unicode_status & XCLASS_HAS_TYPE)
+ {
+ if (unicode_status & XCLASS_SAVE_CHAR)
+ {
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP2, 0);
+ unicode_status |= XCLASS_SCRIPT_EXTENSION_RESTORE_LOCALS0;
+ }
+ else
+ {
+ OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP2, 0);
+ unicode_status |= XCLASS_SCRIPT_EXTENSION_RESTORE_RETURN_ADDR;
+ }
+ }
+ OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script));
+ }
- OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
- OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
- typereg = RETURN_ADDR;
+ while (*cc != XCL_END)
+ {
+ if (*cc == XCL_SINGLE)
+ {
+ cc ++;
+ GETCHARINCTEST(c, cc);
+ }
+ else if (*cc == XCL_RANGE)
+ {
+ cc ++;
+ GETCHARINCTEST(c, cc);
+ GETCHARINCTEST(c, cc);
+ }
+ else
+ {
+ SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP);
+ cc++;
+ if (*cc == PT_SCX)
+ {
+ compares--;
+ invertcmp = (compares == 0 && list != backtracks);
+
+ jump = NULL;
+ if (cc[-1] == XCL_NOTPROP)
+ {
+ jump = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, (int)cc[1]);
+ if (invertcmp)
+ {
+ add_jump(compiler, backtracks, jump);
+ jump = NULL;
+ }
+ invertcmp ^= 0x1;
+ }
+
+ OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP1), (sljit_sw)(PRIV(ucd_script_sets) + (cc[1] >> 5)), SLJIT_IMM, (sljit_sw)1 << (cc[1] & 0x1f));
+ add_jump(compiler, compares > 0 ? list : backtracks, JUMP(SLJIT_NOT_ZERO ^ invertcmp));
+
+ if (jump != NULL)
+ JUMPHERE(jump);
+ }
+ cc += 2;
+ }
}
+
+ if (unicode_status & XCLASS_SCRIPT_EXTENSION_RESTORE_LOCALS0)
+ OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
+ else if (unicode_status & XCLASS_SCRIPT_EXTENSION_RESTORE_RETURN_ADDR)
+ OP1(SLJIT_MOV, TMP2, 0, RETURN_ADDR, 0);
+ cc = ccbegin;
}
- else if (needschar)
+
+ if (unicode_status & XCLASS_SAVE_CHAR)
OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0);
+
+ if (unicode_status & XCLASS_HAS_TYPE)
+ {
+ if (unicode_status & XCLASS_SAVE_CHAR)
+ typereg = RETURN_ADDR;
+
+ OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype));
+ OP2(SLJIT_SHL, typereg, 0, SLJIT_IMM, 1, TMP2, 0);
+
+ if (category_list > 0)
+ {
+ compares--;
+ invertcmp = (compares == 0 && list != backtracks);
+ OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, category_list);
+ add_jump(compiler, compares > 0 ? list : backtracks, JUMP(SLJIT_NOT_ZERO ^ invertcmp));
+ }
+ }
}
#endif /* SUPPORT_UNICODE */
/* Generating code. */
charoffset = 0;
numberofcmps = 0;
-#ifdef SUPPORT_UNICODE
-typeoffset = 0;
-#endif /* SUPPORT_UNICODE */
while (*cc != XCL_END)
{
@@ -7743,13 +8170,13 @@ while (*cc != XCL_END)
if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
{
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
+ OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
numberofcmps++;
}
else if (numberofcmps > 0)
{
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
+ OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
numberofcmps = 0;
@@ -7769,13 +8196,13 @@ while (*cc != XCL_END)
if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE))
{
- OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
+ OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
numberofcmps++;
}
else if (numberofcmps > 0)
{
- OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
+ OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset));
OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
numberofcmps = 0;
@@ -7796,65 +8223,33 @@ while (*cc != XCL_END)
switch(*cc)
{
case PT_ANY:
- if (!invertcmp)
- jump = JUMP(SLJIT_JUMP);
- break;
-
case PT_LAMP:
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset);
- OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset);
- OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset);
- OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL);
- jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
- break;
-
case PT_GC:
- c = PRIV(ucp_typerange)[(int)cc[1] * 2];
- SET_TYPE_OFFSET(c);
- jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c);
- break;
-
case PT_PC:
- jump = CMP(SLJIT_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset);
- break;
-
case PT_SC:
+ case PT_SCX:
+ case PT_BOOL:
+ case PT_BIDICL:
+ case PT_WORD:
+ case PT_ALNUM:
compares++;
- /* Do nothing. */
+ /* Already handled. */
break;
case PT_SPACE:
case PT_PXSPACE:
SET_CHAR_OFFSET(9);
- OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
+ OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xd - 0x9);
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
+ OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x85 - 0x9);
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
+ OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x180e - 0x9);
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
- SET_TYPE_OFFSET(ucp_Zl);
- OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
- OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
- jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
- break;
-
- case PT_WORD:
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset));
- OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
- /* Fall through. */
-
- case PT_ALNUM:
- SET_TYPE_OFFSET(ucp_Ll);
- OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll);
- OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
- SET_TYPE_OFFSET(ucp_Nd);
- OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd);
- OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
+ OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Zl, ucp_Zs));
+ OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_ZERO);
jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
break;
@@ -7876,7 +8271,7 @@ while (*cc != XCL_END)
OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
}
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]);
+ OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, other_cases[1]);
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
other_cases += 2;
}
@@ -7889,103 +8284,135 @@ while (*cc != XCL_END)
OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset);
OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]);
}
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]);
+ OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, other_cases[2]);
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
+ OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset));
OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
other_cases += 3;
}
else
{
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
+ OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
}
while (*other_cases != NOTACHAR)
{
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
+ OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset));
OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL);
}
jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
break;
case PT_UCNC:
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
+ OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset));
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL);
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
+ OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset));
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
+ OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset));
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
SET_CHAR_OFFSET(0xa0);
- OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
+ OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset));
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
SET_CHAR_OFFSET(0);
- OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
+ OP2U(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 0xe000 - 0);
OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_GREATER_EQUAL);
jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
break;
case PT_PXGRAPH:
- /* C and Z groups are the farthest two groups. */
- SET_TYPE_OFFSET(ucp_Ll);
- OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
- OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER);
+ OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Cc, ucp_Cs) | UCPCAT_RANGE(ucp_Zl, ucp_Zs));
+ OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO);
- jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
+ OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT(ucp_Cf));
+ jump = JUMP(SLJIT_ZERO);
+ c = charoffset;
/* In case of ucp_Cf, we overwrite the result. */
SET_CHAR_OFFSET(0x2066);
- OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
+ OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
+ OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
+ OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066);
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
+ /* Restore charoffset. */
+ SET_CHAR_OFFSET(c);
+
JUMPHERE(jump);
jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
break;
case PT_PXPRINT:
- /* C and Z groups are the farthest two groups. */
- SET_TYPE_OFFSET(ucp_Ll);
- OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll);
- OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER);
+ OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Cc, ucp_Cs) | UCPCAT2(ucp_Zl, ucp_Zp));
+ OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO);
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll);
- OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_NOT_EQUAL);
-
- jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll);
+ OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT(ucp_Cf));
+ jump = JUMP(SLJIT_ZERO);
+ c = charoffset;
/* In case of ucp_Cf, we overwrite the result. */
SET_CHAR_OFFSET(0x2066);
- OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
+ OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066);
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
+ OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066);
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL);
+ /* Restore charoffset. */
+ SET_CHAR_OFFSET(c);
+
JUMPHERE(jump);
jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0);
break;
case PT_PXPUNCT:
- SET_TYPE_OFFSET(ucp_Sc);
- OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc);
- OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
+ OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Sc, ucp_So));
+ OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO);
SET_CHAR_OFFSET(0);
- OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x7f);
+ OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0x7f);
OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_LESS_EQUAL);
- SET_TYPE_OFFSET(ucp_Pc);
- OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc);
- OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL);
+ OP2U(SLJIT_AND | SLJIT_SET_Z, typereg, 0, SLJIT_IMM, UCPCAT_RANGE(ucp_Pc, ucp_Ps));
+ OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_ZERO);
+ jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
+ break;
+
+ case PT_PXXDIGIT:
+ SET_CHAR_OFFSET(CHAR_A);
+ OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, ~0x20);
+ OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP2, 0, SLJIT_IMM, CHAR_F - CHAR_A);
+ OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL);
+
+ SET_CHAR_OFFSET(CHAR_0);
+ OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_9 - CHAR_0);
+ OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
+
+ SET_CHAR_OFFSET(0xff10);
+ jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 0xff46 - 0xff10);
+
+ OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xff19 - 0xff10);
+ OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
+
+ SET_CHAR_OFFSET(0xff21);
+ OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xff26 - 0xff21);
+ OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
+
+ SET_CHAR_OFFSET(0xff41);
+ OP2U(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, TMP1, 0, SLJIT_IMM, 0xff46 - 0xff41);
+ OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL);
+
+ SET_CHAR_OFFSET(0xff10);
+
+ JUMPHERE(jump);
+ OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, 0);
jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp);
break;
@@ -8001,6 +8428,7 @@ while (*cc != XCL_END)
add_jump(compiler, compares > 0 ? list : backtracks, jump);
}
+SLJIT_ASSERT(compares == 0);
if (found != NULL)
set_jumps(found, LABEL());
}
@@ -8013,11 +8441,7 @@ if (found != NULL)
static PCRE2_SPTR compile_simple_assertion_matchingpath(compiler_common *common, PCRE2_UCHAR type, PCRE2_SPTR cc, jump_list **backtracks)
{
DEFINE_COMPILER;
-int length;
struct sljit_jump *jump[4];
-#ifdef SUPPORT_UNICODE
-struct sljit_label *label;
-#endif /* SUPPORT_UNICODE */
switch(type)
{
@@ -8045,16 +8469,18 @@ switch(type)
case OP_NOT_WORD_BOUNDARY:
case OP_WORD_BOUNDARY:
- add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL));
+ case OP_NOT_UCP_WORD_BOUNDARY:
+ case OP_UCP_WORD_BOUNDARY:
+ add_jump(compiler, (type == OP_NOT_WORD_BOUNDARY || type == OP_WORD_BOUNDARY) ? &common->wordboundary : &common->ucp_wordboundary, JUMP(SLJIT_FAST_CALL));
#ifdef SUPPORT_UNICODE
if (common->invalid_utf)
{
- add_jump(compiler, backtracks, CMP((type == OP_NOT_WORD_BOUNDARY) ? SLJIT_NOT_EQUAL : SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0));
+ add_jump(compiler, backtracks, CMP((type == OP_NOT_WORD_BOUNDARY || type == OP_NOT_UCP_WORD_BOUNDARY) ? SLJIT_NOT_EQUAL : SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0));
return cc;
}
#endif /* SUPPORT_UNICODE */
sljit_set_current_flags(compiler, SLJIT_SET_Z);
- add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_NOT_ZERO : SLJIT_ZERO));
+ add_jump(compiler, backtracks, JUMP((type == OP_NOT_WORD_BOUNDARY || type == OP_NOT_UCP_WORD_BOUNDARY) ? SLJIT_NOT_ZERO : SLJIT_ZERO));
return cc;
case OP_EODN:
@@ -8069,9 +8495,9 @@ switch(type)
else
{
jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0);
- OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
+ OP2U(SLJIT_SUB | SLJIT_SET_LESS, TMP2, 0, STR_END, 0);
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS);
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
+ OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff);
OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_EQUAL);
add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL));
check_partial(common, TRUE);
@@ -8094,7 +8520,7 @@ switch(type)
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR);
OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2));
- OP2(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0);
+ OP2U(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_GREATER, TMP2, 0, STR_END, 0);
jump[2] = JUMP(SLJIT_GREATER);
add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL) /* LESS */);
/* Equal. */
@@ -8137,10 +8563,10 @@ switch(type)
if (HAS_VIRTUAL_REGISTERS)
{
OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
- OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
+ OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
}
else
- OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
+ OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
if (!common->endonly)
@@ -8157,10 +8583,10 @@ switch(type)
if (HAS_VIRTUAL_REGISTERS)
{
OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
- OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
+ OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
}
else
- OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
+ OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTEOL);
add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
check_partial(common, FALSE);
jump[0] = JUMP(SLJIT_JUMP);
@@ -8200,14 +8626,14 @@ switch(type)
OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0);
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin));
add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
- OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
+ OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
}
else
{
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0));
- OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
+ OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
}
return cc;
@@ -8219,13 +8645,13 @@ switch(type)
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP2, 0);
- OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
+ OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
}
else
{
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP2, 0);
- OP2(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_UNUSED, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
+ OP2U(SLJIT_AND32 | SLJIT_SET_Z, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options), SLJIT_IMM, PCRE2_NOTBOL);
}
add_jump(compiler, backtracks, JUMP(SLJIT_NOT_ZERO));
jump[0] = JUMP(SLJIT_JUMP);
@@ -8250,36 +8676,6 @@ switch(type)
}
JUMPHERE(jump[0]);
return cc;
-
- case OP_REVERSE:
- length = GET(cc, 0);
- if (length == 0)
- return cc + LINK_SIZE;
- if (HAS_VIRTUAL_REGISTERS)
- {
- OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
- }
- else
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
-#ifdef SUPPORT_UNICODE
- if (common->utf)
- {
- OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, length);
- label = LABEL();
- add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0));
- move_back(common, backtracks, FALSE);
- OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
- JUMPTO(SLJIT_NOT_ZERO, label);
- }
- else
-#endif
- {
- OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length));
- add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0));
- }
- check_start_used_ptr(common);
- return cc + LINK_SIZE;
}
SLJIT_UNREACHABLE();
return cc;
@@ -8319,7 +8715,7 @@ do
/* Not breaking between Regional Indicators is allowed only if there
are an even number of preceding RIs. */
- if (lgb == ucp_gbRegionalIndicator && rgb == ucp_gbRegionalIndicator)
+ if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator)
{
ricount = 0;
bptr = prevcc;
@@ -8331,7 +8727,7 @@ do
BACKCHAR(bptr);
GETCHAR(c, bptr);
- if (UCD_GRAPHBREAK(c) != ucp_gbRegionalIndicator)
+ if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator)
break;
ricount++;
@@ -8387,7 +8783,7 @@ do
/* Not breaking between Regional Indicators is allowed only if there
are an even number of preceding RIs. */
- if (lgb == ucp_gbRegionalIndicator && rgb == ucp_gbRegionalIndicator)
+ if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator)
{
ricount = 0;
bptr = prevcc;
@@ -8397,7 +8793,7 @@ do
{
GETCHARBACK_INVALID(c, bptr, start_subject, break);
- if (UCD_GRAPHBREAK(c) != ucp_gbRegionalIndicator)
+ if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator)
break;
ricount++;
@@ -8436,7 +8832,7 @@ c = *cc++;
#if PCRE2_CODE_UNIT_WIDTH == 32
if (c >= 0x110000)
- return NULL;
+ return cc;
#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
lgb = UCD_GRAPHBREAK(c);
@@ -8455,7 +8851,7 @@ while (cc < end_subject)
/* Not breaking between Regional Indicators is allowed only if there
are an even number of preceding RIs. */
- if (lgb == ucp_gbRegionalIndicator && rgb == ucp_gbRegionalIndicator)
+ if (lgb == ucp_gbRegional_Indicator && rgb == ucp_gbRegional_Indicator)
{
ricount = 0;
bptr = cc - 1;
@@ -8470,7 +8866,7 @@ while (cc < end_subject)
break;
#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
- if (UCD_GRAPHBREAK(c) != ucp_gbRegionalIndicator) break;
+ if (UCD_GRAPHBREAK(c) != ucp_gbRegional_Indicator) break;
ricount++;
}
@@ -8520,7 +8916,7 @@ switch(type)
#endif
read_char8_type(common, backtracks, type == OP_NOT_DIGIT);
/* Flip the starting bit in the negative case. */
- OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit);
+ OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, ctype_digit);
add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_ZERO : SLJIT_NOT_ZERO));
return cc;
@@ -8534,7 +8930,7 @@ switch(type)
else
#endif
read_char8_type(common, backtracks, type == OP_NOT_WHITESPACE);
- OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space);
+ OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, ctype_space);
add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_ZERO : SLJIT_NOT_ZERO));
return cc;
@@ -8548,7 +8944,7 @@ switch(type)
else
#endif
read_char8_type(common, backtracks, type == OP_NOT_WORDCHAR);
- OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word);
+ OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, ctype_word);
add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_ZERO : SLJIT_NOT_ZERO));
return cc;
@@ -8578,35 +8974,14 @@ switch(type)
if (check_str_ptr)
detect_partial_match(common, backtracks);
#ifdef SUPPORT_UNICODE
- if (common->utf)
+ if (common->utf && common->invalid_utf)
{
- if (common->invalid_utf)
- {
- read_char(common, 0, READ_CHAR_MAX, backtracks, READ_CHAR_UPDATE_STR_PTR);
- return cc;
- }
-
-#if PCRE2_CODE_UNIT_WIDTH == 8 || PCRE2_CODE_UNIT_WIDTH == 16
- OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0);
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
-#if PCRE2_CODE_UNIT_WIDTH == 8
- jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0);
- OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0);
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
-#elif PCRE2_CODE_UNIT_WIDTH == 16
- jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800);
- OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00);
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800);
- OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL);
- OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0);
-#endif /* PCRE2_CODE_UNIT_WIDTH == 8 */
- JUMPHERE(jump[0]);
+ read_char(common, 0, READ_CHAR_MAX, backtracks, READ_CHAR_UPDATE_STR_PTR);
return cc;
-#endif /* PCRE2_CODE_UNIT_WIDTH == [8|16] */
}
#endif /* SUPPORT_UNICODE */
- OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1));
+
+ skip_valid_char(common);
return cc;
case OP_ANYBYTE:
@@ -8690,14 +9065,14 @@ switch(type)
OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0);
#if PCRE2_CODE_UNIT_WIDTH != 32
- sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM,
- common->utf ? (common->invalid_utf ? SLJIT_FUNC_OFFSET(do_extuni_utf_invalid) : SLJIT_FUNC_OFFSET(do_extuni_utf)) : SLJIT_FUNC_OFFSET(do_extuni_no_utf));
+ sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM,
+ common->utf ? (common->invalid_utf ? SLJIT_FUNC_ADDR(do_extuni_utf_invalid) : SLJIT_FUNC_ADDR(do_extuni_utf)) : SLJIT_FUNC_ADDR(do_extuni_no_utf));
if (common->invalid_utf)
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
#else
- sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM,
- common->invalid_utf ? SLJIT_FUNC_OFFSET(do_extuni_utf_invalid) : SLJIT_FUNC_OFFSET(do_extuni_no_utf));
- if (!common->utf || common->invalid_utf)
+ sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM,
+ common->invalid_utf ? SLJIT_FUNC_ADDR(do_extuni_utf_invalid) : SLJIT_FUNC_ADDR(do_extuni_no_utf));
+ if (common->invalid_utf)
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
#endif
@@ -8758,8 +9133,8 @@ switch(type)
if (sljit_has_cpu_feature(SLJIT_HAS_CMOV))
{
- OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, oc);
- CMOV(SLJIT_EQUAL, TMP1, SLJIT_IMM, c);
+ OP2U(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, SLJIT_IMM, oc);
+ SELECT(SLJIT_EQUAL, TMP1, SLJIT_IMM, c, TMP1);
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c));
}
else
@@ -8878,7 +9253,7 @@ switch(type)
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3);
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc);
OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0);
- OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0);
+ OP2U(SLJIT_AND | SLJIT_SET_Z, TMP1, 0, TMP2, 0);
add_jump(compiler, backtracks, JUMP(SLJIT_ZERO));
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
@@ -9116,7 +9491,7 @@ if (common->utf && *cc == OP_REFI)
caseless_loop = LABEL();
OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP2), 0);
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(uint32_t));
- OP2(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, char1_reg, 0);
+ OP2U(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_LESS, TMP1, 0, char1_reg, 0);
JUMPTO(SLJIT_EQUAL, loop);
JUMPTO(SLJIT_LESS, caseless_loop);
@@ -9278,14 +9653,16 @@ if (!minimize)
if (ref)
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
+
if (ref)
{
- add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
+ if (!common->unset_backref)
+ add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
}
else
{
- compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
+ compile_dnref_search(common, ccbegin, &backtrack->own_backtracks);
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0);
zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
@@ -9298,7 +9675,7 @@ if (!minimize)
label = LABEL();
if (!ref)
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1);
- compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE);
+ compile_ref_matchingpath(common, ccbegin, &backtrack->own_backtracks, FALSE, FALSE);
if (min > 1 || max > 1)
{
@@ -9360,12 +9737,13 @@ else
{
if (ref)
{
- add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
+ if (!common->unset_backref)
+ add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
}
else
{
- compile_dnref_search(common, ccbegin, &backtrack->topbacktracks);
+ compile_dnref_search(common, ccbegin, &backtrack->own_backtracks);
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0);
zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw));
@@ -9374,11 +9752,11 @@ else
BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL();
if (max > 0)
- add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
+ add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max));
if (!ref)
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
-compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE);
+compile_ref_matchingpath(common, ccbegin, &backtrack->own_backtracks, TRUE, TRUE);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
if (min > 1)
@@ -9453,12 +9831,12 @@ if (entry->entry_label == NULL)
else
JUMPTO(SLJIT_FAST_CALL, entry->entry_label);
/* Leave if the match is failed. */
-add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
+add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0));
BACKTRACK_AS(recurse_backtrack)->matchingpath = LABEL();
return cc + 1 + LINK_SIZE;
}
-static sljit_s32 SLJIT_FUNC do_callout(struct jit_arguments *arguments, pcre2_callout_block *callout_block, PCRE2_SPTR *jit_ovector)
+static sljit_s32 SLJIT_FUNC SLJIT_FUNC_ATTRIBUTE do_callout_jit(struct jit_arguments *arguments, pcre2_callout_block *callout_block, PCRE2_SPTR *jit_ovector)
{
PCRE2_SPTR begin;
PCRE2_SIZE *ovector;
@@ -9525,7 +9903,7 @@ unsigned int callout_length = (*cc == OP_CALLOUT)
sljit_sw value1;
sljit_sw value2;
sljit_sw value3;
-sljit_uw callout_arg_size = (common->re->top_bracket + 1) * 2 * sizeof(sljit_sw);
+sljit_uw callout_arg_size = (common->re->top_bracket + 1) * 2 * SSIZE_OF(sw);
PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
@@ -9575,13 +9953,13 @@ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STR_PTR, 0);
/* SLJIT_R0 = arguments */
OP1(SLJIT_MOV, SLJIT_R1, 0, STACK_TOP, 0);
GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START);
-sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(S32) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout));
+sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS3(32, W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(do_callout_jit));
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0);
free_stack(common, callout_arg_size);
/* Check return value. */
-OP2(SLJIT_SUB32 | SLJIT_SET_Z | SLJIT_SET_SIG_GREATER, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
-add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_SIG_GREATER));
+OP2U(SLJIT_SUB32 | SLJIT_SET_Z | SLJIT_SET_SIG_GREATER, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
+add_jump(compiler, &backtrack->own_backtracks, JUMP(SLJIT_SIG_GREATER));
if (common->abort_label == NULL)
add_jump(compiler, &common->abort, JUMP(SLJIT_NOT_EQUAL) /* SIG_LESS */);
else
@@ -9592,6 +9970,106 @@ return cc + callout_length;
#undef CALLOUT_ARG_SIZE
#undef CALLOUT_ARG_OFFSET
+static PCRE2_SPTR compile_reverse_matchingpath(compiler_common *common, PCRE2_SPTR cc, backtrack_common *parent)
+{
+DEFINE_COMPILER;
+backtrack_common *backtrack = NULL;
+jump_list **reverse_failed;
+unsigned int lmin, lmax;
+#ifdef SUPPORT_UNICODE
+struct sljit_jump *jump;
+struct sljit_label *label;
+#endif
+
+SLJIT_ASSERT(parent->top == NULL);
+
+if (*cc == OP_REVERSE)
+ {
+ reverse_failed = &parent->own_backtracks;
+ lmin = GET2(cc, 1);
+ lmax = lmin;
+ cc += 1 + IMM2_SIZE;
+
+ SLJIT_ASSERT(lmin > 0);
+ }
+else
+ {
+ SLJIT_ASSERT(*cc == OP_VREVERSE);
+ PUSH_BACKTRACK(sizeof(vreverse_backtrack), cc, NULL);
+
+ reverse_failed = &backtrack->own_backtracks;
+ lmin = GET2(cc, 1);
+ lmax = GET2(cc, 1 + IMM2_SIZE);
+ cc += 1 + 2 * IMM2_SIZE;
+
+ SLJIT_ASSERT(lmin < lmax);
+ }
+
+if (HAS_VIRTUAL_REGISTERS)
+ {
+ OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
+ OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin));
+ }
+else
+ OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, begin));
+
+#ifdef SUPPORT_UNICODE
+if (common->utf)
+ {
+ if (lmin > 0)
+ {
+ OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, lmin);
+ label = LABEL();
+ add_jump(compiler, reverse_failed, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0));
+ move_back(common, reverse_failed, FALSE);
+ OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
+ JUMPTO(SLJIT_NOT_ZERO, label);
+ }
+
+ if (lmin < lmax)
+ {
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);
+
+ OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, lmax - lmin);
+ label = LABEL();
+ jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0);
+ move_back(common, reverse_failed, FALSE);
+ OP2(SLJIT_SUB | SLJIT_SET_Z, TMP3, 0, TMP3, 0, SLJIT_IMM, 1);
+ JUMPTO(SLJIT_NOT_ZERO, label);
+
+ JUMPHERE(jump);
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), STR_PTR, 0);
+ }
+ }
+else
+#endif
+ {
+ if (lmin > 0)
+ {
+ OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(lmin));
+ add_jump(compiler, reverse_failed, CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0));
+ }
+
+ if (lmin < lmax)
+ {
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0);
+
+ OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(lmax - lmin));
+ OP2U(SLJIT_SUB | SLJIT_SET_LESS, STR_PTR, 0, TMP2, 0);
+ SELECT(SLJIT_LESS, STR_PTR, TMP2, 0, STR_PTR);
+
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), STR_PTR, 0);
+ }
+ }
+
+check_start_used_ptr(common);
+
+if (lmin < lmax)
+ BACKTRACK_AS(vreverse_backtrack)->matchingpath = LABEL();
+
+return cc;
+}
+
static SLJIT_INLINE BOOL assert_needs_str_ptr_saving(PCRE2_SPTR cc)
{
while (TRUE)
@@ -9610,6 +10088,8 @@ while (TRUE)
case OP_DOLLM:
case OP_CALLOUT:
case OP_ALT:
+ case OP_NOT_UCP_WORD_BOUNDARY:
+ case OP_UCP_WORD_BOUNDARY:
cc += PRIV(OP_lengths)[*cc];
break;
@@ -9629,13 +10109,15 @@ int framesize;
int extrasize;
BOOL local_quit_available = FALSE;
BOOL needs_control_head;
+BOOL end_block_size = 0;
+BOOL has_vreverse;
int private_data_ptr;
backtrack_common altbacktrack;
PCRE2_SPTR ccbegin;
PCRE2_UCHAR opcode;
PCRE2_UCHAR bra = OP_BRA;
jump_list *tmp = NULL;
-jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks;
+jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.own_backtracks;
jump_list **found;
/* Saving previous accept variables. */
BOOL save_local_quit_available = common->local_quit_available;
@@ -9658,6 +10140,7 @@ if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO)
bra = *cc;
cc++;
}
+
private_data_ptr = PRIVATE_DATA(cc);
SLJIT_ASSERT(private_data_ptr != 0);
framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head);
@@ -9677,12 +10160,17 @@ if (bra == OP_BRAMINZERO)
brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0);
}
+if ((opcode == OP_ASSERTBACK || opcode == OP_ASSERTBACK_NOT) && find_vreverse(ccbegin))
+ end_block_size = 3;
+
if (framesize < 0)
{
extrasize = 1;
if (bra == OP_BRA && !assert_needs_str_ptr_saving(ccbegin + 1 + LINK_SIZE))
extrasize = 0;
+ extrasize += end_block_size;
+
if (needs_control_head)
extrasize++;
@@ -9700,18 +10188,19 @@ if (framesize < 0)
if (needs_control_head)
{
- SLJIT_ASSERT(extrasize == 2);
+ SLJIT_ASSERT(extrasize == end_block_size + 2);
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 1), TMP1, 0);
}
}
else
{
- extrasize = needs_control_head ? 3 : 2;
+ extrasize = (needs_control_head ? 3 : 2) + end_block_size;
+
+ OP1(SLJIT_MOV, TMP2, 0, STACK_TOP, 0);
allocate_stack(common, framesize + extrasize);
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
- OP2(SLJIT_ADD, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw));
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0);
if (needs_control_head)
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
@@ -9719,16 +10208,22 @@ else
if (needs_control_head)
{
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0);
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0);
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 2), TMP1, 0);
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 1), TMP2, 0);
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
}
else
- OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 1), TMP1, 0);
init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize);
}
+if (end_block_size > 0)
+ {
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_END, 0);
+ OP1(SLJIT_MOV, STR_END, 0, STR_PTR, 0);
+ }
+
memset(&altbacktrack, 0, sizeof(backtrack_common));
if (conditional || (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT))
{
@@ -9747,13 +10242,19 @@ while (1)
common->accept_label = NULL;
common->accept = NULL;
altbacktrack.top = NULL;
- altbacktrack.topbacktracks = NULL;
+ altbacktrack.own_backtracks = NULL;
if (*ccbegin == OP_ALT && extrasize > 0)
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
altbacktrack.cc = ccbegin;
- compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack);
+ ccbegin += 1 + LINK_SIZE;
+
+ has_vreverse = (*ccbegin == OP_VREVERSE);
+ if (*ccbegin == OP_REVERSE || has_vreverse)
+ ccbegin = compile_reverse_matchingpath(common, ccbegin, &altbacktrack);
+
+ compile_matchingpath(common, ccbegin, cc, &altbacktrack);
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
{
if (local_quit_available)
@@ -9769,6 +10270,13 @@ while (1)
common->accept = save_accept;
return NULL;
}
+
+ if (has_vreverse)
+ {
+ SLJIT_ASSERT(altbacktrack.top != NULL);
+ add_jump(compiler, &altbacktrack.top->simple_backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
+ }
+
common->accept_label = LABEL();
if (common->accept != NULL)
set_jumps(common->accept, common->accept_label);
@@ -9781,6 +10289,9 @@ while (1)
else if (extrasize > 0)
free_stack(common, extrasize);
+ if (end_block_size > 0)
+ OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize + 1));
+
if (needs_control_head)
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
}
@@ -9790,12 +10301,20 @@ while (1)
{
/* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw));
+
+ if (end_block_size > 0)
+ OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize + 2));
+
if (needs_control_head)
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1));
}
else
{
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
+
+ if (end_block_size > 0)
+ OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - extrasize + 1));
+
if (needs_control_head)
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 2));
add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
@@ -9809,7 +10328,7 @@ while (1)
if (conditional)
{
if (extrasize > 0)
- OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? STACK(-2) : STACK(-1));
+ OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-end_block_size - (needs_control_head ? 2 : 1)));
}
else if (bra == OP_BRAZERO)
{
@@ -9848,7 +10367,7 @@ while (1)
common->accept = save_accept;
return NULL;
}
- set_jumps(altbacktrack.topbacktracks, LABEL());
+ set_jumps(altbacktrack.own_backtracks, LABEL());
if (*cc != OP_ALT)
break;
@@ -9881,8 +10400,11 @@ if (common->positive_assertion_quit != NULL)
JUMPHERE(jump);
}
+if (end_block_size > 0)
+ OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
+
if (needs_control_head)
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1));
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(end_block_size + 1));
if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
{
@@ -9895,8 +10417,8 @@ if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
/* The topmost item should be 0. */
if (bra == OP_BRAZERO)
{
- if (extrasize == 2)
- free_stack(common, 1);
+ if (extrasize >= 2)
+ free_stack(common, extrasize - 1);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
}
else if (extrasize > 0)
@@ -9930,8 +10452,9 @@ if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
/* Keep the STR_PTR on the top of the stack. */
if (bra == OP_BRAZERO)
{
+ /* This allocation is always successful. */
OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw));
- if (extrasize == 2)
+ if (extrasize >= 2)
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0);
}
else if (bra == OP_BRAMINZERO)
@@ -9951,8 +10474,9 @@ if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
else
{
/* We don't need to keep the STR_PTR, only the previous private_data_ptr. */
- OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw));
- if (extrasize == 2)
+ OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + end_block_size + 2) * sizeof(sljit_sw));
+
+ if (extrasize == 2 + end_block_size)
{
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
if (bra == OP_BRAMINZERO)
@@ -9960,7 +10484,7 @@ if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
}
else
{
- SLJIT_ASSERT(extrasize == 3);
+ SLJIT_ASSERT(extrasize == 3 + end_block_size);
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-1));
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0);
}
@@ -9984,7 +10508,7 @@ if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK)
OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize - 1) * sizeof(sljit_sw));
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
}
- set_jumps(backtrack->common.topbacktracks, LABEL());
+ set_jumps(backtrack->common.own_backtracks, LABEL());
}
}
else
@@ -9997,8 +10521,8 @@ else
if (bra != OP_BRA)
{
- if (extrasize == 2)
- free_stack(common, 1);
+ if (extrasize >= 2)
+ free_stack(common, extrasize - 1);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
}
else if (extrasize > 0)
@@ -10029,9 +10553,9 @@ else
if (bra != OP_BRA)
{
- SLJIT_ASSERT(found == &backtrack->common.topbacktracks);
- set_jumps(backtrack->common.topbacktracks, LABEL());
- backtrack->common.topbacktracks = NULL;
+ SLJIT_ASSERT(found == &backtrack->common.own_backtracks);
+ set_jumps(backtrack->common.own_backtracks, LABEL());
+ backtrack->common.own_backtracks = NULL;
}
}
@@ -10140,7 +10664,7 @@ static PCRE2_SPTR SLJIT_FUNC do_script_run_utf(PCRE2_SPTR ptr, PCRE2_SPTR endptr
#endif /* SUPPORT_UNICODE */
-static SLJIT_INLINE void match_script_run_common(compiler_common *common, int private_data_ptr, backtrack_common *parent)
+static void match_script_run_common(compiler_common *common, int private_data_ptr, backtrack_common *parent)
{
DEFINE_COMPILER;
@@ -10148,14 +10672,14 @@ SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
#ifdef SUPPORT_UNICODE
-sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM,
- common->utf ? SLJIT_FUNC_OFFSET(do_script_run_utf) : SLJIT_FUNC_OFFSET(do_script_run));
+sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM,
+ common->utf ? SLJIT_FUNC_ADDR(do_script_run_utf) : SLJIT_FUNC_ADDR(do_script_run));
#else
-sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_script_run));
+sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(do_script_run));
#endif
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0);
-add_jump(compiler, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
+add_jump(compiler, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
}
/*
@@ -10229,6 +10753,7 @@ PCRE2_UCHAR ket;
assert_backtrack *assert;
BOOL has_alternatives;
BOOL needs_control_head = FALSE;
+BOOL has_vreverse = FALSE;
struct sljit_jump *jump;
struct sljit_jump *skip;
struct sljit_label *rmax_label = NULL;
@@ -10478,6 +11003,21 @@ else if (opcode == OP_CBRA || opcode == OP_SCBRA)
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
}
}
+else if (opcode == OP_ASSERTBACK_NA && PRIVATE_DATA(ccbegin + 1))
+ {
+ OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
+ allocate_stack(common, 4);
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw), STR_END, 0);
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
+ OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0);
+ OP1(SLJIT_MOV, STR_END, 0, STR_PTR, 0);
+
+ has_vreverse = (*matchingpath == OP_VREVERSE);
+ if (*matchingpath == OP_REVERSE || has_vreverse)
+ matchingpath = compile_reverse_matchingpath(common, matchingpath, backtrack);
+ }
else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
{
/* Saving the previous value. */
@@ -10485,6 +11025,9 @@ else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_SC
allocate_stack(common, 1);
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0);
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
+
+ if (*matchingpath == OP_REVERSE)
+ matchingpath = compile_reverse_matchingpath(common, matchingpath, backtrack);
}
else if (has_alternatives)
{
@@ -10604,14 +11147,28 @@ compile_matchingpath(common, matchingpath, cc, backtrack);
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
return NULL;
-if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA)
- OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
-
-if (opcode == OP_ONCE)
- match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
+switch (opcode)
+ {
+ case OP_ASSERTBACK_NA:
+ if (has_vreverse)
+ {
+ SLJIT_ASSERT(backtrack->top != NULL && PRIVATE_DATA(ccbegin + 1));
+ add_jump(compiler, &backtrack->top->simple_backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
+ }
-if (opcode == OP_SCRIPT_RUN)
- match_script_run_common(common, private_data_ptr, backtrack);
+ if (PRIVATE_DATA(ccbegin + 1))
+ OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
+ break;
+ case OP_ASSERT_NA:
+ OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
+ break;
+ case OP_ONCE:
+ match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head);
+ break;
+ case OP_SCRIPT_RUN:
+ match_script_run_common(common, private_data_ptr, backtrack);
+ break;
+ }
stacksize = 0;
if (repeat_type == OP_MINUPTO)
@@ -10810,7 +11367,7 @@ switch(opcode)
case OP_CBRAPOS:
case OP_SCBRAPOS:
offset = GET2(cc, 1 + LINK_SIZE);
- /* This case cannot be optimized in the same was as
+ /* This case cannot be optimized in the same way as
normal capturing brackets. */
SLJIT_ASSERT(common->optimized_cbracket[offset] == 0);
cbraprivptr = OVECTOR_PRIV(offset);
@@ -10927,7 +11484,7 @@ loop = LABEL();
while (*cc != OP_KETRPOS)
{
backtrack->top = NULL;
- backtrack->topbacktracks = NULL;
+ backtrack->own_backtracks = NULL;
cc += GET(cc, 1);
compile_matchingpath(common, ccbegin, cc, backtrack);
@@ -11008,7 +11565,7 @@ while (*cc != OP_KETRPOS)
compile_backtrackingpath(common, backtrack->top);
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
return NULL;
- set_jumps(backtrack->topbacktracks, LABEL());
+ set_jumps(backtrack->own_backtracks, LABEL());
if (framesize < 0)
{
@@ -11040,13 +11597,13 @@ while (*cc != OP_KETRPOS)
/* We don't have to restore the control head in case of a failed match. */
-backtrack->topbacktracks = NULL;
+backtrack->own_backtracks = NULL;
if (!zero)
{
if (framesize < 0)
- add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
+ add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0));
else /* TMP2 is set to [private_data_ptr] above. */
- add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(TMP2), STACK(-stacksize), SLJIT_IMM, 0));
+ add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(TMP2), STACK(-stacksize), SLJIT_IMM, 0));
}
/* None of them matched. */
@@ -11220,7 +11777,7 @@ struct sljit_label *label;
int private_data_ptr = PRIVATE_DATA(cc);
int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
-int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
+int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + SSIZE_OF(sw);
int tmp_base, tmp_offset;
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
BOOL use_tmp;
@@ -11242,7 +11799,7 @@ SLJIT_ASSERT(common->fast_forward_bc_ptr != NULL || early_fail_ptr == 0
|| (early_fail_ptr >= common->early_fail_start_ptr && early_fail_ptr <= common->early_fail_end_ptr));
if (early_fail_type == type_fail)
- add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr));
+ add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr));
cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
@@ -11269,10 +11826,10 @@ if (exact > 1)
&& type != OP_ANYNL && type != OP_EXTUNI)
{
OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(exact));
- add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER, TMP1, 0, STR_END, 0));
+ add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_GREATER, TMP1, 0, STR_END, 0));
OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
label = LABEL();
- compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE);
+ compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, FALSE);
OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
JUMPTO(SLJIT_NOT_ZERO, label);
}
@@ -11280,25 +11837,25 @@ if (exact > 1)
{
OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact);
label = LABEL();
- compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE);
+ compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, TRUE);
OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
JUMPTO(SLJIT_NOT_ZERO, label);
}
}
else if (exact == 1)
- {
- compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE);
+ compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, TRUE);
- if (early_fail_type == type_fail_range)
- {
- OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr);
- OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + (int)sizeof(sljit_sw));
- OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, TMP2, 0);
- OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, TMP2, 0);
- add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS_EQUAL, TMP2, 0, TMP1, 0));
+if (early_fail_type == type_fail_range)
+ {
+ /* Range end first, followed by range start. */
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr);
+ OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + SSIZE_OF(sw));
+ OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, TMP2, 0);
+ OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, TMP2, 0);
+ add_jump(compiler, &backtrack->own_backtracks, CMP(SLJIT_LESS_EQUAL, TMP2, 0, TMP1, 0));
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + (int)sizeof(sljit_sw), STR_PTR, 0);
- }
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr + SSIZE_OF(sw), STR_PTR, 0);
}
switch(opcode)
@@ -11374,8 +11931,8 @@ switch(opcode)
if (common->mode == PCRE2_JIT_COMPLETE)
{
- OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0);
- CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0);
+ OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0);
+ SELECT(SLJIT_GREATER, STR_PTR, STR_END, 0, STR_PTR);
}
else
{
@@ -11443,14 +12000,14 @@ switch(opcode)
if (opcode == OP_UPTO)
{
OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
- add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_ZERO));
+ add_jump(compiler, &backtrack->own_backtracks, JUMP(SLJIT_ZERO));
}
- compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE);
+ compile_char1_matchingpath(common, type, cc, &backtrack->own_backtracks, FALSE);
if (early_fail_ptr != 0)
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), early_fail_ptr, STR_PTR, 0);
JUMPHERE(jump);
- detect_partial_match(common, &backtrack->topbacktracks);
+ detect_partial_match(common, &backtrack->own_backtracks);
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0));
if (charpos_othercasebit != 0)
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit);
@@ -11604,7 +12161,7 @@ switch(opcode)
}
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
- if (common->utf)
+ if (type == OP_EXTUNI || common->utf)
{
OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
detect_partial_match(common, &no_match);
@@ -11667,8 +12224,8 @@ switch(opcode)
if (common->mode == PCRE2_JIT_COMPLETE)
{
- OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0);
- CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0);
+ OP2U(SLJIT_SUB | SLJIT_SET_GREATER, STR_PTR, 0, STR_END, 0);
+ SELECT(SLJIT_GREATER, STR_PTR, STR_END, 0, STR_PTR);
}
else
{
@@ -11721,12 +12278,12 @@ PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL);
if (*cc == OP_FAIL)
{
- add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
+ add_jump(compiler, &backtrack->own_backtracks, JUMP(SLJIT_JUMP));
return cc + 1;
}
if (*cc == OP_ACCEPT && common->currententry == NULL && (common->re->overall_options & PCRE2_ENDANCHORED) != 0)
- add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
+ add_jump(compiler, &common->restart_match, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL || !common->might_be_empty)
{
@@ -11751,9 +12308,9 @@ if (HAS_VIRTUAL_REGISTERS)
else
OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(ARGUMENTS), SLJIT_OFFSETOF(jit_arguments, options));
-OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY);
-add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_NOT_ZERO));
-OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART);
+OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY);
+add_jump(compiler, &backtrack->own_backtracks, JUMP(SLJIT_NOT_ZERO));
+OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART);
if (common->accept_label == NULL)
add_jump(compiler, &common->accept, JUMP(SLJIT_ZERO));
else
@@ -11764,7 +12321,7 @@ if (common->accept_label == NULL)
add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0));
else
CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->accept_label);
-add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP));
+add_jump(compiler, &backtrack->own_backtracks, JUMP(SLJIT_JUMP));
return cc + 1;
}
@@ -11884,8 +12441,9 @@ while (cc < ccend)
case OP_DOLLM:
case OP_CIRC:
case OP_CIRCM:
- case OP_REVERSE:
- cc = compile_simple_assertion_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
+ case OP_NOT_UCP_WORD_BOUNDARY:
+ case OP_UCP_WORD_BOUNDARY:
+ cc = compile_simple_assertion_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks);
break;
case OP_NOT_DIGIT:
@@ -11907,7 +12465,7 @@ while (cc < ccend)
case OP_EXTUNI:
case OP_NOT:
case OP_NOTI:
- cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
+ cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE);
break;
case OP_SET_SOM:
@@ -11922,9 +12480,9 @@ while (cc < ccend)
case OP_CHAR:
case OP_CHARI:
if (common->mode == PCRE2_JIT_COMPLETE)
- cc = compile_charn_matchingpath(common, cc, ccend, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
+ cc = compile_charn_matchingpath(common, cc, ccend, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks);
else
- cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
+ cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE);
break;
case OP_STAR:
@@ -12000,7 +12558,7 @@ while (cc < ccend)
if (cc[1 + (32 / sizeof(PCRE2_UCHAR))] >= OP_CRSTAR && cc[1 + (32 / sizeof(PCRE2_UCHAR))] <= OP_CRPOSRANGE)
cc = compile_iterator_matchingpath(common, cc, parent);
else
- cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
+ cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE);
break;
#if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH == 16 || PCRE2_CODE_UNIT_WIDTH == 32
@@ -12008,7 +12566,7 @@ while (cc < ccend)
if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRPOSRANGE)
cc = compile_iterator_matchingpath(common, cc, parent);
else
- cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE);
+ cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE);
break;
#endif
@@ -12018,7 +12576,7 @@ while (cc < ccend)
cc = compile_ref_iterator_matchingpath(common, cc, parent);
else
{
- compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
+ compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE, FALSE);
cc += 1 + IMM2_SIZE;
}
break;
@@ -12029,8 +12587,8 @@ while (cc < ccend)
cc = compile_ref_iterator_matchingpath(common, cc, parent);
else
{
- compile_dnref_search(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
- compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE);
+ compile_dnref_search(common, cc, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks);
+ compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->simple_backtracks : &parent->own_backtracks, TRUE, FALSE);
cc += 1 + 2 * IMM2_SIZE;
}
break;
@@ -12197,7 +12755,7 @@ PCRE2_SPTR end;
int private_data_ptr = PRIVATE_DATA(cc);
int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP);
int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
-int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
+int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + SSIZE_OF(sw);
cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end);
@@ -12308,7 +12866,7 @@ switch(opcode)
break;
}
-set_jumps(current->topbacktracks, LABEL());
+set_jumps(current->own_backtracks, LABEL());
}
static SLJIT_INLINE void compile_ref_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current)
@@ -12323,7 +12881,7 @@ type = cc[ref ? 1 + IMM2_SIZE : 1 + 2 * IMM2_SIZE];
if ((type & 0x1) == 0)
{
/* Maximize case. */
- set_jumps(current->topbacktracks, LABEL());
+ set_jumps(current->own_backtracks, LABEL());
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
free_stack(common, 1);
CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
@@ -12332,7 +12890,7 @@ if ((type & 0x1) == 0)
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath);
-set_jumps(current->topbacktracks, LABEL());
+set_jumps(current->own_backtracks, LABEL());
free_stack(common, ref ? 2 : 3);
}
@@ -12353,7 +12911,7 @@ if (!CURRENT_AS(recurse_backtrack)->inlined_pattern)
else
compile_backtrackingpath(common, current->top);
-set_jumps(current->topbacktracks, LABEL());
+set_jumps(current->own_backtracks, LABEL());
}
static void compile_assert_backtrackingpath(compiler_common *common, struct backtrack_common *current)
@@ -12372,13 +12930,13 @@ if (*cc == OP_BRAZERO)
if (bra == OP_BRAZERO)
{
- SLJIT_ASSERT(current->topbacktracks == NULL);
+ SLJIT_ASSERT(current->own_backtracks == NULL);
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
}
if (CURRENT_AS(assert_backtrack)->framesize < 0)
{
- set_jumps(current->topbacktracks, LABEL());
+ set_jumps(current->own_backtracks, LABEL());
if (bra == OP_BRAZERO)
{
@@ -12410,10 +12968,10 @@ if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK)
OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(assert_backtrack)->framesize - 1) * sizeof(sljit_sw));
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr, TMP1, 0);
- set_jumps(current->topbacktracks, LABEL());
+ set_jumps(current->own_backtracks, LABEL());
}
else
- set_jumps(current->topbacktracks, LABEL());
+ set_jumps(current->own_backtracks, LABEL());
if (bra == OP_BRAZERO)
{
@@ -12440,6 +12998,7 @@ PCRE2_UCHAR ket;
assert_backtrack *assert;
BOOL has_alternatives;
BOOL needs_control_head = FALSE;
+BOOL has_vreverse;
struct sljit_jump *brazero = NULL;
struct sljit_jump *next_alt = NULL;
struct sljit_jump *once = NULL;
@@ -12616,8 +13175,8 @@ else if (has_alternatives)
}
COMPILE_BACKTRACKINGPATH(current->top);
-if (current->topbacktracks)
- set_jumps(current->topbacktracks, LABEL());
+if (current->own_backtracks)
+ set_jumps(current->own_backtracks, LABEL());
if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))
{
@@ -12653,14 +13212,25 @@ if (has_alternatives)
do
{
current->top = NULL;
- current->topbacktracks = NULL;
- current->nextbacktracks = NULL;
+ current->own_backtracks = NULL;
+ current->simple_backtracks = NULL;
/* Conditional blocks always have an additional alternative, even if it is empty. */
if (*cc == OP_ALT)
{
ccprev = cc + 1 + LINK_SIZE;
cc += GET(cc, 1);
- if (opcode != OP_COND && opcode != OP_SCOND)
+
+ has_vreverse = FALSE;
+ if (opcode == OP_ASSERTBACK || opcode == OP_ASSERTBACK_NA)
+ {
+ SLJIT_ASSERT(private_data_ptr != 0);
+ OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
+
+ has_vreverse = (*ccprev == OP_VREVERSE);
+ if (*ccprev == OP_REVERSE || has_vreverse)
+ ccprev = compile_reverse_matchingpath(common, ccprev, current);
+ }
+ else if (opcode != OP_COND && opcode != OP_SCOND)
{
if (opcode != OP_ONCE)
{
@@ -12672,15 +13242,30 @@ if (has_alternatives)
else
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(needs_control_head ? 1 : 0));
}
+
compile_matchingpath(common, ccprev, cc, current);
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
return;
- if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA)
- OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
+ switch (opcode)
+ {
+ case OP_ASSERTBACK_NA:
+ if (has_vreverse)
+ {
+ SLJIT_ASSERT(current->top != NULL && PRIVATE_DATA(ccbegin + 1));
+ add_jump(compiler, &current->top->simple_backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0));
+ }
- if (opcode == OP_SCRIPT_RUN)
- match_script_run_common(common, private_data_ptr, current);
+ if (PRIVATE_DATA(ccbegin + 1))
+ OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
+ break;
+ case OP_ASSERT_NA:
+ OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr);
+ break;
+ case OP_SCRIPT_RUN:
+ match_script_run_common(common, private_data_ptr, current);
+ break;
+ }
}
/* Instructions after the current alternative is successfully matched. */
@@ -12767,9 +13352,9 @@ if (has_alternatives)
}
COMPILE_BACKTRACKINGPATH(current->top);
- if (current->topbacktracks)
- set_jumps(current->topbacktracks, LABEL());
- SLJIT_ASSERT(!current->nextbacktracks);
+ if (current->own_backtracks)
+ set_jumps(current->own_backtracks, LABEL());
+ SLJIT_ASSERT(!current->simple_backtracks);
}
while (*cc == OP_ALT);
@@ -12811,6 +13396,15 @@ if (offset != 0)
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
}
}
+else if (opcode == OP_ASSERTBACK_NA && PRIVATE_DATA(ccbegin + 1))
+ {
+ OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
+ OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
+ OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw));
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0);
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw), TMP2, 0);
+ free_stack(common, 4);
+ }
else if (opcode == OP_ASSERT_NA || opcode == OP_ASSERTBACK_NA || opcode == OP_SCRIPT_RUN || opcode == OP_SBRA || opcode == OP_SCOND)
{
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(0));
@@ -12897,12 +13491,19 @@ static SLJIT_INLINE void compile_bracketpos_backtrackingpath(compiler_common *co
DEFINE_COMPILER;
int offset;
struct sljit_jump *jump;
+PCRE2_SPTR cc;
+/* No retry on backtrack, just drop everything. */
if (CURRENT_AS(bracketpos_backtrack)->framesize < 0)
{
- if (*current->cc == OP_CBRAPOS || *current->cc == OP_SCBRAPOS)
+ cc = current->cc;
+
+ if (*cc == OP_BRAPOSZERO)
+ cc++;
+
+ if (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS)
{
- offset = (GET2(current->cc, 1 + LINK_SIZE)) << 1;
+ offset = (GET2(cc, 1 + LINK_SIZE)) << 1;
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1));
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0);
@@ -12912,7 +13513,7 @@ if (CURRENT_AS(bracketpos_backtrack)->framesize < 0)
if (common->capture_last_ptr != 0)
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0);
}
- set_jumps(current->topbacktracks, LABEL());
+ set_jumps(current->own_backtracks, LABEL());
free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
return;
}
@@ -12921,10 +13522,10 @@ OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtra
add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL));
OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (CURRENT_AS(bracketpos_backtrack)->framesize - 1) * sizeof(sljit_sw));
-if (current->topbacktracks)
+if (current->own_backtracks)
{
jump = JUMP(SLJIT_JUMP);
- set_jumps(current->topbacktracks, LABEL());
+ set_jumps(current->own_backtracks, LABEL());
/* Drop the stack frame. */
free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize);
JUMPHERE(jump);
@@ -12937,8 +13538,8 @@ static SLJIT_INLINE void compile_braminzero_backtrackingpath(compiler_common *co
assert_backtrack backtrack;
current->top = NULL;
-current->topbacktracks = NULL;
-current->nextbacktracks = NULL;
+current->own_backtracks = NULL;
+current->simple_backtracks = NULL;
if (current->cc[1] > OP_ASSERTBACK_NOT)
{
/* Manual call of compile_bracket_matchingpath and compile_bracket_backtrackingpath. */
@@ -12953,7 +13554,7 @@ else
/* Manual call of compile_assert_matchingpath. */
compile_assert_matchingpath(common, current->cc, &backtrack, FALSE);
}
-SLJIT_ASSERT(!current->nextbacktracks && !current->topbacktracks);
+SLJIT_ASSERT(!current->simple_backtracks && !current->own_backtracks);
}
static SLJIT_INLINE void compile_control_verb_backtrackingpath(compiler_common *common, struct backtrack_common *current)
@@ -13004,7 +13605,7 @@ if (opcode == OP_SKIP_ARG)
SLJIT_ASSERT(common->control_head_ptr != 0 && TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr);
OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (sljit_sw)(current->cc + 2));
- sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_search_mark));
+ sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(do_search_mark));
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_R0, 0);
add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, SLJIT_R0, 0, SLJIT_IMM, 0));
@@ -13018,6 +13619,23 @@ else
add_jump(compiler, &common->reset_match, JUMP(SLJIT_JUMP));
}
+static SLJIT_INLINE void compile_vreverse_backtrackingpath(compiler_common *common, struct backtrack_common *current)
+{
+DEFINE_COMPILER;
+struct sljit_jump *jump;
+struct sljit_label *label;
+
+OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(2));
+jump = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(3));
+skip_valid_char(common);
+OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), STR_PTR, 0);
+JUMPTO(SLJIT_JUMP, CURRENT_AS(vreverse_backtrack)->matchingpath);
+
+label = LABEL();
+sljit_set_label(jump, label);
+set_jumps(current->own_backtracks, label);
+}
+
static SLJIT_INLINE void compile_then_trap_backtrackingpath(compiler_common *common, struct backtrack_common *current)
{
DEFINE_COMPILER;
@@ -13058,8 +13676,8 @@ then_trap_backtrack *save_then_trap = common->then_trap;
while (current)
{
- if (current->nextbacktracks != NULL)
- set_jumps(current->nextbacktracks, LABEL());
+ if (current->simple_backtracks != NULL)
+ set_jumps(current->simple_backtracks, LABEL());
switch(*current->cc)
{
case OP_SET_SOM:
@@ -13225,7 +13843,11 @@ while (current)
case OP_FAIL:
case OP_ACCEPT:
case OP_ASSERT_ACCEPT:
- set_jumps(current->topbacktracks, LABEL());
+ set_jumps(current->own_backtracks, LABEL());
+ break;
+
+ case OP_VREVERSE:
+ compile_vreverse_backtrackingpath(common, current);
break;
case OP_THEN_TRAP:
@@ -13248,10 +13870,8 @@ DEFINE_COMPILER;
PCRE2_SPTR cc = common->start + common->currententry->start;
PCRE2_SPTR ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE);
PCRE2_SPTR ccend = bracketend(cc) - (1 + LINK_SIZE);
-BOOL needs_control_head;
-BOOL has_quit;
-BOOL has_accept;
-int private_data_size = get_recurse_data_length(common, ccbegin, ccend, &needs_control_head, &has_quit, &has_accept);
+uint32_t recurse_flags = 0;
+int private_data_size = get_recurse_data_length(common, ccbegin, ccend, &recurse_flags);
int alt_count, alt_max, local_size;
backtrack_common altbacktrack;
jump_list *match = NULL;
@@ -13273,7 +13893,7 @@ SLJIT_ASSERT(common->currententry->entry_label == NULL && common->recursive_head
common->currententry->entry_label = LABEL();
set_jumps(common->currententry->entry_calls, common->currententry->entry_label);
-sljit_emit_fast_enter(compiler, TMP2, 0);
+sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, TMP2, 0);
count_match(common);
local_size = (alt_max > 1) ? 2 : 1;
@@ -13285,12 +13905,12 @@ allocate_stack(common, private_data_size + local_size);
/* Save return address. */
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP2, 0);
-copy_recurse_data(common, ccbegin, ccend, recurse_copy_from_global, local_size, private_data_size + local_size, has_quit);
+copy_recurse_data(common, ccbegin, ccend, recurse_copy_from_global, local_size, private_data_size + local_size, recurse_flags);
/* This variable is saved and restored all time when we enter or exit from a recursive context. */
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, STACK_TOP, 0);
-if (needs_control_head)
+if (recurse_flags & recurse_flag_control_head_found)
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0);
if (alt_max > 1)
@@ -13306,7 +13926,7 @@ cc += GET(cc, 1);
while (1)
{
altbacktrack.top = NULL;
- altbacktrack.topbacktracks = NULL;
+ altbacktrack.own_backtracks = NULL;
if (altbacktrack.cc != ccbegin)
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
@@ -13315,10 +13935,10 @@ while (1)
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
return;
- allocate_stack(common, (alt_max > 1 || has_accept) ? 2 : 1);
+ allocate_stack(common, (alt_max > 1 || (recurse_flags & recurse_flag_accept_found)) ? 2 : 1);
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
- if (alt_max > 1 || has_accept)
+ if (alt_max > 1 || (recurse_flags & recurse_flag_accept_found))
{
if (alt_max > 3)
put_label = sljit_emit_put_label(compiler, SLJIT_MEM1(STACK_TOP), STACK(1));
@@ -13335,16 +13955,16 @@ while (1)
common->currententry->backtrack_label = LABEL();
set_jumps(common->currententry->backtrack_calls, common->currententry->backtrack_label);
- sljit_emit_fast_enter(compiler, TMP1, 0);
+ sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, TMP1, 0);
- if (has_accept)
+ if (recurse_flags & recurse_flag_accept_found)
accept_exit = CMP(SLJIT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, -1);
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0));
/* Save return address. */
OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(local_size - 1), TMP1, 0);
- copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, has_quit);
+ copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, recurse_flags);
if (alt_max > 1)
{
@@ -13361,7 +13981,7 @@ while (1)
next_alt = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0);
}
else
- free_stack(common, has_accept ? 2 : 1);
+ free_stack(common, (recurse_flags & recurse_flag_accept_found) ? 2 : 1);
}
else if (alt_max > 3)
{
@@ -13383,7 +14003,7 @@ while (1)
compile_backtrackingpath(common, altbacktrack.top);
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
return;
- set_jumps(altbacktrack.topbacktracks, LABEL());
+ set_jumps(altbacktrack.own_backtracks, LABEL());
if (*cc != OP_ALT)
break;
@@ -13396,7 +14016,7 @@ while (1)
quit = LABEL();
-copy_recurse_data(common, ccbegin, ccend, recurse_copy_private_to_global, local_size, private_data_size + local_size, has_quit);
+copy_recurse_data(common, ccbegin, ccend, recurse_copy_private_to_global, local_size, private_data_size + local_size, recurse_flags);
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1));
free_stack(common, private_data_size + local_size);
@@ -13405,15 +14025,15 @@ OP_SRC(SLJIT_FAST_RETURN, TMP2, 0);
if (common->quit != NULL)
{
- SLJIT_ASSERT(has_quit);
+ SLJIT_ASSERT(recurse_flags & recurse_flag_quit_found);
set_jumps(common->quit, LABEL());
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr);
- copy_recurse_data(common, ccbegin, ccend, recurse_copy_shared_to_global, local_size, private_data_size + local_size, has_quit);
+ copy_recurse_data(common, ccbegin, ccend, recurse_copy_shared_to_global, local_size, private_data_size + local_size, recurse_flags);
JUMPTO(SLJIT_JUMP, quit);
}
-if (has_accept)
+if (recurse_flags & recurse_flag_accept_found)
{
JUMPHERE(accept_exit);
free_stack(common, 2);
@@ -13421,7 +14041,7 @@ if (has_accept)
/* Save return address. */
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1), TMP1, 0);
- copy_recurse_data(common, ccbegin, ccend, recurse_copy_kept_shared_to_global, local_size, private_data_size + local_size, has_quit);
+ copy_recurse_data(common, ccbegin, ccend, recurse_copy_kept_shared_to_global, local_size, private_data_size + local_size, recurse_flags);
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(local_size - 1));
free_stack(common, private_data_size + local_size);
@@ -13431,7 +14051,7 @@ if (has_accept)
if (common->accept != NULL)
{
- SLJIT_ASSERT(has_accept);
+ SLJIT_ASSERT(recurse_flags & recurse_flag_accept_found);
set_jumps(common->accept, LABEL());
@@ -13446,7 +14066,7 @@ set_jumps(match, LABEL());
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0);
-copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, has_quit);
+copy_recurse_data(common, ccbegin, ccend, recurse_swap_global, local_size, private_data_size + local_size, recurse_flags);
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP2), STACK(local_size - 1));
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 1);
@@ -13489,9 +14109,9 @@ jump_list *reqcu_not_found = NULL;
SLJIT_ASSERT(tables);
#if HAS_VIRTUAL_REGISTERS == 1
-SLJIT_ASSERT(sljit_get_register_index(TMP3) < 0 && sljit_get_register_index(ARGUMENTS) < 0 && sljit_get_register_index(RETURN_ADDR) < 0);
+SLJIT_ASSERT(sljit_get_register_index(SLJIT_GP_REGISTER, TMP3) < 0 && sljit_get_register_index(SLJIT_GP_REGISTER, ARGUMENTS) < 0 && sljit_get_register_index(SLJIT_GP_REGISTER, RETURN_ADDR) < 0);
#elif HAS_VIRTUAL_REGISTERS == 0
-SLJIT_ASSERT(sljit_get_register_index(TMP3) >= 0 && sljit_get_register_index(ARGUMENTS) >= 0 && sljit_get_register_index(RETURN_ADDR) >= 0);
+SLJIT_ASSERT(sljit_get_register_index(SLJIT_GP_REGISTER, TMP3) >= 0 && sljit_get_register_index(SLJIT_GP_REGISTER, ARGUMENTS) >= 0 && sljit_get_register_index(SLJIT_GP_REGISTER, RETURN_ADDR) >= 0);
#else
#error "Invalid value for HAS_VIRTUAL_REGISTERS"
#endif
@@ -13652,7 +14272,7 @@ SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0));
common->cbra_ptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_sw);
total_length = ccend - common->start;
-common->private_data_ptrs = (sljit_s32 *)SLJIT_MALLOC(total_length * (sizeof(sljit_s32) + (common->has_then ? 1 : 0)), allocator_data);
+common->private_data_ptrs = (sljit_s32*)SLJIT_MALLOC(total_length * (sizeof(sljit_s32) + (common->has_then ? 1 : 0)), allocator_data);
if (!common->private_data_ptrs)
{
SLJIT_FREE(common->optimized_cbracket, allocator_data);
@@ -13663,13 +14283,13 @@ memset(common->private_data_ptrs, 0, total_length * sizeof(sljit_s32));
private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw);
if ((re->overall_options & PCRE2_ANCHORED) == 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0 && !common->has_skip_in_assert_back)
- detect_early_fail(common, common->start, &private_data_size, 0, 0, TRUE);
+ detect_early_fail(common, common->start, &private_data_size, 0, 0);
set_private_data_ptrs(common, &private_data_size, ccend);
SLJIT_ASSERT(common->early_fail_start_ptr <= common->early_fail_end_ptr);
-if (private_data_size > SLJIT_MAX_LOCAL_SIZE)
+if (private_data_size > 65536)
{
SLJIT_FREE(common->private_data_ptrs, allocator_data);
SLJIT_FREE(common->optimized_cbracket, allocator_data);
@@ -13692,8 +14312,9 @@ if (!compiler)
}
common->compiler = compiler;
-/* Main pcre_jit_exec entry. */
-sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW), 5, 5, 0, 0, private_data_size);
+/* Main pcre2_jit_exec entry. */
+SLJIT_ASSERT((private_data_size & (sizeof(sljit_sw) - 1)) == 0);
+sljit_emit_enter(compiler, 0, SLJIT_ARGS1(W, W), 5, 5, SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS, 0, private_data_size);
/* Register init. */
reset_ovector(common, (re->top_bracket + 1) * 2);
@@ -13900,9 +14521,9 @@ if (common->might_be_empty)
JUMPHERE(empty_match);
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0);
OP1(SLJIT_MOV_U32, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, options));
- OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY);
+ OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY);
JUMPTO(SLJIT_NOT_ZERO, empty_match_backtrack_label);
- OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART);
+ OP2U(SLJIT_AND | SLJIT_SET_Z, TMP2, 0, SLJIT_IMM, PCRE2_NOTEMPTY_ATSTART);
JUMPTO(SLJIT_ZERO, empty_match_found_label);
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str));
CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found_label);
@@ -13915,20 +14536,40 @@ common->early_fail_end_ptr = 0;
common->currententry = common->entries;
common->local_quit_available = TRUE;
quit_label = common->quit_label;
-while (common->currententry != NULL)
+if (common->currententry != NULL)
{
- /* Might add new entries. */
- compile_recurse(common);
- if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
+ /* A free bit for each private data. */
+ common->recurse_bitset_size = ((private_data_size / SSIZE_OF(sw)) + 7) >> 3;
+ SLJIT_ASSERT(common->recurse_bitset_size > 0);
+ common->recurse_bitset = (sljit_u8*)SLJIT_MALLOC(common->recurse_bitset_size, allocator_data);;
+
+ if (common->recurse_bitset != NULL)
+ {
+ do
+ {
+ /* Might add new entries. */
+ compile_recurse(common);
+ if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler)))
+ break;
+ flush_stubs(common);
+ common->currententry = common->currententry->next;
+ }
+ while (common->currententry != NULL);
+
+ SLJIT_FREE(common->recurse_bitset, allocator_data);
+ }
+
+ if (common->currententry != NULL)
{
+ /* The common->recurse_bitset has been freed. */
+ SLJIT_ASSERT(sljit_get_compiler_error(compiler) || common->recurse_bitset == NULL);
+
sljit_free_compiler(compiler);
SLJIT_FREE(common->optimized_cbracket, allocator_data);
SLJIT_FREE(common->private_data_ptrs, allocator_data);
PRIV(jit_free_rodata)(common->read_only_data_head, allocator_data);
return PCRE2_ERROR_NOMEMORY;
}
- flush_stubs(common);
- common->currententry = common->currententry->next;
}
common->local_quit_available = FALSE;
common->quit_label = quit_label;
@@ -13937,7 +14578,7 @@ common->quit_label = quit_label;
/* This is a (really) rare case. */
set_jumps(common->stackalloc, LABEL());
/* RETURN_ADDR is not a saved register. */
-sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0);
+sljit_emit_op_dst(compiler, SLJIT_FAST_ENTER, SLJIT_MEM1(SLJIT_SP), LOCALS0);
SLJIT_ASSERT(TMP1 == SLJIT_R0 && STR_PTR == SLJIT_R1);
@@ -13947,7 +14588,7 @@ OP2(SLJIT_SUB, SLJIT_R1, 0, STACK_LIMIT, 0, SLJIT_IMM, STACK_GROWTH_RATE);
OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, stack));
OP1(SLJIT_MOV, STACK_LIMIT, 0, TMP2, 0);
-sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_stack_resize));
+sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM, SLJIT_FUNC_ADDR(sljit_stack_resize));
jump = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0);
OP1(SLJIT_MOV, TMP2, 0, STACK_LIMIT, 0);
@@ -13975,7 +14616,12 @@ if (common->revertframes != NULL)
if (common->wordboundary != NULL)
{
set_jumps(common->wordboundary, LABEL());
- check_wordboundary(common);
+ check_wordboundary(common, FALSE);
+ }
+if (common->ucp_wordboundary != NULL)
+ {
+ set_jumps(common->ucp_wordboundary, LABEL());
+ check_wordboundary(common, TRUE);
}
if (common->anynewline != NULL)
{
@@ -14002,10 +14648,17 @@ if (common->caselesscmp != NULL)
set_jumps(common->caselesscmp, LABEL());
do_caselesscmp(common);
}
-if (common->reset_match != NULL)
+if (common->reset_match != NULL || common->restart_match != NULL)
{
+ if (common->restart_match != NULL)
+ {
+ set_jumps(common->restart_match, LABEL());
+ OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
+ }
+
set_jumps(common->reset_match, LABEL());
do_reset_match(common, (re->top_bracket + 1) * 2);
+ /* The value of restart_match is in TMP1. */
CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP1, 0, continue_match_label);
OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
JUMPTO(SLJIT_JUMP, reset_match_label);
@@ -14134,7 +14787,7 @@ pcre2_jit_compile(pcre2_code *code, uint32_t options)
pcre2_real_code *re = (pcre2_real_code *)code;
#ifdef SUPPORT_JIT
executable_functions *functions;
-static int executable_allocator_is_working = 0;
+static int executable_allocator_is_working = -1;
#endif
if (code == NULL)
@@ -14197,23 +14850,21 @@ return PCRE2_ERROR_JIT_BADOPTION;
if ((re->flags & PCRE2_NOJIT) != 0) return 0;
-if (executable_allocator_is_working == 0)
+if (executable_allocator_is_working == -1)
{
/* Checks whether the executable allocator is working. This check
might run multiple times in multi-threaded environments, but the
result should not be affected by it. */
void *ptr = SLJIT_MALLOC_EXEC(32, NULL);
-
- executable_allocator_is_working = -1;
-
if (ptr != NULL)
{
SLJIT_FREE_EXEC(((sljit_u8*)(ptr)) + SLJIT_EXEC_OFFSET(ptr), NULL);
executable_allocator_is_working = 1;
}
+ else executable_allocator_is_working = 0;
}
-if (executable_allocator_is_working < 0)
+if (!executable_allocator_is_working)
return PCRE2_ERROR_NOMEMORY;
if ((re->overall_options & PCRE2_MATCH_INVALID_UTF) != 0)