From ddf45ac6344a79b23af2df33eb4abff27f748c94 Mon Sep 17 00:00:00 2001 From: Giuseppe D'Angelo Date: Wed, 26 May 2021 17:05:41 +0200 Subject: PCRE2: upgrade to 10.37 New upstream release. Change-Id: I3a7e6c3d6706c940c0a279e4b63e1bfd96cc242c Reviewed-by: Kai Koehne Reviewed-by: Volker Hilsheimer (cherry picked from commit 4ee4b7a8716dd2ffb7b67032aaa0766bc9b33bdb) Reviewed-by: Qt Cherry-pick Bot --- src/3rdparty/pcre2/src/pcre2.h | 4 +- src/3rdparty/pcre2/src/pcre2_auto_possess.c | 13 +- src/3rdparty/pcre2/src/pcre2_compile.c | 52 +- src/3rdparty/pcre2/src/pcre2_jit_compile.c | 40 +- src/3rdparty/pcre2/src/pcre2_jit_simd_inc.h | 828 +++++++++++++++++++-- src/3rdparty/pcre2/src/pcre2_match.c | 4 +- src/3rdparty/pcre2/src/sljit/sljitConfigInternal.h | 2 + src/3rdparty/pcre2/src/sljit/sljitExecAllocator.c | 124 +-- src/3rdparty/pcre2/src/sljit/sljitNativeS390X.c | 43 +- src/3rdparty/pcre2/src/sljit/sljitUtils.c | 10 +- .../pcre2/src/sljit/sljitWXExecAllocator.c | 8 +- 11 files changed, 961 insertions(+), 167 deletions(-) (limited to 'src/3rdparty/pcre2/src') diff --git a/src/3rdparty/pcre2/src/pcre2.h b/src/3rdparty/pcre2/src/pcre2.h index f204ec8180..7ab6b39aeb 100644 --- a/src/3rdparty/pcre2/src/pcre2.h +++ b/src/3rdparty/pcre2/src/pcre2.h @@ -42,9 +42,9 @@ POSSIBILITY OF SUCH DAMAGE. /* The current PCRE version information. */ #define PCRE2_MAJOR 10 -#define PCRE2_MINOR 36 +#define PCRE2_MINOR 37 #define PCRE2_PRERELEASE -#define PCRE2_DATE 2020-12-04 +#define PCRE2_DATE 2021-05-26 /* When an application links to a PCRE DLL in Windows, the symbols that are imported have to be identified as such. When building PCRE2, the appropriate diff --git a/src/3rdparty/pcre2/src/pcre2_auto_possess.c b/src/3rdparty/pcre2/src/pcre2_auto_possess.c index c64cf856d1..e5e0895682 100644 --- a/src/3rdparty/pcre2/src/pcre2_auto_possess.c +++ b/src/3rdparty/pcre2/src/pcre2_auto_possess.c @@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel Original API code Copyright (c) 1997-2012 University of Cambridge - New API code Copyright (c) 2016-2020 University of Cambridge + New API code Copyright (c) 2016-2021 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -490,6 +490,7 @@ switch(c) list[2] = (uint32_t)(end - code); return end; } + return NULL; /* Opcode not accepted */ } @@ -1186,12 +1187,16 @@ for (;;) c = *repeat_opcode; if (c >= OP_CRSTAR && c <= OP_CRMINRANGE) { - /* end must not be NULL. */ - end = get_chr_property_list(code, utf, ucp, cb->fcc, list); + /* The return from get_chr_property_list() will never be NULL when + *code (aka c) is one of the three class opcodes. However, gcc with + -fanalyzer notes that a NULL return is possible, and grumbles. Hence we + put in a check. */ + end = get_chr_property_list(code, utf, ucp, cb->fcc, list); list[1] = (c & 1) == 0; - if (compare_opcodes(end, utf, ucp, cb, list, end, &rec_limit)) + if (end != NULL && + compare_opcodes(end, utf, ucp, cb, list, end, &rec_limit)) { switch (c) { diff --git a/src/3rdparty/pcre2/src/pcre2_compile.c b/src/3rdparty/pcre2/src/pcre2_compile.c index e811f12f02..da449ae9ed 100644 --- a/src/3rdparty/pcre2/src/pcre2_compile.c +++ b/src/3rdparty/pcre2/src/pcre2_compile.c @@ -1398,32 +1398,47 @@ static BOOL read_repeat_counts(PCRE2_SPTR *ptrptr, PCRE2_SPTR ptrend, uint32_t *minp, uint32_t *maxp, int *errorcodeptr) { -PCRE2_SPTR p = *ptrptr; +PCRE2_SPTR p; BOOL yield = FALSE; +BOOL had_comma = FALSE; int32_t min = 0; int32_t max = REPEAT_UNLIMITED; /* This value is larger than MAX_REPEAT_COUNT */ -/* NB read_number() initializes the error code to zero. The only error is for a -number that is too big. */ +/* Check the syntax */ +*errorcodeptr = 0; +for (p = *ptrptr;; p++) + { + uint32_t c; + if (p >= ptrend) return FALSE; + c = *p; + if (IS_DIGIT(c)) continue; + if (c == CHAR_RIGHT_CURLY_BRACKET) break; + if (c == CHAR_COMMA) + { + if (had_comma) return FALSE; + had_comma = TRUE; + } + else return FALSE; + } + +/* The only error from read_number() is for a number that is too big. */ + +p = *ptrptr; if (!read_number(&p, ptrend, -1, MAX_REPEAT_COUNT, ERR5, &min, errorcodeptr)) goto EXIT; -if (p >= ptrend) goto EXIT; - if (*p == CHAR_RIGHT_CURLY_BRACKET) { p++; max = min; } - else { - if (*p++ != CHAR_COMMA || p >= ptrend) goto EXIT; - if (*p != CHAR_RIGHT_CURLY_BRACKET) + if (*(++p) != CHAR_RIGHT_CURLY_BRACKET) { if (!read_number(&p, ptrend, -1, MAX_REPEAT_COUNT, ERR5, &max, - errorcodeptr) || p >= ptrend || *p != CHAR_RIGHT_CURLY_BRACKET) + errorcodeptr)) goto EXIT; if (max < min) { @@ -1438,11 +1453,10 @@ yield = TRUE; if (minp != NULL) *minp = (uint32_t)min; if (maxp != NULL) *maxp = (uint32_t)max; -/* Update the pattern pointer on success, or after an error, but not when -the result is "not a repeat quantifier". */ +/* Update the pattern pointer */ EXIT: -if (yield || *errorcodeptr != 0) *ptrptr = p; +*ptrptr = p; return yield; } @@ -1776,19 +1790,23 @@ else { oldptr = ptr; ptr--; /* Back to the digit */ - if (!read_number(&ptr, ptrend, -1, INT_MAX/10 - 1, ERR61, &s, - errorcodeptr)) - break; - /* \1 to \9 are always back references. \8x and \9x are too; \1x to \7x + /* As we know we are at a digit, the only possible error from + read_number() is a number that is too large to be a group number. In this + case we fall through handle this as not a group reference. If we have + read a small enough number, check for a back reference. + + \1 to \9 are always back references. \8x and \9x are too; \1x to \7x are octal escapes if there are not that many previous captures. */ - if (s < 10 || oldptr[-1] >= CHAR_8 || s <= (int)cb->bracount) + if (read_number(&ptr, ptrend, -1, INT_MAX/10 - 1, 0, &s, errorcodeptr) && + (s < 10 || oldptr[-1] >= CHAR_8 || s <= (int)cb->bracount)) { if (s > (int)MAX_GROUP_NUMBER) *errorcodeptr = ERR61; else escape = -s; /* Indicates a back reference */ break; } + ptr = oldptr; /* Put the pointer back and fall through */ } diff --git a/src/3rdparty/pcre2/src/pcre2_jit_compile.c b/src/3rdparty/pcre2/src/pcre2_jit_compile.c index 1977d28aa5..f3a26aeee0 100644 --- a/src/3rdparty/pcre2/src/pcre2_jit_compile.c +++ b/src/3rdparty/pcre2/src/pcre2_jit_compile.c @@ -1226,7 +1226,7 @@ while (cc < ccend) return TRUE; } -#define EARLY_FAIL_ENHANCE_MAX (1 + 1) +#define EARLY_FAIL_ENHANCE_MAX (1 + 3) /* start: @@ -1238,6 +1238,7 @@ return: current number of iterators enhanced with fast fail */ static int detect_early_fail(compiler_common *common, PCRE2_SPTR cc, int *private_data_start, sljit_s32 depth, int start) { +PCRE2_SPTR begin = cc; PCRE2_SPTR next_alt; PCRE2_SPTR end; PCRE2_SPTR accelerated_start; @@ -1475,31 +1476,19 @@ do case OP_CBRA: end = cc + GET(cc, 1); - if (*end == OP_KET && PRIVATE_DATA(end) == 0) - { - if (*cc == OP_CBRA) - { - if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0) - break; - cc += IMM2_SIZE; - } - - cc += 1 + LINK_SIZE; - continue; - } - fast_forward_allowed = FALSE; if (depth >= 4) break; end = bracketend(cc) - (1 + LINK_SIZE); - if (*end != OP_KET || PRIVATE_DATA(end) != 0) - break; - - if (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0) + if (*end != OP_KET || (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0)) break; count = detect_early_fail(common, cc, private_data_start, depth + 1, count); + + if (PRIVATE_DATA(cc) != 0) + common->private_data_ptrs[begin - common->start] = 1; + if (count < EARLY_FAIL_ENHANCE_MAX) { cc = end + (1 + LINK_SIZE); @@ -1555,6 +1544,8 @@ do return EARLY_FAIL_ENHANCE_MAX; } + /* Cannot be part of a repeat. */ + common->private_data_ptrs[begin - common->start] = 1; count++; if (count < EARLY_FAIL_ENHANCE_MAX) @@ -1620,11 +1611,12 @@ sljit_sw length = end - begin; sljit_s32 min, max, i; /* Detect fixed iterations first. */ -if (end[-(1 + LINK_SIZE)] != OP_KET) +if (end[-(1 + LINK_SIZE)] != OP_KET || PRIVATE_DATA(begin) != 0) return FALSE; -/* Already detected repeat. */ -if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0) +/* /(?:AB){4,6}/ is currently converted to /(?:AB){3}(?AB){1,3}/ + * Skip the check of the second part. */ +if (PRIVATE_DATA(end - LINK_SIZE) == 0) return TRUE; next = end; @@ -1763,6 +1755,7 @@ while (cc < ccend) if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE) break; + /* When the bracket is prefixed by a zero iteration, skip the repeat check (at this point). */ if (repeat_check && (*cc == OP_ONCE || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND)) { if (detect_repeat(common, cc)) @@ -1813,6 +1806,7 @@ while (cc < ccend) case OP_COND: /* Might be a hidden SCOND. */ + common->private_data_ptrs[cc - common->start] = 0; alternative = cc + GET(cc, 1); if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN) { @@ -13661,10 +13655,12 @@ if (!common->private_data_ptrs) memset(common->private_data_ptrs, 0, total_length * sizeof(sljit_s32)); private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw); -set_private_data_ptrs(common, &private_data_size, ccend); + if ((re->overall_options & PCRE2_ANCHORED) == 0 && (re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0 && !common->has_skip_in_assert_back) detect_early_fail(common, common->start, &private_data_size, 0, 0); +set_private_data_ptrs(common, &private_data_size, ccend); + SLJIT_ASSERT(common->early_fail_start_ptr <= common->early_fail_end_ptr); if (private_data_size > SLJIT_MAX_LOCAL_SIZE) diff --git a/src/3rdparty/pcre2/src/pcre2_jit_simd_inc.h b/src/3rdparty/pcre2/src/pcre2_jit_simd_inc.h index 5673d338c0..5fd97b15bd 100644 --- a/src/3rdparty/pcre2/src/pcre2_jit_simd_inc.h +++ b/src/3rdparty/pcre2/src/pcre2_jit_simd_inc.h @@ -39,7 +39,29 @@ POSSIBILITY OF SUCH DAMAGE. ----------------------------------------------------------------------------- */ -#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) && !(defined SUPPORT_VALGRIND) +#if !(defined SUPPORT_VALGRIND) + +#if ((defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \ + || (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X)) + +typedef enum { + vector_compare_match1, + vector_compare_match1i, + vector_compare_match2, +} vector_compare_type; + +static SLJIT_INLINE sljit_u32 max_fast_forward_char_pair_offset(void) +{ +#if PCRE2_CODE_UNIT_WIDTH == 8 +return 15; +#elif PCRE2_CODE_UNIT_WIDTH == 16 +return 7; +#elif PCRE2_CODE_UNIT_WIDTH == 32 +return 3; +#else +#error "Unsupported unit width" +#endif +} #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 static struct sljit_jump *jump_if_utf_char_start(struct sljit_compiler *compiler, sljit_s32 reg) @@ -56,6 +78,10 @@ return CMP(SLJIT_NOT_EQUAL, reg, 0, SLJIT_IMM, 0xdc00); } #endif +#endif /* SLJIT_CONFIG_X86 || SLJIT_CONFIG_S390X */ + +#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) + static sljit_s32 character_to_int32(PCRE2_UCHAR chr) { sljit_u32 value = chr; @@ -97,13 +123,7 @@ instruction[4] = (sljit_u8)offset; sljit_emit_op_custom(compiler, instruction, 5); } -typedef enum { - sse2_compare_match1, - sse2_compare_match1i, - sse2_compare_match2, -} sse2_compare_type; - -static void fast_forward_char_pair_sse2_compare(struct sljit_compiler *compiler, sse2_compare_type compare_type, +static void fast_forward_char_pair_sse2_compare(struct sljit_compiler *compiler, vector_compare_type compare_type, int step, sljit_s32 dst_ind, sljit_s32 cmp1_ind, sljit_s32 cmp2_ind, sljit_s32 tmp_ind) { sljit_u8 instruction[4]; @@ -112,11 +132,11 @@ instruction[1] = 0x0f; SLJIT_ASSERT(step >= 0 && step <= 3); -if (compare_type != sse2_compare_match2) +if (compare_type != vector_compare_match2) { if (step == 0) { - if (compare_type == sse2_compare_match1i) + if (compare_type == vector_compare_match1i) { /* POR xmm1, xmm2/m128 */ /* instruction[0] = 0x66; */ @@ -185,14 +205,14 @@ switch (step) static void fast_forward_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset) { DEFINE_COMPILER; +sljit_u8 instruction[8]; struct sljit_label *start; #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 struct sljit_label *restart; #endif struct sljit_jump *quit; struct sljit_jump *partial_quit[2]; -sse2_compare_type compare_type = sse2_compare_match1; -sljit_u8 instruction[8]; +vector_compare_type compare_type = vector_compare_match1; sljit_s32 tmp1_reg_ind = sljit_get_register_index(TMP1); sljit_s32 str_ptr_reg_ind = sljit_get_register_index(STR_PTR); sljit_s32 data_ind = 0; @@ -207,12 +227,12 @@ SLJIT_UNUSED_ARG(offset); if (char1 != char2) { bit = char1 ^ char2; - compare_type = sse2_compare_match1i; + compare_type = vector_compare_match1i; if (!is_powerof2(bit)) { bit = 0; - compare_type = sse2_compare_match2; + compare_type = vector_compare_match2; } } @@ -349,11 +369,11 @@ if (common->utf && offset > 0) static jump_list *fast_requested_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2) { DEFINE_COMPILER; +sljit_u8 instruction[8]; struct sljit_label *start; struct sljit_jump *quit; jump_list *not_found = NULL; -sse2_compare_type compare_type = sse2_compare_match1; -sljit_u8 instruction[8]; +vector_compare_type compare_type = vector_compare_match1; sljit_s32 tmp1_reg_ind = sljit_get_register_index(TMP1); sljit_s32 str_ptr_reg_ind = sljit_get_register_index(STR_PTR); sljit_s32 data_ind = 0; @@ -366,12 +386,12 @@ int i; if (char1 != char2) { bit = char1 ^ char2; - compare_type = sse2_compare_match1i; + compare_type = vector_compare_match1i; if (!is_powerof2(bit)) { bit = 0; - compare_type = sse2_compare_match2; + compare_type = vector_compare_match2; } } @@ -476,27 +496,15 @@ return not_found; #ifndef _WIN64 -static SLJIT_INLINE sljit_u32 max_fast_forward_char_pair_offset(void) -{ -#if PCRE2_CODE_UNIT_WIDTH == 8 -return 15; -#elif PCRE2_CODE_UNIT_WIDTH == 16 -return 7; -#elif PCRE2_CODE_UNIT_WIDTH == 32 -return 3; -#else -#error "Unsupported unit width" -#endif -} - #define JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD (sljit_has_cpu_feature(SLJIT_HAS_SSE2)) static void fast_forward_char_pair_simd(compiler_common *common, sljit_s32 offs1, PCRE2_UCHAR char1a, PCRE2_UCHAR char1b, sljit_s32 offs2, PCRE2_UCHAR char2a, PCRE2_UCHAR char2b) { DEFINE_COMPILER; -sse2_compare_type compare1_type = sse2_compare_match1; -sse2_compare_type compare2_type = sse2_compare_match1; +sljit_u8 instruction[8]; +vector_compare_type compare1_type = vector_compare_match1; +vector_compare_type compare2_type = vector_compare_match1; sljit_u32 bit1 = 0; sljit_u32 bit2 = 0; sljit_u32 diff = IN_UCHARS(offs1 - offs2); @@ -516,7 +524,6 @@ struct sljit_label *start; struct sljit_label *restart; #endif struct sljit_jump *jump[2]; -sljit_u8 instruction[8]; int i; SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE && offs1 > offs2); @@ -549,13 +556,13 @@ else bit1 = char1a ^ char1b; if (is_powerof2(bit1)) { - compare1_type = sse2_compare_match1i; + compare1_type = vector_compare_match1i; OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1a | bit1)); OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(bit1)); } else { - compare1_type = sse2_compare_match2; + compare1_type = vector_compare_match2; bit1 = 0; OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1a)); OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(char1b)); @@ -578,13 +585,13 @@ else bit2 = char2a ^ char2b; if (is_powerof2(bit2)) { - compare2_type = sse2_compare_match1i; + compare2_type = vector_compare_match1i; OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a | bit2)); OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(bit2)); } else { - compare2_type = sse2_compare_match2; + compare2_type = vector_compare_match2; bit2 = 0; OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char2a)); OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, character_to_int32(char2b)); @@ -731,9 +738,6 @@ OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); -if (common->match_end_ptr != 0) - OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr); - #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 if (common->utf) { @@ -760,7 +764,7 @@ if (common->match_end_ptr != 0) #undef SSE2_COMPARE_TYPE_INDEX -#endif /* SLJIT_CONFIG_X86 && !SUPPORT_VALGRIND */ +#endif /* SLJIT_CONFIG_X86 */ #if (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64 && (defined __ARM_NEON || defined __ARM_NEON__)) @@ -1121,3 +1125,743 @@ JUMPHERE(partial_quit); } #endif /* SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64 */ + +#if (defined SLJIT_CONFIG_S390X && SLJIT_CONFIG_S390X) + +#if PCRE2_CODE_UNIT_WIDTH == 8 +#define VECTOR_ELEMENT_SIZE 0 +#elif PCRE2_CODE_UNIT_WIDTH == 16 +#define VECTOR_ELEMENT_SIZE 1 +#elif PCRE2_CODE_UNIT_WIDTH == 32 +#define VECTOR_ELEMENT_SIZE 2 +#else +#error "Unsupported unit width" +#endif + +static void load_from_mem_vector(struct sljit_compiler *compiler, BOOL vlbb, sljit_s32 dst_vreg, + sljit_s32 base_reg, sljit_s32 index_reg) +{ +sljit_u16 instruction[3]; + +instruction[0] = (sljit_u16)(0xe700 | (dst_vreg << 4) | index_reg); +instruction[1] = (sljit_u16)(base_reg << 12); +instruction[2] = (sljit_u16)((0x8 << 8) | (vlbb ? 0x07 : 0x06)); + +sljit_emit_op_custom(compiler, instruction, 6); +} + +#if PCRE2_CODE_UNIT_WIDTH == 32 + +static void replicate_imm_vector(struct sljit_compiler *compiler, int step, sljit_s32 dst_vreg, + PCRE2_UCHAR chr, sljit_s32 tmp_general_reg) +{ +sljit_u16 instruction[3]; + +SLJIT_ASSERT(step >= 0 && step <= 1); + +if (chr < 0x7fff) + { + if (step == 1) + return; + + /* VREPI */ + instruction[0] = (sljit_u16)(0xe700 | (dst_vreg << 4)); + instruction[1] = (sljit_u16)chr; + instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); + sljit_emit_op_custom(compiler, instruction, 6); + return; + } + +if (step == 0) + { + OP1(SLJIT_MOV, tmp_general_reg, 0, SLJIT_IMM, chr); + + /* VLVG */ + instruction[0] = (sljit_u16)(0xe700 | (dst_vreg << 4) | sljit_get_register_index(tmp_general_reg)); + instruction[1] = 0; + instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x22); + sljit_emit_op_custom(compiler, instruction, 6); + return; + } + +/* VREP */ +instruction[0] = (sljit_u16)(0xe700 | (dst_vreg << 4) | dst_vreg); +instruction[1] = 0; +instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xc << 8) | 0x4d); +sljit_emit_op_custom(compiler, instruction, 6); +} + +#endif + +static void fast_forward_char_pair_sse2_compare(struct sljit_compiler *compiler, vector_compare_type compare_type, + int step, sljit_s32 dst_ind, sljit_s32 cmp1_ind, sljit_s32 cmp2_ind, sljit_s32 tmp_ind) +{ +sljit_u16 instruction[3]; + +SLJIT_ASSERT(step >= 0 && step <= 2); + +if (step == 1) + { + /* VCEQ */ + instruction[0] = (sljit_u16)(0xe700 | (dst_ind << 4) | dst_ind); + instruction[1] = (sljit_u16)(cmp1_ind << 12); + instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0xf8); + sljit_emit_op_custom(compiler, instruction, 6); + return; + } + +if (compare_type != vector_compare_match2) + { + if (step == 0 && compare_type == vector_compare_match1i) + { + /* VO */ + instruction[0] = (sljit_u16)(0xe700 | (dst_ind << 4) | dst_ind); + instruction[1] = (sljit_u16)(cmp2_ind << 12); + instruction[2] = (sljit_u16)((0xe << 8) | 0x6a); + sljit_emit_op_custom(compiler, instruction, 6); + } + return; + } + +switch (step) + { + case 0: + /* VCEQ */ + instruction[0] = (sljit_u16)(0xe700 | (tmp_ind << 4) | dst_ind); + instruction[1] = (sljit_u16)(cmp2_ind << 12); + instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0xf8); + sljit_emit_op_custom(compiler, instruction, 6); + return; + + case 2: + /* VO */ + instruction[0] = (sljit_u16)(0xe700 | (dst_ind << 4) | dst_ind); + instruction[1] = (sljit_u16)(tmp_ind << 12); + instruction[2] = (sljit_u16)((0xe << 8) | 0x6a); + sljit_emit_op_custom(compiler, instruction, 6); + return; + } +} + +#define JIT_HAS_FAST_FORWARD_CHAR_SIMD 1 + +static void fast_forward_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2, sljit_s32 offset) +{ +DEFINE_COMPILER; +sljit_u16 instruction[3]; +struct sljit_label *start; +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +struct sljit_label *restart; +#endif +struct sljit_jump *quit; +struct sljit_jump *partial_quit[2]; +vector_compare_type compare_type = vector_compare_match1; +sljit_s32 tmp1_reg_ind = sljit_get_register_index(TMP1); +sljit_s32 str_ptr_reg_ind = sljit_get_register_index(STR_PTR); +sljit_s32 data_ind = 0; +sljit_s32 tmp_ind = 1; +sljit_s32 cmp1_ind = 2; +sljit_s32 cmp2_ind = 3; +sljit_s32 zero_ind = 4; +sljit_u32 bit = 0; +int i; + +SLJIT_UNUSED_ARG(offset); + +if (char1 != char2) + { + bit = char1 ^ char2; + compare_type = vector_compare_match1i; + + if (!is_powerof2(bit)) + { + bit = 0; + compare_type = vector_compare_match2; + } + } + +partial_quit[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); +if (common->mode == PCRE2_JIT_COMPLETE) + add_jump(compiler, &common->failed_match, partial_quit[0]); + +/* First part (unaligned start) */ + +OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 16); + +#if PCRE2_CODE_UNIT_WIDTH != 32 + +/* VREPI */ +instruction[0] = (sljit_u16)(0xe700 | (cmp1_ind << 4)); +instruction[1] = (sljit_u16)(char1 | bit); +instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); +sljit_emit_op_custom(compiler, instruction, 6); + +if (char1 != char2) + { + /* VREPI */ + instruction[0] = (sljit_u16)(0xe700 | (cmp2_ind << 4)); + instruction[1] = (sljit_u16)(bit != 0 ? bit : char2); + /* instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); */ + sljit_emit_op_custom(compiler, instruction, 6); + } + +#else /* PCRE2_CODE_UNIT_WIDTH == 32 */ + +for (int i = 0; i < 2; i++) + { + replicate_imm_vector(compiler, i, cmp1_ind, char1 | bit, TMP1); + + if (char1 != char2) + replicate_imm_vector(compiler, i, cmp2_ind, bit != 0 ? bit : char2, TMP1); + } + +#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */ + +if (compare_type == vector_compare_match2) + { + /* VREPI */ + instruction[0] = (sljit_u16)(0xe700 | (zero_ind << 4)); + instruction[1] = 0; + instruction[2] = (sljit_u16)((0x8 << 8) | 0x45); + sljit_emit_op_custom(compiler, instruction, 6); + } + +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +restart = LABEL(); +#endif + +load_from_mem_vector(compiler, TRUE, data_ind, str_ptr_reg_ind, 0); +OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, ~15); + +if (compare_type != vector_compare_match2) + { + if (compare_type == vector_compare_match1i) + fast_forward_char_pair_sse2_compare(compiler, compare_type, 0, data_ind, cmp1_ind, cmp2_ind, tmp_ind); + + /* VFEE */ + instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind); + instruction[1] = (sljit_u16)((cmp1_ind << 12) | (1 << 4)); + instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0x80); + sljit_emit_op_custom(compiler, instruction, 6); + } +else + { + for (i = 0; i < 3; i++) + fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind); + + /* VFENE */ + instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind); + instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4)); + instruction[2] = (sljit_u16)((0xe << 8) | 0x81); + sljit_emit_op_custom(compiler, instruction, 6); + } + +/* TODO: use sljit_set_current_flags */ + +/* VLGVB */ +instruction[0] = (sljit_u16)(0xe700 | (tmp1_reg_ind << 4) | data_ind); +instruction[1] = 7; +instruction[2] = (sljit_u16)((0x4 << 8) | 0x21); +sljit_emit_op_custom(compiler, instruction, 6); + +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); +quit = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0); + +OP2(SLJIT_SUB, STR_PTR, 0, TMP2, 0, SLJIT_IMM, 16); + +/* Second part (aligned) */ +start = LABEL(); + +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16); + +partial_quit[1] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); +if (common->mode == PCRE2_JIT_COMPLETE) + add_jump(compiler, &common->failed_match, partial_quit[1]); + +load_from_mem_vector(compiler, TRUE, data_ind, str_ptr_reg_ind, 0); + +if (compare_type != vector_compare_match2) + { + if (compare_type == vector_compare_match1i) + fast_forward_char_pair_sse2_compare(compiler, compare_type, 0, data_ind, cmp1_ind, cmp2_ind, tmp_ind); + + /* VFEE */ + instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind); + instruction[1] = (sljit_u16)((cmp1_ind << 12) | (1 << 4)); + instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0x80); + sljit_emit_op_custom(compiler, instruction, 6); + } +else + { + for (i = 0; i < 3; i++) + fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind); + + /* VFENE */ + instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind); + instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4)); + instruction[2] = (sljit_u16)((0xe << 8) | 0x81); + sljit_emit_op_custom(compiler, instruction, 6); + } + +/* TODO: use sljit_set_current_flags */ + +/* VLGVB */ +instruction[0] = (sljit_u16)(0xe700 | (tmp1_reg_ind << 4) | data_ind); +instruction[1] = 7; +instruction[2] = (sljit_u16)((0x4 << 8) | 0x21); +sljit_emit_op_custom(compiler, instruction, 6); + +CMPTO(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 16, start); + +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); + +JUMPHERE(quit); + +if (common->mode != PCRE2_JIT_COMPLETE) + { + JUMPHERE(partial_quit[0]); + JUMPHERE(partial_quit[1]); + OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0); + CMOV(SLJIT_GREATER, STR_PTR, STR_END, 0); + } +else + add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); + +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +if (common->utf && offset > 0) + { + SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE); + + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset)); + + quit = jump_if_utf_char_start(compiler, TMP1); + + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); + + OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 16); + JUMPTO(SLJIT_JUMP, restart); + + JUMPHERE(quit); + } +#endif +} + +#define JIT_HAS_FAST_REQUESTED_CHAR_SIMD 1 + +static jump_list *fast_requested_char_simd(compiler_common *common, PCRE2_UCHAR char1, PCRE2_UCHAR char2) +{ +DEFINE_COMPILER; +sljit_u16 instruction[3]; +struct sljit_label *start; +struct sljit_jump *quit; +jump_list *not_found = NULL; +vector_compare_type compare_type = vector_compare_match1; +sljit_s32 tmp1_reg_ind = sljit_get_register_index(TMP1); +sljit_s32 tmp3_reg_ind = sljit_get_register_index(TMP3); +sljit_s32 data_ind = 0; +sljit_s32 tmp_ind = 1; +sljit_s32 cmp1_ind = 2; +sljit_s32 cmp2_ind = 3; +sljit_s32 zero_ind = 4; +sljit_u32 bit = 0; +int i; + +if (char1 != char2) + { + bit = char1 ^ char2; + compare_type = vector_compare_match1i; + + if (!is_powerof2(bit)) + { + bit = 0; + compare_type = vector_compare_match2; + } + } + +add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0)); + +/* First part (unaligned start) */ + +OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, 16); + +#if PCRE2_CODE_UNIT_WIDTH != 32 + +/* VREPI */ +instruction[0] = (sljit_u16)(0xe700 | (cmp1_ind << 4)); +instruction[1] = (sljit_u16)(char1 | bit); +instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); +sljit_emit_op_custom(compiler, instruction, 6); + +if (char1 != char2) + { + /* VREPI */ + instruction[0] = (sljit_u16)(0xe700 | (cmp2_ind << 4)); + instruction[1] = (sljit_u16)(bit != 0 ? bit : char2); + /* instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); */ + sljit_emit_op_custom(compiler, instruction, 6); + } + +#else /* PCRE2_CODE_UNIT_WIDTH == 32 */ + +for (int i = 0; i < 2; i++) + { + replicate_imm_vector(compiler, i, cmp1_ind, char1 | bit, TMP3); + + if (char1 != char2) + replicate_imm_vector(compiler, i, cmp2_ind, bit != 0 ? bit : char2, TMP3); + } + +#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */ + +if (compare_type == vector_compare_match2) + { + /* VREPI */ + instruction[0] = (sljit_u16)(0xe700 | (zero_ind << 4)); + instruction[1] = 0; + instruction[2] = (sljit_u16)((0x8 << 8) | 0x45); + sljit_emit_op_custom(compiler, instruction, 6); + } + +load_from_mem_vector(compiler, TRUE, data_ind, tmp1_reg_ind, 0); +OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, ~15); + +if (compare_type != vector_compare_match2) + { + if (compare_type == vector_compare_match1i) + fast_forward_char_pair_sse2_compare(compiler, compare_type, 0, data_ind, cmp1_ind, cmp2_ind, tmp_ind); + + /* VFEE */ + instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind); + instruction[1] = (sljit_u16)((cmp1_ind << 12) | (1 << 4)); + instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0x80); + sljit_emit_op_custom(compiler, instruction, 6); + } +else + { + for (i = 0; i < 3; i++) + fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind); + + /* VFENE */ + instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind); + instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4)); + instruction[2] = (sljit_u16)((0xe << 8) | 0x81); + sljit_emit_op_custom(compiler, instruction, 6); + } + +/* TODO: use sljit_set_current_flags */ + +/* VLGVB */ +instruction[0] = (sljit_u16)(0xe700 | (tmp3_reg_ind << 4) | data_ind); +instruction[1] = 7; +instruction[2] = (sljit_u16)((0x4 << 8) | 0x21); +sljit_emit_op_custom(compiler, instruction, 6); + +OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP3, 0); +quit = CMP(SLJIT_LESS, TMP1, 0, TMP2, 0); + +OP2(SLJIT_SUB, TMP1, 0, TMP2, 0, SLJIT_IMM, 16); + +/* Second part (aligned) */ +start = LABEL(); + +OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 16); + +add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0)); + +load_from_mem_vector(compiler, TRUE, data_ind, tmp1_reg_ind, 0); + +if (compare_type != vector_compare_match2) + { + if (compare_type == vector_compare_match1i) + fast_forward_char_pair_sse2_compare(compiler, compare_type, 0, data_ind, cmp1_ind, cmp2_ind, tmp_ind); + + /* VFEE */ + instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind); + instruction[1] = (sljit_u16)((cmp1_ind << 12) | (1 << 4)); + instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0xe << 8) | 0x80); + sljit_emit_op_custom(compiler, instruction, 6); + } +else + { + for (i = 0; i < 3; i++) + fast_forward_char_pair_sse2_compare(compiler, compare_type, i, data_ind, cmp1_ind, cmp2_ind, tmp_ind); + + /* VFENE */ + instruction[0] = (sljit_u16)(0xe700 | (data_ind << 4) | data_ind); + instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4)); + instruction[2] = (sljit_u16)((0xe << 8) | 0x81); + sljit_emit_op_custom(compiler, instruction, 6); + } + +/* TODO: use sljit_set_current_flags */ + +/* VLGVB */ +instruction[0] = (sljit_u16)(0xe700 | (tmp3_reg_ind << 4) | data_ind); +instruction[1] = 7; +instruction[2] = (sljit_u16)((0x4 << 8) | 0x21); +sljit_emit_op_custom(compiler, instruction, 6); + +CMPTO(SLJIT_GREATER_EQUAL, TMP3, 0, SLJIT_IMM, 16, start); + +OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP3, 0); + +JUMPHERE(quit); +add_jump(compiler, ¬_found, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0)); + +return not_found; +} + +#define JIT_HAS_FAST_FORWARD_CHAR_PAIR_SIMD 1 + +static void fast_forward_char_pair_simd(compiler_common *common, sljit_s32 offs1, + PCRE2_UCHAR char1a, PCRE2_UCHAR char1b, sljit_s32 offs2, PCRE2_UCHAR char2a, PCRE2_UCHAR char2b) +{ +DEFINE_COMPILER; +sljit_u16 instruction[3]; +struct sljit_label *start; +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +struct sljit_label *restart; +#endif +struct sljit_jump *quit; +struct sljit_jump *jump[2]; +vector_compare_type compare1_type = vector_compare_match1; +vector_compare_type compare2_type = vector_compare_match1; +sljit_u32 bit1 = 0; +sljit_u32 bit2 = 0; +sljit_s32 diff = IN_UCHARS(offs2 - offs1); +sljit_s32 tmp1_reg_ind = sljit_get_register_index(TMP1); +sljit_s32 tmp2_reg_ind = sljit_get_register_index(TMP2); +sljit_s32 str_ptr_reg_ind = sljit_get_register_index(STR_PTR); +sljit_s32 data1_ind = 0; +sljit_s32 data2_ind = 1; +sljit_s32 tmp1_ind = 2; +sljit_s32 tmp2_ind = 3; +sljit_s32 cmp1a_ind = 4; +sljit_s32 cmp1b_ind = 5; +sljit_s32 cmp2a_ind = 6; +sljit_s32 cmp2b_ind = 7; +sljit_s32 zero_ind = 8; +int i; + +SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE && offs1 > offs2); +SLJIT_ASSERT(-diff <= (sljit_s32)IN_UCHARS(max_fast_forward_char_pair_offset())); +SLJIT_ASSERT(tmp1_reg_ind != 0 && tmp2_reg_ind != 0); + +if (char1a != char1b) + { + bit1 = char1a ^ char1b; + compare1_type = vector_compare_match1i; + + if (!is_powerof2(bit1)) + { + bit1 = 0; + compare1_type = vector_compare_match2; + } + } + +if (char2a != char2b) + { + bit2 = char2a ^ char2b; + compare2_type = vector_compare_match1i; + + if (!is_powerof2(bit2)) + { + bit2 = 0; + compare2_type = vector_compare_match2; + } + } + +/* Initialize. */ +if (common->match_end_ptr != 0) + { + OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr); + OP1(SLJIT_MOV, TMP3, 0, STR_END, 0); + OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(offs1 + 1)); + + OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP1, 0, STR_END, 0); + CMOV(SLJIT_LESS, STR_END, TMP1, 0); + } + +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1)); +add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); +OP2(SLJIT_AND, TMP2, 0, STR_PTR, 0, SLJIT_IMM, ~15); + +#if PCRE2_CODE_UNIT_WIDTH != 32 + +OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, -diff); + +/* VREPI */ +instruction[0] = (sljit_u16)(0xe700 | (cmp1a_ind << 4)); +instruction[1] = (sljit_u16)(char1a | bit1); +instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); +sljit_emit_op_custom(compiler, instruction, 6); + +if (char1a != char1b) + { + /* VREPI */ + instruction[0] = (sljit_u16)(0xe700 | (cmp1b_ind << 4)); + instruction[1] = (sljit_u16)(bit1 != 0 ? bit1 : char1b); + /* instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); */ + sljit_emit_op_custom(compiler, instruction, 6); + } + +/* VREPI */ +instruction[0] = (sljit_u16)(0xe700 | (cmp2a_ind << 4)); +instruction[1] = (sljit_u16)(char2a | bit2); +/* instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); */ +sljit_emit_op_custom(compiler, instruction, 6); + +if (char2a != char2b) + { + /* VREPI */ + instruction[0] = (sljit_u16)(0xe700 | (cmp2b_ind << 4)); + instruction[1] = (sljit_u16)(bit2 != 0 ? bit2 : char2b); + /* instruction[2] = (sljit_u16)((VECTOR_ELEMENT_SIZE << 12) | (0x8 << 8) | 0x45); */ + sljit_emit_op_custom(compiler, instruction, 6); + } + +#else /* PCRE2_CODE_UNIT_WIDTH == 32 */ + +for (int i = 0; i < 2; i++) + { + replicate_imm_vector(compiler, i, cmp1a_ind, char1a | bit1, TMP1); + + if (char1a != char1b) + replicate_imm_vector(compiler, i, cmp1b_ind, bit1 != 0 ? bit1 : char1b, TMP1); + + replicate_imm_vector(compiler, i, cmp2a_ind, char2a | bit2, TMP1); + + if (char2a != char2b) + replicate_imm_vector(compiler, i, cmp2b_ind, bit2 != 0 ? bit2 : char2b, TMP1); + } + +OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, -diff); + +#endif /* PCRE2_CODE_UNIT_WIDTH != 32 */ + +/* VREPI */ +instruction[0] = (sljit_u16)(0xe700 | (zero_ind << 4)); +instruction[1] = 0; +instruction[2] = (sljit_u16)((0x8 << 8) | 0x45); +sljit_emit_op_custom(compiler, instruction, 6); + +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +restart = LABEL(); +#endif + +jump[0] = CMP(SLJIT_LESS, TMP1, 0, TMP2, 0); +load_from_mem_vector(compiler, TRUE, data2_ind, tmp1_reg_ind, 0); +jump[1] = JUMP(SLJIT_JUMP); +JUMPHERE(jump[0]); +load_from_mem_vector(compiler, FALSE, data2_ind, tmp1_reg_ind, 0); +JUMPHERE(jump[1]); + +load_from_mem_vector(compiler, TRUE, data1_ind, str_ptr_reg_ind, 0); +OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 16); + +for (i = 0; i < 3; i++) + { + fast_forward_char_pair_sse2_compare(compiler, compare1_type, i, data1_ind, cmp1a_ind, cmp1b_ind, tmp1_ind); + fast_forward_char_pair_sse2_compare(compiler, compare2_type, i, data2_ind, cmp2a_ind, cmp2b_ind, tmp2_ind); + } + +/* VN */ +instruction[0] = (sljit_u16)(0xe700 | (data1_ind << 4) | data1_ind); +instruction[1] = (sljit_u16)(data2_ind << 12); +instruction[2] = (sljit_u16)((0xe << 8) | 0x68); +sljit_emit_op_custom(compiler, instruction, 6); + +/* VFENE */ +instruction[0] = (sljit_u16)(0xe700 | (data1_ind << 4) | data1_ind); +instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4)); +instruction[2] = (sljit_u16)((0xe << 8) | 0x81); +sljit_emit_op_custom(compiler, instruction, 6); + +/* TODO: use sljit_set_current_flags */ + +/* VLGVB */ +instruction[0] = (sljit_u16)(0xe700 | (tmp1_reg_ind << 4) | data1_ind); +instruction[1] = 7; +instruction[2] = (sljit_u16)((0x4 << 8) | 0x21); +sljit_emit_op_custom(compiler, instruction, 6); + +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); +quit = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0); + +OP2(SLJIT_SUB, STR_PTR, 0, TMP2, 0, SLJIT_IMM, 16); +OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, diff); + +/* Main loop. */ +start = LABEL(); + +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16); +add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); + +load_from_mem_vector(compiler, FALSE, data1_ind, str_ptr_reg_ind, 0); +load_from_mem_vector(compiler, FALSE, data2_ind, str_ptr_reg_ind, tmp1_reg_ind); + +for (i = 0; i < 3; i++) + { + fast_forward_char_pair_sse2_compare(compiler, compare1_type, i, data1_ind, cmp1a_ind, cmp1b_ind, tmp1_ind); + fast_forward_char_pair_sse2_compare(compiler, compare2_type, i, data2_ind, cmp2a_ind, cmp2b_ind, tmp2_ind); + } + +/* VN */ +instruction[0] = (sljit_u16)(0xe700 | (data1_ind << 4) | data1_ind); +instruction[1] = (sljit_u16)(data2_ind << 12); +instruction[2] = (sljit_u16)((0xe << 8) | 0x68); +sljit_emit_op_custom(compiler, instruction, 6); + +/* VFENE */ +instruction[0] = (sljit_u16)(0xe700 | (data1_ind << 4) | data1_ind); +instruction[1] = (sljit_u16)((zero_ind << 12) | (1 << 4)); +instruction[2] = (sljit_u16)((0xe << 8) | 0x81); +sljit_emit_op_custom(compiler, instruction, 6); + +/* TODO: use sljit_set_current_flags */ + +/* VLGVB */ +instruction[0] = (sljit_u16)(0xe700 | (tmp2_reg_ind << 4) | data1_ind); +instruction[1] = 7; +instruction[2] = (sljit_u16)((0x4 << 8) | 0x21); +sljit_emit_op_custom(compiler, instruction, 6); + +CMPTO(SLJIT_GREATER_EQUAL, TMP2, 0, SLJIT_IMM, 16, start); + +OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); + +JUMPHERE(quit); + +add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); + +#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32 +if (common->utf) + { + SLJIT_ASSERT(common->mode == PCRE2_JIT_COMPLETE); + + OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offs1)); + + quit = jump_if_utf_char_start(compiler, TMP1); + + OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); + add_jump(compiler, &common->failed_match, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); + + /* TMP1 contains diff. */ + OP2(SLJIT_AND, TMP2, 0, STR_PTR, 0, SLJIT_IMM, ~15); + OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, -diff); + JUMPTO(SLJIT_JUMP, restart); + + JUMPHERE(quit); + } +#endif + +OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offs1)); + +if (common->match_end_ptr != 0) + OP1(SLJIT_MOV, STR_END, 0, TMP3, 0); +} + +#endif /* SLJIT_CONFIG_S390X */ + +#endif /* !SUPPORT_VALGRIND */ diff --git a/src/3rdparty/pcre2/src/pcre2_match.c b/src/3rdparty/pcre2/src/pcre2_match.c index e3f78c2ca3..ed60517131 100644 --- a/src/3rdparty/pcre2/src/pcre2_match.c +++ b/src/3rdparty/pcre2/src/pcre2_match.c @@ -818,10 +818,12 @@ fprintf(stderr, "++ op=%d\n", *Fecode); /* N is now the frame of the recursion; the previous frame is at the OP_RECURSE position. Go back there, copying the current subject position - and mark, and move on past the OP_RECURSE. */ + and mark, and the start_match position (\K might have changed it), and + then move on past the OP_RECURSE. */ P->eptr = Feptr; P->mark = Fmark; + P->start_match = Fstart_match; F = P; Fecode += 1 + LINK_SIZE; continue; diff --git a/src/3rdparty/pcre2/src/sljit/sljitConfigInternal.h b/src/3rdparty/pcre2/src/sljit/sljitConfigInternal.h index eb1132db30..ff36e5b7c6 100644 --- a/src/3rdparty/pcre2/src/sljit/sljitConfigInternal.h +++ b/src/3rdparty/pcre2/src/sljit/sljitConfigInternal.h @@ -158,6 +158,8 @@ extern "C" { #define SLJIT_CONFIG_MIPS_64 1 #elif defined(__sparc__) || defined(__sparc) #define SLJIT_CONFIG_SPARC_32 1 +#elif defined(__s390x__) +#define SLJIT_CONFIG_S390X 1 #else /* Unsupported architecture */ #define SLJIT_CONFIG_UNSUPPORTED 1 diff --git a/src/3rdparty/pcre2/src/sljit/sljitExecAllocator.c b/src/3rdparty/pcre2/src/sljit/sljitExecAllocator.c index 61a32f23e9..6e5bf78e45 100644 --- a/src/3rdparty/pcre2/src/sljit/sljitExecAllocator.c +++ b/src/3rdparty/pcre2/src/sljit/sljitExecAllocator.c @@ -79,6 +79,7 @@ */ #ifdef _WIN32 +#define SLJIT_UPDATE_WX_FLAGS(from, to, enable_exec) static SLJIT_INLINE void* alloc_chunk(sljit_uw size) { @@ -91,96 +92,108 @@ static SLJIT_INLINE void free_chunk(void *chunk, sljit_uw size) VirtualFree(chunk, 0, MEM_RELEASE); } -#else - -#ifdef __APPLE__ -#ifdef MAP_ANON -/* Configures TARGET_OS_OSX when appropriate */ -#include - -#if TARGET_OS_OSX && defined(MAP_JIT) -#include -#endif /* TARGET_OS_OSX && MAP_JIT */ - -#ifdef MAP_JIT +#else /* POSIX */ +#if defined(__APPLE__) && defined(MAP_JIT) /* On macOS systems, returns MAP_JIT if it is defined _and_ we're running on a - version where it's OK to have more than one JIT block. + version where it's OK to have more than one JIT block or where MAP_JIT is + required. On non-macOS systems, returns MAP_JIT if it is defined. */ +#include +#if TARGET_OS_OSX +#if defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86 +#ifdef MAP_ANON +#include +#include + +#define SLJIT_MAP_JIT (get_map_jit_flag()) + static SLJIT_INLINE int get_map_jit_flag() { -#if TARGET_OS_OSX - sljit_sw page_size = get_page_alignment() + 1; + sljit_sw page_size; void *ptr; + struct utsname name; static int map_jit_flag = -1; - /* - The following code is thread safe because multiple initialization - sets map_jit_flag to the same value and the code has no side-effects. - Changing the kernel version witout system restart is (very) unlikely. - */ - if (map_jit_flag == -1) { - struct utsname name; - + if (map_jit_flag < 0) { map_jit_flag = 0; uname(&name); - /* Kernel version for 10.14.0 (Mojave) */ + /* Kernel version for 10.14.0 (Mojave) or later */ if (atoi(name.release) >= 18) { + page_size = get_page_alignment() + 1; /* Only use MAP_JIT if a hardened runtime is used */ + ptr = mmap(NULL, page_size, PROT_WRITE | PROT_EXEC, + MAP_PRIVATE | MAP_ANON, -1, 0); - ptr = mmap(NULL, page_size, PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANON, -1, 0); - - if (ptr == MAP_FAILED) { - map_jit_flag = MAP_JIT; - } else { + if (ptr != MAP_FAILED) munmap(ptr, page_size); - } + else + map_jit_flag = MAP_JIT; } } - return map_jit_flag; -#else /* !TARGET_OS_OSX */ - return MAP_JIT; -#endif /* TARGET_OS_OSX */ } - -#endif /* MAP_JIT */ #endif /* MAP_ANON */ -#endif /* __APPLE__ */ +#else /* !SLJIT_CONFIG_X86 */ +#if !(defined SLJIT_CONFIG_ARM && SLJIT_CONFIG_ARM) +#error Unsupported architecture +#endif /* SLJIT_CONFIG_ARM */ +#include + +#define SLJIT_MAP_JIT (MAP_JIT) +#define SLJIT_UPDATE_WX_FLAGS(from, to, enable_exec) \ + apple_update_wx_flags(enable_exec) + +static SLJIT_INLINE void apple_update_wx_flags(sljit_s32 enable_exec) +{ + pthread_jit_write_protect_np(enable_exec); +} +#endif /* SLJIT_CONFIG_X86 */ +#else /* !TARGET_OS_OSX */ +#define SLJIT_MAP_JIT (MAP_JIT) +#endif /* TARGET_OS_OSX */ +#endif /* __APPLE__ && MAP_JIT */ +#ifndef SLJIT_UPDATE_WX_FLAGS +#define SLJIT_UPDATE_WX_FLAGS(from, to, enable_exec) +#endif /* !SLJIT_UPDATE_WX_FLAGS */ +#ifndef SLJIT_MAP_JIT +#define SLJIT_MAP_JIT (0) +#endif /* !SLJIT_MAP_JIT */ static SLJIT_INLINE void* alloc_chunk(sljit_uw size) { void *retval; - const int prot = PROT_READ | PROT_WRITE | PROT_EXEC; - -#ifdef MAP_ANON + int prot = PROT_READ | PROT_WRITE | PROT_EXEC; + int flags = MAP_PRIVATE; + int fd = -1; - int flags = MAP_PRIVATE | MAP_ANON; - -#ifdef MAP_JIT - flags |= get_map_jit_flag(); +#ifdef PROT_MAX + prot |= PROT_MAX(prot); #endif - retval = mmap(NULL, size, prot, flags, -1, 0); +#ifdef MAP_ANON + flags |= MAP_ANON | SLJIT_MAP_JIT; #else /* !MAP_ANON */ if (SLJIT_UNLIKELY((dev_zero < 0) && open_dev_zero())) return NULL; - retval = mmap(NULL, size, prot, MAP_PRIVATE, dev_zero, 0); + fd = dev_zero; #endif /* MAP_ANON */ + retval = mmap(NULL, size, prot, flags, fd, 0); if (retval == MAP_FAILED) - retval = NULL; - else { - if (mprotect(retval, size, prot) < 0) { - munmap(retval, size); - retval = NULL; - } + return NULL; + + if (mprotect(retval, size, PROT_READ | PROT_WRITE | PROT_EXEC) < 0) { + munmap(retval, size); + return NULL; } + SLJIT_UPDATE_WX_FLAGS(retval, (uint8_t *)retval + size, 0); + return retval; } @@ -189,7 +202,7 @@ static SLJIT_INLINE void free_chunk(void *chunk, sljit_uw size) munmap(chunk, size); } -#endif +#endif /* windows */ /* --------------------------------------------------------------------- */ /* Common functions */ @@ -261,6 +274,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_malloc_exec(sljit_uw size) while (free_block) { if (free_block->size >= size) { chunk_size = free_block->size; + SLJIT_UPDATE_WX_FLAGS(NULL, NULL, 0); if (chunk_size > size + 64) { /* We just cut a block from the end of the free block. */ chunk_size -= size; @@ -326,6 +340,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_free_exec(void* ptr) allocated_size -= header->size; /* Connecting free blocks together if possible. */ + SLJIT_UPDATE_WX_FLAGS(NULL, NULL, 0); /* If header->prev_size == 0, free_block will equal to header. In this case, free_block->header.size will be > 0. */ @@ -358,6 +373,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_free_exec(void* ptr) } } + SLJIT_UPDATE_WX_FLAGS(NULL, NULL, 1); SLJIT_ALLOCATOR_UNLOCK(); } @@ -367,6 +383,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void) struct free_block* next_free_block; SLJIT_ALLOCATOR_LOCK(); + SLJIT_UPDATE_WX_FLAGS(NULL, NULL, 0); free_block = free_blocks; while (free_block) { @@ -381,5 +398,6 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void) } SLJIT_ASSERT((total_size && free_blocks) || (!total_size && !free_blocks)); + SLJIT_UPDATE_WX_FLAGS(NULL, NULL, 1); SLJIT_ALLOCATOR_UNLOCK(); } diff --git a/src/3rdparty/pcre2/src/sljit/sljitNativeS390X.c b/src/3rdparty/pcre2/src/sljit/sljitNativeS390X.c index a8b65112d4..3d007fe8a1 100644 --- a/src/3rdparty/pcre2/src/sljit/sljitNativeS390X.c +++ b/src/3rdparty/pcre2/src/sljit/sljitNativeS390X.c @@ -42,7 +42,7 @@ SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void) typedef sljit_uw sljit_ins; /* Instruction tags (most significant halfword). */ -const sljit_ins sljit_ins_const = (sljit_ins)1 << 48; +static const sljit_ins sljit_ins_const = (sljit_ins)1 << 48; static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = { 14, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 0, 1 @@ -66,22 +66,22 @@ typedef sljit_uw sljit_gpr; * will be retired ASAP (TODO: carenas) */ -const sljit_gpr r0 = 0; /* reg_map[SLJIT_NUMBER_OF_REGISTERS + 2]: 0 in address calculations; reserved */ -const sljit_gpr r1 = 1; /* reg_map[SLJIT_NUMBER_OF_REGISTERS + 3]: reserved */ -const sljit_gpr r2 = 2; /* reg_map[1]: 1st argument */ -const sljit_gpr r3 = 3; /* reg_map[2]: 2nd argument */ -const sljit_gpr r4 = 4; /* reg_map[3]: 3rd argument */ -const sljit_gpr r5 = 5; /* reg_map[4]: 4th argument */ -const sljit_gpr r6 = 6; /* reg_map[5]: 5th argument; 1st saved register */ -const sljit_gpr r7 = 7; /* reg_map[6] */ -const sljit_gpr r8 = 8; /* reg_map[7] */ -const sljit_gpr r9 = 9; /* reg_map[8] */ -const sljit_gpr r10 = 10; /* reg_map[9] */ -const sljit_gpr r11 = 11; /* reg_map[10] */ -const sljit_gpr r12 = 12; /* reg_map[11]: GOT */ -const sljit_gpr r13 = 13; /* reg_map[12]: Literal Pool pointer */ -const sljit_gpr r14 = 14; /* reg_map[0]: return address and flag register */ -const sljit_gpr r15 = 15; /* reg_map[SLJIT_NUMBER_OF_REGISTERS + 1]: stack pointer */ +static const sljit_gpr r0 = 0; /* reg_map[SLJIT_NUMBER_OF_REGISTERS + 2]: 0 in address calculations; reserved */ +static const sljit_gpr r1 = 1; /* reg_map[SLJIT_NUMBER_OF_REGISTERS + 3]: reserved */ +static const sljit_gpr r2 = 2; /* reg_map[1]: 1st argument */ +static const sljit_gpr r3 = 3; /* reg_map[2]: 2nd argument */ +static const sljit_gpr r4 = 4; /* reg_map[3]: 3rd argument */ +static const sljit_gpr r5 = 5; /* reg_map[4]: 4th argument */ +static const sljit_gpr r6 = 6; /* reg_map[5]: 5th argument; 1st saved register */ +static const sljit_gpr r7 = 7; /* reg_map[6] */ +static const sljit_gpr r8 = 8; /* reg_map[7] */ +static const sljit_gpr r9 = 9; /* reg_map[8] */ +static const sljit_gpr r10 = 10; /* reg_map[9] */ +static const sljit_gpr r11 = 11; /* reg_map[10] */ +static const sljit_gpr r12 = 12; /* reg_map[11]: GOT */ +static const sljit_gpr r13 = 13; /* reg_map[12]: Literal Pool pointer */ +static const sljit_gpr r14 = 14; /* reg_map[0]: return address and flag register */ +static const sljit_gpr r15 = 15; /* reg_map[SLJIT_NUMBER_OF_REGISTERS + 1]: stack pointer */ /* WARNING: r12 and r13 shouldn't be used as per ABI recommendation */ /* TODO(carenas): r12 might conflict in PIC code, reserve? */ @@ -100,8 +100,8 @@ const sljit_gpr r15 = 15; /* reg_map[SLJIT_NUMBER_OF_REGISTERS + 1]: stack point /* Link registers. The normal link register is r14, but since we use that for flags we need to use r0 instead to do fast calls so that flags are preserved. */ -const sljit_gpr link_r = 14; /* r14 */ -const sljit_gpr fast_link_r = 0; /* r0 */ +static const sljit_gpr link_r = 14; /* r14 */ +static const sljit_gpr fast_link_r = 0; /* r0 */ /* Flag register layout: @@ -110,7 +110,7 @@ const sljit_gpr fast_link_r = 0; /* r0 */ | ZERO | 0 | 0 | C C |///////| +---------------+---+---+-------+-------+ */ -const sljit_gpr flag_r = 14; /* r14 */ +static const sljit_gpr flag_r = 14; /* r14 */ struct sljit_s390x_const { struct sljit_const const_; /* must be first */ @@ -1465,7 +1465,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile op = GET_OPCODE(op) | (op & SLJIT_I32_OP); switch (op) { case SLJIT_BREAKPOINT: - /* TODO(mundaym): insert real breakpoint? */ + /* The following invalid instruction is emitted by gdb. */ + return push_inst(compiler, 0x0001 /* 2-byte trap */); case SLJIT_NOP: return push_inst(compiler, 0x0700 /* 2-byte nop */); case SLJIT_LMUL_UW: diff --git a/src/3rdparty/pcre2/src/sljit/sljitUtils.c b/src/3rdparty/pcre2/src/sljit/sljitUtils.c index 08ca35cf37..9bce714735 100644 --- a/src/3rdparty/pcre2/src/sljit/sljitUtils.c +++ b/src/3rdparty/pcre2/src/sljit/sljitUtils.c @@ -48,7 +48,7 @@ static HANDLE allocator_lock; static SLJIT_INLINE void allocator_grab_lock(void) { HANDLE lock; - if (SLJIT_UNLIKELY(!allocator_lock)) { + if (SLJIT_UNLIKELY(!InterlockedCompareExchangePointer(&allocator_lock, NULL, NULL))) { lock = CreateMutex(NULL, FALSE, NULL); if (InterlockedCompareExchangePointer(&allocator_lock, lock, NULL)) CloseHandle(lock); @@ -146,9 +146,13 @@ static SLJIT_INLINE sljit_sw get_page_alignment(void) { #include static SLJIT_INLINE sljit_sw get_page_alignment(void) { - static sljit_sw sljit_page_align; - if (!sljit_page_align) { + static sljit_sw sljit_page_align = -1; + if (sljit_page_align < 0) { +#ifdef _SC_PAGESIZE sljit_page_align = sysconf(_SC_PAGESIZE); +#else + sljit_page_align = getpagesize(); +#endif /* Should never happen. */ if (sljit_page_align < 0) sljit_page_align = 4096; diff --git a/src/3rdparty/pcre2/src/sljit/sljitWXExecAllocator.c b/src/3rdparty/pcre2/src/sljit/sljitWXExecAllocator.c index 6ef71f7d83..72d5b8dd2b 100644 --- a/src/3rdparty/pcre2/src/sljit/sljitWXExecAllocator.c +++ b/src/3rdparty/pcre2/src/sljit/sljitWXExecAllocator.c @@ -121,14 +121,18 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_malloc_exec(sljit_uw size) static pthread_mutex_t se_lock = PTHREAD_MUTEX_INITIALIZER; #endif static int se_protected = !SLJIT_PROT_WX; + int prot = PROT_READ | PROT_WRITE | SLJIT_PROT_WX; sljit_uw* ptr; if (SLJIT_UNLIKELY(se_protected < 0)) return NULL; +#ifdef PROT_MAX + prot |= PROT_MAX(PROT_READ | PROT_WRITE | PROT_EXEC); +#endif + size += sizeof(sljit_uw); - ptr = (sljit_uw*)mmap(NULL, size, PROT_READ | PROT_WRITE | SLJIT_PROT_WX, - MAP_PRIVATE | MAP_ANON, -1, 0); + ptr = (sljit_uw*)mmap(NULL, size, prot, MAP_PRIVATE | MAP_ANON, -1, 0); if (ptr == MAP_FAILED) return NULL; -- cgit v1.2.3