summaryrefslogtreecommitdiffstats
path: root/src/3rdparty/pcre2/src/sljit
diff options
context:
space:
mode:
Diffstat (limited to 'src/3rdparty/pcre2/src/sljit')
-rw-r--r--src/3rdparty/pcre2/src/sljit/sljitConfigInternal.h29
-rw-r--r--src/3rdparty/pcre2/src/sljit/sljitExecAllocator.c9
-rw-r--r--src/3rdparty/pcre2/src/sljit/sljitLir.c10
-rw-r--r--src/3rdparty/pcre2/src/sljit/sljitLir.h20
-rw-r--r--src/3rdparty/pcre2/src/sljit/sljitNativeARM_64.c250
-rw-r--r--src/3rdparty/pcre2/src/sljit/sljitNativeARM_T2_32.c112
-rw-r--r--src/3rdparty/pcre2/src/sljit/sljitNativeMIPS_32.c4
-rw-r--r--src/3rdparty/pcre2/src/sljit/sljitNativeMIPS_64.c4
-rw-r--r--src/3rdparty/pcre2/src/sljit/sljitNativeMIPS_common.c48
-rw-r--r--src/3rdparty/pcre2/src/sljit/sljitNativeX86_32.c79
-rw-r--r--src/3rdparty/pcre2/src/sljit/sljitNativeX86_64.c120
-rw-r--r--src/3rdparty/pcre2/src/sljit/sljitNativeX86_common.c17
12 files changed, 467 insertions, 235 deletions
diff --git a/src/3rdparty/pcre2/src/sljit/sljitConfigInternal.h b/src/3rdparty/pcre2/src/sljit/sljitConfigInternal.h
index e13282c842..f5703e8e7f 100644
--- a/src/3rdparty/pcre2/src/sljit/sljitConfigInternal.h
+++ b/src/3rdparty/pcre2/src/sljit/sljitConfigInternal.h
@@ -66,7 +66,7 @@
SLJIT_RETURN_ADDRESS_OFFSET : a return instruction always adds this offset to the return address
Other macros:
- SLJIT_FUNC : calling convention attribute for both calling JIT form C and C calling back from JIT
+ SLJIT_FUNC : calling convention attribute for both calling JIT from C and C calling back from JIT
SLJIT_W(number) : defining 64 bit constants on 64 bit architectures (compiler independent helper)
*/
@@ -147,17 +147,23 @@
#define SLJIT_CONFIG_UNSUPPORTED 1
#endif
-#else /* !_WIN32 */
+#else /* _WIN32 */
#if defined(_M_X64) || defined(__x86_64__)
#define SLJIT_CONFIG_X86_64 1
+#elif (defined(_M_ARM) && _M_ARM >= 7 && defined(_M_ARMT)) || defined(__thumb2__)
+#define SLJIT_CONFIG_ARM_THUMB2 1
+#elif (defined(_M_ARM) && _M_ARM >= 7)
+#define SLJIT_CONFIG_ARM_V7 1
#elif defined(_ARM_)
#define SLJIT_CONFIG_ARM_V5 1
+#elif defined(_M_ARM64) || defined(__aarch64__)
+#define SLJIT_CONFIG_ARM_64 1
#else
#define SLJIT_CONFIG_X86_32 1
#endif
-#endif /* !WIN32 */
+#endif /* !_WIN32 */
#endif /* SLJIT_CONFIG_AUTO */
#if (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED)
@@ -324,6 +330,11 @@
sparc_cache_flush((from), (to))
#define SLJIT_CACHE_FLUSH_OWN_IMPL 1
+#elif defined _WIN32
+
+#define SLJIT_CACHE_FLUSH(from, to) \
+ FlushInstructionCache(GetCurrentProcess(), (char*)(from), (char*)(to) - (char*)(from))
+
#else
/* Calls __ARM_NR_cacheflush on ARM-Linux. */
@@ -371,12 +382,18 @@ typedef int sljit_sw;
#define SLJIT_64BIT_ARCHITECTURE 1
#define SLJIT_WORD_SHIFT 3
#ifdef _WIN32
+#ifdef __GNUC__
+/* These types do not require windows.h */
+typedef unsigned long long sljit_uw;
+typedef long long sljit_sw;
+#else
typedef unsigned __int64 sljit_uw;
typedef __int64 sljit_sw;
-#else
+#endif
+#else /* !_WIN32 */
typedef unsigned long int sljit_uw;
typedef long int sljit_sw;
-#endif
+#endif /* _WIN32 */
#endif
typedef sljit_uw sljit_p;
@@ -590,7 +607,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr);
#define SLJIT_NUMBER_OF_REGISTERS 26
#define SLJIT_NUMBER_OF_SAVED_REGISTERS 10
-#define SLJIT_LOCALS_OFFSET_BASE (2 * sizeof(sljit_sw))
+#define SLJIT_LOCALS_OFFSET_BASE 0
#elif (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC)
diff --git a/src/3rdparty/pcre2/src/sljit/sljitExecAllocator.c b/src/3rdparty/pcre2/src/sljit/sljitExecAllocator.c
index f5009788f6..7c18578618 100644
--- a/src/3rdparty/pcre2/src/sljit/sljitExecAllocator.c
+++ b/src/3rdparty/pcre2/src/sljit/sljitExecAllocator.c
@@ -99,7 +99,14 @@ static SLJIT_INLINE void* alloc_chunk(sljit_uw size)
void *retval;
#ifdef MAP_ANON
- retval = mmap(NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANON, -1, 0);
+
+ int flags = MAP_PRIVATE | MAP_ANON;
+
+#ifdef MAP_JIT
+ flags |= MAP_JIT;
+#endif
+
+ retval = mmap(NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC, flags, -1, 0);
#else
if (dev_zero < 0) {
if (open_dev_zero())
diff --git a/src/3rdparty/pcre2/src/sljit/sljitLir.c b/src/3rdparty/pcre2/src/sljit/sljitLir.c
index 5e435f0154..5bdddc10cf 100644
--- a/src/3rdparty/pcre2/src/sljit/sljitLir.c
+++ b/src/3rdparty/pcre2/src/sljit/sljitLir.c
@@ -26,6 +26,13 @@
#include "sljitLir.h"
+#ifdef _WIN32
+
+/* For SLJIT_CACHE_FLUSH, which can expand to FlushInstructionCache. */
+#include <windows.h>
+
+#endif /* _WIN32 */
+
#if !(defined SLJIT_STD_MACROS_DEFINED && SLJIT_STD_MACROS_DEFINED)
/* These libraries are needed for the macros below. */
@@ -2178,7 +2185,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compil
#endif
-#if !(defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
+#if !(defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \
+ && !(defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64)
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset)
{
diff --git a/src/3rdparty/pcre2/src/sljit/sljitLir.h b/src/3rdparty/pcre2/src/sljit/sljitLir.h
index 920f6d4f78..e71890cf7b 100644
--- a/src/3rdparty/pcre2/src/sljit/sljitLir.h
+++ b/src/3rdparty/pcre2/src/sljit/sljitLir.h
@@ -138,7 +138,7 @@ of sljitConfigInternal.h */
be specified as scratch registers and the fifth one as saved register
on the CPU above and any user code which requires four scratch
registers can run unmodified. The SLJIT compiler automatically saves
- the content of the two extra scrath register on the stack. Scratch
+ the content of the two extra scratch register on the stack. Scratch
registers can also be preserved by saving their value on the stack
but this needs to be done manually.
@@ -746,7 +746,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler
be mixed. The only exception is SLJIT_MOV32 and SLJIT_MOVU32 whose source
register can hold any 32 or 64 bit value, and it is converted to a 32 bit
compatible format first. This conversion is free (no instructions are
- emitted) on most CPUs. A 32 bit value can also be coverted to a 64 bit
+ emitted) on most CPUs. A 32 bit value can also be converted to a 64 bit
value by SLJIT_MOV_S32 (sign extension) or SLJIT_MOV_U32 (zero extension).
Note: memory addressing always uses 64 bit values on 64 bit systems so
@@ -773,8 +773,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler
*/
#define SLJIT_F32_OP SLJIT_I32_OP
-/* Many CPUs (x86, ARM, PPC) has status flags which can be set according
- to the result of an operation. Other CPUs (MIPS) does not have status
+/* Many CPUs (x86, ARM, PPC) have status flags which can be set according
+ to the result of an operation. Other CPUs (MIPS) do not have status
flags, and results must be stored in registers. To cover both architecture
types efficiently only two flags are defined by SLJIT:
@@ -810,14 +810,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler
Using these flags can reduce the number of emitted instructions. E.g. a
fast loop can be implemented by decreasing a counter register and set the
- zero flag to jump back if the counter register is not reached zero.
+ zero flag to jump back if the counter register has not reached zero.
Motivation: although CPUs can set a large number of flags, usually their
values are ignored or only one of them is used. Emulating a large number
of flags on systems without flag register is complicated so SLJIT
instructions must specify the flag they want to use and only that flag
will be emulated. The last arithmetic instruction can be repeated if
- multiple flags needs to be checked.
+ multiple flags need to be checked.
*/
/* Set Zero status flag. */
@@ -884,7 +884,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile
/* Starting index of opcodes for sljit_emit_op1. */
#define SLJIT_OP1_BASE 32
-/* The MOV instruction transfer data from source to destination.
+/* The MOV instruction transfers data from source to destination.
MOV instruction suffixes:
@@ -1156,7 +1156,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi
#define SLJIT_FAST_CALL 25
/* Called function must be declared with the SLJIT_FUNC attribute. */
#define SLJIT_CALL 26
- /* Called function must be decalred with cdecl attribute.
+ /* Called function must be declared with cdecl attribute.
This is the default attribute for C functions. */
#define SLJIT_CALL_CDECL 27
@@ -1210,7 +1210,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_label(struct sljit_jump *jump, struct sl
/* Set the destination address of the jump to this label. */
SLJIT_API_FUNC_ATTRIBUTE void sljit_set_target(struct sljit_jump *jump, sljit_uw target);
-/* Emit an indirect jump or fast call. Both direct and indirect form
+/* Emit an indirect jump or fast call.
Direct form: set src to SLJIT_IMM() and srcw to the address
Indirect form: any other valid addressing mode
type must be between SLJIT_JUMP and SLJIT_FAST_CALL
@@ -1274,7 +1274,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compil
#define SLJIT_MEM_POST 0x1000
/* Emit a single memory load or store with update instruction. When the
- requested instruction from is not supported by the CPU, it returns
+ requested instruction form is not supported by the CPU, it returns
with SLJIT_ERR_UNSUPPORTED instead of emulating the instruction. This
allows specializing tight loops based on the supported instruction
forms (see SLJIT_MEM_SUPP flag).
diff --git a/src/3rdparty/pcre2/src/sljit/sljitNativeARM_64.c b/src/3rdparty/pcre2/src/sljit/sljitNativeARM_64.c
index 8a437bd6a0..27af741487 100644
--- a/src/3rdparty/pcre2/src/sljit/sljitNativeARM_64.c
+++ b/src/3rdparty/pcre2/src/sljit/sljitNativeARM_64.c
@@ -37,14 +37,14 @@ typedef sljit_u32 sljit_ins;
#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)
#define TMP_LR (SLJIT_NUMBER_OF_REGISTERS + 4)
-#define TMP_SP (SLJIT_NUMBER_OF_REGISTERS + 5)
+#define TMP_FP (SLJIT_NUMBER_OF_REGISTERS + 5)
#define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2)
/* r18 - platform register, currently not used */
static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 8] = {
- 31, 0, 1, 2, 3, 4, 5, 6, 7, 11, 12, 13, 14, 15, 16, 17, 8, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 29, 9, 10, 30, 31
+ 31, 0, 1, 2, 3, 4, 5, 6, 7, 11, 12, 13, 14, 15, 16, 17, 8, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 31, 9, 10, 30, 29
};
static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
@@ -68,6 +68,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
#define ADC 0x9a000000
#define ADD 0x8b000000
+#define ADDE 0x8b200000
#define ADDI 0x91000000
#define AND 0x8a000000
#define ANDI 0x92000000
@@ -96,7 +97,8 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
#define FSUB 0x1e603800
#define LDRI 0xf9400000
#define LDP 0xa9400000
-#define LDP_PST 0xa8c00000
+#define LDP_PRE 0xa9c00000
+#define LDR_PRE 0xf8400c00
#define LSLV 0x9ac02000
#define LSRV 0x9ac02400
#define MADD 0x9b000000
@@ -873,73 +875,51 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
- saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 0);
- local_size += saved_regs_size + SLJIT_LOCALS_OFFSET;
+ saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 2);
+ if (saved_regs_size & 0x8)
+ saved_regs_size += sizeof(sljit_sw);
+
local_size = (local_size + 15) & ~0xf;
- compiler->local_size = local_size;
-
- if (local_size <= (63 * sizeof(sljit_sw))) {
- FAIL_IF(push_inst(compiler, STP_PRE | 29 | RT2(TMP_LR)
- | RN(TMP_SP) | ((-(local_size >> 3) & 0x7f) << 15)));
- FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(TMP_SP) | (0 << 10)));
- offs = (local_size - saved_regs_size) << (15 - 3);
- } else {
- offs = 0 << 15;
- if (saved_regs_size & 0x8) {
- offs = 1 << 15;
- saved_regs_size += sizeof(sljit_sw);
- }
- local_size -= saved_regs_size + SLJIT_LOCALS_OFFSET;
- if (saved_regs_size > 0)
- FAIL_IF(push_inst(compiler, SUBI | RD(TMP_SP) | RN(TMP_SP) | (saved_regs_size << 10)));
- }
+ compiler->local_size = local_size + saved_regs_size;
+
+ FAIL_IF(push_inst(compiler, STP_PRE | RT(TMP_FP) | RT2(TMP_LR)
+ | RN(SLJIT_SP) | ((-(saved_regs_size >> 3) & 0x7f) << 15)));
+
+#ifdef _WIN32
+ if (local_size >= 4096)
+ FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(SLJIT_SP) | (1 << 10) | (1 << 22)));
+ else if (local_size > 256)
+ FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(SLJIT_SP) | (local_size << 10)));
+#endif
tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG;
prev = -1;
+ offs = 2 << 15;
for (i = SLJIT_S0; i >= tmp; i--) {
if (prev == -1) {
- if (!(offs & (1 << 15))) {
- prev = i;
- continue;
- }
- FAIL_IF(push_inst(compiler, STRI | RT(i) | RN(TMP_SP) | (offs >> 5)));
- offs += 1 << 15;
+ prev = i;
continue;
}
- FAIL_IF(push_inst(compiler, STP | RT(prev) | RT2(i) | RN(TMP_SP) | offs));
+ FAIL_IF(push_inst(compiler, STP | RT(prev) | RT2(i) | RN(SLJIT_SP) | offs));
offs += 2 << 15;
prev = -1;
}
for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
if (prev == -1) {
- if (!(offs & (1 << 15))) {
- prev = i;
- continue;
- }
- FAIL_IF(push_inst(compiler, STRI | RT(i) | RN(TMP_SP) | (offs >> 5)));
- offs += 1 << 15;
+ prev = i;
continue;
}
- FAIL_IF(push_inst(compiler, STP | RT(prev) | RT2(i) | RN(TMP_SP) | offs));
+ FAIL_IF(push_inst(compiler, STP | RT(prev) | RT2(i) | RN(SLJIT_SP) | offs));
offs += 2 << 15;
prev = -1;
}
- SLJIT_ASSERT(prev == -1);
+ if (prev != -1)
+ FAIL_IF(push_inst(compiler, STRI | RT(prev) | RN(SLJIT_SP) | (offs >> 5)));
- if (compiler->local_size > (63 * sizeof(sljit_sw))) {
- /* The local_size is already adjusted by the saved registers. */
- if (local_size > 0xfff) {
- FAIL_IF(push_inst(compiler, SUBI | RD(TMP_SP) | RN(TMP_SP) | ((local_size >> 12) << 10) | (1 << 22)));
- local_size &= 0xfff;
- }
- if (local_size)
- FAIL_IF(push_inst(compiler, SUBI | RD(TMP_SP) | RN(TMP_SP) | (local_size << 10)));
- FAIL_IF(push_inst(compiler, STP_PRE | 29 | RT2(TMP_LR)
- | RN(TMP_SP) | ((-(16 >> 3) & 0x7f) << 15)));
- FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(TMP_SP) | (0 << 10)));
- }
+
+ FAIL_IF(push_inst(compiler, ADDI | RD(TMP_FP) | RN(SLJIT_SP) | (0 << 10)));
args = get_arg_count(arg_types);
@@ -950,6 +930,64 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
if (args >= 3)
FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_S2) | RN(TMP_ZERO) | RM(SLJIT_R2)));
+#ifdef _WIN32
+ if (local_size >= 4096) {
+ if (local_size < 4 * 4096) {
+ /* No need for a loop. */
+ if (local_size >= 2 * 4096) {
+ FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1)));
+ FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(TMP_REG1) | (1 << 10) | (1 << 22)));
+ local_size -= 4096;
+ }
+
+ if (local_size >= 2 * 4096) {
+ FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1)));
+ FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(TMP_REG1) | (1 << 10) | (1 << 22)));
+ local_size -= 4096;
+ }
+
+ FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1)));
+ local_size -= 4096;
+ }
+ else {
+ FAIL_IF(push_inst(compiler, MOVZ | RD(TMP_REG2) | (((local_size >> 12) - 1) << 5)));
+ FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1)));
+ FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(TMP_REG1) | (1 << 10) | (1 << 22)));
+ FAIL_IF(push_inst(compiler, SUBI | (1 << 29) | RD(TMP_REG2) | RN(TMP_REG2) | (1 << 10)));
+ FAIL_IF(push_inst(compiler, B_CC | ((((sljit_ins) -3) & 0x7ffff) << 5) | 0x1 /* not-equal */));
+ FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1)));
+
+ local_size &= 0xfff;
+ }
+
+ if (local_size > 256) {
+ FAIL_IF(push_inst(compiler, SUBI | RD(TMP_REG1) | RN(TMP_REG1) | (local_size << 10)));
+ FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1)));
+ }
+ else if (local_size > 0)
+ FAIL_IF(push_inst(compiler, LDR_PRE | RT(TMP_ZERO) | RN(TMP_REG1) | ((-local_size & 0x1ff) << 12)));
+
+ FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(TMP_REG1) | (0 << 10)));
+ }
+ else if (local_size > 256) {
+ FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(TMP_REG1)));
+ FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(TMP_REG1) | (0 << 10)));
+ }
+ else if (local_size > 0)
+ FAIL_IF(push_inst(compiler, LDR_PRE | RT(TMP_ZERO) | RN(SLJIT_SP) | ((-local_size & 0x1ff) << 12)));
+
+#else /* !_WIN32 */
+
+ /* The local_size does not include saved registers size. */
+ if (local_size > 0xfff) {
+ FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | ((local_size >> 12) << 10) | (1 << 22)));
+ local_size &= 0xfff;
+ }
+ if (local_size != 0)
+ FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | (local_size << 10)));
+
+#endif /* _WIN32 */
+
return SLJIT_SUCCESS;
}
@@ -957,13 +995,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp
sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
{
+ sljit_s32 saved_regs_size;
+
CHECK_ERROR();
CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
- local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 0) + SLJIT_LOCALS_OFFSET;
- local_size = (local_size + 15) & ~0xf;
- compiler->local_size = local_size;
+ saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 2);
+ if (saved_regs_size & 0x8)
+ saved_regs_size += sizeof(sljit_sw);
+
+ compiler->local_size = saved_regs_size + ((local_size + 15) & ~0xf);
return SLJIT_SUCCESS;
}
@@ -977,71 +1019,59 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *comp
FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
- local_size = compiler->local_size;
+ saved_regs_size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 2);
+ if (saved_regs_size & 0x8)
+ saved_regs_size += sizeof(sljit_sw);
- saved_regs_size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 0);
- if (local_size <= (63 * sizeof(sljit_sw)))
- offs = (local_size - saved_regs_size) << (15 - 3);
+ local_size = compiler->local_size - saved_regs_size;
+
+ /* Load LR as early as possible. */
+ if (local_size == 0)
+ FAIL_IF(push_inst(compiler, LDP | RT(TMP_FP) | RT2(TMP_LR) | RN(SLJIT_SP)));
+ else if (local_size < 63 * sizeof(sljit_sw)) {
+ FAIL_IF(push_inst(compiler, LDP_PRE | RT(TMP_FP) | RT2(TMP_LR)
+ | RN(SLJIT_SP) | (local_size << (15 - 3))));
+ }
else {
- FAIL_IF(push_inst(compiler, LDP_PST | 29 | RT2(TMP_LR)
- | RN(TMP_SP) | (((16 >> 3) & 0x7f) << 15)));
- offs = 0 << 15;
- if (saved_regs_size & 0x8) {
- offs = 1 << 15;
- saved_regs_size += sizeof(sljit_sw);
- }
- local_size -= saved_regs_size + SLJIT_LOCALS_OFFSET;
if (local_size > 0xfff) {
- FAIL_IF(push_inst(compiler, ADDI | RD(TMP_SP) | RN(TMP_SP) | ((local_size >> 12) << 10) | (1 << 22)));
+ FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(SLJIT_SP) | ((local_size >> 12) << 10) | (1 << 22)));
local_size &= 0xfff;
}
if (local_size)
- FAIL_IF(push_inst(compiler, ADDI | RD(TMP_SP) | RN(TMP_SP) | (local_size << 10)));
+ FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(SLJIT_SP) | (local_size << 10)));
+
+ FAIL_IF(push_inst(compiler, LDP | RT(TMP_FP) | RT2(TMP_LR) | RN(SLJIT_SP)));
}
tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG;
prev = -1;
+ offs = 2 << 15;
for (i = SLJIT_S0; i >= tmp; i--) {
if (prev == -1) {
- if (!(offs & (1 << 15))) {
- prev = i;
- continue;
- }
- FAIL_IF(push_inst(compiler, LDRI | RT(i) | RN(TMP_SP) | (offs >> 5)));
- offs += 1 << 15;
+ prev = i;
continue;
}
- FAIL_IF(push_inst(compiler, LDP | RT(prev) | RT2(i) | RN(TMP_SP) | offs));
+ FAIL_IF(push_inst(compiler, LDP | RT(prev) | RT2(i) | RN(SLJIT_SP) | offs));
offs += 2 << 15;
prev = -1;
}
for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
if (prev == -1) {
- if (!(offs & (1 << 15))) {
- prev = i;
- continue;
- }
- FAIL_IF(push_inst(compiler, LDRI | RT(i) | RN(TMP_SP) | (offs >> 5)));
- offs += 1 << 15;
+ prev = i;
continue;
}
- FAIL_IF(push_inst(compiler, LDP | RT(prev) | RT2(i) | RN(TMP_SP) | offs));
+ FAIL_IF(push_inst(compiler, LDP | RT(prev) | RT2(i) | RN(SLJIT_SP) | offs));
offs += 2 << 15;
prev = -1;
}
- SLJIT_ASSERT(prev == -1);
+ if (prev != -1)
+ FAIL_IF(push_inst(compiler, LDRI | RT(prev) | RN(SLJIT_SP) | (offs >> 5)));
- if (compiler->local_size <= (63 * sizeof(sljit_sw))) {
- FAIL_IF(push_inst(compiler, LDP_PST | 29 | RT2(TMP_LR)
- | RN(TMP_SP) | (((local_size >> 3) & 0x7f) << 15)));
- } else if (saved_regs_size > 0) {
- FAIL_IF(push_inst(compiler, ADDI | RD(TMP_SP) | RN(TMP_SP) | (saved_regs_size << 10)));
- }
-
- FAIL_IF(push_inst(compiler, RET | RN(TMP_LR)));
- return SLJIT_SUCCESS;
+ /* These two can be executed in parallel. */
+ FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(SLJIT_SP) | (saved_regs_size << 10)));
+ return push_inst(compiler, RET | RN(TMP_LR));
}
/* --------------------------------------------------------------------- */
@@ -1856,6 +1886,46 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compil
return push_inst(compiler, inst | VT(freg) | RN(mem & REG_MASK) | ((memw & 0x1ff) << 12));
}
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset)
+{
+ sljit_s32 dst_reg;
+ sljit_ins ins;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_get_local_base(compiler, dst, dstw, offset));
+
+ SLJIT_ASSERT (SLJIT_LOCALS_OFFSET_BASE == 0);
+
+ dst_reg = FAST_IS_REG(dst) ? dst : TMP_REG1;
+
+ if (offset <= 0xffffff && offset >= -0xffffff) {
+ ins = ADDI;
+ if (offset < 0) {
+ offset = -offset;
+ ins = SUBI;
+ }
+
+ if (offset <= 0xfff)
+ FAIL_IF(push_inst(compiler, ins | RD(dst_reg) | RN(SLJIT_SP) | (offset << 10)));
+ else {
+ FAIL_IF(push_inst(compiler, ins | RD(dst_reg) | RN(SLJIT_SP) | ((offset & 0xfff000) >> (12 - 10)) | (1 << 22)));
+
+ offset &= 0xfff;
+ if (offset != 0)
+ FAIL_IF(push_inst(compiler, ins | RD(dst_reg) | RN(dst_reg) | (offset << 10)));
+ }
+ }
+ else {
+ FAIL_IF(load_immediate (compiler, dst_reg, offset));
+ /* Add extended register form. */
+ FAIL_IF(push_inst(compiler, ADDE | (0x3 << 13) | RD(dst_reg) | RN(SLJIT_SP) | RM(dst_reg)));
+ }
+
+ if (SLJIT_UNLIKELY(dst & SLJIT_MEM))
+ return emit_op_mem(compiler, WORD_SIZE | STORE, dst_reg, dst, dstw, TMP_REG1);
+ return SLJIT_SUCCESS;
+}
+
SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
{
struct sljit_const *const_;
diff --git a/src/3rdparty/pcre2/src/sljit/sljitNativeARM_T2_32.c b/src/3rdparty/pcre2/src/sljit/sljitNativeARM_T2_32.c
index 75e7a38b5f..d7024b6d7d 100644
--- a/src/3rdparty/pcre2/src/sljit/sljitNativeARM_T2_32.c
+++ b/src/3rdparty/pcre2/src/sljit/sljitNativeARM_T2_32.c
@@ -110,6 +110,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
#define ASRSI 0x1000
#define ASR_W 0xfa40f000
#define ASR_WI 0xea4f0020
+#define BCC 0xd000
#define BICI 0xf0200000
#define BKPT 0xbe00
#define BLX 0x4780
@@ -125,6 +126,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
#define EORS 0x4040
#define EOR_W 0xea800000
#define IT 0xbf00
+#define LDRI 0xf8500800
#define LSLS 0x4080
#define LSLSI 0x0000
#define LSL_W 0xfa00f000
@@ -158,6 +160,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
#define SBCI 0xf1600000
#define SBCS 0x4180
#define SBC_W 0xeb600000
+#define SDIV 0xfb90f0f0
#define SMULL 0xfb800000
#define STR_SP 0x9000
#define SUBS 0x1a00
@@ -172,6 +175,7 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
#define SXTH 0xb200
#define SXTH_W 0xfa0ff080
#define TST 0x4200
+#define UDIV 0xfbb0f0f0
#define UMULL 0xfba00000
#define UXTB 0xb2c0
#define UXTB_W 0xfa5ff080
@@ -339,8 +343,8 @@ static SLJIT_INLINE void set_jump_instruction(struct sljit_jump *jump, sljit_sw
/* Really complex instruction form for branches. */
s = (diff >> 23) & 0x1;
- j1 = (~(diff >> 21) ^ s) & 0x1;
- j2 = (~(diff >> 22) ^ s) & 0x1;
+ j1 = (~(diff >> 22) ^ s) & 0x1;
+ j2 = (~(diff >> 21) ^ s) & 0x1;
jump_inst[0] = 0xf000 | (s << 10) | COPY_BITS(diff, 11, 0, 10);
jump_inst[1] = (j1 << 13) | (j2 << 11) | (diff & 0x7ff);
@@ -520,6 +524,8 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst,
{
sljit_uw tmp;
+ /* MOVS cannot be used since it destroy flags. */
+
if (imm >= 0x10000) {
tmp = get_imm(imm);
if (tmp != INVALID_IMM)
@@ -1032,6 +1038,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
{
sljit_s32 args, size, i, tmp;
sljit_ins push = 0;
+#ifdef _WIN32
+ sljit_uw imm;
+#endif
CHECK_ERROR();
CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
@@ -1052,12 +1061,25 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1);
local_size = ((size + local_size + 7) & ~7) - size;
compiler->local_size = local_size;
+
+#ifdef _WIN32
+ if (local_size >= 256) {
+ if (local_size > 4096)
+ imm = get_imm(4096);
+ else
+ imm = get_imm(local_size & ~0xff);
+
+ SLJIT_ASSERT(imm != INVALID_IMM);
+ FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(SLJIT_SP) | imm));
+ }
+#else
if (local_size > 0) {
if (local_size <= (127 << 2))
FAIL_IF(push_inst16(compiler, SUB_SP | (local_size >> 2)));
else
FAIL_IF(emit_op_imm(compiler, SLJIT_SUB | ARG2_IMM, SLJIT_SP, SLJIT_SP, local_size));
}
+#endif
args = get_arg_count(arg_types);
@@ -1068,6 +1090,61 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
if (args >= 3)
FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_S2, SLJIT_R2)));
+#ifdef _WIN32
+ if (local_size >= 256) {
+ if (local_size > 4096) {
+ imm = get_imm(4096);
+ SLJIT_ASSERT(imm != INVALID_IMM);
+
+ if (local_size < 4 * 4096) {
+ if (local_size > 2 * 4096) {
+ FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG2) | RN4(TMP_REG1)));
+ FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(TMP_REG1) | imm));
+ local_size -= 4096;
+ }
+
+ if (local_size > 2 * 4096) {
+ FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG2) | RN4(TMP_REG1)));
+ FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(TMP_REG1) | imm));
+ local_size -= 4096;
+ }
+
+ FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG2) | RN4(TMP_REG1)));
+ local_size -= 4096;
+
+ SLJIT_ASSERT(local_size > 0);
+ }
+ else {
+ FAIL_IF(load_immediate(compiler, SLJIT_R3, (local_size >> 12) - 1));
+ FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG2) | RN4(TMP_REG1)));
+ FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(TMP_REG1) | imm));
+ SLJIT_ASSERT(reg_map[SLJIT_R3] < 7);
+ FAIL_IF(push_inst16(compiler, SUBSI8 | RDN3(SLJIT_R3) | 1));
+ FAIL_IF(push_inst16(compiler, BCC | (0x1 << 8) /* not-equal */ | (-7 & 0xff)));
+
+ local_size &= 0xfff;
+
+ if (local_size != 0)
+ FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG2) | RN4(TMP_REG1)));
+ }
+
+ if (local_size >= 256) {
+ imm = get_imm(local_size & ~0xff);
+ SLJIT_ASSERT(imm != INVALID_IMM);
+
+ FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(TMP_REG1) | imm));
+ }
+ }
+
+ local_size &= 0xff;
+ FAIL_IF(push_inst32(compiler, LDRI | 0x400 | (local_size > 0 ? 0x100 : 0) | RT4(TMP_REG2) | RN4(TMP_REG1) | local_size));
+
+ FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_SP, TMP_REG1)));
+ }
+ else if (local_size > 0)
+ FAIL_IF(push_inst32(compiler, LDRI | 0x500 | RT4(TMP_REG1) | RN4(SLJIT_SP) | local_size));
+#endif
+
return SLJIT_SUCCESS;
}
@@ -1119,11 +1196,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *comp
/* Operators */
/* --------------------------------------------------------------------- */
+#if !(defined __ARM_FEATURE_IDIV) && !(defined __ARM_ARCH_EXT_IDIV__)
+
#ifdef __cplusplus
extern "C" {
#endif
-#if defined(__GNUC__)
+#ifdef _WIN32
+extern unsigned long long __rt_udiv(unsigned int denominator, unsigned int numerator);
+extern long long __rt_sdiv(int denominator, int numerator);
+#elif defined(__GNUC__)
extern unsigned int __aeabi_uidivmod(unsigned int numerator, int unsigned denominator);
extern int __aeabi_idivmod(int numerator, int denominator);
#else
@@ -1134,10 +1216,14 @@ extern int __aeabi_idivmod(int numerator, int denominator);
}
#endif
+#endif /* !__ARM_FEATURE_IDIV && !__ARM_ARCH_EXT_IDIV__ */
+
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
{
+#if !(defined __ARM_FEATURE_IDIV) && !(defined __ARM_ARCH_EXT_IDIV__)
sljit_sw saved_reg_list[3];
sljit_sw saved_reg_count;
+#endif
CHECK_ERROR();
CHECK(check_sljit_emit_op0(compiler, op));
@@ -1155,6 +1241,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile
| (reg_map[SLJIT_R0] << 12)
| (reg_map[SLJIT_R0] << 16)
| reg_map[SLJIT_R1]);
+#if (defined __ARM_FEATURE_IDIV) || (defined __ARM_ARCH_EXT_IDIV__)
+ case SLJIT_DIVMOD_UW:
+ case SLJIT_DIVMOD_SW:
+ FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG1, SLJIT_R0)));
+ FAIL_IF(push_inst32(compiler, (op == SLJIT_DIVMOD_UW ? UDIV : SDIV) | RD4(SLJIT_R0) | RN4(SLJIT_R0) | RM4(SLJIT_R1)));
+ FAIL_IF(push_inst32(compiler, MUL | RD4(SLJIT_R1) | RN4(SLJIT_R0) | RM4(SLJIT_R1)));
+ return push_inst32(compiler, SUB_W | RD4(SLJIT_R1) | RN4(TMP_REG1) | RM4(SLJIT_R1));
+ case SLJIT_DIV_UW:
+ case SLJIT_DIV_SW:
+ return push_inst32(compiler, (op == SLJIT_DIV_UW ? UDIV : SDIV) | RD4(SLJIT_R0) | RN4(SLJIT_R0) | RM4(SLJIT_R1));
+#else /* !__ARM_FEATURE_IDIV && !__ARM_ARCH_EXT_IDIV__ */
case SLJIT_DIVMOD_UW:
case SLJIT_DIVMOD_SW:
case SLJIT_DIV_UW:
@@ -1183,7 +1280,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile
}
}
-#if defined(__GNUC__)
+#ifdef _WIN32
+ FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG1, SLJIT_R0)));
+ FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_R0, SLJIT_R1)));
+ FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_R1, TMP_REG1)));
+ FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM,
+ ((op | 0x2) == SLJIT_DIV_UW ? SLJIT_FUNC_OFFSET(__rt_udiv) : SLJIT_FUNC_OFFSET(__rt_sdiv))));
+#elif defined(__GNUC__)
FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM,
((op | 0x2) == SLJIT_DIV_UW ? SLJIT_FUNC_OFFSET(__aeabi_uidivmod) : SLJIT_FUNC_OFFSET(__aeabi_idivmod))));
#else
@@ -1203,6 +1306,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile
| (saved_reg_list[0] << 12) /* ldr rX, [sp], #8/16 */);
}
return SLJIT_SUCCESS;
+#endif /* __ARM_FEATURE_IDIV || __ARM_ARCH_EXT_IDIV__ */
}
return SLJIT_SUCCESS;
diff --git a/src/3rdparty/pcre2/src/sljit/sljitNativeMIPS_32.c b/src/3rdparty/pcre2/src/sljit/sljitNativeMIPS_32.c
index 9f9e157a05..094c9923bc 100644
--- a/src/3rdparty/pcre2/src/sljit/sljitNativeMIPS_32.c
+++ b/src/3rdparty/pcre2/src/sljit/sljitNativeMIPS_32.c
@@ -448,7 +448,7 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t
sljit_ins ins = NOP;
sljit_u8 offsets[4];
- SLJIT_ASSERT(reg_map[TMP_REG3] == 4 && freg_map[TMP_FREG1] == 12);
+ SLJIT_ASSERT(reg_map[TMP_REG1] == 4 && freg_map[TMP_FREG1] == 12);
arg_types >>= SLJIT_DEF_SHIFT;
@@ -516,7 +516,7 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t
else if (arg_count != word_arg_count)
ins = ADDU | S(word_arg_count) | TA(0) | DA(4 + (offsets[arg_count - 1] >> 2));
else if (arg_count == 1)
- ins = ADDU | S(SLJIT_R0) | TA(0) | D(TMP_REG3);
+ ins = ADDU | S(SLJIT_R0) | TA(0) | DA(4);
arg_count--;
word_arg_count--;
diff --git a/src/3rdparty/pcre2/src/sljit/sljitNativeMIPS_64.c b/src/3rdparty/pcre2/src/sljit/sljitNativeMIPS_64.c
index ff6f048659..f841aef5dd 100644
--- a/src/3rdparty/pcre2/src/sljit/sljitNativeMIPS_64.c
+++ b/src/3rdparty/pcre2/src/sljit/sljitNativeMIPS_64.c
@@ -547,7 +547,7 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t
sljit_ins prev_ins = NOP;
sljit_ins ins = NOP;
- SLJIT_ASSERT(reg_map[TMP_REG3] == 4 && freg_map[TMP_FREG1] == 12);
+ SLJIT_ASSERT(reg_map[TMP_REG1] == 4 && freg_map[TMP_FREG1] == 12);
arg_types >>= SLJIT_DEF_SHIFT;
@@ -591,7 +591,7 @@ static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_t
if (arg_count != word_arg_count)
ins = DADDU | S(word_arg_count) | TA(0) | D(arg_count);
else if (arg_count == 1)
- ins = DADDU | S(SLJIT_R0) | TA(0) | D(TMP_REG3);
+ ins = DADDU | S(SLJIT_R0) | TA(0) | DA(4);
arg_count--;
word_arg_count--;
break;
diff --git a/src/3rdparty/pcre2/src/sljit/sljitNativeMIPS_common.c b/src/3rdparty/pcre2/src/sljit/sljitNativeMIPS_common.c
index e108433f70..894e21304b 100644
--- a/src/3rdparty/pcre2/src/sljit/sljitNativeMIPS_common.c
+++ b/src/3rdparty/pcre2/src/sljit/sljitNativeMIPS_common.c
@@ -57,14 +57,14 @@ typedef sljit_u32 sljit_ins;
#define RETURN_ADDR_REG 31
/* Flags are kept in volatile registers. */
-#define EQUAL_FLAG 31
+#define EQUAL_FLAG 3
#define OTHER_FLAG 1
#define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
#define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2)
static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
- 0, 2, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 24, 23, 22, 21, 20, 19, 18, 17, 16, 29, 3, 25, 4
+ 0, 2, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 24, 23, 22, 21, 20, 19, 18, 17, 16, 29, 4, 25, 31
};
#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32)
@@ -612,16 +612,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
/* Frequent case. */
FAIL_IF(push_inst(compiler, ADDIU_W | S(SLJIT_SP) | T(SLJIT_SP) | IMM(-local_size), DR(SLJIT_SP)));
base = S(SLJIT_SP);
+ offs = local_size - (sljit_sw)sizeof(sljit_sw);
}
else {
- FAIL_IF(load_immediate(compiler, DR(TMP_REG1), local_size));
+ FAIL_IF(load_immediate(compiler, DR(OTHER_FLAG), local_size));
FAIL_IF(push_inst(compiler, ADDU_W | S(SLJIT_SP) | TA(0) | D(TMP_REG2), DR(TMP_REG2)));
- FAIL_IF(push_inst(compiler, SUBU_W | S(SLJIT_SP) | T(TMP_REG1) | D(SLJIT_SP), DR(SLJIT_SP)));
+ FAIL_IF(push_inst(compiler, SUBU_W | S(SLJIT_SP) | T(OTHER_FLAG) | D(SLJIT_SP), DR(SLJIT_SP)));
base = S(TMP_REG2);
local_size = 0;
+ offs = -(sljit_sw)sizeof(sljit_sw);
}
- offs = local_size - (sljit_sw)(sizeof(sljit_sw));
FAIL_IF(push_inst(compiler, STACK_STORE | base | TA(RETURN_ADDR_REG) | IMM(offs), MOVABLE_INS));
tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG;
@@ -805,7 +806,8 @@ static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sl
if ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA)) {
tmp_ar = reg_ar;
delay_slot = reg_ar;
- } else {
+ }
+ else {
tmp_ar = DR(TMP_REG1);
delay_slot = MOVABLE_INS;
}
@@ -881,11 +883,39 @@ static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sl
static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg_ar, sljit_s32 arg, sljit_sw argw)
{
+ sljit_s32 tmp_ar, base, delay_slot;
+
if (getput_arg_fast(compiler, flags, reg_ar, arg, argw))
return compiler->error;
- compiler->cache_arg = 0;
- compiler->cache_argw = 0;
- return getput_arg(compiler, flags, reg_ar, arg, argw, 0, 0);
+
+ if ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA)) {
+ tmp_ar = reg_ar;
+ delay_slot = reg_ar;
+ }
+ else {
+ tmp_ar = DR(TMP_REG1);
+ delay_slot = MOVABLE_INS;
+ }
+ base = arg & REG_MASK;
+
+ if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
+ argw &= 0x3;
+
+ if (SLJIT_UNLIKELY(argw)) {
+ FAIL_IF(push_inst(compiler, SLL_W | T(OFFS_REG(arg)) | DA(tmp_ar) | SH_IMM(argw), tmp_ar));
+ FAIL_IF(push_inst(compiler, ADDU_W | S(base) | TA(tmp_ar) | DA(tmp_ar), tmp_ar));
+ }
+ else
+ FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(OFFS_REG(arg)) | DA(tmp_ar), tmp_ar));
+ return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | SA(tmp_ar) | TA(reg_ar), delay_slot);
+ }
+
+ FAIL_IF(load_immediate(compiler, tmp_ar, argw));
+
+ if (base != 0)
+ FAIL_IF(push_inst(compiler, ADDU_W | S(base) | TA(tmp_ar) | DA(tmp_ar), tmp_ar));
+
+ return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | SA(tmp_ar) | TA(reg_ar), delay_slot);
}
static SLJIT_INLINE sljit_s32 emit_op_mem2(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg1, sljit_sw arg1w, sljit_s32 arg2, sljit_sw arg2w)
diff --git a/src/3rdparty/pcre2/src/sljit/sljitNativeX86_32.c b/src/3rdparty/pcre2/src/sljit/sljitNativeX86_32.c
index 8a83e273a4..074e64b9f2 100644
--- a/src/3rdparty/pcre2/src/sljit/sljitNativeX86_32.c
+++ b/src/3rdparty/pcre2/src/sljit/sljitNativeX86_32.c
@@ -123,34 +123,38 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
if (args > 0) {
- *inst++ = MOV_r_rm;
- *inst++ = MOD_REG | (reg_map[SLJIT_S0] << 3) | reg_map[SLJIT_R2];
+ inst[0] = MOV_r_rm;
+ inst[1] = MOD_REG | (reg_map[SLJIT_S0] << 3) | reg_map[SLJIT_R2];
+ inst += 2;
}
if (args > 1) {
- *inst++ = MOV_r_rm;
- *inst++ = MOD_REG | (reg_map[SLJIT_S1] << 3) | reg_map[SLJIT_R1];
+ inst[0] = MOV_r_rm;
+ inst[1] = MOD_REG | (reg_map[SLJIT_S1] << 3) | reg_map[SLJIT_R1];
+ inst += 2;
}
if (args > 2) {
- *inst++ = MOV_r_rm;
- *inst++ = MOD_DISP8 | (reg_map[SLJIT_S2] << 3) | 0x4 /* esp */;
- *inst++ = 0x24;
- *inst++ = sizeof(sljit_sw) * (3 + 2); /* saveds >= 3 as well. */
+ inst[0] = MOV_r_rm;
+ inst[1] = MOD_DISP8 | (reg_map[SLJIT_S2] << 3) | 0x4 /* esp */;
+ inst[2] = 0x24;
+ inst[3] = sizeof(sljit_sw) * (3 + 2); /* saveds >= 3 as well. */
}
#else
if (args > 0) {
- *inst++ = MOV_r_rm;
- *inst++ = MOD_DISP8 | (reg_map[SLJIT_S0] << 3) | reg_map[TMP_REG1];
- *inst++ = sizeof(sljit_sw) * 2;
+ inst[0] = MOV_r_rm;
+ inst[1] = MOD_DISP8 | (reg_map[SLJIT_S0] << 3) | reg_map[TMP_REG1];
+ inst[2] = sizeof(sljit_sw) * 2;
+ inst += 3;
}
if (args > 1) {
- *inst++ = MOV_r_rm;
- *inst++ = MOD_DISP8 | (reg_map[SLJIT_S1] << 3) | reg_map[TMP_REG1];
- *inst++ = sizeof(sljit_sw) * 3;
+ inst[0] = MOV_r_rm;
+ inst[1] = MOD_DISP8 | (reg_map[SLJIT_S1] << 3) | reg_map[TMP_REG1];
+ inst[2] = sizeof(sljit_sw) * 3;
+ inst += 3;
}
if (args > 2) {
- *inst++ = MOV_r_rm;
- *inst++ = MOD_DISP8 | (reg_map[SLJIT_S2] << 3) | reg_map[TMP_REG1];
- *inst++ = sizeof(sljit_sw) * 4;
+ inst[0] = MOV_r_rm;
+ inst[1] = MOD_DISP8 | (reg_map[SLJIT_S2] << 3) | reg_map[TMP_REG1];
+ inst[2] = sizeof(sljit_sw) * 4;
}
#endif
@@ -170,17 +174,36 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
compiler->local_size = local_size;
#ifdef _WIN32
- if (local_size > 1024) {
-#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
- FAIL_IF(emit_do_imm(compiler, MOV_r_i32 + reg_map[SLJIT_R0], local_size));
-#else
- /* Space for a single argument. This amount is excluded when the stack is allocated below. */
- local_size -= sizeof(sljit_sw);
- FAIL_IF(emit_do_imm(compiler, MOV_r_i32 + reg_map[SLJIT_R0], local_size));
- FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
- SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, sizeof(sljit_sw)));
-#endif
- FAIL_IF(sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARG1(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_grow_stack)));
+ if (local_size > 0) {
+ if (local_size <= 4 * 4096) {
+ if (local_size > 4096)
+ EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096);
+ if (local_size > 2 * 4096)
+ EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 2);
+ if (local_size > 3 * 4096)
+ EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 3);
+ }
+ else {
+ EMIT_MOV(compiler, SLJIT_R0, 0, SLJIT_SP, 0);
+ EMIT_MOV(compiler, SLJIT_R1, 0, SLJIT_IMM, (local_size - 1) >> 12);
+
+ SLJIT_ASSERT (reg_map[SLJIT_R0] == 0);
+
+ EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_R0), -4096);
+ FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
+ SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 4096));
+ FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
+ SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1));
+
+ inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
+ FAIL_IF(!inst);
+
+ INC_SIZE(2);
+ inst[0] = JNE_i8;
+ inst[1] = (sljit_s8) -16;
+ }
+
+ EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -local_size);
}
#endif
diff --git a/src/3rdparty/pcre2/src/sljit/sljitNativeX86_64.c b/src/3rdparty/pcre2/src/sljit/sljitNativeX86_64.c
index 635ebd087c..8506565614 100644
--- a/src/3rdparty/pcre2/src/sljit/sljitNativeX86_64.c
+++ b/src/3rdparty/pcre2/src/sljit/sljitNativeX86_64.c
@@ -83,6 +83,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
+ compiler->mode32 = 0;
+
#ifdef _WIN64
/* Two/four register slots for parameters plus space for xmm6 register if needed. */
if (fscratches >= 6 || fsaveds >= 1)
@@ -126,35 +128,39 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
#ifndef _WIN64
if (args > 0) {
- *inst++ = REX_W;
- *inst++ = MOV_r_rm;
- *inst++ = MOD_REG | (reg_map[SLJIT_S0] << 3) | 0x7 /* rdi */;
+ inst[0] = REX_W;
+ inst[1] = MOV_r_rm;
+ inst[2] = MOD_REG | (reg_map[SLJIT_S0] << 3) | 0x7 /* rdi */;
+ inst += 3;
}
if (args > 1) {
- *inst++ = REX_W | REX_R;
- *inst++ = MOV_r_rm;
- *inst++ = MOD_REG | (reg_lmap[SLJIT_S1] << 3) | 0x6 /* rsi */;
+ inst[0] = REX_W | REX_R;
+ inst[1] = MOV_r_rm;
+ inst[2] = MOD_REG | (reg_lmap[SLJIT_S1] << 3) | 0x6 /* rsi */;
+ inst += 3;
}
if (args > 2) {
- *inst++ = REX_W | REX_R;
- *inst++ = MOV_r_rm;
- *inst++ = MOD_REG | (reg_lmap[SLJIT_S2] << 3) | 0x2 /* rdx */;
+ inst[0] = REX_W | REX_R;
+ inst[1] = MOV_r_rm;
+ inst[2] = MOD_REG | (reg_lmap[SLJIT_S2] << 3) | 0x2 /* rdx */;
}
#else
if (args > 0) {
- *inst++ = REX_W;
- *inst++ = MOV_r_rm;
- *inst++ = MOD_REG | (reg_map[SLJIT_S0] << 3) | 0x1 /* rcx */;
+ inst[0] = REX_W;
+ inst[1] = MOV_r_rm;
+ inst[2] = MOD_REG | (reg_map[SLJIT_S0] << 3) | 0x1 /* rcx */;
+ inst += 3;
}
if (args > 1) {
- *inst++ = REX_W;
- *inst++ = MOV_r_rm;
- *inst++ = MOD_REG | (reg_map[SLJIT_S1] << 3) | 0x2 /* rdx */;
+ inst[0] = REX_W;
+ inst[1] = MOV_r_rm;
+ inst[2] = MOD_REG | (reg_map[SLJIT_S1] << 3) | 0x2 /* rdx */;
+ inst += 3;
}
if (args > 2) {
- *inst++ = REX_W | REX_B;
- *inst++ = MOV_r_rm;
- *inst++ = MOD_REG | (reg_map[SLJIT_S2] << 3) | 0x0 /* r8 */;
+ inst[0] = REX_W | REX_B;
+ inst[1] = MOV_r_rm;
+ inst[2] = MOD_REG | (reg_map[SLJIT_S2] << 3) | 0x0 /* r8 */;
}
#endif
}
@@ -163,58 +169,42 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
compiler->local_size = local_size;
#ifdef _WIN64
- if (local_size > 1024) {
- /* Allocate stack for the callback, which grows the stack. */
- inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + (3 + sizeof(sljit_s32)));
- FAIL_IF(!inst);
- INC_SIZE(4 + (3 + sizeof(sljit_s32)));
- *inst++ = REX_W;
- *inst++ = GROUP_BINARY_83;
- *inst++ = MOD_REG | SUB | reg_map[SLJIT_SP];
- /* Allocated size for registers must be divisible by 8. */
- SLJIT_ASSERT(!(saved_register_size & 0x7));
- /* Aligned to 16 byte. */
- if (saved_register_size & 0x8) {
- *inst++ = 5 * sizeof(sljit_sw);
- local_size -= 5 * sizeof(sljit_sw);
- } else {
- *inst++ = 4 * sizeof(sljit_sw);
- local_size -= 4 * sizeof(sljit_sw);
- }
- /* Second instruction */
- SLJIT_ASSERT(reg_map[SLJIT_R0] < 8);
- *inst++ = REX_W;
- *inst++ = MOV_rm_i32;
- *inst++ = MOD_REG | reg_lmap[SLJIT_R0];
- sljit_unaligned_store_s32(inst, local_size);
-#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
- || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
- compiler->skip_checks = 1;
-#endif
- FAIL_IF(sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARG1(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_grow_stack)));
- }
-#endif
-
if (local_size > 0) {
- if (local_size <= 127) {
- inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
- FAIL_IF(!inst);
- INC_SIZE(4);
- *inst++ = REX_W;
- *inst++ = GROUP_BINARY_83;
- *inst++ = MOD_REG | SUB | reg_map[SLJIT_SP];
- *inst++ = local_size;
+ if (local_size <= 4 * 4096) {
+ if (local_size > 4096)
+ EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096);
+ if (local_size > 2 * 4096)
+ EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 2);
+ if (local_size > 3 * 4096)
+ EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 3);
}
else {
- inst = (sljit_u8*)ensure_buf(compiler, 1 + 7);
+ EMIT_MOV(compiler, SLJIT_R0, 0, SLJIT_SP, 0);
+ EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, (local_size - 1) >> 12);
+
+ SLJIT_ASSERT (reg_map[SLJIT_R0] == 0);
+
+ EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_MEM1(SLJIT_R0), -4096);
+ FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
+ SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 4096));
+ FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
+ TMP_REG1, 0, TMP_REG1, 0, SLJIT_IMM, 1));
+
+ inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
FAIL_IF(!inst);
- INC_SIZE(7);
- *inst++ = REX_W;
- *inst++ = GROUP_BINARY_81;
- *inst++ = MOD_REG | SUB | reg_map[SLJIT_SP];
- sljit_unaligned_store_s32(inst, local_size);
- inst += sizeof(sljit_s32);
+
+ INC_SIZE(2);
+ inst[0] = JNE_i8;
+ inst[1] = (sljit_s8) -19;
}
+
+ EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -local_size);
+ }
+#endif
+
+ if (local_size > 0) {
+ FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
+ SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size));
}
#ifdef _WIN64
diff --git a/src/3rdparty/pcre2/src/sljit/sljitNativeX86_common.c b/src/3rdparty/pcre2/src/sljit/sljitNativeX86_common.c
index ab7b36adb2..6f02ee3e8b 100644
--- a/src/3rdparty/pcre2/src/sljit/sljitNativeX86_common.c
+++ b/src/3rdparty/pcre2/src/sljit/sljitNativeX86_common.c
@@ -669,23 +669,6 @@ static SLJIT_INLINE sljit_s32 emit_sse2_store(struct sljit_compiler *compiler,
static SLJIT_INLINE sljit_s32 emit_sse2_load(struct sljit_compiler *compiler,
sljit_s32 single, sljit_s32 dst, sljit_s32 src, sljit_sw srcw);
-#ifdef _WIN32
-#include <malloc.h>
-
-static void SLJIT_FUNC sljit_grow_stack(sljit_sw local_size)
-{
- /* Workaround for calling the internal _chkstk() function on Windows.
- This function touches all 4k pages belongs to the requested stack space,
- which size is passed in local_size. This is necessary on Windows where
- the stack can only grow in 4k steps. However, this function just burn
- CPU cycles if the stack is large enough. However, you don't know it in
- advance, so it must always be called. I think this is a bad design in
- general even if it has some reasons. */
- *(volatile sljit_s32*)alloca(local_size) = 0;
-}
-
-#endif
-
#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
#include "sljitNativeX86_32.c"
#else