diff options
Diffstat (limited to 'src/3rdparty/pcre2/src/sljit')
22 files changed, 3496 insertions, 3278 deletions
diff --git a/src/3rdparty/pcre2/src/sljit/sljitConfig.h b/src/3rdparty/pcre2/src/sljit/sljitConfig.h index a548c37ab6..b65584a4af 100644 --- a/src/3rdparty/pcre2/src/sljit/sljitConfig.h +++ b/src/3rdparty/pcre2/src/sljit/sljitConfig.h @@ -1,7 +1,7 @@ /* * Stack-less Just-In-Time compiler * - * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are * permitted provided that the following conditions are met: @@ -90,10 +90,20 @@ /* Executable code allocation: If SLJIT_EXECUTABLE_ALLOCATOR is not defined, the application should - define both SLJIT_MALLOC_EXEC and SLJIT_FREE_EXEC. */ + define SLJIT_MALLOC_EXEC, SLJIT_FREE_EXEC, and SLJIT_EXEC_OFFSET. */ #ifndef SLJIT_EXECUTABLE_ALLOCATOR /* Enabled by default. */ #define SLJIT_EXECUTABLE_ALLOCATOR 1 + +/* When SLJIT_PROT_EXECUTABLE_ALLOCATOR is enabled SLJIT uses + an allocator which does not set writable and executable + permission flags at the same time. The trade-of is increased + memory consumption and disabled dynamic code modifications. */ +#ifndef SLJIT_PROT_EXECUTABLE_ALLOCATOR +/* Disabled by default. */ +#define SLJIT_PROT_EXECUTABLE_ALLOCATOR 0 +#endif + #endif /* Force cdecl calling convention even if a better calling diff --git a/src/3rdparty/pcre2/src/sljit/sljitConfigInternal.h b/src/3rdparty/pcre2/src/sljit/sljitConfigInternal.h index 566c368063..cc0810fbd7 100644 --- a/src/3rdparty/pcre2/src/sljit/sljitConfigInternal.h +++ b/src/3rdparty/pcre2/src/sljit/sljitConfigInternal.h @@ -1,7 +1,7 @@ /* * Stack-less Just-In-Time compiler * - * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are * permitted provided that the following conditions are met: @@ -187,14 +187,6 @@ /* External function definitions. */ /**********************************/ -#if !(defined SLJIT_STD_MACROS_DEFINED && SLJIT_STD_MACROS_DEFINED) - -/* These libraries are needed for the macros below. */ -#include <stdlib.h> -#include <string.h> - -#endif /* SLJIT_STD_MACROS_DEFINED */ - /* General macros: Note: SLJIT is designed to be independent from them as possible. @@ -304,6 +296,13 @@ #define SLJIT_CACHE_FLUSH(from, to) \ sys_icache_invalidate((char*)(from), (char*)(to) - (char*)(from)) +#elif (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) + +/* The __clear_cache() implementation of GCC is a dummy function on PowerPC. */ +#define SLJIT_CACHE_FLUSH(from, to) \ + ppc_cache_flush((from), (to)) +#define SLJIT_CACHE_FLUSH_OWN_IMPL 1 + #elif (defined(__GNUC__) && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) #define SLJIT_CACHE_FLUSH(from, to) \ @@ -316,13 +315,6 @@ #define SLJIT_CACHE_FLUSH(from, to) \ cacheflush((long)(from), (long)(to), 0) -#elif (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) - -/* The __clear_cache() implementation of GCC is a dummy function on PowerPC. */ -#define SLJIT_CACHE_FLUSH(from, to) \ - ppc_cache_flush((from), (to)) -#define SLJIT_CACHE_FLUSH_OWN_IMPL 1 - #elif (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) /* The __clear_cache() implementation of GCC is a dummy function on Sparc. */ @@ -401,7 +393,9 @@ typedef double sljit_f64; #ifndef SLJIT_W /* Defining long constants. */ -#if (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) +#if (defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) +#define SLJIT_W(w) (w##l) +#elif (defined SLJIT_64BIT_ARCHITECTURE && SLJIT_64BIT_ARCHITECTURE) #define SLJIT_W(w) (w##ll) #else #define SLJIT_W(w) (w) @@ -545,6 +539,14 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_free_exec(void* ptr); SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void); #define SLJIT_MALLOC_EXEC(size) sljit_malloc_exec(size) #define SLJIT_FREE_EXEC(ptr) sljit_free_exec(ptr) + +#if (defined SLJIT_PROT_EXECUTABLE_ALLOCATOR && SLJIT_PROT_EXECUTABLE_ALLOCATOR) +SLJIT_API_FUNC_ATTRIBUTE sljit_sw sljit_exec_offset(void* ptr); +#define SLJIT_EXEC_OFFSET(ptr) sljit_exec_offset(ptr) +#else +#define SLJIT_EXEC_OFFSET(ptr) 0 +#endif + #endif /**********************************************/ @@ -553,37 +555,37 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void); #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) -#define SLJIT_NUMBER_OF_REGISTERS 10 -#define SLJIT_NUMBER_OF_SAVED_REGISTERS 7 +#define SLJIT_NUMBER_OF_REGISTERS 12 +#define SLJIT_NUMBER_OF_SAVED_REGISTERS 9 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) -#define SLJIT_LOCALS_OFFSET_BASE ((2 + 4) * sizeof(sljit_sw)) +#define SLJIT_LOCALS_OFFSET_BASE (compiler->locals_offset) #else /* Maximum 3 arguments are passed on the stack, +1 for double alignment. */ -#define SLJIT_LOCALS_OFFSET_BASE ((3 + 1 + 4) * sizeof(sljit_sw)) +#define SLJIT_LOCALS_OFFSET_BASE (compiler->locals_offset) #endif /* SLJIT_X86_32_FASTCALL */ #elif (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) #ifndef _WIN64 -#define SLJIT_NUMBER_OF_REGISTERS 12 +#define SLJIT_NUMBER_OF_REGISTERS 13 #define SLJIT_NUMBER_OF_SAVED_REGISTERS 6 -#define SLJIT_LOCALS_OFFSET_BASE (sizeof(sljit_sw)) +#define SLJIT_LOCALS_OFFSET_BASE 0 #else -#define SLJIT_NUMBER_OF_REGISTERS 12 +#define SLJIT_NUMBER_OF_REGISTERS 13 #define SLJIT_NUMBER_OF_SAVED_REGISTERS 8 -#define SLJIT_LOCALS_OFFSET_BASE ((4 + 2) * sizeof(sljit_sw)) +#define SLJIT_LOCALS_OFFSET_BASE (compiler->locals_offset) #endif /* _WIN64 */ #elif (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) -#define SLJIT_NUMBER_OF_REGISTERS 11 +#define SLJIT_NUMBER_OF_REGISTERS 12 #define SLJIT_NUMBER_OF_SAVED_REGISTERS 8 #define SLJIT_LOCALS_OFFSET_BASE 0 #elif (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) -#define SLJIT_NUMBER_OF_REGISTERS 11 -#define SLJIT_NUMBER_OF_SAVED_REGISTERS 7 +#define SLJIT_NUMBER_OF_REGISTERS 12 +#define SLJIT_NUMBER_OF_SAVED_REGISTERS 8 #define SLJIT_LOCALS_OFFSET_BASE 0 #elif (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) @@ -607,7 +609,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void); #elif (defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) -#define SLJIT_NUMBER_OF_REGISTERS 17 +#define SLJIT_NUMBER_OF_REGISTERS 21 #define SLJIT_NUMBER_OF_SAVED_REGISTERS 8 #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) #define SLJIT_LOCALS_OFFSET_BASE (4 * sizeof(sljit_sw)) @@ -663,7 +665,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void); #if (defined SLJIT_DEBUG && SLJIT_DEBUG) -#if !defined(SLJIT_ASSERT) || !defined(SLJIT_ASSERT_STOP) +#if !defined(SLJIT_ASSERT) || !defined(SLJIT_UNREACHABLE) /* SLJIT_HALT_PROCESS must halt the process. */ #ifndef SLJIT_HALT_PROCESS @@ -675,7 +677,7 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void); #include <stdio.h> -#endif /* !SLJIT_ASSERT || !SLJIT_ASSERT_STOP */ +#endif /* !SLJIT_ASSERT || !SLJIT_UNREACHABLE */ /* Feel free to redefine these two macros. */ #ifndef SLJIT_ASSERT @@ -690,34 +692,33 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void); #endif /* !SLJIT_ASSERT */ -#ifndef SLJIT_ASSERT_STOP +#ifndef SLJIT_UNREACHABLE -#define SLJIT_ASSERT_STOP() \ +#define SLJIT_UNREACHABLE() \ do { \ printf("Should never been reached " __FILE__ ":%d\n", __LINE__); \ SLJIT_HALT_PROCESS(); \ } while (0) -#endif /* !SLJIT_ASSERT_STOP */ +#endif /* !SLJIT_UNREACHABLE */ #else /* (defined SLJIT_DEBUG && SLJIT_DEBUG) */ /* Forcing empty, but valid statements. */ #undef SLJIT_ASSERT -#undef SLJIT_ASSERT_STOP +#undef SLJIT_UNREACHABLE #define SLJIT_ASSERT(x) \ do { } while (0) -#define SLJIT_ASSERT_STOP() \ +#define SLJIT_UNREACHABLE() \ do { } while (0) #endif /* (defined SLJIT_DEBUG && SLJIT_DEBUG) */ #ifndef SLJIT_COMPILE_ASSERT -/* Should be improved eventually. */ #define SLJIT_COMPILE_ASSERT(x, description) \ - SLJIT_ASSERT(x) + switch(0) { case 0: case ((x) ? 1 : 0): break; } #endif /* !SLJIT_COMPILE_ASSERT */ diff --git a/src/3rdparty/pcre2/src/sljit/sljitExecAllocator.c b/src/3rdparty/pcre2/src/sljit/sljitExecAllocator.c index 54f05f5dd7..f5009788f6 100644 --- a/src/3rdparty/pcre2/src/sljit/sljitExecAllocator.c +++ b/src/3rdparty/pcre2/src/sljit/sljitExecAllocator.c @@ -1,7 +1,7 @@ /* * Stack-less Just-In-Time compiler * - * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are * permitted provided that the following conditions are met: @@ -86,7 +86,7 @@ static SLJIT_INLINE void* alloc_chunk(sljit_uw size) return VirtualAlloc(NULL, size, MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE); } -static SLJIT_INLINE void free_chunk(void* chunk, sljit_uw size) +static SLJIT_INLINE void free_chunk(void *chunk, sljit_uw size) { SLJIT_UNUSED_ARG(size); VirtualFree(chunk, 0, MEM_RELEASE); @@ -96,7 +96,7 @@ static SLJIT_INLINE void free_chunk(void* chunk, sljit_uw size) static SLJIT_INLINE void* alloc_chunk(sljit_uw size) { - void* retval; + void *retval; #ifdef MAP_ANON retval = mmap(NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANON, -1, 0); @@ -111,7 +111,7 @@ static SLJIT_INLINE void* alloc_chunk(sljit_uw size) return (retval != MAP_FAILED) ? retval : NULL; } -static SLJIT_INLINE void free_chunk(void* chunk, sljit_uw size) +static SLJIT_INLINE void free_chunk(void *chunk, sljit_uw size) { munmap(chunk, size); } @@ -180,8 +180,8 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_malloc_exec(sljit_uw size) sljit_uw chunk_size; allocator_grab_lock(); - if (size < sizeof(struct free_block)) - size = sizeof(struct free_block); + if (size < (64 - sizeof(struct block_header))) + size = (64 - sizeof(struct block_header)); size = ALIGN_SIZE(size); free_block = free_blocks; diff --git a/src/3rdparty/pcre2/src/sljit/sljitLir.c b/src/3rdparty/pcre2/src/sljit/sljitLir.c index ec1781e4c7..c0bbb5201a 100644 --- a/src/3rdparty/pcre2/src/sljit/sljitLir.c +++ b/src/3rdparty/pcre2/src/sljit/sljitLir.c @@ -1,7 +1,7 @@ /* * Stack-less Just-In-Time compiler * - * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are * permitted provided that the following conditions are met: @@ -26,6 +26,14 @@ #include "sljitLir.h" +#if !(defined SLJIT_STD_MACROS_DEFINED && SLJIT_STD_MACROS_DEFINED) + +/* These libraries are needed for the macros below. */ +#include <stdlib.h> +#include <string.h> + +#endif /* SLJIT_STD_MACROS_DEFINED */ + #define CHECK_ERROR() \ do { \ if (SLJIT_UNLIKELY(compiler->error)) \ @@ -76,14 +84,18 @@ #if !(defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) +#define VARIABLE_FLAG_SHIFT (10) +#define VARIABLE_FLAG_MASK (0x3f << VARIABLE_FLAG_SHIFT) +#define GET_FLAG_TYPE(op) ((op) >> VARIABLE_FLAG_SHIFT) + #define GET_OPCODE(op) \ - ((op) & ~(SLJIT_I32_OP | SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O | SLJIT_SET_C | SLJIT_KEEP_FLAGS)) + ((op) & ~(SLJIT_I32_OP | SLJIT_SET_Z | VARIABLE_FLAG_MASK)) -#define GET_FLAGS(op) \ - ((op) & (SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O | SLJIT_SET_C)) +#define HAS_FLAGS(op) \ + ((op) & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) #define GET_ALL_FLAGS(op) \ - ((op) & (SLJIT_I32_OP | SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O | SLJIT_SET_C | SLJIT_KEEP_FLAGS)) + ((op) & (SLJIT_I32_OP | SLJIT_SET_Z | VARIABLE_FLAG_MASK)) #define TYPE_CAST_NEEDED(op) \ (((op) >= SLJIT_MOV_U8 && (op) <= SLJIT_MOV_S16) || ((op) >= SLJIT_MOVU_U8 && (op) <= SLJIT_MOVU_S16)) @@ -112,10 +124,10 @@ /* SLJIT_REWRITABLE_JUMP is 0x1000. */ #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) -# define PATCH_MB 0x4 -# define PATCH_MW 0x8 +# define PATCH_MB 0x4 +# define PATCH_MW 0x8 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) -# define PATCH_MD 0x10 +# define PATCH_MD 0x10 #endif #endif @@ -242,9 +254,21 @@ #if !(defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED) #if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR) + +#if (defined SLJIT_PROT_EXECUTABLE_ALLOCATOR && SLJIT_PROT_EXECUTABLE_ALLOCATOR) +#include "sljitProtExecAllocator.c" +#else #include "sljitExecAllocator.c" #endif +#endif + +#if (defined SLJIT_PROT_EXECUTABLE_ALLOCATOR && SLJIT_PROT_EXECUTABLE_ALLOCATOR) +#define SLJIT_ADD_EXEC_OFFSET(ptr, exec_offset) ((sljit_u8 *)(ptr) + (exec_offset)) +#else +#define SLJIT_ADD_EXEC_OFFSET(ptr, exec_offset) ((sljit_u8 *)(ptr)) +#endif + /* Argument checking features. */ #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) @@ -318,7 +342,7 @@ /* Public functions */ /* --------------------------------------------------------------------- */ -#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) || (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) +#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) #define SLJIT_NEEDS_COMPILER_INIT 1 static sljit_s32 compiler_initialized = 0; /* A thread safe initialization. */ @@ -345,6 +369,8 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler* sljit_create_compiler(void *allo int_op_and_single_op_must_be_the_same); SLJIT_COMPILE_ASSERT(SLJIT_REWRITABLE_JUMP != SLJIT_F32_OP, rewritable_jump_and_single_op_must_not_be_the_same); + SLJIT_COMPILE_ASSERT(!(SLJIT_EQUAL & 0x1) && !(SLJIT_LESS & 0x1) && !(SLJIT_EQUAL_F64 & 0x1) && !(SLJIT_JUMP & 0x1), + conditional_flags_must_be_even_numbers); /* Only the non-zero members must be set. */ compiler->error = SLJIT_SUCCESS; @@ -479,6 +505,18 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_target(struct sljit_jump *jump, sljit_uw } } +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_current_flags(struct sljit_compiler *compiler, sljit_s32 current_flags) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(current_flags); + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + if ((current_flags & ~(VARIABLE_FLAG_MASK | SLJIT_I32_OP | SLJIT_SET_Z)) == 0) { + compiler->last_flags = GET_FLAG_TYPE(current_flags) | (current_flags & (SLJIT_I32_OP | SLJIT_SET_Z)); + } +#endif +} + /* --------------------------------------------------------------------- */ /* Private functions */ /* --------------------------------------------------------------------- */ @@ -624,65 +662,6 @@ static SLJIT_INLINE void set_const(struct sljit_const *const_, struct sljit_comp (((exp) & SLJIT_MEM) && (((exp) & REG_MASK) == reg || OFFS_REG(exp) == reg)) #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) -#define FUNCTION_CHECK_OP() \ - CHECK_ARGUMENT(!GET_FLAGS(op) || !(op & SLJIT_KEEP_FLAGS)); \ - switch (GET_OPCODE(op)) { \ - case SLJIT_NOT: \ - case SLJIT_CLZ: \ - case SLJIT_AND: \ - case SLJIT_OR: \ - case SLJIT_XOR: \ - case SLJIT_SHL: \ - case SLJIT_LSHR: \ - case SLJIT_ASHR: \ - CHECK_ARGUMENT(!(op & (SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O | SLJIT_SET_C))); \ - break; \ - case SLJIT_NEG: \ - CHECK_ARGUMENT(!(op & (SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_C))); \ - break; \ - case SLJIT_MUL: \ - CHECK_ARGUMENT(!(op & (SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_C))); \ - break; \ - case SLJIT_ADD: \ - CHECK_ARGUMENT(!(op & (SLJIT_SET_U | SLJIT_SET_S))); \ - break; \ - case SLJIT_SUB: \ - break; \ - case SLJIT_ADDC: \ - case SLJIT_SUBC: \ - CHECK_ARGUMENT(!(op & (SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O))); \ - break; \ - case SLJIT_BREAKPOINT: \ - case SLJIT_NOP: \ - case SLJIT_LMUL_UW: \ - case SLJIT_LMUL_SW: \ - case SLJIT_MOV: \ - case SLJIT_MOV_U32: \ - case SLJIT_MOV_P: \ - case SLJIT_MOVU: \ - case SLJIT_MOVU_U32: \ - case SLJIT_MOVU_P: \ - /* Nothing allowed */ \ - CHECK_ARGUMENT(!(op & (SLJIT_I32_OP | SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O | SLJIT_SET_C | SLJIT_KEEP_FLAGS))); \ - break; \ - default: \ - /* Only SLJIT_I32_OP or SLJIT_F32_OP is allowed. */ \ - CHECK_ARGUMENT(!(op & (SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O | SLJIT_SET_C | SLJIT_KEEP_FLAGS))); \ - break; \ - } - -#define FUNCTION_CHECK_FOP() \ - CHECK_ARGUMENT(!GET_FLAGS(op) || !(op & SLJIT_KEEP_FLAGS)); \ - switch (GET_OPCODE(op)) { \ - case SLJIT_CMP_F64: \ - CHECK_ARGUMENT(!(op & (SLJIT_SET_U | SLJIT_SET_O | SLJIT_SET_C | SLJIT_KEEP_FLAGS))); \ - CHECK_ARGUMENT((op & (SLJIT_SET_E | SLJIT_SET_S))); \ - break; \ - default: \ - /* Only SLJIT_I32_OP or SLJIT_F32_OP is allowed. */ \ - CHECK_ARGUMENT(!(op & (SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O | SLJIT_SET_C | SLJIT_KEEP_FLAGS))); \ - break; \ - } #define FUNCTION_CHECK_IS_REG(r) \ (((r) >= SLJIT_R0 && (r) < (SLJIT_R0 + compiler->scratches)) || \ @@ -718,12 +697,12 @@ static SLJIT_INLINE void set_const(struct sljit_const *const_, struct sljit_comp CHECK_NOT_VIRTUAL_REGISTER(OFFS_REG(p)); \ CHECK_ARGUMENT(!((i) & ~0x3)); \ } \ - CHECK_ARGUMENT(!((p) & ~(SLJIT_MEM | SLJIT_IMM | REG_MASK | OFFS_REG_MASK))); \ + CHECK_ARGUMENT(!((p) & ~(SLJIT_MEM | REG_MASK | OFFS_REG_MASK))); \ } -#define FUNCTION_CHECK_DST(p, i) \ +#define FUNCTION_CHECK_DST(p, i, unused) \ CHECK_ARGUMENT(compiler->scratches != -1 && compiler->saveds != -1); \ - if (FUNCTION_CHECK_IS_REG_OR_UNUSED(p)) \ + if (FUNCTION_CHECK_IS_REG(p) || ((unused) && (p) == SLJIT_UNUSED)) \ CHECK_ARGUMENT((i) == 0); \ else if ((p) == (SLJIT_MEM1(SLJIT_SP))) \ CHECK_ARGUMENT((i) >= 0 && (i) < compiler->logical_local_size); \ @@ -737,7 +716,7 @@ static SLJIT_INLINE void set_const(struct sljit_const *const_, struct sljit_comp CHECK_NOT_VIRTUAL_REGISTER(OFFS_REG(p)); \ CHECK_ARGUMENT(!((i) & ~0x3)); \ } \ - CHECK_ARGUMENT(!((p) & ~(SLJIT_MEM | SLJIT_IMM | REG_MASK | OFFS_REG_MASK))); \ + CHECK_ARGUMENT(!((p) & ~(SLJIT_MEM | REG_MASK | OFFS_REG_MASK))); \ } #define FUNCTION_FCHECK(p, i) \ @@ -757,15 +736,7 @@ static SLJIT_INLINE void set_const(struct sljit_const *const_, struct sljit_comp CHECK_NOT_VIRTUAL_REGISTER(OFFS_REG(p)); \ CHECK_ARGUMENT(((p) & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_SP) && !(i & ~0x3)); \ } \ - CHECK_ARGUMENT(!((p) & ~(SLJIT_MEM | SLJIT_IMM | REG_MASK | OFFS_REG_MASK))); \ - } - -#define FUNCTION_CHECK_OP1() \ - if (GET_OPCODE(op) >= SLJIT_MOVU && GET_OPCODE(op) <= SLJIT_MOVU_P) { \ - CHECK_ARGUMENT(!(src & SLJIT_MEM) || (src & REG_MASK) != SLJIT_SP); \ - CHECK_ARGUMENT(!(dst & SLJIT_MEM) || (dst & REG_MASK) != SLJIT_SP); \ - if ((src & SLJIT_MEM) && (src & REG_MASK)) \ - CHECK_ARGUMENT((dst & REG_MASK) != (src & REG_MASK) && OFFS_REG(dst) != (src & REG_MASK)); \ + CHECK_ARGUMENT(!((p) & ~(SLJIT_MEM | REG_MASK | OFFS_REG_MASK))); \ } #endif /* SLJIT_ARGUMENT_CHECKS */ @@ -791,8 +762,10 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_compiler_verbose(struct sljit_compiler *comp do { \ if ((r) < (SLJIT_R0 + compiler->scratches)) \ fprintf(compiler->verbose, "r%d", (r) - SLJIT_R0); \ - else \ + else if ((r) != SLJIT_SP) \ fprintf(compiler->verbose, "s%d", SLJIT_NUMBER_OF_REGISTERS - (r)); \ + else \ + fprintf(compiler->verbose, "sp"); \ } while (0) #define sljit_verbose_param(compiler, p, i) \ @@ -885,6 +858,7 @@ static char* jump_names[] = { (char*)"sig_greater", (char*)"sig_less_equal", (char*)"overflow", (char*)"not_overflow", (char*)"mul_overflow", (char*)"mul_not_overflow", + (char*)"carry", (char*)"", (char*)"equal", (char*)"not_equal", (char*)"less", (char*)"greater_equal", (char*)"greater", (char*)"less_equal", @@ -929,7 +903,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_enter(struct sljit_compil SLJIT_UNUSED_ARG(compiler); #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - CHECK_ARGUMENT(!(options & ~SLJIT_DOUBLE_ALIGNMENT)); + CHECK_ARGUMENT(!(options & ~SLJIT_F64_ALIGNMENT)); CHECK_ARGUMENT(args >= 0 && args <= 3); CHECK_ARGUMENT(scratches >= 0 && scratches <= SLJIT_NUMBER_OF_REGISTERS); CHECK_ARGUMENT(saveds >= 0 && saveds <= SLJIT_NUMBER_OF_REGISTERS); @@ -939,6 +913,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_enter(struct sljit_compil CHECK_ARGUMENT(fsaveds >= 0 && fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); CHECK_ARGUMENT(fscratches + fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); CHECK_ARGUMENT(local_size >= 0 && local_size <= SLJIT_MAX_LOCAL_SIZE); + compiler->last_flags = 0; #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) @@ -952,13 +927,8 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_set_context(struct sljit_compi sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds, sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { - if (SLJIT_UNLIKELY(compiler->skip_checks)) { - compiler->skip_checks = 0; - CHECK_RETURN_OK; - } - #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - CHECK_ARGUMENT(!(options & ~SLJIT_DOUBLE_ALIGNMENT)); + CHECK_ARGUMENT(!(options & ~SLJIT_F64_ALIGNMENT)); CHECK_ARGUMENT(args >= 0 && args <= 3); CHECK_ARGUMENT(scratches >= 0 && scratches <= SLJIT_NUMBER_OF_REGISTERS); CHECK_ARGUMENT(saveds >= 0 && saveds <= SLJIT_NUMBER_OF_REGISTERS); @@ -968,6 +938,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_set_context(struct sljit_compi CHECK_ARGUMENT(fsaveds >= 0 && fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); CHECK_ARGUMENT(fscratches + fsaveds <= SLJIT_NUMBER_OF_FLOAT_REGISTERS); CHECK_ARGUMENT(local_size >= 0 && local_size <= SLJIT_MAX_LOCAL_SIZE); + compiler->last_flags = 0; #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) @@ -987,6 +958,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_return(struct sljit_compi } else CHECK_ARGUMENT(src == 0 && srcw == 0); + compiler->last_flags = 0; #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { @@ -1005,7 +977,8 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_return(struct sljit_compi static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) { #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - FUNCTION_CHECK_DST(dst, dstw); + FUNCTION_CHECK_DST(dst, dstw, 0); + compiler->last_flags = 0; #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { @@ -1021,6 +994,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fast_return(struct sljit_ { #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) FUNCTION_CHECK_SRC(src, srcw); + compiler->last_flags = 0; #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { @@ -1038,6 +1012,8 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op0(struct sljit_compiler CHECK_ARGUMENT((op >= SLJIT_BREAKPOINT && op <= SLJIT_LMUL_SW) || ((op & ~SLJIT_I32_OP) >= SLJIT_DIVMOD_UW && (op & ~SLJIT_I32_OP) <= SLJIT_DIV_SW)); CHECK_ARGUMENT(op < SLJIT_LMUL_UW || compiler->scratches >= 2); + if (op >= SLJIT_LMUL_UW) + compiler->last_flags = 0; #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) @@ -1063,10 +1039,49 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op1(struct sljit_compiler #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_MOV && GET_OPCODE(op) <= SLJIT_CLZ); - FUNCTION_CHECK_OP(); + + switch (GET_OPCODE(op)) { + case SLJIT_NOT: + /* Only SLJIT_I32_OP and SLJIT_SET_Z are allowed. */ + CHECK_ARGUMENT(!(op & VARIABLE_FLAG_MASK)); + break; + case SLJIT_NEG: + CHECK_ARGUMENT(!(op & VARIABLE_FLAG_MASK) + || GET_FLAG_TYPE(op) == SLJIT_OVERFLOW); + break; + case SLJIT_MOV: + case SLJIT_MOV_U32: + case SLJIT_MOV_P: + case SLJIT_MOVU: + case SLJIT_MOVU_U32: + case SLJIT_MOVU_P: + /* Nothing allowed */ + CHECK_ARGUMENT(!(op & (SLJIT_I32_OP | SLJIT_SET_Z | VARIABLE_FLAG_MASK))); + break; + default: + /* Only SLJIT_I32_OP is allowed. */ + CHECK_ARGUMENT(!(op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK))); + break; + } + + FUNCTION_CHECK_DST(dst, dstw, 1); FUNCTION_CHECK_SRC(src, srcw); - FUNCTION_CHECK_DST(dst, dstw); - FUNCTION_CHECK_OP1(); + + if (GET_OPCODE(op) >= SLJIT_NOT) + compiler->last_flags = GET_FLAG_TYPE(op) | (op & (SLJIT_I32_OP | SLJIT_SET_Z)); + else if (GET_OPCODE(op) >= SLJIT_MOVU) { + CHECK_ARGUMENT(!(src & SLJIT_MEM) || (src & REG_MASK) != SLJIT_SP); + CHECK_ARGUMENT(!(dst & SLJIT_MEM) || (dst & REG_MASK) != SLJIT_SP); + if ((src & REG_MASK) != SLJIT_UNUSED) { + CHECK_ARGUMENT((src & REG_MASK) != (dst & REG_MASK) && (src & REG_MASK) != OFFS_REG(dst)); + CHECK_ARGUMENT((src & OFFS_REG_MASK) == SLJIT_UNUSED || srcw == 0); + } + if ((dst & REG_MASK) != SLJIT_UNUSED) { + CHECK_ARGUMENT((dst & REG_MASK) != OFFS_REG(src)); + CHECK_ARGUMENT((dst & OFFS_REG_MASK) == SLJIT_UNUSED || dstw == 0); + } + compiler->last_flags = 0; + } #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { @@ -1077,9 +1092,9 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op1(struct sljit_compiler } else { - fprintf(compiler->verbose, " %s%s%s%s%s%s%s%s ", op1_names[GET_OPCODE(op) - SLJIT_OP1_BASE], !(op & SLJIT_I32_OP) ? "" : "32", - !(op & SLJIT_SET_E) ? "" : ".e", !(op & SLJIT_SET_U) ? "" : ".u", !(op & SLJIT_SET_S) ? "" : ".s", - !(op & SLJIT_SET_O) ? "" : ".o", !(op & SLJIT_SET_C) ? "" : ".c", !(op & SLJIT_KEEP_FLAGS) ? "" : ".k"); + fprintf(compiler->verbose, " %s%s%s%s%s ", op1_names[GET_OPCODE(op) - SLJIT_OP1_BASE], !(op & SLJIT_I32_OP) ? "" : "32", + !(op & SLJIT_SET_Z) ? "" : ".z", !(op & VARIABLE_FLAG_MASK) ? "" : ".", + !(op & VARIABLE_FLAG_MASK) ? "" : jump_names[GET_FLAG_TYPE(op)]); } sljit_verbose_param(compiler, dst, dstw); @@ -1103,16 +1118,53 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op2(struct sljit_compiler #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_ADD && GET_OPCODE(op) <= SLJIT_ASHR); - FUNCTION_CHECK_OP(); + + switch (GET_OPCODE(op)) { + case SLJIT_AND: + case SLJIT_OR: + case SLJIT_XOR: + case SLJIT_SHL: + case SLJIT_LSHR: + case SLJIT_ASHR: + CHECK_ARGUMENT(!(op & VARIABLE_FLAG_MASK)); + break; + case SLJIT_MUL: + CHECK_ARGUMENT(!(op & SLJIT_SET_Z)); + CHECK_ARGUMENT(!(op & VARIABLE_FLAG_MASK) + || GET_FLAG_TYPE(op) == SLJIT_MUL_OVERFLOW); + break; + case SLJIT_ADD: + CHECK_ARGUMENT(!(op & VARIABLE_FLAG_MASK) + || GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY) + || GET_FLAG_TYPE(op) == SLJIT_OVERFLOW); + break; + case SLJIT_SUB: + CHECK_ARGUMENT(!(op & VARIABLE_FLAG_MASK) + || (GET_FLAG_TYPE(op) >= SLJIT_LESS && GET_FLAG_TYPE(op) <= SLJIT_OVERFLOW) + || GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY)); + break; + case SLJIT_ADDC: + case SLJIT_SUBC: + CHECK_ARGUMENT(!(op & VARIABLE_FLAG_MASK) + || GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY)); + CHECK_ARGUMENT((compiler->last_flags & 0xff) == GET_FLAG_TYPE(SLJIT_SET_CARRY)); + CHECK_ARGUMENT((op & SLJIT_I32_OP) == (compiler->last_flags & SLJIT_I32_OP)); + break; + default: + SLJIT_UNREACHABLE(); + break; + } + + FUNCTION_CHECK_DST(dst, dstw, 1); FUNCTION_CHECK_SRC(src1, src1w); FUNCTION_CHECK_SRC(src2, src2w); - FUNCTION_CHECK_DST(dst, dstw); + compiler->last_flags = GET_FLAG_TYPE(op) | (op & (SLJIT_I32_OP | SLJIT_SET_Z)); #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { - fprintf(compiler->verbose, " %s%s%s%s%s%s%s%s ", op2_names[GET_OPCODE(op) - SLJIT_OP2_BASE], !(op & SLJIT_I32_OP) ? "" : "32", - !(op & SLJIT_SET_E) ? "" : ".e", !(op & SLJIT_SET_U) ? "" : ".u", !(op & SLJIT_SET_S) ? "" : ".s", - !(op & SLJIT_SET_O) ? "" : ".o", !(op & SLJIT_SET_C) ? "" : ".c", !(op & SLJIT_KEEP_FLAGS) ? "" : ".k"); + fprintf(compiler->verbose, " %s%s%s%s%s ", op2_names[GET_OPCODE(op) - SLJIT_OP2_BASE], !(op & SLJIT_I32_OP) ? "" : "32", + !(op & SLJIT_SET_Z) ? "" : ".z", !(op & VARIABLE_FLAG_MASK) ? "" : ".", + !(op & VARIABLE_FLAG_MASK) ? "" : jump_names[GET_FLAG_TYPE(op)]); sljit_verbose_param(compiler, dst, dstw); fprintf(compiler->verbose, ", "); sljit_verbose_param(compiler, src1, src1w); @@ -1153,6 +1205,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op_custom(struct sljit_co #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) CHECK_ARGUMENT(instruction); + #if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) CHECK_ARGUMENT(size > 0 && size < 16); #elif (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) @@ -1162,6 +1215,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op_custom(struct sljit_co CHECK_ARGUMENT(size == 4 && (((sljit_sw)instruction) & 0x3) == 0); #endif + compiler->last_flags = 0; #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { @@ -1184,9 +1238,9 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1(struct sljit_compile } #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - CHECK_ARGUMENT(sljit_is_fpu_available()); + CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU)); CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_MOV_F64 && GET_OPCODE(op) <= SLJIT_ABS_F64); - FUNCTION_CHECK_FOP(); + CHECK_ARGUMENT(!(op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK))); FUNCTION_FCHECK(src, srcw); FUNCTION_FCHECK(dst, dstw); #endif @@ -1212,22 +1266,31 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1_cmp(struct sljit_com sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) { +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->last_flags = GET_FLAG_TYPE(op) | (op & (SLJIT_I32_OP | SLJIT_SET_Z)); +#endif + if (SLJIT_UNLIKELY(compiler->skip_checks)) { compiler->skip_checks = 0; CHECK_RETURN_OK; } #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - CHECK_ARGUMENT(sljit_is_fpu_available()); + CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU)); CHECK_ARGUMENT(GET_OPCODE(op) == SLJIT_CMP_F64); - FUNCTION_CHECK_FOP(); + CHECK_ARGUMENT(!(op & SLJIT_SET_Z)); + CHECK_ARGUMENT((op & VARIABLE_FLAG_MASK) + || (GET_FLAG_TYPE(op) >= SLJIT_EQUAL_F64 && GET_FLAG_TYPE(op) <= SLJIT_ORDERED_F64)); FUNCTION_FCHECK(src1, src1w); FUNCTION_FCHECK(src2, src2w); #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { - fprintf(compiler->verbose, " %s%s%s%s ", fop1_names[SLJIT_CMP_F64 - SLJIT_FOP1_BASE], (op & SLJIT_F32_OP) ? ".f32" : ".f64", - (op & SLJIT_SET_E) ? ".e" : "", (op & SLJIT_SET_S) ? ".s" : ""); + fprintf(compiler->verbose, " %s%s", fop1_names[SLJIT_CMP_F64 - SLJIT_FOP1_BASE], (op & SLJIT_F32_OP) ? ".f32" : ".f64"); + if (op & VARIABLE_FLAG_MASK) { + fprintf(compiler->verbose, ".%s_f", jump_names[GET_FLAG_TYPE(op)]); + } + fprintf(compiler->verbose, " "); sljit_verbose_fparam(compiler, src1, src1w); fprintf(compiler->verbose, ", "); sljit_verbose_fparam(compiler, src2, src2w); @@ -1247,11 +1310,11 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1_conv_sw_from_f64(str } #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - CHECK_ARGUMENT(sljit_is_fpu_available()); + CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU)); CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_CONV_SW_FROM_F64 && GET_OPCODE(op) <= SLJIT_CONV_S32_FROM_F64); - FUNCTION_CHECK_FOP(); + CHECK_ARGUMENT(!(op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK))); FUNCTION_FCHECK(src, srcw); - FUNCTION_CHECK_DST(dst, dstw); + FUNCTION_CHECK_DST(dst, dstw, 0); #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { @@ -1277,9 +1340,9 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop1_conv_f64_from_sw(str } #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - CHECK_ARGUMENT(sljit_is_fpu_available()); + CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU)); CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_CONV_F64_FROM_SW && GET_OPCODE(op) <= SLJIT_CONV_F64_FROM_S32); - FUNCTION_CHECK_FOP(); + CHECK_ARGUMENT(!(op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK))); FUNCTION_CHECK_SRC(src, srcw); FUNCTION_FCHECK(dst, dstw); #endif @@ -1303,9 +1366,9 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fop2(struct sljit_compile sljit_s32 src2, sljit_sw src2w) { #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - CHECK_ARGUMENT(sljit_is_fpu_available()); + CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU)); CHECK_ARGUMENT(GET_OPCODE(op) >= SLJIT_ADD_F64 && GET_OPCODE(op) <= SLJIT_DIV_F64); - FUNCTION_CHECK_FOP(); + CHECK_ARGUMENT(!(op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK))); FUNCTION_FCHECK(src1, src1w); FUNCTION_FCHECK(src2, src2w); FUNCTION_FCHECK(dst, dstw); @@ -1328,6 +1391,15 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_label(struct sljit_compil { SLJIT_UNUSED_ARG(compiler); + if (SLJIT_UNLIKELY(compiler->skip_checks)) { + compiler->skip_checks = 0; + CHECK_RETURN_OK; + } + +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->last_flags = 0; +#endif + #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) fprintf(compiler->verbose, "label:\n"); @@ -1344,9 +1416,20 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_jump(struct sljit_compile #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_REWRITABLE_JUMP | SLJIT_I32_OP))); + CHECK_ARGUMENT((type & 0xff) != GET_FLAG_TYPE(SLJIT_SET_CARRY) && (type & 0xff) != (GET_FLAG_TYPE(SLJIT_SET_CARRY) + 1)); CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_CALL3); CHECK_ARGUMENT((type & 0xff) < SLJIT_JUMP || !(type & SLJIT_I32_OP)); CHECK_ARGUMENT((type & 0xff) <= SLJIT_CALL0 || ((type & 0xff) - SLJIT_CALL0) <= compiler->scratches); + + if ((type & 0xff) < SLJIT_JUMP) { + if ((type & 0xff) <= SLJIT_NOT_ZERO) + CHECK_ARGUMENT(compiler->last_flags & SLJIT_SET_Z); + else + CHECK_ARGUMENT((type & 0xff) == (compiler->last_flags & 0xff) + || ((type & 0xff) == SLJIT_NOT_OVERFLOW && (compiler->last_flags & 0xff) == SLJIT_OVERFLOW) + || ((type & 0xff) == SLJIT_MUL_NOT_OVERFLOW && (compiler->last_flags & 0xff) == SLJIT_MUL_OVERFLOW)); + CHECK_ARGUMENT((type & SLJIT_I32_OP) == (compiler->last_flags & SLJIT_I32_OP)); + } #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) @@ -1365,6 +1448,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_cmp(struct sljit_compiler CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_SIG_LESS_EQUAL); FUNCTION_CHECK_SRC(src1, src1w); FUNCTION_CHECK_SRC(src2, src2w); + compiler->last_flags = 0; #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { @@ -1384,11 +1468,12 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fcmp(struct sljit_compile sljit_s32 src2, sljit_sw src2w) { #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - CHECK_ARGUMENT(sljit_is_fpu_available()); + CHECK_ARGUMENT(sljit_has_cpu_feature(SLJIT_HAS_FPU)); CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_REWRITABLE_JUMP | SLJIT_F32_OP))); CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL_F64 && (type & 0xff) <= SLJIT_ORDERED_F64); FUNCTION_FCHECK(src1, src1w); FUNCTION_FCHECK(src2, src2w); + compiler->last_flags = 0; #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { @@ -1405,6 +1490,10 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_fcmp(struct sljit_compile static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw) { +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->last_flags = 0; +#endif + if (SLJIT_UNLIKELY(compiler->skip_checks)) { compiler->skip_checks = 0; CHECK_RETURN_OK; @@ -1427,46 +1516,82 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_ijump(struct sljit_compil static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, - sljit_s32 src, sljit_sw srcw, sljit_s32 type) { #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_I32_OP))); CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_ORDERED_F64); - CHECK_ARGUMENT(op == SLJIT_MOV || GET_OPCODE(op) == SLJIT_MOV_U32 || GET_OPCODE(op) == SLJIT_MOV_S32 + CHECK_ARGUMENT((type & 0xff) != GET_FLAG_TYPE(SLJIT_SET_CARRY) && (type & 0xff) != (GET_FLAG_TYPE(SLJIT_SET_CARRY) + 1)); + CHECK_ARGUMENT(op == SLJIT_MOV || op == SLJIT_MOV32 || (GET_OPCODE(op) >= SLJIT_AND && GET_OPCODE(op) <= SLJIT_XOR)); - CHECK_ARGUMENT((op & (SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O | SLJIT_SET_C)) == 0); - CHECK_ARGUMENT((op & (SLJIT_SET_E | SLJIT_KEEP_FLAGS)) != (SLJIT_SET_E | SLJIT_KEEP_FLAGS)); - if (GET_OPCODE(op) < SLJIT_ADD) { - CHECK_ARGUMENT(src == SLJIT_UNUSED && srcw == 0); - } else { - CHECK_ARGUMENT(src == dst && srcw == dstw); - } - FUNCTION_CHECK_DST(dst, dstw); + CHECK_ARGUMENT(!(op & VARIABLE_FLAG_MASK)); + + if ((type & 0xff) <= SLJIT_NOT_ZERO) + CHECK_ARGUMENT(compiler->last_flags & SLJIT_SET_Z); + else + CHECK_ARGUMENT((type & 0xff) == (compiler->last_flags & 0xff) + || ((type & 0xff) == SLJIT_NOT_OVERFLOW && (compiler->last_flags & 0xff) == SLJIT_OVERFLOW) + || ((type & 0xff) == SLJIT_MUL_NOT_OVERFLOW && (compiler->last_flags & 0xff) == SLJIT_MUL_OVERFLOW)); + + FUNCTION_CHECK_DST(dst, dstw, 0); + + if (GET_OPCODE(op) >= SLJIT_ADD) + compiler->last_flags = GET_FLAG_TYPE(op) | (op & (SLJIT_I32_OP | SLJIT_SET_Z)); #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { - fprintf(compiler->verbose, " flags %s%s%s%s, ", - !(op & SLJIT_SET_E) ? "" : ".e", !(op & SLJIT_KEEP_FLAGS) ? "" : ".k", + fprintf(compiler->verbose, " flags%s %s%s, ", + !(op & SLJIT_SET_Z) ? "" : ".z", GET_OPCODE(op) < SLJIT_OP2_BASE ? "mov" : op2_names[GET_OPCODE(op) - SLJIT_OP2_BASE], GET_OPCODE(op) < SLJIT_OP2_BASE ? op1_names[GET_OPCODE(op) - SLJIT_OP1_BASE] : ((op & SLJIT_I32_OP) ? "32" : "")); sljit_verbose_param(compiler, dst, dstw); - if (src != SLJIT_UNUSED) { - fprintf(compiler->verbose, ", "); - sljit_verbose_param(compiler, src, srcw); - } fprintf(compiler->verbose, ", %s%s\n", jump_names[type & 0xff], JUMP_POSTFIX(type)); } #endif CHECK_RETURN_OK; } +static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_reg, + sljit_s32 src, sljit_sw srcw) +{ +#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_I32_OP))); + CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_ORDERED_F64); + CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(dst_reg & ~SLJIT_I32_OP)); + if (src != SLJIT_IMM) { + CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(src)); + CHECK_ARGUMENT(srcw == 0); + } + + if ((type & 0xff) <= SLJIT_NOT_ZERO) + CHECK_ARGUMENT(compiler->last_flags & SLJIT_SET_Z); + else + CHECK_ARGUMENT((type & 0xff) == (compiler->last_flags & 0xff) + || ((type & 0xff) == SLJIT_NOT_OVERFLOW && (compiler->last_flags & 0xff) == SLJIT_OVERFLOW) + || ((type & 0xff) == SLJIT_MUL_NOT_OVERFLOW && (compiler->last_flags & 0xff) == SLJIT_MUL_OVERFLOW)); +#endif +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + if (SLJIT_UNLIKELY(!!compiler->verbose)) { + fprintf(compiler->verbose, " cmov%s %s%s, ", + !(dst_reg & SLJIT_I32_OP) ? "" : ".i", + jump_names[type & 0xff], JUMP_POSTFIX(type)); + sljit_verbose_reg(compiler, dst_reg & ~SLJIT_I32_OP); + fprintf(compiler->verbose, ", "); + sljit_verbose_param(compiler, src, srcw); + fprintf(compiler->verbose, "\n"); + } +#endif + CHECK_RETURN_OK; +} + static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset) { + /* Any offset is allowed. */ SLJIT_UNUSED_ARG(offset); #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - FUNCTION_CHECK_DST(dst, dstw); + FUNCTION_CHECK_DST(dst, dstw, 0); #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { @@ -1483,7 +1608,7 @@ static SLJIT_INLINE CHECK_RETURN_TYPE check_sljit_emit_const(struct sljit_compil SLJIT_UNUSED_ARG(init_value); #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - FUNCTION_CHECK_DST(dst, dstw); + FUNCTION_CHECK_DST(dst, dstw, 0); #endif #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) if (SLJIT_UNLIKELY(!!compiler->verbose)) { @@ -1544,6 +1669,44 @@ static SLJIT_INLINE sljit_s32 emit_mov_before_return(struct sljit_compiler *comp return sljit_emit_op1(compiler, op, SLJIT_RETURN_REG, 0, src, srcw); } +#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) \ + || (defined SLJIT_CONFIG_PPC && SLJIT_CONFIG_PPC) \ + || (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) \ + || ((defined SLJIT_CONFIG_MIPS && SLJIT_CONFIG_MIPS) && !(defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1)) + +static SLJIT_INLINE sljit_s32 sljit_emit_cmov_generic(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_reg, + sljit_s32 src, sljit_sw srcw) +{ + struct sljit_label *label; + struct sljit_jump *jump; + sljit_s32 op = (dst_reg & SLJIT_I32_OP) ? SLJIT_MOV32 : SLJIT_MOV; + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + jump = sljit_emit_jump(compiler, type ^ 0x1); + FAIL_IF(!jump); + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + FAIL_IF(sljit_emit_op1(compiler, op, dst_reg & ~SLJIT_I32_OP, 0, src, srcw)); + +#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ + || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) + compiler->skip_checks = 1; +#endif + label = sljit_emit_label(compiler); + FAIL_IF(!label); + sljit_set_label(jump, label); + return SLJIT_SUCCESS; +} + +#endif + /* CPU description section */ #if (defined SLJIT_32BIT_ARCHITECTURE && SLJIT_32BIT_ARCHITECTURE) @@ -1645,6 +1808,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler condition = SLJIT_SIG_GREATER_EQUAL; break; } + type = condition | (type & (SLJIT_I32_OP | SLJIT_REWRITABLE_JUMP)); tmp_src = src1; src1 = src2; @@ -1655,11 +1819,9 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler } if (condition <= SLJIT_NOT_ZERO) - flags = SLJIT_SET_E; - else if (condition <= SLJIT_LESS_EQUAL) - flags = SLJIT_SET_U; + flags = SLJIT_SET_Z; else - flags = SLJIT_SET_S; + flags = condition << VARIABLE_FLAG_SHIFT; #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) @@ -1671,38 +1833,31 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) compiler->skip_checks = 1; #endif - return sljit_emit_jump(compiler, condition | (type & SLJIT_REWRITABLE_JUMP)); + return sljit_emit_jump(compiler, condition | (type & (SLJIT_REWRITABLE_JUMP | SLJIT_I32_OP))); } +#endif + SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_fcmp(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) { - sljit_s32 flags, condition; - CHECK_ERROR_PTR(); CHECK_PTR(check_sljit_emit_fcmp(compiler, type, src1, src1w, src2, src2w)); - condition = type & 0xff; - flags = (condition <= SLJIT_NOT_EQUAL_F64) ? SLJIT_SET_E : SLJIT_SET_S; - if (type & SLJIT_F32_OP) - flags |= SLJIT_F32_OP; - #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) compiler->skip_checks = 1; #endif - sljit_emit_fop1(compiler, SLJIT_CMP_F64 | flags, src1, src1w, src2, src2w); + sljit_emit_fop1(compiler, SLJIT_CMP_F64 | ((type & 0xff) << VARIABLE_FLAG_SHIFT) | (type & SLJIT_I32_OP), src1, src1w, src2, src2w); #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) compiler->skip_checks = 1; #endif - return sljit_emit_jump(compiler, condition | (type & SLJIT_REWRITABLE_JUMP)); + return sljit_emit_jump(compiler, type); } -#endif - #if !(defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset) @@ -1716,7 +1871,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *c compiler->skip_checks = 1; #endif if (offset != 0) - return sljit_emit_op2(compiler, SLJIT_ADD | SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, SLJIT_IMM, offset); + return sljit_emit_op2(compiler, SLJIT_ADD, dst, dstw, SLJIT_SP, 0, SLJIT_IMM, offset); return sljit_emit_op1(compiler, SLJIT_MOV, dst, dstw, SLJIT_SP, 0); } @@ -1731,23 +1886,30 @@ SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void) return "unsupported"; } -SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler* sljit_create_compiler(void) +SLJIT_API_FUNC_ATTRIBUTE struct sljit_compiler* sljit_create_compiler(void *allocator_data) { - SLJIT_ASSERT_STOP(); + SLJIT_UNUSED_ARG(allocator_data); + SLJIT_UNREACHABLE(); return NULL; } SLJIT_API_FUNC_ATTRIBUTE void sljit_free_compiler(struct sljit_compiler *compiler) { SLJIT_UNUSED_ARG(compiler); - SLJIT_ASSERT_STOP(); + SLJIT_UNREACHABLE(); +} + +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_compiler_memory_error(struct sljit_compiler *compiler) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNREACHABLE(); } SLJIT_API_FUNC_ATTRIBUTE void* sljit_alloc_memory(struct sljit_compiler *compiler, sljit_s32 size) { SLJIT_UNUSED_ARG(compiler); SLJIT_UNUSED_ARG(size); - SLJIT_ASSERT_STOP(); + SLJIT_UNREACHABLE(); return NULL; } @@ -1756,21 +1918,28 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_compiler_verbose(struct sljit_compiler *comp { SLJIT_UNUSED_ARG(compiler); SLJIT_UNUSED_ARG(verbose); - SLJIT_ASSERT_STOP(); + SLJIT_UNREACHABLE(); } #endif SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler) { SLJIT_UNUSED_ARG(compiler); - SLJIT_ASSERT_STOP(); + SLJIT_UNREACHABLE(); return NULL; } +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) +{ + SLJIT_UNUSED_ARG(feature_type); + SLJIT_UNREACHABLE(); + return 0; +} + SLJIT_API_FUNC_ATTRIBUTE void sljit_free_code(void* code) { SLJIT_UNUSED_ARG(code); - SLJIT_ASSERT_STOP(); + SLJIT_UNREACHABLE(); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, @@ -1785,7 +1954,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi SLJIT_UNUSED_ARG(fscratches); SLJIT_UNUSED_ARG(fsaveds); SLJIT_UNUSED_ARG(local_size); - SLJIT_ASSERT_STOP(); + SLJIT_UNREACHABLE(); return SLJIT_ERR_UNSUPPORTED; } @@ -1801,7 +1970,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp SLJIT_UNUSED_ARG(fscratches); SLJIT_UNUSED_ARG(fsaveds); SLJIT_UNUSED_ARG(local_size); - SLJIT_ASSERT_STOP(); + SLJIT_UNREACHABLE(); return SLJIT_ERR_UNSUPPORTED; } @@ -1811,7 +1980,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *comp SLJIT_UNUSED_ARG(op); SLJIT_UNUSED_ARG(src); SLJIT_UNUSED_ARG(srcw); - SLJIT_ASSERT_STOP(); + SLJIT_UNREACHABLE(); return SLJIT_ERR_UNSUPPORTED; } @@ -1820,7 +1989,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler * SLJIT_UNUSED_ARG(compiler); SLJIT_UNUSED_ARG(dst); SLJIT_UNUSED_ARG(dstw); - SLJIT_ASSERT_STOP(); + SLJIT_UNREACHABLE(); return SLJIT_ERR_UNSUPPORTED; } @@ -1829,7 +1998,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler SLJIT_UNUSED_ARG(compiler); SLJIT_UNUSED_ARG(src); SLJIT_UNUSED_ARG(srcw); - SLJIT_ASSERT_STOP(); + SLJIT_UNREACHABLE(); return SLJIT_ERR_UNSUPPORTED; } @@ -1837,7 +2006,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile { SLJIT_UNUSED_ARG(compiler); SLJIT_UNUSED_ARG(op); - SLJIT_ASSERT_STOP(); + SLJIT_UNREACHABLE(); return SLJIT_ERR_UNSUPPORTED; } @@ -1851,7 +2020,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile SLJIT_UNUSED_ARG(dstw); SLJIT_UNUSED_ARG(src); SLJIT_UNUSED_ARG(srcw); - SLJIT_ASSERT_STOP(); + SLJIT_UNREACHABLE(); return SLJIT_ERR_UNSUPPORTED; } @@ -1868,13 +2037,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile SLJIT_UNUSED_ARG(src1w); SLJIT_UNUSED_ARG(src2); SLJIT_UNUSED_ARG(src2w); - SLJIT_ASSERT_STOP(); + SLJIT_UNREACHABLE(); return SLJIT_ERR_UNSUPPORTED; } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg) { - SLJIT_ASSERT_STOP(); + SLJIT_UNREACHABLE(); return reg; } @@ -1884,14 +2053,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *c SLJIT_UNUSED_ARG(compiler); SLJIT_UNUSED_ARG(instruction); SLJIT_UNUSED_ARG(size); - SLJIT_ASSERT_STOP(); + SLJIT_UNREACHABLE(); return SLJIT_ERR_UNSUPPORTED; } -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void) +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_current_flags(struct sljit_compiler *compiler, sljit_s32 current_flags) { - SLJIT_ASSERT_STOP(); - return 0; + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(current_flags); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, @@ -1904,7 +2073,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil SLJIT_UNUSED_ARG(dstw); SLJIT_UNUSED_ARG(src); SLJIT_UNUSED_ARG(srcw); - SLJIT_ASSERT_STOP(); + SLJIT_UNREACHABLE(); return SLJIT_ERR_UNSUPPORTED; } @@ -1921,14 +2090,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil SLJIT_UNUSED_ARG(src1w); SLJIT_UNUSED_ARG(src2); SLJIT_UNUSED_ARG(src2w); - SLJIT_ASSERT_STOP(); + SLJIT_UNREACHABLE(); return SLJIT_ERR_UNSUPPORTED; } SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler) { SLJIT_UNUSED_ARG(compiler); - SLJIT_ASSERT_STOP(); + SLJIT_UNREACHABLE(); return NULL; } @@ -1936,7 +2105,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile { SLJIT_UNUSED_ARG(compiler); SLJIT_UNUSED_ARG(type); - SLJIT_ASSERT_STOP(); + SLJIT_UNREACHABLE(); return NULL; } @@ -1950,7 +2119,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler SLJIT_UNUSED_ARG(src1w); SLJIT_UNUSED_ARG(src2); SLJIT_UNUSED_ARG(src2w); - SLJIT_ASSERT_STOP(); + SLJIT_UNREACHABLE(); return NULL; } @@ -1964,7 +2133,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_fcmp(struct sljit_compile SLJIT_UNUSED_ARG(src1w); SLJIT_UNUSED_ARG(src2); SLJIT_UNUSED_ARG(src2w); - SLJIT_ASSERT_STOP(); + SLJIT_UNREACHABLE(); return NULL; } @@ -1972,14 +2141,14 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_label(struct sljit_jump *jump, struct sl { SLJIT_UNUSED_ARG(jump); SLJIT_UNUSED_ARG(label); - SLJIT_ASSERT_STOP(); + SLJIT_UNREACHABLE(); } SLJIT_API_FUNC_ATTRIBUTE void sljit_set_target(struct sljit_jump *jump, sljit_uw target) { SLJIT_UNUSED_ARG(jump); SLJIT_UNUSED_ARG(target); - SLJIT_ASSERT_STOP(); + SLJIT_UNREACHABLE(); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw) @@ -1988,23 +2157,33 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi SLJIT_UNUSED_ARG(type); SLJIT_UNUSED_ARG(src); SLJIT_UNUSED_ARG(srcw); - SLJIT_ASSERT_STOP(); + SLJIT_UNREACHABLE(); return SLJIT_ERR_UNSUPPORTED; } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, - sljit_s32 src, sljit_sw srcw, sljit_s32 type) { SLJIT_UNUSED_ARG(compiler); SLJIT_UNUSED_ARG(op); SLJIT_UNUSED_ARG(dst); SLJIT_UNUSED_ARG(dstw); + SLJIT_UNUSED_ARG(type); + SLJIT_UNREACHABLE(); + return SLJIT_ERR_UNSUPPORTED; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_reg, + sljit_s32 src, sljit_sw srcw) +{ + SLJIT_UNUSED_ARG(compiler); + SLJIT_UNUSED_ARG(type); + SLJIT_UNUSED_ARG(dst_reg); SLJIT_UNUSED_ARG(src); SLJIT_UNUSED_ARG(srcw); - SLJIT_UNUSED_ARG(type); - SLJIT_ASSERT_STOP(); + SLJIT_UNREACHABLE(); return SLJIT_ERR_UNSUPPORTED; } @@ -2014,7 +2193,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *c SLJIT_UNUSED_ARG(dst); SLJIT_UNUSED_ARG(dstw); SLJIT_UNUSED_ARG(offset); - SLJIT_ASSERT_STOP(); + SLJIT_UNREACHABLE(); return SLJIT_ERR_UNSUPPORTED; } @@ -2024,22 +2203,24 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi SLJIT_UNUSED_ARG(dst); SLJIT_UNUSED_ARG(dstw); SLJIT_UNUSED_ARG(initval); - SLJIT_ASSERT_STOP(); + SLJIT_UNREACHABLE(); return NULL; } -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr) +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) { SLJIT_UNUSED_ARG(addr); - SLJIT_UNUSED_ARG(new_addr); - SLJIT_ASSERT_STOP(); + SLJIT_UNUSED_ARG(new_target); + SLJIT_UNUSED_ARG(executable_offset); + SLJIT_UNREACHABLE(); } -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant) +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) { SLJIT_UNUSED_ARG(addr); SLJIT_UNUSED_ARG(new_constant); - SLJIT_ASSERT_STOP(); + SLJIT_UNUSED_ARG(executable_offset); + SLJIT_UNREACHABLE(); } #endif diff --git a/src/3rdparty/pcre2/src/sljit/sljitLir.h b/src/3rdparty/pcre2/src/sljit/sljitLir.h index df69b8656f..470c84f592 100644 --- a/src/3rdparty/pcre2/src/sljit/sljitLir.h +++ b/src/3rdparty/pcre2/src/sljit/sljitLir.h @@ -1,7 +1,7 @@ /* * Stack-less Just-In-Time compiler * - * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are * permitted provided that the following conditions are met: @@ -99,6 +99,8 @@ of sljitConfigInternal.h */ #define SLJIT_ERR_UNSUPPORTED 4 /* An ivalid argument is passed to any SLJIT function. */ #define SLJIT_ERR_BAD_ARGUMENT 5 +/* Dynamic code modification is not enabled. */ +#define SLJIT_ERR_DYN_CODE_MOD 6 /* --------------------------------------------------------------------- */ /* Registers */ @@ -118,8 +120,8 @@ of sljitConfigInternal.h */ If an architecture provides two scratch and three saved registers, its scratch and saved register sets are the following: - R0 | [S4] | R0 and S4 represent the same physical register - R1 | [S3] | R1 and S3 represent the same physical register + R0 | | R0 is always a scratch register + R1 | | R1 is always a scratch register [R2] | S2 | R2 and S2 represent the same physical register [R3] | S1 | R3 and S1 represent the same physical register [R4] | S0 | R4 and S0 represent the same physical register @@ -127,38 +129,35 @@ of sljitConfigInternal.h */ Note: SLJIT_NUMBER_OF_SCRATCH_REGISTERS would be 2 and SLJIT_NUMBER_OF_SAVED_REGISTERS would be 3 for this architecture. - Note: On all supported architectures SLJIT_NUMBER_OF_REGISTERS >= 10 - and SLJIT_NUMBER_OF_SAVED_REGISTERS >= 5. However, 4 registers + Note: On all supported architectures SLJIT_NUMBER_OF_REGISTERS >= 12 + and SLJIT_NUMBER_OF_SAVED_REGISTERS >= 6. However, 6 registers are virtual on x86-32. See below. - The purpose of this definition is convenience. Although a register - is either scratch register or saved register, SLJIT allows accessing - them from the other set. For example, four registers can be used as - scratch registers and the fifth one as saved register on the architecture - above. Of course the last two scratch registers (R2 and R3) from this - four will be saved on the stack, because they are defined as saved - registers in the application binary interface. Still R2 and R3 can be - used for referencing to these registers instead of S2 and S1, which - makes easier to write platform independent code. Scratch registers - can be saved registers in a similar way, but these extra saved - registers will not be preserved across function calls! Hence the - application must save them on those platforms, where the number of - saved registers is too low. This can be done by copy them onto - the stack and restore them after a function call. + The purpose of this definition is convenience: saved registers can + be used as extra scratch registers. For example four registers can + be specified as scratch registers and the fifth one as saved register + on the CPU above and any user code which requires four scratch + registers can run unmodified. The SLJIT compiler automatically saves + the content of the two extra scrath register on the stack. Scratch + registers can also be preserved by saving their value on the stack + but this needs to be done manually. Note: To emphasize that registers assigned to R2-R4 are saved - registers, they are enclosed by square brackets. S3-S4 - are marked in a similar way. + registers, they are enclosed by square brackets. Note: sljit_emit_enter and sljit_set_context defines whether a register is S or R register. E.g: when 3 scratches and 1 saved is mapped by sljit_emit_enter, the allowed register set will be: R0-R2 and S0. Although S2 is mapped to the same position as R2, it does not - available in the current configuration. Furthermore the R3 (S1) - register does not available as well. + available in the current configuration. Furthermore the S1 register + is not available at all. */ -/* When SLJIT_UNUSED is specified as destination, the result is discarded. */ +/* When SLJIT_UNUSED is specified as the destination of sljit_emit_op1 and + and sljit_emit_op2 operations the result is discarded. If no status + flags are set, no instructions are emitted for these operations. Data + prefetch is a special exception, see SLJIT_MOV operation. Other SLJIT + operations do not support SLJIT_UNUSED as a destination operand. */ #define SLJIT_UNUSED 0 /* Scratch registers. */ @@ -323,19 +322,22 @@ struct sljit_compiler { sljit_s32 local_size; /* Code size. */ sljit_uw size; - /* For statistical purposes. */ + /* Relative offset of the executable mapping from the writable mapping. */ + sljit_uw executable_offset; + /* Executable size for statistical purposes. */ sljit_uw executable_size; #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) sljit_s32 args; + sljit_s32 locals_offset; + sljit_s32 saveds_offset; #endif #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) sljit_s32 mode32; +#ifdef _WIN64 + sljit_s32 locals_offset; #endif - -#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) - sljit_s32 flags_saved; #endif #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) @@ -352,13 +354,6 @@ struct sljit_compiler { #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) || (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) /* Temporary fields. */ sljit_uw shift_imm; - sljit_s32 cache_arg; - sljit_sw cache_argw; -#endif - -#if (defined SLJIT_CONFIG_ARM_THUMB2 && SLJIT_CONFIG_ARM_THUMB2) - sljit_s32 cache_arg; - sljit_sw cache_argw; #endif #if (defined SLJIT_CONFIG_ARM_64 && SLJIT_CONFIG_ARM_64) @@ -395,6 +390,9 @@ struct sljit_compiler { #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \ || (defined SLJIT_DEBUG && SLJIT_DEBUG) + /* Flags specified by the last arithmetic instruction. + It contains the type of the variable flag. */ + sljit_s32 last_flags; /* Local size passed to the functions. */ sljit_s32 logical_local_size; #endif @@ -402,6 +400,7 @@ struct sljit_compiler { #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) \ || (defined SLJIT_DEBUG && SLJIT_DEBUG) \ || (defined SLJIT_VERBOSE && SLJIT_VERBOSE) + /* Trust arguments when the API function is called. */ sljit_s32 skip_checks; #endif }; @@ -455,19 +454,67 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_alloc_memory(struct sljit_compiler *compile SLJIT_API_FUNC_ATTRIBUTE void sljit_compiler_verbose(struct sljit_compiler *compiler, FILE* verbose); #endif +/* + Create executable code from the sljit instruction stream. This is the final step + of the code generation so no more instructions can be added after this call. +*/ + SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler); + +/* Free executable code. */ + SLJIT_API_FUNC_ATTRIBUTE void sljit_free_code(void* code); /* - After the machine code generation is finished we can retrieve the allocated - executable memory size, although this area may not be fully filled with - instructions depending on some optimizations. This function is useful only - for statistical purposes. + When the protected executable allocator is used the JIT code is mapped + twice. The first mapping has read/write and the second mapping has read/exec + permissions. This function returns with the relative offset of the executable + mapping using the writable mapping as the base after the machine code is + successfully generated. The returned value is always 0 for the normal executable + allocator, since it uses only one mapping with read/write/exec permissions. + Dynamic code modifications requires this value. + + Before a successful code generation, this function returns with 0. +*/ +static SLJIT_INLINE sljit_sw sljit_get_executable_offset(struct sljit_compiler *compiler) { return compiler->executable_offset; } + +/* + The executable memory consumption of the generated code can be retrieved by + this function. The returned value can be used for statistical purposes. Before a successful code generation, this function returns with 0. */ static SLJIT_INLINE sljit_uw sljit_get_generated_code_size(struct sljit_compiler *compiler) { return compiler->executable_size; } +/* Returns with non-zero if the feature or limitation type passed as its + argument is present on the current CPU. + + Some features (e.g. floating point operations) require hardware (CPU) + support while others (e.g. move with update) are emulated if not available. + However even if a feature is emulated, specialized code paths can be faster + than the emulation. Some limitations are emulated as well so their general + case is supported but it has extra performance costs. */ + +/* [Not emulated] Floating-point support is available. */ +#define SLJIT_HAS_FPU 0 +/* [Limitation] Some registers are virtual registers. */ +#define SLJIT_HAS_VIRTUAL_REGISTERS 1 +/* [Emulated] Some forms of move with pre update is supported. */ +#define SLJIT_HAS_PRE_UPDATE 2 +/* [Emulated] Count leading zero is supported. */ +#define SLJIT_HAS_CLZ 3 +/* [Emulated] Conditional move is supported. */ +#define SLJIT_HAS_CMOV 4 +/* [Limitation] [Emulated] Shifting with register is limited to SLJIT_PREF_SHIFT_REG. */ +#define SLJIT_HAS_PREF_SHIFT_REG 5 + +#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) +/* [Not emulated] SSE2 support is available on x86. */ +#define SLJIT_HAS_SSE2 100 +#endif + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type); + /* Instruction generation. Returns with any error code. If there is no error, they return with SLJIT_SUCCESS. */ @@ -512,8 +559,8 @@ static SLJIT_INLINE sljit_uw sljit_get_generated_code_size(struct sljit_compiler */ /* The absolute address returned by sljit_get_local_base with -offset 0 is aligned to sljit_d. Otherwise it is aligned to sljit_uw. */ -#define SLJIT_DOUBLE_ALIGNMENT 0x00000001 +offset 0 is aligned to sljit_f64. Otherwise it is aligned to sljit_sw. */ +#define SLJIT_F64_ALIGNMENT 0x00000001 /* The local_size must be >= 0 and <= SLJIT_MAX_LOCAL_SIZE. */ #define SLJIT_MAX_LOCAL_SIZE 65536 @@ -555,7 +602,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *comp and setting up a new stack frame would cost too much performance. However, it is still possible to return to the address of the caller (or anywhere else). */ -/* Note: flags are not changed (unlike sljit_emit_enter / sljit_emit_return). */ +/* Note: may destroy flags. */ /* Note: although sljit_emit_fast_return could be replaced by an ijump, it is not suggested, since many architectures do clever branch prediction on call / return instruction pairs. */ @@ -624,57 +671,97 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler #define SLJIT_MEM2(r1, r2) (SLJIT_MEM | (r1) | ((r2) << 8)) #define SLJIT_IMM 0x40 -/* Set 32 bit operation mode (I) on 64 bit CPUs. This flag is ignored on 32 - bit CPUs. When this flag is set for an arithmetic operation, only the - lower 32 bit of the input register(s) are used, and the CPU status flags - are set according to the 32 bit result. Although the higher 32 bit of - the input and the result registers are not defined by SLJIT, it might be - defined by the CPU architecture (e.g. MIPS). To satisfy these requirements - all source registers must be computed by operations where this flag is - also set. In other words 32 and 64 bit arithmetic operations cannot be - mixed. The only exception is SLJIT_IMOV and SLJIT_IMOVU whose source - register can hold any 32 or 64 bit value. This source register is - converted to a 32 bit compatible format. SLJIT does not generate any - instructions on certain CPUs (e.g. on x86 and ARM) if the source and - destination operands are the same registers. Affects sljit_emit_op0, - sljit_emit_op1 and sljit_emit_op2. */ +/* Set 32 bit operation mode (I) on 64 bit CPUs. This option is ignored on + 32 bit CPUs. When this option is set for an arithmetic operation, only + the lower 32 bit of the input registers are used, and the CPU status + flags are set according to the 32 bit result. Although the higher 32 bit + of the input and the result registers are not defined by SLJIT, it might + be defined by the CPU architecture (e.g. MIPS). To satisfy these CPU + requirements all source registers must be the result of those operations + where this option was also set. Memory loads read 32 bit values rather + than 64 bit ones. In other words 32 bit and 64 bit operations cannot + be mixed. The only exception is SLJIT_MOV32 and SLJIT_MOVU32 whose source + register can hold any 32 or 64 bit value, and it is converted to a 32 bit + compatible format first. This conversion is free (no instructions are + emitted) on most CPUs. A 32 bit value can also be coverted to a 64 bit + value by SLJIT_MOV_S32 (sign extension) or SLJIT_MOV_U32 (zero extension). + + Note: memory addressing always uses 64 bit values on 64 bit systems so + the result of a 32 bit operation must not be used with SLJIT_MEMx + macros. + + This option is part of the instruction name, so there is no need to + manually set it. E.g: + + SLJIT_ADD32 == (SLJIT_ADD | SLJIT_I32_OP) */ #define SLJIT_I32_OP 0x100 -/* F32 precision mode (SP). This flag is similar to SLJIT_I32_OP, just - it applies to floating point registers (it is even the same bit). When - this flag is passed, the CPU performs 32 bit floating point operations. - Similar to SLJIT_I32_OP, all register arguments must be computed by - floating point operations where this flag is also set. Affects - sljit_emit_fop1, sljit_emit_fop2 and sljit_emit_fcmp. */ -#define SLJIT_F32_OP 0x100 - -/* Common CPU status flags for all architectures (x86, ARM, PPC) - - carry flag - - overflow flag - - zero flag - - negative/positive flag (depends on arc) - On mips, these flags are emulated by software. */ - -/* By default, the instructions may, or may not set the CPU status flags. - Forcing to set or keep status flags can be done with the following flags: */ - -/* Note: sljit tries to emit the minimum number of instructions. Using these - flags can increase them, so use them wisely to avoid unnecessary code generation. */ - -/* Set Equal (Zero) status flag (E). */ -#define SLJIT_SET_E 0x0200 -/* Set unsigned status flag (U). */ -#define SLJIT_SET_U 0x0400 -/* Set signed status flag (S). */ -#define SLJIT_SET_S 0x0800 -/* Set signed overflow flag (O). */ -#define SLJIT_SET_O 0x1000 -/* Set carry flag (C). - Note: Kinda unsigned overflow, but behaves differently on various cpus. */ -#define SLJIT_SET_C 0x2000 -/* Do not modify the flags (K). - Note: This flag cannot be combined with any other SLJIT_SET_* flag. */ -#define SLJIT_KEEP_FLAGS 0x4000 +/* Set F32 (single) precision mode for floating-point computation. This + option is similar to SLJIT_I32_OP, it just applies to floating point + registers. When this option is passed, the CPU performs 32 bit floating + point operations, rather than 64 bit one. Similar to SLJIT_I32_OP, all + register arguments must be the result of those operations where this + option was also set. + + This option is part of the instruction name, so there is no need to + manually set it. E.g: + + SLJIT_MOV_F32 = (SLJIT_MOV_F64 | SLJIT_F32_OP) + */ +#define SLJIT_F32_OP SLJIT_I32_OP + +/* Many CPUs (x86, ARM, PPC) has status flags which can be set according + to the result of an operation. Other CPUs (MIPS) does not have status + flags, and results must be stored in registers. To cover both architecture + types efficiently only two flags are defined by SLJIT: + + * Zero (equal) flag: it is set if the result is zero + * Variable flag: its value is defined by the last arithmetic operation + + SLJIT instructions can set any or both of these flags. The value of + these flags is undefined if the instruction does not specify their value. + The description of each instruction contains the list of allowed flag + types. + + Example: SLJIT_ADD can set the Z, OVERFLOW, CARRY flags hence + + sljit_op2(..., SLJIT_ADD, ...) + Both the zero and variable flags are undefined so they can + have any value after the operation is completed. + + sljit_op2(..., SLJIT_ADD | SLJIT_SET_Z, ...) + Sets the zero flag if the result is zero, clears it otherwise. + The variable flag is undefined. + + sljit_op2(..., SLJIT_ADD | SLJIT_SET_OVERFLOW, ...) + Sets the variable flag if an integer overflow occurs, clears + it otherwise. The zero flag is undefined. + + sljit_op2(..., SLJIT_ADD | SLJIT_SET_Z | SLJIT_SET_CARRY, ...) + Sets the zero flag if the result is zero, clears it otherwise. + Sets the variable flag if unsigned overflow (carry) occurs, + clears it otherwise. + + If an instruction (e.g. SLJIT_MOV) does not modify flags the flags are + unchanged. + + Using these flags can reduce the number of emitted instructions. E.g. a + fast loop can be implemented by decreasing a counter register and set the + zero flag to jump back if the counter register is not reached zero. + + Motivation: although CPUs can set a large number of flags, usually their + values are ignored or only one of them is used. Emulating a large number + of flags on systems without flag register is complicated so SLJIT + instructions must specify the flag they want to use and only that flag + will be emulated. The last arithmetic instruction can be repeated if + multiple flags needs to be checked. +*/ + +/* Set Zero status flag. */ +#define SLJIT_SET_Z 0x0200 +/* Set the variable status flag if condition is true. + See comparison types. */ +#define SLJIT_SET(condition) ((condition) << 10) /* Notes: - you cannot postpone conditional jump instructions except if noted that @@ -684,11 +771,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler /* Starting index of opcodes for sljit_emit_op0. */ #define SLJIT_OP0_BASE 0 -/* Flags: - (never set any flags) +/* Flags: - (does not modify flags) Note: breakpoint instruction is not supported by all architectures (e.g. ppc) It falls back to SLJIT_NOP in those cases. */ #define SLJIT_BREAKPOINT (SLJIT_OP0_BASE + 0) -/* Flags: - (never set any flags) +/* Flags: - (does not modify flags) Note: may or may not cause an extra cycle wait it can even decrease the runtime in a few cases. */ #define SLJIT_NOP (SLJIT_OP0_BASE + 1) @@ -700,13 +787,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler Signed multiplication of SLJIT_R0 and SLJIT_R1. Result is placed into SLJIT_R1:SLJIT_R0 (high:low) word */ #define SLJIT_LMUL_SW (SLJIT_OP0_BASE + 3) -/* Flags: I - (may destroy flags) +/* Flags: - (may destroy flags) Unsigned divide of the value in SLJIT_R0 by the value in SLJIT_R1. The result is placed into SLJIT_R0 and the remainder into SLJIT_R1. Note: if SLJIT_R1 is 0, the behaviour is undefined. */ #define SLJIT_DIVMOD_UW (SLJIT_OP0_BASE + 4) #define SLJIT_DIVMOD_U32 (SLJIT_DIVMOD_UW | SLJIT_I32_OP) -/* Flags: I - (may destroy flags) +/* Flags: - (may destroy flags) Signed divide of the value in SLJIT_R0 by the value in SLJIT_R1. The result is placed into SLJIT_R0 and the remainder into SLJIT_R1. Note: if SLJIT_R1 is 0, the behaviour is undefined. @@ -714,13 +801,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler the behaviour is undefined. */ #define SLJIT_DIVMOD_SW (SLJIT_OP0_BASE + 5) #define SLJIT_DIVMOD_S32 (SLJIT_DIVMOD_SW | SLJIT_I32_OP) -/* Flags: I - (may destroy flags) +/* Flags: - (may destroy flags) Unsigned divide of the value in SLJIT_R0 by the value in SLJIT_R1. The result is placed into SLJIT_R0. SLJIT_R1 preserves its value. Note: if SLJIT_R1 is 0, the behaviour is undefined. */ #define SLJIT_DIV_UW (SLJIT_OP0_BASE + 6) #define SLJIT_DIV_U32 (SLJIT_DIV_UW | SLJIT_I32_OP) -/* Flags: I - (may destroy flags) +/* Flags: - (may destroy flags) Signed divide of the value in SLJIT_R0 by the value in SLJIT_R1. The result is placed into SLJIT_R0. SLJIT_R1 preserves its value. Note: if SLJIT_R1 is 0, the behaviour is undefined. @@ -734,75 +821,113 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile /* Starting index of opcodes for sljit_emit_op1. */ #define SLJIT_OP1_BASE 32 -/* Notes for MOV instructions: - U = Mov with update (pre form). If source or destination defined as SLJIT_MEM1(r1) - or SLJIT_MEM2(r1, r2), r1 is increased by the sum of r2 and the constant argument - UB = unsigned byte (8 bit) - SB = signed byte (8 bit) - UH = unsigned half (16 bit) - SH = signed half (16 bit) - UI = unsigned int (32 bit) - SI = signed int (32 bit) - P = pointer (sljit_p) size */ - -/* Flags: - (never set any flags) */ +/* The MOV instruction transfer data from source to destination. + + MOV instruction suffixes: + + U8 - unsigned 8 bit data transfer + S8 - signed 8 bit data transfer + U16 - unsigned 16 bit data transfer + S16 - signed 16 bit data transfer + U32 - unsigned int (32 bit) data transfer + S32 - signed int (32 bit) data transfer + P - pointer (sljit_p) data transfer + + U = move with update (pre form). If source or destination defined as + SLJIT_MEM1(r1) or SLJIT_MEM2(r1, r2), r1 is increased by the + offset part of the address. + + Register arguments and base registers can only be used once for move + with update instructions. The shift value of SLJIT_MEM2 addressing + mode must also be 0. Reason: SLJIT_MOVU instructions are expected to + be in high-performance loops where complex instruction emulation + would be too costly. + + Examples for invalid move with update instructions: + + sljit_emit_op1(..., SLJIT_MOVU_U8, + SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), 8); + sljit_emit_op1(..., SLJIT_MOVU_U8, + SLJIT_MEM2(SLJIT_R1, SLJIT_R0), 0, SLJIT_R0, 0); + sljit_emit_op1(..., SLJIT_MOVU_U8, + SLJIT_MEM2(SLJIT_R0, SLJIT_R1), 0, SLJIT_MEM1(SLJIT_R0), 8); + sljit_emit_op1(..., SLJIT_MOVU_U8, + SLJIT_MEM2(SLJIT_R0, SLJIT_R1), 0, SLJIT_MEM2(SLJIT_R1, SLJIT_R0), 0); + sljit_emit_op1(..., SLJIT_MOVU_U8, + SLJIT_R2, 0, SLJIT_MEM2(SLJIT_R0, SLJIT_R1), 1); + + The following example is valid, since only the offset register is + used multiple times: + + sljit_emit_op1(..., SLJIT_MOVU_U8, + SLJIT_MEM2(SLJIT_R0, SLJIT_R2), 0, SLJIT_MEM2(SLJIT_R1, SLJIT_R2), 0); + + If the destination of a MOV without update instruction is SLJIT_UNUSED + and the source operand is a memory address the compiler emits a prefetch + instruction if this instruction is supported by the current CPU. + Higher data sizes bring the data closer to the core: a MOV with word + size loads the data into a higher level cache than a byte size. Otherwise + the type does not affect the prefetch instruction. Furthermore a prefetch + instruction never fails, so it can be used to prefetch a data from an + address and check whether that address is NULL afterwards. +*/ + +/* Flags: - (does not modify flags) */ #define SLJIT_MOV (SLJIT_OP1_BASE + 0) -/* Flags: I - (never set any flags) */ +/* Flags: - (does not modify flags) */ #define SLJIT_MOV_U8 (SLJIT_OP1_BASE + 1) #define SLJIT_MOV32_U8 (SLJIT_MOV_U8 | SLJIT_I32_OP) -/* Flags: I - (never set any flags) */ +/* Flags: - (does not modify flags) */ #define SLJIT_MOV_S8 (SLJIT_OP1_BASE + 2) #define SLJIT_MOV32_S8 (SLJIT_MOV_S8 | SLJIT_I32_OP) -/* Flags: I - (never set any flags) */ +/* Flags: - (does not modify flags) */ #define SLJIT_MOV_U16 (SLJIT_OP1_BASE + 3) #define SLJIT_MOV32_U16 (SLJIT_MOV_U16 | SLJIT_I32_OP) -/* Flags: I - (never set any flags) */ +/* Flags: - (does not modify flags) */ #define SLJIT_MOV_S16 (SLJIT_OP1_BASE + 4) #define SLJIT_MOV32_S16 (SLJIT_MOV_S16 | SLJIT_I32_OP) -/* Flags: I - (never set any flags) +/* Flags: - (does not modify flags) Note: no SLJIT_MOV32_U32 form, since it is the same as SLJIT_MOV32 */ #define SLJIT_MOV_U32 (SLJIT_OP1_BASE + 5) -/* Flags: I - (never set any flags) +/* Flags: - (does not modify flags) Note: no SLJIT_MOV32_S32 form, since it is the same as SLJIT_MOV32 */ #define SLJIT_MOV_S32 (SLJIT_OP1_BASE + 6) -/* Flags: I - (never set any flags) */ +/* Flags: - (does not modify flags) */ #define SLJIT_MOV32 (SLJIT_MOV_S32 | SLJIT_I32_OP) -/* Flags: - (never set any flags) */ +/* Flags: - (does not modify flags) */ #define SLJIT_MOV_P (SLJIT_OP1_BASE + 7) -/* Flags: - (never set any flags) */ +/* Flags: - (may destroy flags) */ #define SLJIT_MOVU (SLJIT_OP1_BASE + 8) -/* Flags: I - (never set any flags) */ +/* Flags: - (may destroy flags) */ #define SLJIT_MOVU_U8 (SLJIT_OP1_BASE + 9) #define SLJIT_MOVU32_U8 (SLJIT_MOVU_U8 | SLJIT_I32_OP) -/* Flags: I - (never set any flags) */ +/* Flags: - (may destroy flags) */ #define SLJIT_MOVU_S8 (SLJIT_OP1_BASE + 10) #define SLJIT_MOVU32_S8 (SLJIT_MOVU_S8 | SLJIT_I32_OP) -/* Flags: I - (never set any flags) */ +/* Flags: - (may destroy flags) */ #define SLJIT_MOVU_U16 (SLJIT_OP1_BASE + 11) #define SLJIT_MOVU32_U16 (SLJIT_MOVU_U16 | SLJIT_I32_OP) -/* Flags: I - (never set any flags) */ +/* Flags: - (may destroy flags) */ #define SLJIT_MOVU_S16 (SLJIT_OP1_BASE + 12) #define SLJIT_MOVU32_S16 (SLJIT_MOVU_S16 | SLJIT_I32_OP) -/* Flags: I - (never set any flags) +/* Flags: - (may destroy flags) Note: no SLJIT_MOVU32_U32 form, since it is the same as SLJIT_MOVU32 */ #define SLJIT_MOVU_U32 (SLJIT_OP1_BASE + 13) -/* Flags: I - (never set any flags) +/* Flags: - (may destroy flags) Note: no SLJIT_MOVU32_S32 form, since it is the same as SLJIT_MOVU32 */ #define SLJIT_MOVU_S32 (SLJIT_OP1_BASE + 14) -/* Flags: I - (never set any flags) */ +/* Flags: - (may destroy flags) */ #define SLJIT_MOVU32 (SLJIT_MOVU_S32 | SLJIT_I32_OP) -/* Flags: - (never set any flags) */ +/* Flags: - (may destroy flags) */ #define SLJIT_MOVU_P (SLJIT_OP1_BASE + 15) -/* Flags: I | E | K */ +/* Flags: Z */ #define SLJIT_NOT (SLJIT_OP1_BASE + 16) #define SLJIT_NOT32 (SLJIT_NOT | SLJIT_I32_OP) -/* Flags: I | E | O | K */ +/* Flags: Z | OVERFLOW */ #define SLJIT_NEG (SLJIT_OP1_BASE + 17) #define SLJIT_NEG32 (SLJIT_NEG | SLJIT_I32_OP) /* Count leading zeroes - Flags: I | E | K - Important note! Sparc 32 does not support K flag, since - the required popc instruction is introduced only in sparc 64. */ + Flags: - (may destroy flags) */ #define SLJIT_CLZ (SLJIT_OP1_BASE + 18) #define SLJIT_CLZ32 (SLJIT_CLZ | SLJIT_I32_OP) @@ -813,46 +938,48 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile /* Starting index of opcodes for sljit_emit_op2. */ #define SLJIT_OP2_BASE 96 -/* Flags: I | E | O | C | K */ +/* Flags: Z | OVERFLOW | CARRY */ #define SLJIT_ADD (SLJIT_OP2_BASE + 0) #define SLJIT_ADD32 (SLJIT_ADD | SLJIT_I32_OP) -/* Flags: I | C | K */ +/* Flags: CARRY */ #define SLJIT_ADDC (SLJIT_OP2_BASE + 1) #define SLJIT_ADDC32 (SLJIT_ADDC | SLJIT_I32_OP) -/* Flags: I | E | U | S | O | C | K */ +/* Flags: Z | LESS | GREATER_EQUAL | GREATER | LESS_EQUAL + SIG_LESS | SIG_GREATER_EQUAL | SIG_GREATER + SIG_LESS_EQUAL | CARRY */ #define SLJIT_SUB (SLJIT_OP2_BASE + 2) #define SLJIT_SUB32 (SLJIT_SUB | SLJIT_I32_OP) -/* Flags: I | C | K */ +/* Flags: CARRY */ #define SLJIT_SUBC (SLJIT_OP2_BASE + 3) #define SLJIT_SUBC32 (SLJIT_SUBC | SLJIT_I32_OP) /* Note: integer mul - Flags: I | O (see SLJIT_C_MUL_*) | K */ + Flags: MUL_OVERFLOW */ #define SLJIT_MUL (SLJIT_OP2_BASE + 4) #define SLJIT_MUL32 (SLJIT_MUL | SLJIT_I32_OP) -/* Flags: I | E | K */ +/* Flags: Z */ #define SLJIT_AND (SLJIT_OP2_BASE + 5) #define SLJIT_AND32 (SLJIT_AND | SLJIT_I32_OP) -/* Flags: I | E | K */ +/* Flags: Z */ #define SLJIT_OR (SLJIT_OP2_BASE + 6) #define SLJIT_OR32 (SLJIT_OR | SLJIT_I32_OP) -/* Flags: I | E | K */ +/* Flags: Z */ #define SLJIT_XOR (SLJIT_OP2_BASE + 7) #define SLJIT_XOR32 (SLJIT_XOR | SLJIT_I32_OP) -/* Flags: I | E | K +/* Flags: Z Let bit_length be the length of the shift operation: 32 or 64. If src2 is immediate, src2w is masked by (bit_length - 1). Otherwise, if the content of src2 is outside the range from 0 to bit_length - 1, the result is undefined. */ #define SLJIT_SHL (SLJIT_OP2_BASE + 8) #define SLJIT_SHL32 (SLJIT_SHL | SLJIT_I32_OP) -/* Flags: I | E | K +/* Flags: Z Let bit_length be the length of the shift operation: 32 or 64. If src2 is immediate, src2w is masked by (bit_length - 1). Otherwise, if the content of src2 is outside the range from 0 to bit_length - 1, the result is undefined. */ #define SLJIT_LSHR (SLJIT_OP2_BASE + 9) #define SLJIT_LSHR32 (SLJIT_LSHR | SLJIT_I32_OP) -/* Flags: I | E | K +/* Flags: Z Let bit_length be the length of the shift operation: 32 or 64. If src2 is immediate, src2w is masked by (bit_length - 1). Otherwise, if the content of src2 is outside the range from 0 @@ -865,44 +992,38 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w); -/* Returns with non-zero if fpu is available. */ - -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void); - /* Starting index of opcodes for sljit_emit_fop1. */ #define SLJIT_FOP1_BASE 128 -/* Flags: SP - (never set any flags) */ +/* Flags: - (does not modify flags) */ #define SLJIT_MOV_F64 (SLJIT_FOP1_BASE + 0) #define SLJIT_MOV_F32 (SLJIT_MOV_F64 | SLJIT_F32_OP) /* Convert opcodes: CONV[DST_TYPE].FROM[SRC_TYPE] SRC/DST TYPE can be: D - double, S - single, W - signed word, I - signed int Rounding mode when the destination is W or I: round towards zero. */ -/* Flags: SP - (never set any flags) */ +/* Flags: - (does not modify flags) */ #define SLJIT_CONV_F64_FROM_F32 (SLJIT_FOP1_BASE + 1) #define SLJIT_CONV_F32_FROM_F64 (SLJIT_CONV_F64_FROM_F32 | SLJIT_F32_OP) -/* Flags: SP - (never set any flags) */ +/* Flags: - (does not modify flags) */ #define SLJIT_CONV_SW_FROM_F64 (SLJIT_FOP1_BASE + 2) #define SLJIT_CONV_SW_FROM_F32 (SLJIT_CONV_SW_FROM_F64 | SLJIT_F32_OP) -/* Flags: SP - (never set any flags) */ +/* Flags: - (does not modify flags) */ #define SLJIT_CONV_S32_FROM_F64 (SLJIT_FOP1_BASE + 3) #define SLJIT_CONV_S32_FROM_F32 (SLJIT_CONV_S32_FROM_F64 | SLJIT_F32_OP) -/* Flags: SP - (never set any flags) */ +/* Flags: - (does not modify flags) */ #define SLJIT_CONV_F64_FROM_SW (SLJIT_FOP1_BASE + 4) #define SLJIT_CONV_F32_FROM_SW (SLJIT_CONV_F64_FROM_SW | SLJIT_F32_OP) -/* Flags: SP - (never set any flags) */ +/* Flags: - (does not modify flags) */ #define SLJIT_CONV_F64_FROM_S32 (SLJIT_FOP1_BASE + 5) #define SLJIT_CONV_F32_FROM_S32 (SLJIT_CONV_F64_FROM_S32 | SLJIT_F32_OP) /* Note: dst is the left and src is the right operand for SLJIT_CMPD. - Note: NaN check is always performed. If SLJIT_C_FLOAT_UNORDERED flag - is set, the comparison result is unpredictable. - Flags: SP | E | S (see SLJIT_C_FLOAT_*) */ + Flags: EQUAL_F | LESS_F | GREATER_EQUAL_F | GREATER_F | LESS_EQUAL_F */ #define SLJIT_CMP_F64 (SLJIT_FOP1_BASE + 6) #define SLJIT_CMP_F32 (SLJIT_CMP_F64 | SLJIT_F32_OP) -/* Flags: SP - (never set any flags) */ +/* Flags: - (does not modify flags) */ #define SLJIT_NEG_F64 (SLJIT_FOP1_BASE + 7) #define SLJIT_NEG_F32 (SLJIT_NEG_F64 | SLJIT_F32_OP) -/* Flags: SP - (never set any flags) */ +/* Flags: - (does not modify flags) */ #define SLJIT_ABS_F64 (SLJIT_FOP1_BASE + 8) #define SLJIT_ABS_F32 (SLJIT_ABS_F64 | SLJIT_F32_OP) @@ -913,16 +1034,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil /* Starting index of opcodes for sljit_emit_fop2. */ #define SLJIT_FOP2_BASE 160 -/* Flags: SP - (never set any flags) */ +/* Flags: - (does not modify flags) */ #define SLJIT_ADD_F64 (SLJIT_FOP2_BASE + 0) #define SLJIT_ADD_F32 (SLJIT_ADD_F64 | SLJIT_F32_OP) -/* Flags: SP - (never set any flags) */ +/* Flags: - (does not modify flags) */ #define SLJIT_SUB_F64 (SLJIT_FOP2_BASE + 1) #define SLJIT_SUB_F32 (SLJIT_SUB_F64 | SLJIT_F32_OP) -/* Flags: SP - (never set any flags) */ +/* Flags: - (does not modify flags) */ #define SLJIT_MUL_F64 (SLJIT_FOP2_BASE + 2) #define SLJIT_MUL_F32 (SLJIT_MUL_F64 | SLJIT_F32_OP) -/* Flags: SP - (never set any flags) */ +/* Flags: - (does not modify flags) */ #define SLJIT_DIV_F64 (SLJIT_FOP2_BASE + 3) #define SLJIT_DIV_F32 (SLJIT_DIV_F64 | SLJIT_F32_OP) @@ -949,56 +1070,77 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi #define SLJIT_LESS 2 #define SLJIT_LESS32 (SLJIT_LESS | SLJIT_I32_OP) +#define SLJIT_SET_LESS SLJIT_SET(SLJIT_LESS) #define SLJIT_GREATER_EQUAL 3 #define SLJIT_GREATER_EQUAL32 (SLJIT_GREATER_EQUAL | SLJIT_I32_OP) +#define SLJIT_SET_GREATER_EQUAL SLJIT_SET(SLJIT_GREATER_EQUAL) #define SLJIT_GREATER 4 #define SLJIT_GREATER32 (SLJIT_GREATER | SLJIT_I32_OP) +#define SLJIT_SET_GREATER SLJIT_SET(SLJIT_GREATER) #define SLJIT_LESS_EQUAL 5 #define SLJIT_LESS_EQUAL32 (SLJIT_LESS_EQUAL | SLJIT_I32_OP) +#define SLJIT_SET_LESS_EQUAL SLJIT_SET(SLJIT_LESS_EQUAL) #define SLJIT_SIG_LESS 6 #define SLJIT_SIG_LESS32 (SLJIT_SIG_LESS | SLJIT_I32_OP) +#define SLJIT_SET_SIG_LESS SLJIT_SET(SLJIT_SIG_LESS) #define SLJIT_SIG_GREATER_EQUAL 7 #define SLJIT_SIG_GREATER_EQUAL32 (SLJIT_SIG_GREATER_EQUAL | SLJIT_I32_OP) +#define SLJIT_SET_SIG_GREATER_EQUAL SLJIT_SET(SLJIT_SIG_GREATER_EQUAL) #define SLJIT_SIG_GREATER 8 #define SLJIT_SIG_GREATER32 (SLJIT_SIG_GREATER | SLJIT_I32_OP) +#define SLJIT_SET_SIG_GREATER SLJIT_SET(SLJIT_SIG_GREATER) #define SLJIT_SIG_LESS_EQUAL 9 #define SLJIT_SIG_LESS_EQUAL32 (SLJIT_SIG_LESS_EQUAL | SLJIT_I32_OP) +#define SLJIT_SET_SIG_LESS_EQUAL SLJIT_SET(SLJIT_SIG_LESS_EQUAL) #define SLJIT_OVERFLOW 10 #define SLJIT_OVERFLOW32 (SLJIT_OVERFLOW | SLJIT_I32_OP) +#define SLJIT_SET_OVERFLOW SLJIT_SET(SLJIT_OVERFLOW) #define SLJIT_NOT_OVERFLOW 11 #define SLJIT_NOT_OVERFLOW32 (SLJIT_NOT_OVERFLOW | SLJIT_I32_OP) #define SLJIT_MUL_OVERFLOW 12 #define SLJIT_MUL_OVERFLOW32 (SLJIT_MUL_OVERFLOW | SLJIT_I32_OP) +#define SLJIT_SET_MUL_OVERFLOW SLJIT_SET(SLJIT_MUL_OVERFLOW) #define SLJIT_MUL_NOT_OVERFLOW 13 #define SLJIT_MUL_NOT_OVERFLOW32 (SLJIT_MUL_NOT_OVERFLOW | SLJIT_I32_OP) +/* There is no SLJIT_CARRY or SLJIT_NOT_CARRY. */ +#define SLJIT_SET_CARRY SLJIT_SET(14) + /* Floating point comparison types. */ -#define SLJIT_EQUAL_F64 14 +#define SLJIT_EQUAL_F64 16 #define SLJIT_EQUAL_F32 (SLJIT_EQUAL_F64 | SLJIT_F32_OP) -#define SLJIT_NOT_EQUAL_F64 15 +#define SLJIT_SET_EQUAL_F SLJIT_SET(SLJIT_EQUAL_F64) +#define SLJIT_NOT_EQUAL_F64 17 #define SLJIT_NOT_EQUAL_F32 (SLJIT_NOT_EQUAL_F64 | SLJIT_F32_OP) -#define SLJIT_LESS_F64 16 +#define SLJIT_SET_NOT_EQUAL_F SLJIT_SET(SLJIT_NOT_EQUAL_F64) +#define SLJIT_LESS_F64 18 #define SLJIT_LESS_F32 (SLJIT_LESS_F64 | SLJIT_F32_OP) -#define SLJIT_GREATER_EQUAL_F64 17 +#define SLJIT_SET_LESS_F SLJIT_SET(SLJIT_LESS_F64) +#define SLJIT_GREATER_EQUAL_F64 19 #define SLJIT_GREATER_EQUAL_F32 (SLJIT_GREATER_EQUAL_F64 | SLJIT_F32_OP) -#define SLJIT_GREATER_F64 18 +#define SLJIT_SET_GREATER_EQUAL_F SLJIT_SET(SLJIT_GREATER_EQUAL_F64) +#define SLJIT_GREATER_F64 20 #define SLJIT_GREATER_F32 (SLJIT_GREATER_F64 | SLJIT_F32_OP) -#define SLJIT_LESS_EQUAL_F64 19 +#define SLJIT_SET_GREATER_F SLJIT_SET(SLJIT_GREATER_F64) +#define SLJIT_LESS_EQUAL_F64 21 #define SLJIT_LESS_EQUAL_F32 (SLJIT_LESS_EQUAL_F64 | SLJIT_F32_OP) -#define SLJIT_UNORDERED_F64 20 +#define SLJIT_SET_LESS_EQUAL_F SLJIT_SET(SLJIT_LESS_EQUAL_F64) +#define SLJIT_UNORDERED_F64 22 #define SLJIT_UNORDERED_F32 (SLJIT_UNORDERED_F64 | SLJIT_F32_OP) -#define SLJIT_ORDERED_F64 21 +#define SLJIT_SET_UNORDERED_F SLJIT_SET(SLJIT_UNORDERED_F64) +#define SLJIT_ORDERED_F64 23 #define SLJIT_ORDERED_F32 (SLJIT_ORDERED_F64 | SLJIT_F32_OP) +#define SLJIT_SET_ORDERED_F SLJIT_SET(SLJIT_ORDERED_F64) /* Unconditional jump types. */ -#define SLJIT_JUMP 22 -#define SLJIT_FAST_CALL 23 -#define SLJIT_CALL0 24 -#define SLJIT_CALL1 25 -#define SLJIT_CALL2 26 -#define SLJIT_CALL3 27 +#define SLJIT_JUMP 24 +#define SLJIT_FAST_CALL 25 +#define SLJIT_CALL0 26 +#define SLJIT_CALL1 27 +#define SLJIT_CALL2 28 +#define SLJIT_CALL3 29 /* Fast calling method. See sljit_emit_fast_enter / sljit_emit_fast_return. */ @@ -1008,8 +1150,9 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi /* Emit a jump instruction. The destination is not set, only the type of the jump. type must be between SLJIT_EQUAL and SLJIT_CALL3 type can be combined (or'ed) with SLJIT_REWRITABLE_JUMP - Flags: - (never set any flags) for both conditional and unconditional jumps. - Flags: destroy all flags for calls. */ + + Flags: does not modify flags for conditional and unconditional + jumps but destroy all flags for calls. */ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type); /* Basic arithmetic comparison. In most architectures it is implemented as @@ -1019,7 +1162,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile It is suggested to use this comparison form when appropriate. type must be between SLJIT_EQUAL and SLJIT_I_SIG_LESS_EQUAL type can be combined (or'ed) with SLJIT_REWRITABLE_JUMP - Flags: destroy flags. */ + Flags: may destroy flags. */ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w); @@ -1047,36 +1190,55 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_target(struct sljit_jump *jump, sljit_uw type must be between SLJIT_JUMP and SLJIT_CALL3 Direct form: set src to SLJIT_IMM() and srcw to the address Indirect form: any other valid addressing mode - Flags: - (never set any flags) for unconditional jumps. - Flags: destroy all flags for calls. */ + + Flags: does not modify flags for unconditional jumps but + destroy all flags for calls. */ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw); /* Perform the operation using the conditional flags as the second argument. - Type must always be between SLJIT_EQUAL and SLJIT_S_ORDERED. The value + Type must always be between SLJIT_EQUAL and SLJIT_ORDERED_F64. The value represented by the type is 1, if the condition represented by the type is fulfilled, and 0 otherwise. - If op == SLJIT_MOV, SLJIT_MOV_S32, SLJIT_MOV_U32: + If op == SLJIT_MOV, SLJIT_MOV32: Set dst to the value represented by the type (0 or 1). - Src must be SLJIT_UNUSED, and srcw must be 0 - Flags: - (never set any flags) + Flags: - (does not modify flags) If op == SLJIT_OR, op == SLJIT_AND, op == SLJIT_XOR - Performs the binary operation using src as the first, and the value - represented by type as the second argument. - Important note: only dst=src and dstw=srcw is supported at the moment! - Flags: I | E | K - Note: sljit_emit_op_flags does nothing, if dst is SLJIT_UNUSED (regardless of op). */ + Performs the binary operation using dst as the first, and the value + represented by type as the second argument. Result is written into dst. + Flags: Z (may destroy flags) */ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, - sljit_s32 src, sljit_sw srcw, sljit_s32 type); -/* Copies the base address of SLJIT_SP + offset to dst. - Flags: - (never set any flags) */ +/* Emit a conditional mov instruction which moves source to destination, + if the condition is satisfied. Unlike other arithmetic operations this + instruction does not support memory accesses. + + type must be between SLJIT_EQUAL and SLJIT_ORDERED_F64 + dst_reg must be a valid register and it can be combined + with SLJIT_I32_OP to perform a 32 bit arithmetic operation + src must be register or immediate (SLJIT_IMM) + + Flags: - (does not modify flags) */ +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_reg, + sljit_s32 src, sljit_sw srcw); + +/* Copies the base address of SLJIT_SP + offset to dst. The offset can be + anything to negate the effect of relative addressing. For example if an + array of sljit_sw values is stored on the stack from offset 0x40, and R0 + contains the offset of an array item plus 0x120, this item can be + overwritten by two SLJIT instructions: + + sljit_get_local_base(compiler, SLJIT_R1, 0, 0x40 - 0x120); + sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM2(SLJIT_R1, SLJIT_R0), 0, SLJIT_IMM, 0x5); + + Flags: - (may destroy flags) */ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset); /* The constant can be changed runtime (see: sljit_set_const) - Flags: - (never set any flags) */ + Flags: - (does not modify flags) */ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value); /* After the code generation the address for label, jump and const instructions @@ -1086,16 +1248,17 @@ static SLJIT_INLINE sljit_uw sljit_get_label_addr(struct sljit_label *label) { r static SLJIT_INLINE sljit_uw sljit_get_jump_addr(struct sljit_jump *jump) { return jump->addr; } static SLJIT_INLINE sljit_uw sljit_get_const_addr(struct sljit_const *const_) { return const_->addr; } -/* Only the address is required to rewrite the code. */ -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr); -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant); +/* Only the address and executable offset are required to perform dynamic + code modifications. See sljit_get_executable_offset function. */ +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset); +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset); /* --------------------------------------------------------------------- */ /* Miscellaneous utility functions */ /* --------------------------------------------------------------------- */ #define SLJIT_MAJOR_VERSION 0 -#define SLJIT_MINOR_VERSION 93 +#define SLJIT_MINOR_VERSION 94 /* Get the human readable name of the platform. Can be useful on platforms like ARM, where ARM and Thumb2 functions can be mixed, and @@ -1113,19 +1276,23 @@ SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_release_lock(void); #if (defined SLJIT_UTIL_STACK && SLJIT_UTIL_STACK) -/* The sljit_stack is a utiliy feature of sljit, which allocates a - writable memory region between base (inclusive) and limit (exclusive). - Both base and limit is a pointer, and base is always <= than limit. - This feature uses the "address space reserve" feature - of modern operating systems. Basically we don't need to allocate a - huge memory block in one step for the worst case, we can start with - a smaller chunk and extend it later. Since the address space is - reserved, the data never copied to other regions, thus it is safe - to store pointers here. */ - -/* Note: The base field is aligned to PAGE_SIZE bytes (usually 4k or more). - Note: stack growing should not happen in small steps: 4k, 16k or even - bigger growth is better. +/* The sljit_stack is a utility extension of sljit, which provides + a top-down stack. The stack starts at base and goes down to + max_limit, so the memory region for this stack is between + max_limit (inclusive) and base (exclusive). However the + application can only use the region between limit (inclusive) + and base (exclusive). The sljit_stack_resize can be used to + extend this region up to max_limit. + + This feature uses the "address space reserve" feature of modern + operating systems, so instead of allocating a huge memory block + applications can allocate a small region and extend it later + without moving the memory area. Hence pointers can be stored + in this area. */ + +/* Note: base and max_limit fields are aligned to PAGE_SIZE bytes + (usually 4 Kbyte or more). + Note: stack should grow in larger steps, e.g. 4Kbyte, 16Kbyte or more. Note: this structure may not be supported by all operating systems. Some kind of fallback mechanism is suggested when SLJIT_UTIL_STACK is not defined. */ @@ -1133,15 +1300,16 @@ SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_release_lock(void); struct sljit_stack { /* User data, anything can be stored here. Starting with the same value as base. */ - sljit_uw top; + sljit_u8 *top; /* These members are read only. */ - sljit_uw base; - sljit_uw limit; - sljit_uw max_limit; + sljit_u8 *base; + sljit_u8 *limit; + sljit_u8 *max_limit; }; /* Returns NULL if unsuccessful. - Note: limit and max_limit contains the size for stack allocation. + Note: max_limit contains the maximum stack size in bytes. + Note: limit contains the starting stack size in bytes. Note: the top field is initialized to base. Note: see sljit_create_compiler for the explanation of allocator_data. */ SLJIT_API_FUNC_ATTRIBUTE struct sljit_stack* SLJIT_CALL sljit_allocate_stack(sljit_uw limit, sljit_uw max_limit, void *allocator_data); @@ -1153,7 +1321,7 @@ SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_free_stack(struct sljit_stack *st since the growth ratio can be added to the current limit, and sljit_stack_resize will do all the necessary checks. The fields of the stack are not changed if sljit_stack_resize fails. */ -SLJIT_API_FUNC_ATTRIBUTE sljit_sw SLJIT_CALL sljit_stack_resize(struct sljit_stack *stack, sljit_uw new_limit); +SLJIT_API_FUNC_ATTRIBUTE sljit_sw SLJIT_CALL sljit_stack_resize(struct sljit_stack *stack, sljit_u8 *new_limit); #endif /* (defined SLJIT_UTIL_STACK && SLJIT_UTIL_STACK) */ @@ -1182,6 +1350,15 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_function_context(void** func_ptr, struct #endif /* !(defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) */ +#if (defined SLJIT_EXECUTABLE_ALLOCATOR && SLJIT_EXECUTABLE_ALLOCATOR) +/* Free unused executable memory. The allocator keeps some free memory + around to reduce the number of OS executable memory allocations. + This improves performance since these calls are costly. However + it is sometimes desired to free all unused memory regions, e.g. + before the application terminates. */ +SLJIT_API_FUNC_ATTRIBUTE void sljit_free_unused_memory_exec(void); +#endif + /* --------------------------------------------------------------------- */ /* CPU specific functions */ /* --------------------------------------------------------------------- */ @@ -1214,32 +1391,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg) SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler, void *instruction, sljit_s32 size); -#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) - -/* Returns with non-zero if sse2 is available. */ +/* Define the currently available CPU status flags. It is usually used after an + sljit_emit_op_custom call to define which flags are set. */ -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_x86_is_sse2_available(void); - -/* Returns with non-zero if cmov instruction is available. */ - -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_x86_is_cmov_available(void); - -/* Emit a conditional mov instruction on x86 CPUs. This instruction - moves src to destination, if the condition is satisfied. Unlike - other arithmetic instructions, destination must be a register. - Before such instructions are emitted, cmov support should be - checked by sljit_x86_is_cmov_available function. - type must be between SLJIT_EQUAL and SLJIT_S_ORDERED - dst_reg must be a valid register and it can be combined - with SLJIT_I32_OP to perform 32 bit arithmetic - Flags: I - (never set any flags) - */ - -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_x86_emit_cmov(struct sljit_compiler *compiler, - sljit_s32 type, - sljit_s32 dst_reg, - sljit_s32 src, sljit_sw srcw); - -#endif +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_current_flags(struct sljit_compiler *compiler, + sljit_s32 current_flags); #endif /* _SLJIT_LIR_H_ */ diff --git a/src/3rdparty/pcre2/src/sljit/sljitNativeARM_32.c b/src/3rdparty/pcre2/src/sljit/sljitNativeARM_32.c index b92808f526..745da99f61 100644 --- a/src/3rdparty/pcre2/src/sljit/sljitNativeARM_32.c +++ b/src/3rdparty/pcre2/src/sljit/sljitNativeARM_32.c @@ -1,7 +1,7 @@ /* * Stack-less Just-In-Time compiler * - * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are * permitted provided that the following conditions are met: @@ -38,8 +38,7 @@ SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void) /* Last register + 1. */ #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) #define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) -#define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4) -#define TMP_PC (SLJIT_NUMBER_OF_REGISTERS + 5) +#define TMP_PC (SLJIT_NUMBER_OF_REGISTERS + 4) #define TMP_FREG1 (0) #define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) @@ -55,8 +54,8 @@ SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void) (((max_diff) / (sljit_s32)sizeof(sljit_uw)) - (CONST_POOL_ALIGNMENT - 1)) /* See sljit_emit_enter and sljit_emit_op0 if you want to change them. */ -static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 6] = { - 0, 0, 1, 2, 11, 10, 9, 8, 7, 6, 5, 4, 13, 3, 12, 14, 15 +static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = { + 0, 0, 1, 2, 3, 11, 10, 9, 8, 7, 6, 5, 4, 13, 14, 12, 15 }; #define RM(rm) (reg_map[rm]) @@ -83,6 +82,7 @@ static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 6] = { #define BLX 0xe12fff30 #define BX 0xe12fff10 #define CLZ 0xe16f0f10 +#define CMN_DP 0xb #define CMP_DP 0xa #define BKPT 0xe1200070 #define EOR_DP 0x1 @@ -260,7 +260,7 @@ static SLJIT_INLINE sljit_s32 emit_blx(struct sljit_compiler *compiler) { /* Must follow tightly the previous instruction (to be able to convert it to bl instruction). */ SLJIT_ASSERT(compiler->cpool_diff == CONST_POOL_EMPTY || compiler->size - compiler->cpool_diff < MAX_DIFFERENCE(4092)); - return push_inst(compiler, BLX | RM(TMP_REG1)); + return push_inst(compiler, BLX | RM(TMP_REG2)); } static sljit_uw patch_pc_relative_loads(sljit_uw *last_pc_patch, sljit_uw *code_ptr, sljit_uw* const_pool, sljit_uw cpool_size) @@ -389,7 +389,7 @@ static SLJIT_INLINE sljit_s32 emit_imm(struct sljit_compiler *compiler, sljit_s3 #endif -static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_uw *code_ptr, sljit_uw *code) +static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_uw *code_ptr, sljit_uw *code, sljit_sw executable_offset) { sljit_sw diff; @@ -401,7 +401,7 @@ static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_uw code_ptr--; if (jump->flags & JUMP_ADDR) - diff = ((sljit_sw)jump->u.target - (sljit_sw)(code_ptr + 2)); + diff = ((sljit_sw)jump->u.target - (sljit_sw)(code_ptr + 2) - executable_offset); else { SLJIT_ASSERT(jump->flags & JUMP_LABEL); diff = ((sljit_sw)(code + jump->u.label->size) - (sljit_sw)(code_ptr + 2)); @@ -426,7 +426,7 @@ static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_uw } #else if (jump->flags & JUMP_ADDR) - diff = ((sljit_sw)jump->u.target - (sljit_sw)code_ptr); + diff = ((sljit_sw)jump->u.target - (sljit_sw)code_ptr - executable_offset); else { SLJIT_ASSERT(jump->flags & JUMP_LABEL); diff = ((sljit_sw)(code + jump->u.label->size) - (sljit_sw)code_ptr); @@ -446,26 +446,28 @@ static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_uw return 0; } -static SLJIT_INLINE void inline_set_jump_addr(sljit_uw addr, sljit_uw new_addr, sljit_s32 flush) +static SLJIT_INLINE void inline_set_jump_addr(sljit_uw jump_ptr, sljit_sw executable_offset, sljit_uw new_addr, sljit_s32 flush_cache) { #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) - sljit_uw *ptr = (sljit_uw*)addr; - sljit_uw *inst = (sljit_uw*)ptr[0]; + sljit_uw *ptr = (sljit_uw *)jump_ptr; + sljit_uw *inst = (sljit_uw *)ptr[0]; sljit_uw mov_pc = ptr[1]; sljit_s32 bl = (mov_pc & 0x0000f000) != RD(TMP_PC); - sljit_sw diff = (sljit_sw)(((sljit_sw)new_addr - (sljit_sw)(inst + 2)) >> 2); + sljit_sw diff = (sljit_sw)(((sljit_sw)new_addr - (sljit_sw)(inst + 2) - executable_offset) >> 2); if (diff <= 0x7fffff && diff >= -0x800000) { /* Turn to branch. */ if (!bl) { inst[0] = (mov_pc & COND_MASK) | (B - CONDITIONAL) | (diff & 0xffffff); - if (flush) { + if (flush_cache) { + inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); SLJIT_CACHE_FLUSH(inst, inst + 1); } } else { inst[0] = (mov_pc & COND_MASK) | (BL - CONDITIONAL) | (diff & 0xffffff); inst[1] = NOP; - if (flush) { + if (flush_cache) { + inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); SLJIT_CACHE_FLUSH(inst, inst + 2); } } @@ -479,12 +481,14 @@ static SLJIT_INLINE void inline_set_jump_addr(sljit_uw addr, sljit_uw new_addr, if (*inst != mov_pc) { inst[0] = mov_pc; if (!bl) { - if (flush) { + if (flush_cache) { + inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); SLJIT_CACHE_FLUSH(inst, inst + 1); } } else { inst[1] = BLX | RM(TMP_REG1); - if (flush) { + if (flush_cache) { + inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); SLJIT_CACHE_FLUSH(inst, inst + 2); } } @@ -492,11 +496,12 @@ static SLJIT_INLINE void inline_set_jump_addr(sljit_uw addr, sljit_uw new_addr, *ptr = new_addr; } #else - sljit_uw *inst = (sljit_uw*)addr; + sljit_uw *inst = (sljit_uw*)jump_ptr; SLJIT_ASSERT((inst[0] & 0xfff00000) == MOVW && (inst[1] & 0xfff00000) == MOVT); inst[0] = MOVW | (inst[0] & 0xf000) | ((new_addr << 4) & 0xf0000) | (new_addr & 0xfff); inst[1] = MOVT | (inst[1] & 0xf000) | ((new_addr >> 12) & 0xf0000) | ((new_addr >> 16) & 0xfff); - if (flush) { + if (flush_cache) { + inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); SLJIT_CACHE_FLUSH(inst, inst + 2); } #endif @@ -504,7 +509,7 @@ static SLJIT_INLINE void inline_set_jump_addr(sljit_uw addr, sljit_uw new_addr, static sljit_uw get_imm(sljit_uw imm); -static SLJIT_INLINE void inline_set_const(sljit_uw addr, sljit_sw new_constant, sljit_s32 flush) +static SLJIT_INLINE void inline_set_const(sljit_uw addr, sljit_sw executable_offset, sljit_sw new_constant, sljit_s32 flush_cache) { #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) sljit_uw *ptr = (sljit_uw*)addr; @@ -515,7 +520,8 @@ static SLJIT_INLINE void inline_set_const(sljit_uw addr, sljit_sw new_constant, src2 = get_imm(new_constant); if (src2) { *inst = 0xe3a00000 | (ldr_literal & 0xf000) | src2; - if (flush) { + if (flush_cache) { + inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); SLJIT_CACHE_FLUSH(inst, inst + 1); } return; @@ -524,7 +530,8 @@ static SLJIT_INLINE void inline_set_const(sljit_uw addr, sljit_sw new_constant, src2 = get_imm(~new_constant); if (src2) { *inst = 0xe3e00000 | (ldr_literal & 0xf000) | src2; - if (flush) { + if (flush_cache) { + inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); SLJIT_CACHE_FLUSH(inst, inst + 1); } return; @@ -537,7 +544,8 @@ static SLJIT_INLINE void inline_set_const(sljit_uw addr, sljit_sw new_constant, if (*inst != ldr_literal) { *inst = ldr_literal; - if (flush) { + if (flush_cache) { + inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); SLJIT_CACHE_FLUSH(inst, inst + 1); } } @@ -547,7 +555,8 @@ static SLJIT_INLINE void inline_set_const(sljit_uw addr, sljit_sw new_constant, SLJIT_ASSERT((inst[0] & 0xfff00000) == MOVW && (inst[1] & 0xfff00000) == MOVT); inst[0] = MOVW | (inst[0] & 0xf000) | ((new_constant << 4) & 0xf0000) | (new_constant & 0xfff); inst[1] = MOVT | (inst[1] & 0xf000) | ((new_constant >> 12) & 0xf0000) | ((new_constant >> 16) & 0xfff); - if (flush) { + if (flush_cache) { + inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); SLJIT_CACHE_FLUSH(inst, inst + 2); } #endif @@ -562,6 +571,8 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil sljit_uw *buf_end; sljit_uw size; sljit_uw word_count; + sljit_sw executable_offset; + sljit_sw jump_addr; #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) sljit_uw cpool_size; sljit_uw cpool_skip_alignment; @@ -602,14 +613,14 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil code_ptr = code; word_count = 0; + executable_offset = SLJIT_EXEC_OFFSET(code); label = compiler->labels; jump = compiler->jumps; const_ = compiler->consts; if (label && label->size == 0) { - label->addr = (sljit_uw)code; - label->size = 0; + label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code, executable_offset); label = label->next; } @@ -636,7 +647,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil cpool_size = 0; if (label && label->size == word_count) { /* Points after the current instruction. */ - label->addr = (sljit_uw)code_ptr; + label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); label->size = code_ptr - code; label = label->next; } @@ -652,19 +663,19 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil SLJIT_ASSERT(!const_ || const_->addr >= word_count); if (jump && jump->addr == word_count) { #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) - if (detect_jump_type(jump, code_ptr, code)) + if (detect_jump_type(jump, code_ptr, code, executable_offset)) code_ptr--; jump->addr = (sljit_uw)code_ptr; #else jump->addr = (sljit_uw)(code_ptr - 2); - if (detect_jump_type(jump, code_ptr, code)) + if (detect_jump_type(jump, code_ptr, code, executable_offset)) code_ptr -= 2; #endif jump = jump->next; } if (label && label->size == word_count) { /* code_ptr can be affected above. */ - label->addr = (sljit_uw)(code_ptr + 1); + label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr + 1, executable_offset); label->size = (code_ptr + 1) - code; label = label->next; } @@ -729,17 +740,18 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil jump = compiler->jumps; while (jump) { - buf_ptr = (sljit_uw*)jump->addr; + buf_ptr = (sljit_uw *)jump->addr; if (jump->flags & PATCH_B) { + jump_addr = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr + 2, executable_offset); if (!(jump->flags & JUMP_ADDR)) { SLJIT_ASSERT(jump->flags & JUMP_LABEL); - SLJIT_ASSERT(((sljit_sw)jump->u.label->addr - (sljit_sw)(buf_ptr + 2)) <= 0x01ffffff && ((sljit_sw)jump->u.label->addr - (sljit_sw)(buf_ptr + 2)) >= -0x02000000); - *buf_ptr |= (((sljit_sw)jump->u.label->addr - (sljit_sw)(buf_ptr + 2)) >> 2) & 0x00ffffff; + SLJIT_ASSERT(((sljit_sw)jump->u.label->addr - jump_addr) <= 0x01ffffff && ((sljit_sw)jump->u.label->addr - jump_addr) >= -0x02000000); + *buf_ptr |= (((sljit_sw)jump->u.label->addr - jump_addr) >> 2) & 0x00ffffff; } else { - SLJIT_ASSERT(((sljit_sw)jump->u.target - (sljit_sw)(buf_ptr + 2)) <= 0x01ffffff && ((sljit_sw)jump->u.target - (sljit_sw)(buf_ptr + 2)) >= -0x02000000); - *buf_ptr |= (((sljit_sw)jump->u.target - (sljit_sw)(buf_ptr + 2)) >> 2) & 0x00ffffff; + SLJIT_ASSERT(((sljit_sw)jump->u.target - jump_addr) <= 0x01ffffff && ((sljit_sw)jump->u.target - jump_addr) >= -0x02000000); + *buf_ptr |= (((sljit_sw)jump->u.target - jump_addr) >> 2) & 0x00ffffff; } } else if (jump->flags & SLJIT_REWRITABLE_JUMP) { @@ -747,10 +759,10 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil jump->addr = (sljit_uw)code_ptr; code_ptr[0] = (sljit_uw)buf_ptr; code_ptr[1] = *buf_ptr; - inline_set_jump_addr((sljit_uw)code_ptr, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0); + inline_set_jump_addr((sljit_uw)code_ptr, executable_offset, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0); code_ptr += 2; #else - inline_set_jump_addr((sljit_uw)buf_ptr, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0); + inline_set_jump_addr((sljit_uw)buf_ptr, executable_offset, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0); #endif } else { @@ -763,7 +775,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil buf_ptr += 1; *buf_ptr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target; #else - inline_set_jump_addr((sljit_uw)buf_ptr, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0); + inline_set_jump_addr((sljit_uw)buf_ptr, executable_offset, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0); #endif } jump = jump->next; @@ -782,7 +794,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil else buf_ptr += 1; /* Set the value again (can be a simple constant). */ - inline_set_const((sljit_uw)code_ptr, *buf_ptr, 0); + inline_set_const((sljit_uw)code_ptr, executable_offset, *buf_ptr, 0); code_ptr += 2; const_ = const_->next; @@ -792,29 +804,90 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil SLJIT_ASSERT(code_ptr - code <= (sljit_s32)size); compiler->error = SLJIT_ERR_COMPILED; + compiler->executable_offset = executable_offset; compiler->executable_size = (code_ptr - code) * sizeof(sljit_uw); + + code = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset); + code_ptr = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + SLJIT_CACHE_FLUSH(code, code_ptr); return code; } +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) +{ + switch (feature_type) { + case SLJIT_HAS_FPU: +#ifdef SLJIT_IS_FPU_AVAILABLE + return SLJIT_IS_FPU_AVAILABLE; +#else + /* Available by default. */ + return 1; +#endif + + case SLJIT_HAS_PRE_UPDATE: + case SLJIT_HAS_CLZ: + case SLJIT_HAS_CMOV: + return 1; + + default: + return 0; + } +} + /* --------------------------------------------------------------------- */ /* Entry, exit */ /* --------------------------------------------------------------------- */ +/* Creates an index in data_transfer_insts array. */ +#define WORD_DATA 0x00 +#define BYTE_DATA 0x01 +#define HALF_DATA 0x02 +#define PRELOAD_DATA 0x03 +#define SIGNED_DATA 0x04 +#define LOAD_DATA 0x08 + /* emit_op inp_flags. WRITE_BACK must be the first, since it is a flag. */ -#define WRITE_BACK 0x01 -#define ALLOW_IMM 0x02 -#define ALLOW_INV_IMM 0x04 +#define WRITE_BACK 0x10 +#define ALLOW_IMM 0x20 +#define ALLOW_INV_IMM 0x40 #define ALLOW_ANY_IMM (ALLOW_IMM | ALLOW_INV_IMM) -#define ARG_TEST 0x08 -/* Creates an index in data_transfer_insts array. */ -#define WORD_DATA 0x00 -#define BYTE_DATA 0x10 -#define HALF_DATA 0x20 -#define SIGNED_DATA 0x40 -#define LOAD_DATA 0x80 +/* s/l - store/load (1 bit) + u/s - signed/unsigned (1 bit) + w/b/h/N - word/byte/half/NOT allowed (2 bit) + Storing signed and unsigned values are the same operations. */ + +static const sljit_uw data_transfer_insts[16] = { +/* s u w */ 0xe5000000 /* str */, +/* s u b */ 0xe5400000 /* strb */, +/* s u h */ 0xe10000b0 /* strh */, +/* s u N */ 0x00000000 /* not allowed */, +/* s s w */ 0xe5000000 /* str */, +/* s s b */ 0xe5400000 /* strb */, +/* s s h */ 0xe10000b0 /* strh */, +/* s s N */ 0x00000000 /* not allowed */, + +/* l u w */ 0xe5100000 /* ldr */, +/* l u b */ 0xe5500000 /* ldrb */, +/* l u h */ 0xe11000b0 /* ldrh */, +/* l u p */ 0xf5500000 /* preload data */, +/* l s w */ 0xe5100000 /* ldr */, +/* l s b */ 0xe11000d0 /* ldrsb */, +/* l s h */ 0xe11000f0 /* ldrsh */, +/* l s N */ 0x00000000 /* not allowed */, +}; + +#define EMIT_DATA_TRANSFER(type, add, wb, target_reg, base_reg, arg) \ + (data_transfer_insts[(type) & 0xf] | ((add) << 23) | ((wb) << (21 - 4)) | RD(target_reg) | RN(base_reg) | (arg)) + +/* Normal ldr/str instruction. + Type2: ldrsb, ldrh, ldrsh */ +#define IS_TYPE1_TRANSFER(type) \ + (data_transfer_insts[(type) & 0xf] & 0x04000000) +#define TYPE2_TRANSFER_IMM(imm) \ + (((imm) & 0xf) | (((imm) & 0xf0) << 4) | (1 << 22)) /* Condition: AL. */ #define EMIT_DATA_PROCESS_INS(opcode, set_flags, dst, src1, src2) \ @@ -912,52 +985,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *comp /* Operators */ /* --------------------------------------------------------------------- */ -/* s/l - store/load (1 bit) - u/s - signed/unsigned (1 bit) - w/b/h/N - word/byte/half/NOT allowed (2 bit) - It contans 16 items, but not all are different. */ - -static sljit_sw data_transfer_insts[16] = { -/* s u w */ 0xe5000000 /* str */, -/* s u b */ 0xe5400000 /* strb */, -/* s u h */ 0xe10000b0 /* strh */, -/* s u N */ 0x00000000 /* not allowed */, -/* s s w */ 0xe5000000 /* str */, -/* s s b */ 0xe5400000 /* strb */, -/* s s h */ 0xe10000b0 /* strh */, -/* s s N */ 0x00000000 /* not allowed */, - -/* l u w */ 0xe5100000 /* ldr */, -/* l u b */ 0xe5500000 /* ldrb */, -/* l u h */ 0xe11000b0 /* ldrh */, -/* l u N */ 0x00000000 /* not allowed */, -/* l s w */ 0xe5100000 /* ldr */, -/* l s b */ 0xe11000d0 /* ldrsb */, -/* l s h */ 0xe11000f0 /* ldrsh */, -/* l s N */ 0x00000000 /* not allowed */, -}; - -#define EMIT_DATA_TRANSFER(type, add, wb, target, base1, base2) \ - (data_transfer_insts[(type) >> 4] | ((add) << 23) | ((wb) << 21) | (reg_map[target] << 12) | (reg_map[base1] << 16) | (base2)) -/* Normal ldr/str instruction. - Type2: ldrsb, ldrh, ldrsh */ -#define IS_TYPE1_TRANSFER(type) \ - (data_transfer_insts[(type) >> 4] & 0x04000000) -#define TYPE2_TRANSFER_IMM(imm) \ - (((imm) & 0xf) | (((imm) & 0xf0) << 4) | (1 << 22)) - /* flags: */ /* Arguments are swapped. */ #define ARGS_SWAPPED 0x01 /* Inverted immediate. */ #define INV_IMM 0x02 /* Source and destination is register. */ -#define REG_DEST 0x04 -#define REG_SOURCE 0x08 - /* One instruction is enough. */ -#define FAST_DEST 0x10 - /* Multiple instructions are required. */ -#define SLOW_DEST 0x20 +#define MOVE_REG_CONV 0x04 + /* Unused return value. */ +#define UNUSED_RETURN 0x08 /* SET_FLAGS must be (1 << 20) as it is also the value of S bit (can be used for optimization). */ #define SET_FLAGS (1 << 20) /* dst: reg @@ -966,157 +1002,135 @@ static sljit_sw data_transfer_insts[16] = { SRC2_IMM must be (1 << 25) as it is also the value of I bit (can be used for optimization). */ #define SRC2_IMM (1 << 25) -#define EMIT_DATA_PROCESS_INS_AND_RETURN(opcode) \ - return push_inst(compiler, EMIT_DATA_PROCESS_INS(opcode, flags & SET_FLAGS, dst, src1, (src2 & SRC2_IMM) ? src2 : RM(src2))) - -#define EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(opcode, dst, src1, src2) \ - return push_inst(compiler, EMIT_DATA_PROCESS_INS(opcode, flags & SET_FLAGS, dst, src1, src2)) - #define EMIT_SHIFT_INS_AND_RETURN(opcode) \ SLJIT_ASSERT(!(flags & INV_IMM) && !(src2 & SRC2_IMM)); \ if (compiler->shift_imm != 0x20) { \ SLJIT_ASSERT(src1 == TMP_REG1); \ SLJIT_ASSERT(!(flags & ARGS_SWAPPED)); \ + \ if (compiler->shift_imm != 0) \ - return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, flags & SET_FLAGS, dst, SLJIT_UNUSED, (compiler->shift_imm << 7) | (opcode << 5) | reg_map[src2])); \ - return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, flags & SET_FLAGS, dst, SLJIT_UNUSED, reg_map[src2])); \ + return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, flags & SET_FLAGS, \ + dst, SLJIT_UNUSED, (compiler->shift_imm << 7) | (opcode << 5) | RM(src2))); \ + return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, flags & SET_FLAGS, dst, SLJIT_UNUSED, RM(src2))); \ } \ - return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, flags & SET_FLAGS, dst, SLJIT_UNUSED, (reg_map[(flags & ARGS_SWAPPED) ? src1 : src2] << 8) | (opcode << 5) | 0x10 | ((flags & ARGS_SWAPPED) ? reg_map[src2] : reg_map[src1]))); + return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, flags & SET_FLAGS, \ + dst, SLJIT_UNUSED, (reg_map[(flags & ARGS_SWAPPED) ? src1 : src2] << 8) | (opcode << 5) | 0x10 | RM((flags & ARGS_SWAPPED) ? src2 : src1))); static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags, sljit_s32 dst, sljit_s32 src1, sljit_s32 src2) { - sljit_sw mul_inst; - switch (GET_OPCODE(op)) { case SLJIT_MOV: SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED)); if (dst != src2) { if (src2 & SRC2_IMM) { - if (flags & INV_IMM) - EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MVN_DP, dst, SLJIT_UNUSED, src2); - EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MOV_DP, dst, SLJIT_UNUSED, src2); + return push_inst(compiler, EMIT_DATA_PROCESS_INS((flags & INV_IMM) ? MVN_DP : MOV_DP, 0, + dst, SLJIT_UNUSED, src2)); } - EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MOV_DP, dst, SLJIT_UNUSED, reg_map[src2]); + return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, RM(src2))); } return SLJIT_SUCCESS; case SLJIT_MOV_U8: case SLJIT_MOV_S8: SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED)); - if ((flags & (REG_DEST | REG_SOURCE)) == (REG_DEST | REG_SOURCE)) { + if (flags & MOVE_REG_CONV) { #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) if (op == SLJIT_MOV_U8) return push_inst(compiler, EMIT_DATA_PROCESS_INS(AND_DP, 0, dst, src2, SRC2_IMM | 0xff)); - FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (24 << 7) | reg_map[src2]))); - return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (24 << 7) | (op == SLJIT_MOV_U8 ? 0x20 : 0x40) | reg_map[dst])); + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (24 << 7) | RM(src2)))); + return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (24 << 7) | (op == SLJIT_MOV_U8 ? 0x20 : 0x40) | RM(dst))); #else return push_inst(compiler, (op == SLJIT_MOV_U8 ? UXTB : SXTB) | RD(dst) | RM(src2)); #endif } else if (dst != src2) { SLJIT_ASSERT(src2 & SRC2_IMM); - if (flags & INV_IMM) - EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MVN_DP, dst, SLJIT_UNUSED, src2); - EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MOV_DP, dst, SLJIT_UNUSED, src2); + return push_inst(compiler, EMIT_DATA_PROCESS_INS((flags & INV_IMM) ? MVN_DP : MOV_DP, 0, + dst, SLJIT_UNUSED, src2)); } return SLJIT_SUCCESS; case SLJIT_MOV_U16: case SLJIT_MOV_S16: SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED)); - if ((flags & (REG_DEST | REG_SOURCE)) == (REG_DEST | REG_SOURCE)) { + if (flags & MOVE_REG_CONV) { #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) - FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (16 << 7) | reg_map[src2]))); - return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (16 << 7) | (op == SLJIT_MOV_U16 ? 0x20 : 0x40) | reg_map[dst])); + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (16 << 7) | RM(src2)))); + return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, (16 << 7) | (op == SLJIT_MOV_U16 ? 0x20 : 0x40) | RM(dst))); #else return push_inst(compiler, (op == SLJIT_MOV_U16 ? UXTH : SXTH) | RD(dst) | RM(src2)); #endif } else if (dst != src2) { SLJIT_ASSERT(src2 & SRC2_IMM); - if (flags & INV_IMM) - EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MVN_DP, dst, SLJIT_UNUSED, src2); - EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MOV_DP, dst, SLJIT_UNUSED, src2); + return push_inst(compiler, EMIT_DATA_PROCESS_INS((flags & INV_IMM) ? MVN_DP : MOV_DP, 0, + dst, SLJIT_UNUSED, src2)); } return SLJIT_SUCCESS; case SLJIT_NOT: if (src2 & SRC2_IMM) { - if (flags & INV_IMM) - EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MOV_DP, dst, SLJIT_UNUSED, src2); - EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MVN_DP, dst, SLJIT_UNUSED, src2); + return push_inst(compiler, EMIT_DATA_PROCESS_INS((flags & INV_IMM) ? MOV_DP : MVN_DP, flags & SET_FLAGS, + dst, SLJIT_UNUSED, src2)); } - EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(MVN_DP, dst, SLJIT_UNUSED, RM(src2)); + return push_inst(compiler, EMIT_DATA_PROCESS_INS(MVN_DP, flags & SET_FLAGS, dst, SLJIT_UNUSED, RM(src2))); case SLJIT_CLZ: SLJIT_ASSERT(!(flags & INV_IMM)); SLJIT_ASSERT(!(src2 & SRC2_IMM)); FAIL_IF(push_inst(compiler, CLZ | RD(dst) | RM(src2))); - if (flags & SET_FLAGS) - EMIT_FULL_DATA_PROCESS_INS_AND_RETURN(CMP_DP, SLJIT_UNUSED, dst, SRC2_IMM); return SLJIT_SUCCESS; case SLJIT_ADD: SLJIT_ASSERT(!(flags & INV_IMM)); - EMIT_DATA_PROCESS_INS_AND_RETURN(ADD_DP); + if ((flags & (UNUSED_RETURN | SET_FLAGS)) == (UNUSED_RETURN | SET_FLAGS) && !(flags & ARGS_SWAPPED)) + return push_inst(compiler, EMIT_DATA_PROCESS_INS(CMN_DP, SET_FLAGS, + SLJIT_UNUSED, src1, (src2 & SRC2_IMM) ? src2 : RM(src2))); + return push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, flags & SET_FLAGS, + dst, src1, (src2 & SRC2_IMM) ? src2 : RM(src2))); case SLJIT_ADDC: SLJIT_ASSERT(!(flags & INV_IMM)); - EMIT_DATA_PROCESS_INS_AND_RETURN(ADC_DP); + return push_inst(compiler, EMIT_DATA_PROCESS_INS(ADC_DP, flags & SET_FLAGS, + dst, src1, (src2 & SRC2_IMM) ? src2 : RM(src2))); case SLJIT_SUB: SLJIT_ASSERT(!(flags & INV_IMM)); - if (!(flags & ARGS_SWAPPED)) - EMIT_DATA_PROCESS_INS_AND_RETURN(SUB_DP); - EMIT_DATA_PROCESS_INS_AND_RETURN(RSB_DP); + if ((flags & (UNUSED_RETURN | SET_FLAGS)) == (UNUSED_RETURN | SET_FLAGS) && !(flags & ARGS_SWAPPED)) + return push_inst(compiler, EMIT_DATA_PROCESS_INS(CMP_DP, SET_FLAGS, + SLJIT_UNUSED, src1, (src2 & SRC2_IMM) ? src2 : RM(src2))); + return push_inst(compiler, EMIT_DATA_PROCESS_INS(!(flags & ARGS_SWAPPED) ? SUB_DP : RSB_DP, flags & SET_FLAGS, + dst, src1, (src2 & SRC2_IMM) ? src2 : RM(src2))); case SLJIT_SUBC: SLJIT_ASSERT(!(flags & INV_IMM)); - if (!(flags & ARGS_SWAPPED)) - EMIT_DATA_PROCESS_INS_AND_RETURN(SBC_DP); - EMIT_DATA_PROCESS_INS_AND_RETURN(RSC_DP); + return push_inst(compiler, EMIT_DATA_PROCESS_INS(!(flags & ARGS_SWAPPED) ? SBC_DP : RSC_DP, flags & SET_FLAGS, + dst, src1, (src2 & SRC2_IMM) ? src2 : RM(src2))); case SLJIT_MUL: SLJIT_ASSERT(!(flags & INV_IMM)); SLJIT_ASSERT(!(src2 & SRC2_IMM)); - if (SLJIT_UNLIKELY(op & SLJIT_SET_O)) - mul_inst = SMULL | (reg_map[TMP_REG3] << 16) | (reg_map[dst] << 12); - else - mul_inst = MUL | (reg_map[dst] << 16); - if (dst != src2) - FAIL_IF(push_inst(compiler, mul_inst | (reg_map[src1] << 8) | reg_map[src2])); - else if (dst != src1) - FAIL_IF(push_inst(compiler, mul_inst | (reg_map[src2] << 8) | reg_map[src1])); - else { - /* Rm and Rd must not be the same register. */ - SLJIT_ASSERT(dst != TMP_REG1); - FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG1, SLJIT_UNUSED, reg_map[src2]))); - FAIL_IF(push_inst(compiler, mul_inst | (reg_map[src2] << 8) | reg_map[TMP_REG1])); - } + if (!HAS_FLAGS(op)) + return push_inst(compiler, MUL | (reg_map[dst] << 16) | (reg_map[src2] << 8) | reg_map[src1]); - if (!(op & SLJIT_SET_O)) - return SLJIT_SUCCESS; + FAIL_IF(push_inst(compiler, SMULL | (reg_map[TMP_REG1] << 16) | (reg_map[dst] << 12) | (reg_map[src2] << 8) | reg_map[src1])); - /* We need to use TMP_REG3. */ - compiler->cache_arg = 0; - compiler->cache_argw = 0; - /* cmp TMP_REG2, dst asr #31. */ - return push_inst(compiler, EMIT_DATA_PROCESS_INS(CMP_DP, SET_FLAGS, SLJIT_UNUSED, TMP_REG3, RM(dst) | 0xfc0)); + /* cmp TMP_REG1, dst asr #31. */ + return push_inst(compiler, EMIT_DATA_PROCESS_INS(CMP_DP, SET_FLAGS, SLJIT_UNUSED, TMP_REG1, RM(dst) | 0xfc0)); case SLJIT_AND: - if (!(flags & INV_IMM)) - EMIT_DATA_PROCESS_INS_AND_RETURN(AND_DP); - EMIT_DATA_PROCESS_INS_AND_RETURN(BIC_DP); + return push_inst(compiler, EMIT_DATA_PROCESS_INS(!(flags & INV_IMM) ? AND_DP : BIC_DP, flags & SET_FLAGS, + dst, src1, (src2 & SRC2_IMM) ? src2 : RM(src2))); case SLJIT_OR: SLJIT_ASSERT(!(flags & INV_IMM)); - EMIT_DATA_PROCESS_INS_AND_RETURN(ORR_DP); + return push_inst(compiler, EMIT_DATA_PROCESS_INS(ORR_DP, flags & SET_FLAGS, dst, src1, (src2 & SRC2_IMM) ? src2 : RM(src2))); case SLJIT_XOR: SLJIT_ASSERT(!(flags & INV_IMM)); - EMIT_DATA_PROCESS_INS_AND_RETURN(EOR_DP); + return push_inst(compiler, EMIT_DATA_PROCESS_INS(EOR_DP, flags & SET_FLAGS, dst, src1, (src2 & SRC2_IMM) ? src2 : RM(src2))); case SLJIT_SHL: EMIT_SHIFT_INS_AND_RETURN(0); @@ -1127,12 +1141,11 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl case SLJIT_ASHR: EMIT_SHIFT_INS_AND_RETURN(2); } - SLJIT_ASSERT_STOP(); + + SLJIT_UNREACHABLE(); return SLJIT_SUCCESS; } -#undef EMIT_DATA_PROCESS_INS_AND_RETURN -#undef EMIT_FULL_DATA_PROCESS_INS_AND_RETURN #undef EMIT_SHIFT_INS_AND_RETURN /* Tests whether the immediate can be stored in the 12 bit imm field. @@ -1312,291 +1325,116 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 reg, /* Load integer. */ return push_inst_with_literal(compiler, EMIT_DATA_TRANSFER(WORD_DATA | LOAD_DATA, 1, 0, reg, TMP_PC, 0), imm); #else - return emit_imm(compiler, reg, imm); + FAIL_IF(push_inst(compiler, MOVW | RD(reg) | ((imm << 4) & 0xf0000) | (imm & 0xfff))); + if (imm <= 0xffff) + return SLJIT_SUCCESS; + return push_inst(compiler, MOVT | RD(reg) | ((imm >> 12) & 0xf0000) | ((imm >> 16) & 0xfff)); #endif } -/* Helper function. Dst should be reg + value, using at most 1 instruction, flags does not set. */ -static sljit_s32 emit_set_delta(struct sljit_compiler *compiler, sljit_s32 dst, sljit_s32 reg, sljit_sw value) +static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, + sljit_s32 arg, sljit_sw argw, sljit_s32 tmp_reg) { - if (value >= 0) { - value = get_imm(value); - if (value) - return push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, dst, reg, value)); - } - else { - value = get_imm(-value); - if (value) - return push_inst(compiler, EMIT_DATA_PROCESS_INS(SUB_DP, 0, dst, reg, value)); - } - return SLJIT_ERR_UNSUPPORTED; -} + sljit_uw offset_reg, imm; + sljit_uw is_type1_transfer = IS_TYPE1_TRANSFER(flags); -/* Can perform an operation using at most 1 instruction. */ -static sljit_s32 getput_arg_fast(struct sljit_compiler *compiler, sljit_s32 inp_flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw) -{ - sljit_uw imm; + SLJIT_ASSERT (arg & SLJIT_MEM); + SLJIT_ASSERT((arg & REG_MASK) != tmp_reg); - if (arg & SLJIT_IMM) { - imm = get_imm(argw); - if (imm) { - if (inp_flags & ARG_TEST) - return 1; - FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, reg, SLJIT_UNUSED, imm))); - return -1; + SLJIT_COMPILE_ASSERT(WRITE_BACK == 0x10, optimized_for_emit_data_transfer); + + if ((arg & REG_MASK) == SLJIT_UNUSED) { + /* Write back is not used. */ + if (is_type1_transfer) { + FAIL_IF(load_immediate(compiler, tmp_reg, argw & ~0xfff)); + argw &= 0xfff; } - imm = get_imm(~argw); - if (imm) { - if (inp_flags & ARG_TEST) - return 1; - FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MVN_DP, 0, reg, SLJIT_UNUSED, imm))); - return -1; + else { + FAIL_IF(load_immediate(compiler, tmp_reg, argw & ~0xff)); + argw &= 0xff; } - return 0; + + return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, 0, reg, tmp_reg, is_type1_transfer ? argw : TYPE2_TRANSFER_IMM(argw))); } - SLJIT_ASSERT(arg & SLJIT_MEM); + if (arg & OFFS_REG_MASK) { + offset_reg = OFFS_REG(arg); + arg &= REG_MASK; + argw &= 0x3; - /* Fast loads/stores. */ - if (!(arg & REG_MASK)) - return 0; + if (argw != 0 && !is_type1_transfer) { + SLJIT_ASSERT(!(flags & WRITE_BACK)); - if (arg & OFFS_REG_MASK) { - if ((argw & 0x3) != 0 && !IS_TYPE1_TRANSFER(inp_flags)) - return 0; + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, tmp_reg, arg, RM(offset_reg) | (argw << 7)))); + return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, 0, reg, tmp_reg, TYPE2_TRANSFER_IMM(0))); + } - if (inp_flags & ARG_TEST) - return 1; - FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, - RM(OFFS_REG(arg)) | (IS_TYPE1_TRANSFER(inp_flags) ? SRC2_IMM : 0) | ((argw & 0x3) << 7)))); - return -1; + /* Bit 25: RM is offset. */ + return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, flags & WRITE_BACK, reg, arg, + RM(offset_reg) | (is_type1_transfer ? (1 << 25) : 0) | (argw << 7))); } - if (IS_TYPE1_TRANSFER(inp_flags)) { + arg &= REG_MASK; + + if (is_type1_transfer) { + if (argw > 0xfff) { + imm = get_imm(argw & ~0xfff); + if (imm) { + offset_reg = (flags & WRITE_BACK) ? arg : tmp_reg; + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, offset_reg, arg, imm))); + argw = argw & 0xfff; + arg = offset_reg; + } + } + else if (argw < -0xfff) { + imm = get_imm(-argw & ~0xfff); + if (imm) { + offset_reg = (flags & WRITE_BACK) ? arg : tmp_reg; + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(SUB_DP, 0, offset_reg, arg, imm))); + argw = -(-argw & 0xfff); + arg = offset_reg; + } + } + if (argw >= 0 && argw <= 0xfff) { - if (inp_flags & ARG_TEST) - return 1; - FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, argw))); - return -1; + return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, flags & WRITE_BACK, reg, arg & REG_MASK, argw)); } if (argw < 0 && argw >= -0xfff) { - if (inp_flags & ARG_TEST) - return 1; - FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 0, inp_flags & WRITE_BACK, reg, arg & REG_MASK, -argw))); - return -1; + return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 0, flags & WRITE_BACK, reg, arg & REG_MASK, -argw)); } } else { - if (argw >= 0 && argw <= 0xff) { - if (inp_flags & ARG_TEST) - return 1; - FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, TYPE2_TRANSFER_IMM(argw)))); - return -1; - } - if (argw < 0 && argw >= -0xff) { - if (inp_flags & ARG_TEST) - return 1; - argw = -argw; - FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 0, inp_flags & WRITE_BACK, reg, arg & REG_MASK, TYPE2_TRANSFER_IMM(argw)))); - return -1; - } - } - - return 0; -} - -/* See getput_arg below. - Note: can_cache is called only for binary operators. Those - operators always uses word arguments without write back. */ -static sljit_s32 can_cache(sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw) -{ - /* Immediate caching is not supported as it would be an operation on constant arguments. */ - if (arg & SLJIT_IMM) - return 0; - - /* Always a simple operation. */ - if (arg & OFFS_REG_MASK) - return 0; - - if (!(arg & REG_MASK)) { - /* Immediate access. */ - if ((next_arg & SLJIT_MEM) && ((sljit_uw)argw - (sljit_uw)next_argw <= 0xfff || (sljit_uw)next_argw - (sljit_uw)argw <= 0xfff)) - return 1; - return 0; - } - - if (argw <= 0xfffff && argw >= -0xfffff) - return 0; - - if (argw == next_argw && (next_arg & SLJIT_MEM)) - return 1; - - if (arg == next_arg && ((sljit_uw)argw - (sljit_uw)next_argw <= 0xfff || (sljit_uw)next_argw - (sljit_uw)argw <= 0xfff)) - return 1; - - return 0; -} - -#define GETPUT_ARG_DATA_TRANSFER(add, wb, target, base, imm) \ - if (max_delta & 0xf00) \ - FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, add, wb, target, base, imm))); \ - else \ - FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, add, wb, target, base, TYPE2_TRANSFER_IMM(imm)))); - -#define TEST_WRITE_BACK() \ - if (inp_flags & WRITE_BACK) { \ - tmp_r = arg & REG_MASK; \ - if (reg == tmp_r) { \ - /* This can only happen for stores */ \ - /* since ldr reg, [reg, ...]! has no meaning */ \ - SLJIT_ASSERT(!(inp_flags & LOAD_DATA)); \ - FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG3, SLJIT_UNUSED, RM(reg)))); \ - reg = TMP_REG3; \ - } \ - } - -/* Emit the necessary instructions. See can_cache above. */ -static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 inp_flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw) -{ - sljit_s32 tmp_r; - sljit_sw max_delta; - sljit_sw sign; - sljit_uw imm; - - if (arg & SLJIT_IMM) { - SLJIT_ASSERT(inp_flags & LOAD_DATA); - return load_immediate(compiler, reg, argw); - } - - SLJIT_ASSERT(arg & SLJIT_MEM); - - tmp_r = (inp_flags & LOAD_DATA) ? reg : TMP_REG3; - max_delta = IS_TYPE1_TRANSFER(inp_flags) ? 0xfff : 0xff; - - if ((arg & REG_MASK) == SLJIT_UNUSED) { - /* Write back is not used. */ - imm = (sljit_uw)(argw - compiler->cache_argw); - if ((compiler->cache_arg & SLJIT_IMM) && (imm <= (sljit_uw)max_delta || imm >= (sljit_uw)-max_delta)) { - if (imm <= (sljit_uw)max_delta) { - sign = 1; - argw = argw - compiler->cache_argw; + if (argw > 0xff) { + imm = get_imm(argw & ~0xff); + if (imm) { + offset_reg = (flags & WRITE_BACK) ? arg : tmp_reg; + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, offset_reg, arg, imm))); + argw = argw & 0xff; + arg = offset_reg; } - else { - sign = 0; - argw = compiler->cache_argw - argw; + } + else if (argw < -0xff) { + imm = get_imm(-argw & ~0xff); + if (imm) { + offset_reg = (flags & WRITE_BACK) ? arg : tmp_reg; + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(SUB_DP, 0, offset_reg, arg, imm))); + argw = -(-argw & 0xff); + arg = offset_reg; } - - GETPUT_ARG_DATA_TRANSFER(sign, 0, reg, TMP_REG3, argw); - return SLJIT_SUCCESS; } - /* With write back, we can create some sophisticated loads, but - it is hard to decide whether we should convert downward (0s) or upward (1s). */ - imm = (sljit_uw)(argw - next_argw); - if ((next_arg & SLJIT_MEM) && (imm <= (sljit_uw)max_delta || imm >= (sljit_uw)-max_delta)) { - SLJIT_ASSERT(inp_flags & LOAD_DATA); - - compiler->cache_arg = SLJIT_IMM; - compiler->cache_argw = argw; - tmp_r = TMP_REG3; + if (argw >= 0 && argw <= 0xff) { + return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, flags & WRITE_BACK, reg, arg, TYPE2_TRANSFER_IMM(argw))); + } + if (argw < 0 && argw >= -0xff) { + argw = -argw; + return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 0, flags & WRITE_BACK, reg, arg, TYPE2_TRANSFER_IMM(argw))); } - - FAIL_IF(load_immediate(compiler, tmp_r, argw)); - GETPUT_ARG_DATA_TRANSFER(1, 0, reg, tmp_r, 0); - return SLJIT_SUCCESS; - } - - if (arg & OFFS_REG_MASK) { - SLJIT_ASSERT((argw & 0x3) && !(max_delta & 0xf00)); - if (inp_flags & WRITE_BACK) - tmp_r = arg & REG_MASK; - FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, tmp_r, arg & REG_MASK, RM(OFFS_REG(arg)) | ((argw & 0x3) << 7)))); - return push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, 0, reg, tmp_r, TYPE2_TRANSFER_IMM(0))); - } - - imm = (sljit_uw)(argw - compiler->cache_argw); - if (compiler->cache_arg == arg && imm <= (sljit_uw)max_delta) { - SLJIT_ASSERT(!(inp_flags & WRITE_BACK)); - GETPUT_ARG_DATA_TRANSFER(1, 0, reg, TMP_REG3, imm); - return SLJIT_SUCCESS; - } - if (compiler->cache_arg == arg && imm >= (sljit_uw)-max_delta) { - SLJIT_ASSERT(!(inp_flags & WRITE_BACK)); - imm = (sljit_uw)-(sljit_sw)imm; - GETPUT_ARG_DATA_TRANSFER(0, 0, reg, TMP_REG3, imm); - return SLJIT_SUCCESS; - } - - imm = get_imm(argw & ~max_delta); - if (imm) { - TEST_WRITE_BACK(); - FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, tmp_r, arg & REG_MASK, imm))); - GETPUT_ARG_DATA_TRANSFER(1, inp_flags & WRITE_BACK, reg, tmp_r, argw & max_delta); - return SLJIT_SUCCESS; - } - - imm = get_imm(-argw & ~max_delta); - if (imm) { - argw = -argw; - TEST_WRITE_BACK(); - FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(SUB_DP, 0, tmp_r, arg & REG_MASK, imm))); - GETPUT_ARG_DATA_TRANSFER(0, inp_flags & WRITE_BACK, reg, tmp_r, argw & max_delta); - return SLJIT_SUCCESS; - } - - if ((compiler->cache_arg & SLJIT_IMM) && compiler->cache_argw == argw) { - TEST_WRITE_BACK(); - return push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, RM(TMP_REG3) | (max_delta & 0xf00 ? SRC2_IMM : 0))); - } - - if (argw == next_argw && (next_arg & SLJIT_MEM)) { - SLJIT_ASSERT(inp_flags & LOAD_DATA); - FAIL_IF(load_immediate(compiler, TMP_REG3, argw)); - - compiler->cache_arg = SLJIT_IMM; - compiler->cache_argw = argw; - - TEST_WRITE_BACK(); - return push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, RM(TMP_REG3) | (max_delta & 0xf00 ? SRC2_IMM : 0))); - } - - imm = (sljit_uw)(argw - next_argw); - if (arg == next_arg && !(inp_flags & WRITE_BACK) && (imm <= (sljit_uw)max_delta || imm >= (sljit_uw)-max_delta)) { - SLJIT_ASSERT(inp_flags & LOAD_DATA); - FAIL_IF(load_immediate(compiler, TMP_REG3, argw)); - FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG3, TMP_REG3, reg_map[arg & REG_MASK]))); - - compiler->cache_arg = arg; - compiler->cache_argw = argw; - - GETPUT_ARG_DATA_TRANSFER(1, 0, reg, TMP_REG3, 0); - return SLJIT_SUCCESS; - } - - if ((arg & REG_MASK) == tmp_r) { - compiler->cache_arg = SLJIT_IMM; - compiler->cache_argw = argw; - tmp_r = TMP_REG3; } - FAIL_IF(load_immediate(compiler, tmp_r, argw)); - return push_inst(compiler, EMIT_DATA_TRANSFER(inp_flags, 1, inp_flags & WRITE_BACK, reg, arg & REG_MASK, reg_map[tmp_r] | (max_delta & 0xf00 ? SRC2_IMM : 0))); -} - -static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw) -{ - if (getput_arg_fast(compiler, flags, reg, arg, argw)) - return compiler->error; - compiler->cache_arg = 0; - compiler->cache_argw = 0; - return getput_arg(compiler, flags, reg, arg, argw, 0, 0); -} - -static SLJIT_INLINE sljit_s32 emit_op_mem2(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg1, sljit_sw arg1w, sljit_s32 arg2, sljit_sw arg2w) -{ - if (getput_arg_fast(compiler, flags, reg, arg1, arg1w)) - return compiler->error; - return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w); + FAIL_IF(load_immediate(compiler, tmp_reg, argw)); + return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, flags & WRITE_BACK, reg, arg, + RM(tmp_reg) | (is_type1_transfer ? (1 << 25) : 0))); } static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 inp_flags, @@ -1604,68 +1442,66 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) { - /* arg1 goes to TMP_REG1 or src reg - arg2 goes to TMP_REG2, imm or src reg - TMP_REG3 can be used for caching - result goes to TMP_REG2, so put result can use TMP_REG1 and TMP_REG3. */ + /* src1 is reg or TMP_REG1 + src2 is reg, TMP_REG2, or imm + result goes to TMP_REG2, so put result can use TMP_REG1. */ /* We prefers register and simple consts. */ - sljit_s32 dst_r; - sljit_s32 src1_r; - sljit_s32 src2_r = 0; - sljit_s32 sugg_src2_r = TMP_REG2; - sljit_s32 flags = GET_FLAGS(op) ? SET_FLAGS : 0; - - compiler->cache_arg = 0; - compiler->cache_argw = 0; + sljit_s32 dst_reg; + sljit_s32 src1_reg; + sljit_s32 src2_reg; + sljit_s32 flags = HAS_FLAGS(op) ? SET_FLAGS : 0; /* Destination check. */ - if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) { - if (op >= SLJIT_MOV && op <= SLJIT_MOVU_S32 && !(src2 & SLJIT_MEM)) - return SLJIT_SUCCESS; - dst_r = TMP_REG2; - } - else if (FAST_IS_REG(dst)) { - dst_r = dst; - flags |= REG_DEST; - if (op >= SLJIT_MOV && op <= SLJIT_MOVU_S32) - sugg_src2_r = dst_r; - } - else { - SLJIT_ASSERT(dst & SLJIT_MEM); - if (getput_arg_fast(compiler, inp_flags | ARG_TEST, TMP_REG2, dst, dstw)) { - flags |= FAST_DEST; - dst_r = TMP_REG2; - } - else { - flags |= SLOW_DEST; - dst_r = 0; + if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) + flags |= UNUSED_RETURN; + + SLJIT_ASSERT(!(inp_flags & ALLOW_INV_IMM) || (inp_flags & ALLOW_IMM)); + + src2_reg = 0; + + do { + if (!(inp_flags & ALLOW_IMM)) + break; + + if (src2 & SLJIT_IMM) { + src2_reg = get_imm(src2w); + if (src2_reg) + break; + if (inp_flags & ALLOW_INV_IMM) { + src2_reg = get_imm(~src2w); + if (src2_reg) { + flags |= INV_IMM; + break; + } + } + if (GET_OPCODE(op) == SLJIT_ADD) { + src2_reg = get_imm(-src2w); + if (src2_reg) { + op = SLJIT_SUB | GET_ALL_FLAGS(op); + break; + } + } + if (GET_OPCODE(op) == SLJIT_SUB) { + src2_reg = get_imm(-src2w); + if (src2_reg) { + op = SLJIT_ADD | GET_ALL_FLAGS(op); + break; + } + } } - } - /* Source 1. */ - if (FAST_IS_REG(src1)) - src1_r = src1; - else if (FAST_IS_REG(src2)) { - flags |= ARGS_SWAPPED; - src1_r = src2; - src2 = src1; - src2w = src1w; - } - else do { /* do { } while(0) is used because of breaks. */ - src1_r = 0; - if ((inp_flags & ALLOW_ANY_IMM) && (src1 & SLJIT_IMM)) { - /* The second check will generate a hit. */ - src2_r = get_imm(src1w); - if (src2_r) { + if (src1 & SLJIT_IMM) { + src2_reg = get_imm(src1w); + if (src2_reg) { flags |= ARGS_SWAPPED; src1 = src2; src1w = src2w; break; } if (inp_flags & ALLOW_INV_IMM) { - src2_r = get_imm(~src1w); - if (src2_r) { + src2_reg = get_imm(~src1w); + if (src2_reg) { flags |= ARGS_SWAPPED | INV_IMM; src1 = src2; src1w = src2w; @@ -1673,9 +1509,9 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 } } if (GET_OPCODE(op) == SLJIT_ADD) { - src2_r = get_imm(-src1w); - if (src2_r) { - /* Note: ARGS_SWAPPED is intentionally not applied! */ + src2_reg = get_imm(-src1w); + if (src2_reg) { + /* Note: add is commutative operation. */ src1 = src2; src1w = src2w; op = SLJIT_SUB | GET_ALL_FLAGS(op); @@ -1683,110 +1519,54 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 } } } + } while(0); - if (getput_arg_fast(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w)) { - FAIL_IF(compiler->error); - src1_r = TMP_REG1; - } - } while (0); + /* Source 1. */ + if (FAST_IS_REG(src1)) + src1_reg = src1; + else if (src1 & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w, TMP_REG1)); + src1_reg = TMP_REG1; + } + else { + FAIL_IF(load_immediate(compiler, TMP_REG1, src1w)); + src1_reg = TMP_REG1; + } - /* Source 2. */ - if (src2_r == 0) { - if (FAST_IS_REG(src2)) { - src2_r = src2; - flags |= REG_SOURCE; - if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOVU_S32) - dst_r = src2_r; - } - else do { /* do { } while(0) is used because of breaks. */ - if ((inp_flags & ALLOW_ANY_IMM) && (src2 & SLJIT_IMM)) { - src2_r = get_imm(src2w); - if (src2_r) - break; - if (inp_flags & ALLOW_INV_IMM) { - src2_r = get_imm(~src2w); - if (src2_r) { - flags |= INV_IMM; - break; - } - } - if (GET_OPCODE(op) == SLJIT_ADD) { - src2_r = get_imm(-src2w); - if (src2_r) { - op = SLJIT_SUB | GET_ALL_FLAGS(op); - flags &= ~ARGS_SWAPPED; - break; - } - } - if (GET_OPCODE(op) == SLJIT_SUB && !(flags & ARGS_SWAPPED)) { - src2_r = get_imm(-src2w); - if (src2_r) { - op = SLJIT_ADD | GET_ALL_FLAGS(op); - flags &= ~ARGS_SWAPPED; - break; - } - } - } + /* Destination. */ + dst_reg = SLOW_IS_REG(dst) ? dst : TMP_REG2; - /* src2_r is 0. */ - if (getput_arg_fast(compiler, inp_flags | LOAD_DATA, sugg_src2_r, src2, src2w)) { - FAIL_IF(compiler->error); - src2_r = sugg_src2_r; - } - } while (0); - } + if (op <= SLJIT_MOVU_P) { + if (dst & SLJIT_MEM) { + if (inp_flags & BYTE_DATA) + inp_flags &= ~SIGNED_DATA; - /* src1_r, src2_r and dst_r can be zero (=unprocessed) or non-zero. - If they are zero, they must not be registers. */ - if (src1_r == 0 && src2_r == 0 && dst_r == 0) { - if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) { - SLJIT_ASSERT(!(flags & ARGS_SWAPPED)); - flags |= ARGS_SWAPPED; - FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG1, src2, src2w, src1, src1w)); - FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG2, src1, src1w, dst, dstw)); + if (FAST_IS_REG(src2)) + return emit_op_mem(compiler, inp_flags, src2, dst, dstw, TMP_REG2); } - else { - FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w)); - FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG2, src2, src2w, dst, dstw)); - } - src1_r = TMP_REG1; - src2_r = TMP_REG2; - } - else if (src1_r == 0 && src2_r == 0) { - FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w)); - src1_r = TMP_REG1; - } - else if (src1_r == 0 && dst_r == 0) { - FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw)); - src1_r = TMP_REG1; - } - else if (src2_r == 0 && dst_r == 0) { - FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, sugg_src2_r, src2, src2w, dst, dstw)); - src2_r = sugg_src2_r; + + if (FAST_IS_REG(src2) && dst_reg != TMP_REG2) + flags |= MOVE_REG_CONV; } - if (dst_r == 0) - dst_r = TMP_REG2; + /* Source 2. */ + if (src2_reg == 0) { + src2_reg = (op <= SLJIT_MOVU_P) ? dst_reg : TMP_REG2; - if (src1_r == 0) { - FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w, 0, 0)); - src1_r = TMP_REG1; + if (FAST_IS_REG(src2)) + src2_reg = src2; + else if (src2 & SLJIT_MEM) + FAIL_IF(emit_op_mem(compiler, inp_flags | LOAD_DATA, src2_reg, src2, src2w, TMP_REG2)); + else + FAIL_IF(load_immediate(compiler, src2_reg, src2w)); } - if (src2_r == 0) { - FAIL_IF(getput_arg(compiler, inp_flags | LOAD_DATA, sugg_src2_r, src2, src2w, 0, 0)); - src2_r = sugg_src2_r; - } + FAIL_IF(emit_single_op(compiler, op, flags, dst_reg, src1_reg, src2_reg)); - FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r)); + if (!(dst & SLJIT_MEM)) + return SLJIT_SUCCESS; - if (flags & (FAST_DEST | SLOW_DEST)) { - if (flags & FAST_DEST) - FAIL_IF(getput_arg_fast(compiler, inp_flags, dst_r, dst, dstw)); - else - FAIL_IF(getput_arg(compiler, inp_flags, dst_r, dst, dstw, 0, 0)); - } - return SLJIT_SUCCESS; + return emit_op_mem(compiler, inp_flags, dst_reg, dst, dstw, TMP_REG1); } #ifdef __cplusplus @@ -1806,6 +1586,9 @@ extern int __aeabi_idivmod(int numerator, int denominator); SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op) { + sljit_sw saved_reg_list[3]; + sljit_sw saved_reg_count; + CHECK_ERROR(); CHECK(check_sljit_emit_op0(compiler, op)); @@ -1819,33 +1602,38 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile break; case SLJIT_LMUL_UW: case SLJIT_LMUL_SW: -#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) return push_inst(compiler, (op == SLJIT_LMUL_UW ? UMULL : SMULL) | (reg_map[SLJIT_R1] << 16) | (reg_map[SLJIT_R0] << 12) | (reg_map[SLJIT_R0] << 8) | reg_map[SLJIT_R1]); -#else - FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG1, SLJIT_UNUSED, RM(SLJIT_R1)))); - return push_inst(compiler, (op == SLJIT_LMUL_UW ? UMULL : SMULL) - | (reg_map[SLJIT_R1] << 16) - | (reg_map[SLJIT_R0] << 12) - | (reg_map[SLJIT_R0] << 8) - | reg_map[TMP_REG1]); -#endif case SLJIT_DIVMOD_UW: case SLJIT_DIVMOD_SW: case SLJIT_DIV_UW: case SLJIT_DIV_SW: SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments); - SLJIT_COMPILE_ASSERT(reg_map[2] == 1 && reg_map[3] == 2, bad_register_mapping); - - if ((op >= SLJIT_DIV_UW) && (compiler->scratches >= 3)) { - FAIL_IF(push_inst(compiler, 0xe52d2008 /* str r2, [sp, #-8]! */)); - FAIL_IF(push_inst(compiler, 0xe58d1004 /* str r1, [sp, #4] */)); + SLJIT_ASSERT(reg_map[2] == 1 && reg_map[3] == 2 && reg_map[4] == 3); + + saved_reg_count = 0; + if (compiler->scratches >= 4) + saved_reg_list[saved_reg_count++] = 3; + if (compiler->scratches >= 3) + saved_reg_list[saved_reg_count++] = 2; + if (op >= SLJIT_DIV_UW) + saved_reg_list[saved_reg_count++] = 1; + + if (saved_reg_count > 0) { + FAIL_IF(push_inst(compiler, 0xe52d0000 | (saved_reg_count >= 3 ? 16 : 8) + | (saved_reg_list[0] << 12) /* str rX, [sp, #-8/-16]! */)); + if (saved_reg_count >= 2) { + SLJIT_ASSERT(saved_reg_list[1] < 8); + FAIL_IF(push_inst(compiler, 0xe58d0004 | (saved_reg_list[1] << 12) /* str rX, [sp, #4] */)); + } + if (saved_reg_count >= 3) { + SLJIT_ASSERT(saved_reg_list[2] < 8); + FAIL_IF(push_inst(compiler, 0xe58d0008 | (saved_reg_list[2] << 12) /* str rX, [sp, #8] */)); + } } - else if ((op >= SLJIT_DIV_UW) || (compiler->scratches >= 3)) - FAIL_IF(push_inst(compiler, 0xe52d0008 | (op >= SLJIT_DIV_UW ? 0x1000 : 0x2000) /* str r1/r2, [sp, #-8]! */)); #if defined(__GNUC__) FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM, @@ -1854,12 +1642,18 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile #error "Software divmod functions are needed" #endif - if ((op >= SLJIT_DIV_UW) && (compiler->scratches >= 3)) { - FAIL_IF(push_inst(compiler, 0xe59d1004 /* ldr r1, [sp, #4] */)); - FAIL_IF(push_inst(compiler, 0xe49d2008 /* ldr r2, [sp], #8 */)); + if (saved_reg_count > 0) { + if (saved_reg_count >= 3) { + SLJIT_ASSERT(saved_reg_list[2] < 8); + FAIL_IF(push_inst(compiler, 0xe59d0008 | (saved_reg_list[2] << 12) /* ldr rX, [sp, #8] */)); + } + if (saved_reg_count >= 2) { + SLJIT_ASSERT(saved_reg_list[1] < 8); + FAIL_IF(push_inst(compiler, 0xe59d0004 | (saved_reg_list[1] << 12) /* ldr rX, [sp, #4] */)); + } + return push_inst(compiler, 0xe49d0000 | (saved_reg_count >= 3 ? 16 : 8) + | (saved_reg_list[0] << 12) /* ldr rX, [sp], #8/16 */); } - else if ((op >= SLJIT_DIV_UW) || (compiler->scratches >= 3)) - return push_inst(compiler, 0xe49d0008 | (op >= SLJIT_DIV_UW ? 0x1000 : 0x2000) /* ldr r1/r2, [sp], #8 */); return SLJIT_SUCCESS; } @@ -1875,6 +1669,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile ADJUST_LOCAL_OFFSET(dst, dstw); ADJUST_LOCAL_OFFSET(src, srcw); + if (dst == SLJIT_UNUSED && !HAS_FLAGS(op)) { +#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) + if (op <= SLJIT_MOV_P && (src & SLJIT_MEM)) + return emit_op_mem(compiler, PRELOAD_DATA | LOAD_DATA, TMP_PC, src, srcw, TMP_REG1); +#endif + return SLJIT_SUCCESS; + } + switch (GET_OPCODE(op)) { case SLJIT_MOV: case SLJIT_MOV_U32: @@ -1940,6 +1742,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile ADJUST_LOCAL_OFFSET(src1, src1w); ADJUST_LOCAL_OFFSET(src2, src2w); + if (dst == SLJIT_UNUSED && !HAS_FLAGS(op)) + return SLJIT_SUCCESS; + switch (GET_OPCODE(op)) { case SLJIT_ADD: case SLJIT_ADDC: @@ -1996,43 +1801,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *c /* Floating point operators */ /* --------------------------------------------------------------------- */ -#if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) - -/* 0 - no fpu - 1 - vfp */ -static sljit_s32 arm_fpu_type = -1; - -static void init_compiler(void) -{ - if (arm_fpu_type != -1) - return; - - /* TODO: Only the OS can help to determine the correct fpu type. */ - arm_fpu_type = 1; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void) -{ -#ifdef SLJIT_IS_FPU_AVAILABLE - return SLJIT_IS_FPU_AVAILABLE; -#else - if (arm_fpu_type == -1) - init_compiler(); - return arm_fpu_type; -#endif -} - -#else - -#define arm_fpu_type 1 - -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void) -{ - /* Always available. */ - return 1; -} - -#endif #define FPU_LOAD (1 << 20) #define EMIT_FPU_DATA_TRANSFER(inst, add, base, freg, offs) \ @@ -2042,72 +1810,54 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void) static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw) { - sljit_sw tmp; sljit_uw imm; sljit_sw inst = VSTR_F32 | (flags & (SLJIT_F32_OP | FPU_LOAD)); + SLJIT_ASSERT(arg & SLJIT_MEM); + arg &= ~SLJIT_MEM; if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { - FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG1, arg & REG_MASK, RM(OFFS_REG(arg)) | ((argw & 0x3) << 7)))); - arg = SLJIT_MEM | TMP_REG1; + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG2, arg & REG_MASK, RM(OFFS_REG(arg)) | ((argw & 0x3) << 7)))); + arg = TMP_REG2; argw = 0; } /* Fast loads and stores. */ - if ((arg & REG_MASK)) { + if (arg) { if (!(argw & ~0x3fc)) return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, arg & REG_MASK, reg, argw >> 2)); if (!(-argw & ~0x3fc)) return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 0, arg & REG_MASK, reg, (-argw) >> 2)); - } - if (compiler->cache_arg == arg) { - tmp = argw - compiler->cache_argw; - if (!(tmp & ~0x3fc)) - return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG3, reg, tmp >> 2)); - if (!(-tmp & ~0x3fc)) - return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 0, TMP_REG3, reg, -tmp >> 2)); - if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, tmp) != SLJIT_ERR_UNSUPPORTED) { - FAIL_IF(compiler->error); - compiler->cache_argw = argw; - return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG3, reg, 0)); - } - } - - if (arg & REG_MASK) { - if (emit_set_delta(compiler, TMP_REG1, arg & REG_MASK, argw) != SLJIT_ERR_UNSUPPORTED) { - FAIL_IF(compiler->error); - return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG1, reg, 0)); - } imm = get_imm(argw & ~0x3fc); if (imm) { - FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG1, arg & REG_MASK, imm))); - return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG1, reg, (argw & 0x3fc) >> 2)); + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG2, arg & REG_MASK, imm))); + return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG2, reg, (argw & 0x3fc) >> 2)); } imm = get_imm(-argw & ~0x3fc); if (imm) { argw = -argw; - FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(SUB_DP, 0, TMP_REG1, arg & REG_MASK, imm))); - return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 0, TMP_REG1, reg, (argw & 0x3fc) >> 2)); + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(SUB_DP, 0, TMP_REG2, arg & REG_MASK, imm))); + return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 0, TMP_REG2, reg, (argw & 0x3fc) >> 2)); } } - compiler->cache_arg = arg; - compiler->cache_argw = argw; - if (arg & REG_MASK) { - FAIL_IF(load_immediate(compiler, TMP_REG1, argw)); - FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG3, arg & REG_MASK, reg_map[TMP_REG1]))); + if (arg) { + FAIL_IF(load_immediate(compiler, TMP_REG2, argw)); + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(ADD_DP, 0, TMP_REG2, arg & REG_MASK, RM(TMP_REG2)))); } else - FAIL_IF(load_immediate(compiler, TMP_REG3, argw)); + FAIL_IF(load_immediate(compiler, TMP_REG2, argw)); - return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG3, reg, 0)); + return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG2, reg, 0)); } static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, sljit_s32 src, sljit_sw srcw) { + op ^= SLJIT_F32_OP; + if (src & SLJIT_MEM) { FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG1, src, srcw)); src = TMP_FREG1; @@ -2115,9 +1865,6 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_S32_F32, op & SLJIT_F32_OP, TMP_FREG1, src, 0))); - if (dst == SLJIT_UNUSED) - return SLJIT_SUCCESS; - if (FAST_IS_REG(dst)) return push_inst(compiler, VMOV | (1 << 20) | RD(dst) | (TMP_FREG1 << 16)); @@ -2131,6 +1878,8 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_comp { sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + op ^= SLJIT_F32_OP; + if (FAST_IS_REG(src)) FAIL_IF(push_inst(compiler, VMOV | RD(src) | (TMP_FREG1 << 16))); else if (src & SLJIT_MEM) { @@ -2153,6 +1902,8 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compile sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) { + op ^= SLJIT_F32_OP; + if (src1 & SLJIT_MEM) { FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG1, src1, src1w)); src1 = TMP_FREG1; @@ -2174,16 +1925,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil sljit_s32 dst_r; CHECK_ERROR(); - compiler->cache_arg = 0; - compiler->cache_argw = 0; - if (GET_OPCODE(op) != SLJIT_CONV_F64_FROM_F32) - op ^= SLJIT_F32_OP; SLJIT_COMPILE_ASSERT((SLJIT_F32_OP == 0x100), float_transfer_bit_error); SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + if (GET_OPCODE(op) != SLJIT_CONV_F64_FROM_F32) + op ^= SLJIT_F32_OP; + if (src & SLJIT_MEM) { FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, dst_r, src, srcw)); src = dst_r; @@ -2228,8 +1978,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil ADJUST_LOCAL_OFFSET(src1, src1w); ADJUST_LOCAL_OFFSET(src2, src2w); - compiler->cache_arg = 0; - compiler->cache_argw = 0; op ^= SLJIT_F32_OP; dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; @@ -2282,21 +2030,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler * CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw)); ADJUST_LOCAL_OFFSET(dst, dstw); - /* For UNUSED dst. Uncommon, but possible. */ - if (dst == SLJIT_UNUSED) - return SLJIT_SUCCESS; + SLJIT_ASSERT(reg_map[TMP_REG1] == 14); if (FAST_IS_REG(dst)) - return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, RM(TMP_REG3))); + return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst, SLJIT_UNUSED, RM(TMP_REG1))); /* Memory. */ - if (getput_arg_fast(compiler, WORD_DATA, TMP_REG3, dst, dstw)) - return compiler->error; - /* TMP_REG3 is used for caching. */ - FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG2, SLJIT_UNUSED, RM(TMP_REG3)))); - compiler->cache_arg = 0; - compiler->cache_argw = 0; - return getput_arg(compiler, WORD_DATA, TMP_REG2, dst, dstw, 0, 0); + return emit_op_mem(compiler, WORD_DATA, TMP_REG1, dst, dstw, TMP_REG2); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw) @@ -2305,21 +2045,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler CHECK(check_sljit_emit_fast_return(compiler, src, srcw)); ADJUST_LOCAL_OFFSET(src, srcw); + SLJIT_ASSERT(reg_map[TMP_REG1] == 14); + if (FAST_IS_REG(src)) - FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG3, SLJIT_UNUSED, RM(src)))); - else if (src & SLJIT_MEM) { - if (getput_arg_fast(compiler, WORD_DATA | LOAD_DATA, TMP_REG3, src, srcw)) - FAIL_IF(compiler->error); - else { - compiler->cache_arg = 0; - compiler->cache_argw = 0; - FAIL_IF(getput_arg(compiler, WORD_DATA | LOAD_DATA, TMP_REG2, src, srcw, 0, 0)); - FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG3, SLJIT_UNUSED, RM(TMP_REG2)))); - } - } + FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, TMP_REG1, 0, RM(src)))); + else if (src & SLJIT_MEM) + FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG2)); else if (src & SLJIT_IMM) - FAIL_IF(load_immediate(compiler, TMP_REG3, srcw)); - return push_inst(compiler, BLX | RM(TMP_REG3)); + FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); + + return push_inst(compiler, BX | RM(TMP_REG1)); } /* --------------------------------------------------------------------- */ @@ -2414,7 +2149,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile if (type >= SLJIT_FAST_CALL) PTR_FAIL_IF(prepare_blx(compiler)); PTR_FAIL_IF(push_inst_with_unique_literal(compiler, ((EMIT_DATA_TRANSFER(WORD_DATA | LOAD_DATA, 1, 0, - type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, TMP_PC, 0)) & ~COND_MASK) | get_cc(type), 0)); + type <= SLJIT_JUMP ? TMP_PC : TMP_REG2, TMP_PC, 0)) & ~COND_MASK) | get_cc(type), 0)); if (jump->flags & SLJIT_REWRITABLE_JUMP) { jump->addr = compiler->size; @@ -2431,8 +2166,8 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile #else if (type >= SLJIT_FAST_CALL) jump->flags |= IS_BL; - PTR_FAIL_IF(emit_imm(compiler, TMP_REG1, 0)); - PTR_FAIL_IF(push_inst(compiler, (((type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG1)) & ~COND_MASK) | get_cc(type))); + PTR_FAIL_IF(emit_imm(compiler, TMP_REG2, 0)); + PTR_FAIL_IF(push_inst(compiler, (((type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG2)) & ~COND_MASK) | get_cc(type))); jump->addr = compiler->size; #endif return jump; @@ -2452,7 +2187,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi return push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(src)); SLJIT_ASSERT(src & SLJIT_MEM); - FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG2, src, srcw)); + FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG2, src, srcw, TMP_REG2)); return push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG2)); } @@ -2464,12 +2199,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5) if (type >= SLJIT_FAST_CALL) FAIL_IF(prepare_blx(compiler)); - FAIL_IF(push_inst_with_unique_literal(compiler, EMIT_DATA_TRANSFER(WORD_DATA | LOAD_DATA, 1, 0, type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, TMP_PC, 0), 0)); + FAIL_IF(push_inst_with_unique_literal(compiler, EMIT_DATA_TRANSFER(WORD_DATA | LOAD_DATA, 1, 0, type <= SLJIT_JUMP ? TMP_PC : TMP_REG2, TMP_PC, 0), 0)); if (type >= SLJIT_FAST_CALL) FAIL_IF(emit_blx(compiler)); #else - FAIL_IF(emit_imm(compiler, TMP_REG1, 0)); - FAIL_IF(push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG1))); + FAIL_IF(emit_imm(compiler, TMP_REG2, 0)); + FAIL_IF(push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG2))); #endif jump->addr = compiler->size; return SLJIT_SUCCESS; @@ -2477,55 +2212,80 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, - sljit_s32 src, sljit_sw srcw, sljit_s32 type) { sljit_s32 dst_r, flags = GET_ALL_FLAGS(op); sljit_uw cc, ins; CHECK_ERROR(); - CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type)); + CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type)); ADJUST_LOCAL_OFFSET(dst, dstw); - ADJUST_LOCAL_OFFSET(src, srcw); - - if (dst == SLJIT_UNUSED) - return SLJIT_SUCCESS; op = GET_OPCODE(op); cc = get_cc(type & 0xff); - dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; if (op < SLJIT_ADD) { FAIL_IF(push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst_r, SLJIT_UNUSED, SRC2_IMM | 0))); FAIL_IF(push_inst(compiler, (EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst_r, SLJIT_UNUSED, SRC2_IMM | 1) & ~COND_MASK) | cc)); - return (dst_r == TMP_REG2) ? emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw) : SLJIT_SUCCESS; + if (dst & SLJIT_MEM) + return emit_op_mem(compiler, WORD_DATA, TMP_REG1, dst, dstw, TMP_REG2); + return SLJIT_SUCCESS; } ins = (op == SLJIT_AND ? AND_DP : (op == SLJIT_OR ? ORR_DP : EOR_DP)); - if ((op == SLJIT_OR || op == SLJIT_XOR) && FAST_IS_REG(dst) && dst == src) { - FAIL_IF(push_inst(compiler, (EMIT_DATA_PROCESS_INS(ins, 0, dst, dst, SRC2_IMM | 1) & ~COND_MASK) | cc)); - /* The condition must always be set, even if the ORR/EOR is not executed above. */ - return (flags & SLJIT_SET_E) ? push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, SET_FLAGS, TMP_REG1, SLJIT_UNUSED, RM(dst))) : SLJIT_SUCCESS; - } - compiler->cache_arg = 0; - compiler->cache_argw = 0; - if (src & SLJIT_MEM) { - FAIL_IF(emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw, dst, dstw)); - src = TMP_REG1; - srcw = 0; - } else if (src & SLJIT_IMM) { + if (dst & SLJIT_MEM) + FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, dst, dstw, TMP_REG2)); + + FAIL_IF(push_inst(compiler, (EMIT_DATA_PROCESS_INS(ins, 0, dst_r, dst_r, SRC2_IMM | 1) & ~COND_MASK) | cc)); + + if (op == SLJIT_AND) + FAIL_IF(push_inst(compiler, (EMIT_DATA_PROCESS_INS(ins, 0, dst_r, dst_r, SRC2_IMM | 0) & ~COND_MASK) | (cc ^ 0x10000000))); + + if (dst & SLJIT_MEM) + FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG1, dst, dstw, TMP_REG2)); + + if (flags & SLJIT_SET_Z) + return push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, SET_FLAGS, TMP_REG2, SLJIT_UNUSED, RM(dst_r))); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_reg, + sljit_s32 src, sljit_sw srcw) +{ + sljit_uw cc, tmp; + + CHECK_ERROR(); + CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw)); + + dst_reg &= ~SLJIT_I32_OP; + + cc = get_cc(type & 0xff); + + if (SLJIT_UNLIKELY(src & SLJIT_IMM)) { + tmp = get_imm(srcw); + if (tmp) + return push_inst(compiler, (EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst_reg, SLJIT_UNUSED, tmp) & ~COND_MASK) | cc); + + tmp = get_imm(~srcw); + if (tmp) + return push_inst(compiler, (EMIT_DATA_PROCESS_INS(MVN_DP, 0, dst_reg, SLJIT_UNUSED, tmp) & ~COND_MASK) | cc); + +#if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7) + tmp = (sljit_uw) srcw; + FAIL_IF(push_inst(compiler, (MOVW & ~COND_MASK) | cc | RD(dst_reg) | ((tmp << 4) & 0xf0000) | (tmp & 0xfff))); + if (tmp <= 0xffff) + return SLJIT_SUCCESS; + return push_inst(compiler, (MOVT & ~COND_MASK) | cc | RD(dst_reg) | ((tmp >> 12) & 0xf0000) | ((tmp >> 16) & 0xfff)); +#else FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); src = TMP_REG1; - srcw = 0; +#endif } - FAIL_IF(push_inst(compiler, (EMIT_DATA_PROCESS_INS(ins, 0, dst_r, src, SRC2_IMM | 1) & ~COND_MASK) | cc)); - FAIL_IF(push_inst(compiler, (EMIT_DATA_PROCESS_INS(ins, 0, dst_r, src, SRC2_IMM | 0) & ~COND_MASK) | (cc ^ 0x10000000))); - if (dst_r == TMP_REG2) - FAIL_IF(emit_op_mem2(compiler, WORD_DATA, TMP_REG2, dst, dstw, 0, 0)); - - return (flags & SLJIT_SET_E) ? push_inst(compiler, EMIT_DATA_PROCESS_INS(MOV_DP, SET_FLAGS, TMP_REG1, SLJIT_UNUSED, RM(dst_r))) : SLJIT_SUCCESS; + return push_inst(compiler, (EMIT_DATA_PROCESS_INS(MOV_DP, 0, dst_reg, SLJIT_UNUSED, RM(src)) & ~COND_MASK) | cc); } SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value) @@ -2551,16 +2311,16 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi set_const(const_, compiler); if (dst & SLJIT_MEM) - PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw)); + PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw, TMP_REG1)); return const_; } -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr) +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) { - inline_set_jump_addr(addr, new_addr, 1); + inline_set_jump_addr(addr, executable_offset, new_target, 1); } -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant) +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) { - inline_set_const(addr, new_constant, 1); + inline_set_const(addr, executable_offset, new_constant, 1); } diff --git a/src/3rdparty/pcre2/src/sljit/sljitNativeARM_64.c b/src/3rdparty/pcre2/src/sljit/sljitNativeARM_64.c index d9958512c8..fd67f50253 100644 --- a/src/3rdparty/pcre2/src/sljit/sljitNativeARM_64.c +++ b/src/3rdparty/pcre2/src/sljit/sljitNativeARM_64.c @@ -1,7 +1,7 @@ /* * Stack-less Just-In-Time compiler * - * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are * permitted provided that the following conditions are met: @@ -76,6 +76,7 @@ static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 8] = { #define BRK 0xd4200000 #define CBZ 0xb4000000 #define CLZ 0xdac01000 +#define CSEL 0x9a800000 #define CSINC 0x9a800400 #define EOR 0xca000000 #define EORI 0xd2000000 @@ -151,7 +152,7 @@ static SLJIT_INLINE void modify_imm64_const(sljit_ins* inst, sljit_uw new_imm) inst[3] = MOVK | dst | ((new_imm >> 48) << 5) | (3 << 21); } -static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code) +static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset) { sljit_sw diff; sljit_uw target_addr; @@ -165,9 +166,10 @@ static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_in target_addr = jump->u.target; else { SLJIT_ASSERT(jump->flags & JUMP_LABEL); - target_addr = (sljit_uw)(code + jump->u.label->size); + target_addr = (sljit_uw)(code + jump->u.label->size) + (sljit_uw)executable_offset; } - diff = (sljit_sw)target_addr - (sljit_sw)(code_ptr + 4); + + diff = (sljit_sw)target_addr - (sljit_sw)(code_ptr + 4) - executable_offset; if (jump->flags & IS_COND) { diff += sizeof(sljit_ins); @@ -211,6 +213,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil sljit_ins *buf_ptr; sljit_ins *buf_end; sljit_uw word_count; + sljit_sw executable_offset; sljit_uw addr; sljit_s32 dst; @@ -228,6 +231,8 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil code_ptr = code; word_count = 0; + executable_offset = SLJIT_EXEC_OFFSET(code); + label = compiler->labels; jump = compiler->jumps; const_ = compiler->consts; @@ -242,13 +247,13 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil SLJIT_ASSERT(!jump || jump->addr >= word_count); SLJIT_ASSERT(!const_ || const_->addr >= word_count); if (label && label->size == word_count) { - label->addr = (sljit_uw)code_ptr; + label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); label->size = code_ptr - code; label = label->next; } if (jump && jump->addr == word_count) { jump->addr = (sljit_uw)(code_ptr - 4); - code_ptr -= detect_jump_type(jump, code_ptr, code); + code_ptr -= detect_jump_type(jump, code_ptr, code, executable_offset); jump = jump->next; } if (const_ && const_->addr == word_count) { @@ -263,7 +268,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil } while (buf); if (label && label->size == word_count) { - label->addr = (sljit_uw)code_ptr; + label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); label->size = code_ptr - code; label = label->next; } @@ -277,9 +282,10 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil while (jump) { do { addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target; - buf_ptr = (sljit_ins*)jump->addr; + buf_ptr = (sljit_ins *)jump->addr; + if (jump->flags & PATCH_B) { - addr = (sljit_sw)(addr - jump->addr) >> 2; + addr = (sljit_sw)(addr - (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2; SLJIT_ASSERT((sljit_sw)addr <= 0x1ffffff && (sljit_sw)addr >= -0x2000000); buf_ptr[0] = ((jump->flags & IS_BL) ? BL : B) | (addr & 0x3ffffff); if (jump->flags & IS_COND) @@ -287,7 +293,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil break; } if (jump->flags & PATCH_COND) { - addr = (sljit_sw)(addr - jump->addr) >> 2; + addr = (sljit_sw)(addr - (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2; SLJIT_ASSERT((sljit_sw)addr <= 0x3ffff && (sljit_sw)addr >= -0x40000); buf_ptr[0] = (buf_ptr[0] & ~0xffffe0) | ((addr & 0x7ffff) << 5); break; @@ -308,11 +314,37 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil } compiler->error = SLJIT_ERR_COMPILED; + compiler->executable_offset = executable_offset; compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins); + + code = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset); + code_ptr = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + SLJIT_CACHE_FLUSH(code, code_ptr); return code; } +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) +{ + switch (feature_type) { + case SLJIT_HAS_FPU: +#ifdef SLJIT_IS_FPU_AVAILABLE + return SLJIT_IS_FPU_AVAILABLE; +#else + /* Available by default. */ + return 1; +#endif + + case SLJIT_HAS_PRE_UPDATE: + case SLJIT_HAS_CLZ: + case SLJIT_HAS_CMOV: + return 1; + + default: + return 0; + } +} + /* --------------------------------------------------------------------- */ /* Core code generator functions. */ /* --------------------------------------------------------------------- */ @@ -365,7 +397,7 @@ static sljit_ins logical_imm(sljit_sw imm, sljit_s32 len) uimm = (sljit_uw)imm; while (1) { if (len <= 0) { - SLJIT_ASSERT_STOP(); + SLJIT_UNREACHABLE(); return 0; } mask = ((sljit_uw)1 << len) - 1; @@ -635,7 +667,7 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s } goto set_flags; default: - SLJIT_ASSERT_STOP(); + SLJIT_UNREACHABLE(); break; } @@ -702,7 +734,7 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s case SLJIT_NOT: SLJIT_ASSERT(arg1 == TMP_REG1); FAIL_IF(push_inst(compiler, (ORN ^ inv_bits) | RD(dst) | RN(TMP_ZERO) | RM(arg2))); - goto set_flags; + break; /* Set flags. */ case SLJIT_NEG: SLJIT_ASSERT(arg1 == TMP_REG1); if (flags & SET_FLAGS) @@ -710,8 +742,7 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s return push_inst(compiler, (SUB ^ inv_bits) | RD(dst) | RN(TMP_ZERO) | RM(arg2)); case SLJIT_CLZ: SLJIT_ASSERT(arg1 == TMP_REG1); - FAIL_IF(push_inst(compiler, (CLZ ^ inv_bits) | RD(dst) | RN(arg2))); - goto set_flags; + return push_inst(compiler, (CLZ ^ inv_bits) | RD(dst) | RN(arg2)); case SLJIT_ADD: CHECK_FLAGS(1 << 29); return push_inst(compiler, (ADD ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)); @@ -740,24 +771,24 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s return push_inst(compiler, (AND ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)); case SLJIT_OR: FAIL_IF(push_inst(compiler, (ORR ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2))); - goto set_flags; + break; /* Set flags. */ case SLJIT_XOR: FAIL_IF(push_inst(compiler, (EOR ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2))); - goto set_flags; + break; /* Set flags. */ case SLJIT_SHL: FAIL_IF(push_inst(compiler, (LSLV ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2))); - goto set_flags; + break; /* Set flags. */ case SLJIT_LSHR: FAIL_IF(push_inst(compiler, (LSRV ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2))); - goto set_flags; + break; /* Set flags. */ case SLJIT_ASHR: FAIL_IF(push_inst(compiler, (ASRV ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2))); - goto set_flags; + break; /* Set flags. */ + default: + SLJIT_UNREACHABLE(); + return SLJIT_SUCCESS; } - SLJIT_ASSERT_STOP(); - return SLJIT_SUCCESS; - set_flags: if (flags & SET_FLAGS) return push_inst(compiler, (SUBS ^ inv_bits) | RD(TMP_ZERO) | RN(dst) | RM(TMP_ZERO)); @@ -859,6 +890,10 @@ static sljit_s32 getput_arg_fast(struct sljit_compiler *compiler, sljit_s32 flag } arg &= REG_MASK; + + if (arg == SLJIT_UNUSED) + return 0; + if (argw >= 0 && (argw >> shift) <= 0xfff && (argw & ((1 << shift) - 1)) == 0) { if (SLJIT_UNLIKELY(flags & ARG_TEST)) return 1; @@ -919,21 +954,23 @@ static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sl next_argw = 0; } - tmp_r = (flags & STORE) ? TMP_REG3 : reg; + tmp_r = ((flags & STORE) || (flags == (WORD_SIZE | SIGNED))) ? TMP_REG3 : reg; if (SLJIT_UNLIKELY((flags & UPDATE) && (arg & REG_MASK))) { /* Update only applies if a base register exists. */ other_r = OFFS_REG(arg); if (!other_r) { other_r = arg & REG_MASK; - if (other_r != reg && argw >= 0 && argw <= 0xffffff) { + SLJIT_ASSERT(other_r != reg); + + if (argw >= 0 && argw <= 0xffffff) { if ((argw & 0xfff) != 0) FAIL_IF(push_inst(compiler, ADDI | RD(other_r) | RN(other_r) | ((argw & 0xfff) << 10))); if (argw >> 12) FAIL_IF(push_inst(compiler, ADDI | (1 << 22) | RD(other_r) | RN(other_r) | ((argw >> 12) << 10))); return push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) | RT(reg) | RN(other_r)); } - else if (other_r != reg && argw < 0 && argw >= -0xffffff) { + else if (argw < 0 && argw >= -0xffffff) { argw = -argw; if ((argw & 0xfff) != 0) FAIL_IF(push_inst(compiler, SUBI | RD(other_r) | RN(other_r) | ((argw & 0xfff) << 10))); @@ -966,18 +1003,8 @@ static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sl /* No caching here. */ arg &= REG_MASK; - argw &= 0x3; - if (!argw || argw == shift) { - FAIL_IF(push_inst(compiler, sljit_mem_reg[flags & 0x3] | (shift << 30) | RT(reg) | RN(arg) | RM(other_r) | (argw ? (1 << 12) : 0))); - return push_inst(compiler, ADD | RD(arg) | RN(arg) | RM(other_r) | (argw << 10)); - } - if (arg != reg) { - FAIL_IF(push_inst(compiler, ADD | RD(arg) | RN(arg) | RM(other_r) | (argw << 10))); - return push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) | RT(reg) | RN(arg)); - } - FAIL_IF(push_inst(compiler, ADD | RD(TMP_LR) | RN(arg) | RM(other_r) | (argw << 10))); - FAIL_IF(push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) | RT(reg) | RN(TMP_LR))); - return push_inst(compiler, ORR | RD(arg) | RN(TMP_ZERO) | RM(TMP_LR)); + FAIL_IF(push_inst(compiler, sljit_mem_reg[flags & 0x3] | (shift << 30) | RT(reg) | RN(arg) | RM(other_r))); + return push_inst(compiler, ADD | RD(arg) | RN(arg) | RM(other_r)); } if (arg & OFFS_REG_MASK) { @@ -998,16 +1025,16 @@ static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sl } } - if (argw >= 0 && argw <= 0xffffff && (argw & ((1 << shift) - 1)) == 0) { - FAIL_IF(push_inst(compiler, ADDI | (1 << 22) | RD(tmp_r) | RN(arg & REG_MASK) | ((argw >> 12) << 10))); - return push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) - | RT(reg) | RN(tmp_r) | ((argw & 0xfff) << (10 - shift))); - } - diff = argw - next_argw; next_arg = (arg & REG_MASK) && (arg == next_arg) && diff <= 0xfff && diff >= -0xfff && diff != 0; arg &= REG_MASK; + if (arg != SLJIT_UNUSED && argw >= 0 && argw <= 0xffffff && (argw & ((1 << shift) - 1)) == 0) { + FAIL_IF(push_inst(compiler, ADDI | (1 << 22) | RD(tmp_r) | RN(arg) | ((argw >> 12) << 10))); + return push_inst(compiler, sljit_mem_imm[flags & 0x3] | (shift << 30) + | RT(reg) | RN(tmp_r) | ((argw & 0xfff) << (10 - shift))); + } + if (arg && compiler->cache_arg == SLJIT_MEM) { if (compiler->cache_argw == argw) return push_inst(compiler, sljit_mem_reg[flags & 0x3] | (shift << 30) | RT(reg) | RN(arg) | RM(TMP_REG3)); @@ -1290,6 +1317,23 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile compiler->cache_arg = 0; compiler->cache_argw = 0; + if (dst == SLJIT_UNUSED && !HAS_FLAGS(op)) { + if (op <= SLJIT_MOV_P && (src & SLJIT_MEM)) { + SLJIT_ASSERT(reg_map[1] == 0 && reg_map[3] == 2 && reg_map[5] == 4); + + if (op >= SLJIT_MOV_U8 && op <= SLJIT_MOV_S8) + dst = 5; + else if (op >= SLJIT_MOV_U16 && op <= SLJIT_MOV_S16) + dst = 3; + else + dst = 1; + + /* Signed word sized load is the prefetch instruction. */ + return emit_op_mem(compiler, WORD_SIZE | SIGNED, dst, src, srcw); + } + return SLJIT_SUCCESS; + } + dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1; op = GET_OPCODE(op); @@ -1364,7 +1408,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile srcw = (sljit_s32)srcw; break; default: - SLJIT_ASSERT_STOP(); + SLJIT_UNREACHABLE(); flags = 0; break; } @@ -1391,7 +1435,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile return SLJIT_SUCCESS; } - flags = GET_FLAGS(op_flags) ? SET_FLAGS : 0; + flags = HAS_FLAGS(op_flags) ? SET_FLAGS : 0; mem_flags = WORD_SIZE; if (op_flags & SLJIT_I32_OP) { flags |= INT_OP; @@ -1443,8 +1487,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile compiler->cache_arg = 0; compiler->cache_argw = 0; + if (dst == SLJIT_UNUSED && !HAS_FLAGS(op)) + return SLJIT_SUCCESS; + dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1; - flags = GET_FLAGS(op) ? SET_FLAGS : 0; + flags = HAS_FLAGS(op) ? SET_FLAGS : 0; mem_flags = WORD_SIZE; if (op & SLJIT_I32_OP) { flags |= INT_OP; @@ -1537,16 +1584,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *c /* Floating point operators */ /* --------------------------------------------------------------------- */ -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void) -{ -#ifdef SLJIT_IS_FPU_AVAILABLE - return SLJIT_IS_FPU_AVAILABLE; -#else - /* Available by default. */ - return 1; -#endif -} - static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw) { sljit_u32 shift = MEM_SIZE_SHIFT(flags); @@ -1604,7 +1641,7 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp sljit_s32 dst, sljit_sw dstw, sljit_s32 src, sljit_sw srcw) { - sljit_s32 dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1; + sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; sljit_ins inv_bits = (op & SLJIT_F32_OP) ? (1 << 22) : 0; if (GET_OPCODE(op) == SLJIT_CONV_S32_FROM_F64) @@ -1617,7 +1654,7 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp FAIL_IF(push_inst(compiler, (FCVTZS ^ inv_bits) | RD(dst_r) | VN(src))); - if (dst_r == TMP_REG1 && dst != SLJIT_UNUSED) + if (dst & SLJIT_MEM) return emit_op_mem(compiler, ((GET_OPCODE(op) == SLJIT_CONV_S32_FROM_F64) ? INT_SIZE : WORD_SIZE) | STORE, TMP_REG1, dst, dstw); return SLJIT_SUCCESS; } @@ -1775,10 +1812,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler * CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw)); ADJUST_LOCAL_OFFSET(dst, dstw); - /* For UNUSED dst. Uncommon, but possible. */ - if (dst == SLJIT_UNUSED) - return SLJIT_SUCCESS; - if (FAST_IS_REG(dst)) return push_inst(compiler, ORR | RD(dst) | RN(TMP_ZERO) | RM(TMP_LR)); @@ -1856,7 +1889,7 @@ static sljit_uw get_cc(sljit_s32 type) return 0x6; default: - SLJIT_ASSERT_STOP(); + SLJIT_UNREACHABLE(); return 0xe; } } @@ -1966,19 +1999,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, - sljit_s32 src, sljit_sw srcw, sljit_s32 type) { - sljit_s32 dst_r, flags, mem_flags; + sljit_s32 dst_r, src_r, flags, mem_flags; sljit_ins cc; CHECK_ERROR(); - CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type)); + CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type)); ADJUST_LOCAL_OFFSET(dst, dstw); - ADJUST_LOCAL_OFFSET(src, srcw); - - if (dst == SLJIT_UNUSED) - return SLJIT_SUCCESS; cc = get_cc(type & 0xff); dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; @@ -1992,26 +2020,50 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co compiler->cache_arg = 0; compiler->cache_argw = 0; - flags = GET_FLAGS(op) ? SET_FLAGS : 0; + flags = HAS_FLAGS(op) ? SET_FLAGS : 0; mem_flags = WORD_SIZE; if (op & SLJIT_I32_OP) { flags |= INT_OP; mem_flags = INT_SIZE; } - if (src & SLJIT_MEM) { - FAIL_IF(emit_op_mem2(compiler, mem_flags, TMP_REG1, src, srcw, dst, dstw)); + src_r = dst; + + if (dst & SLJIT_MEM) { + FAIL_IF(emit_op_mem2(compiler, mem_flags, TMP_REG1, dst, dstw, dst, dstw)); + src_r = TMP_REG1; + } + + FAIL_IF(push_inst(compiler, CSINC | (cc << 12) | RD(TMP_REG2) | RN(TMP_ZERO) | RM(TMP_ZERO))); + emit_op_imm(compiler, flags | GET_OPCODE(op), dst_r, src_r, TMP_REG2); + + if (dst & SLJIT_MEM) + return emit_op_mem2(compiler, mem_flags | STORE, TMP_REG1, dst, dstw, 0, 0); + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_reg, + sljit_s32 src, sljit_sw srcw) +{ + sljit_ins inv_bits = (dst_reg & SLJIT_I32_OP) ? (1 << 31) : 0; + sljit_ins cc; + + CHECK_ERROR(); + CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw)); + + if (SLJIT_UNLIKELY(src & SLJIT_IMM)) { + if (dst_reg & SLJIT_I32_OP) + srcw = (sljit_s32)srcw; + FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); src = TMP_REG1; srcw = 0; - } else if (src & SLJIT_IMM) - flags |= ARG1_IMM; + } - FAIL_IF(push_inst(compiler, CSINC | (cc << 12) | RD(TMP_REG2) | RN(TMP_ZERO) | RM(TMP_ZERO))); - emit_op_imm(compiler, flags | GET_OPCODE(op), dst_r, src, TMP_REG2); + cc = get_cc(type & 0xff); + dst_reg &= ~SLJIT_I32_OP; - if (dst_r != TMP_REG1) - return SLJIT_SUCCESS; - return emit_op_mem2(compiler, mem_flags | STORE, TMP_REG1, dst, dstw, 0, 0); + return push_inst(compiler, (CSEL ^ inv_bits) | (cc << 12) | RD(dst_reg) | RN(dst_reg) | RM(src)); } SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value) @@ -2027,7 +2079,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi PTR_FAIL_IF(!const_); set_const(const_, compiler); - dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1; + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; PTR_FAIL_IF(emit_imm64_const(compiler, dst_r, init_value)); if (dst & SLJIT_MEM) @@ -2035,16 +2087,18 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi return const_; } -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr) +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) { sljit_ins* inst = (sljit_ins*)addr; - modify_imm64_const(inst, new_addr); + modify_imm64_const(inst, new_target); + inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); SLJIT_CACHE_FLUSH(inst, inst + 4); } -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant) +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) { sljit_ins* inst = (sljit_ins*)addr; modify_imm64_const(inst, new_constant); + inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); SLJIT_CACHE_FLUSH(inst, inst + 4); } diff --git a/src/3rdparty/pcre2/src/sljit/sljitNativeARM_T2_32.c b/src/3rdparty/pcre2/src/sljit/sljitNativeARM_T2_32.c index 1ed44a8130..29e5566a82 100644 --- a/src/3rdparty/pcre2/src/sljit/sljitNativeARM_T2_32.c +++ b/src/3rdparty/pcre2/src/sljit/sljitNativeARM_T2_32.c @@ -1,7 +1,7 @@ /* * Stack-less Just-In-Time compiler * - * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are * permitted provided that the following conditions are met: @@ -35,15 +35,14 @@ typedef sljit_u32 sljit_ins; /* Last register + 1. */ #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) #define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) -#define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4) -#define TMP_PC (SLJIT_NUMBER_OF_REGISTERS + 5) +#define TMP_PC (SLJIT_NUMBER_OF_REGISTERS + 4) #define TMP_FREG1 (0) #define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) /* See sljit_emit_enter and sljit_emit_op0 if you want to change them. */ -static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 6] = { - 0, 0, 1, 2, 12, 11, 10, 9, 8, 7, 6, 5, 13, 3, 4, 14, 15 +static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = { + 0, 0, 1, 2, 12, 11, 10, 9, 8, 7, 6, 5, 4, 13, 3, 14, 15 }; #define COPY_BITS(src, from, to, bits) \ @@ -108,7 +107,11 @@ static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 6] = { #define BLX 0x4780 #define BX 0x4700 #define CLZ 0xfab0f080 +#define CMNI_W 0xf1100f00 +#define CMP 0x4280 #define CMPI 0x2800 +#define CMPI_W 0xf1b00f00 +#define CMP_X 0x4500 #define CMP_W 0xebb00f00 #define EORI 0xf0800000 #define EORS 0x4040 @@ -221,7 +224,7 @@ static SLJIT_INLINE void modify_imm32_const(sljit_u16 *inst, sljit_uw new_imm) inst[3] = dst | COPY_BITS(new_imm, 8 + 16, 12, 3) | ((new_imm & 0xff0000) >> 16); } -static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_u16 *code_ptr, sljit_u16 *code) +static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_u16 *code_ptr, sljit_u16 *code, sljit_sw executable_offset) { sljit_sw diff; @@ -232,7 +235,7 @@ static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_u1 /* Branch to ARM code is not optimized yet. */ if (!(jump->u.target & 0x1)) return 0; - diff = ((sljit_sw)jump->u.target - (sljit_sw)(code_ptr + 2)) >> 1; + diff = ((sljit_sw)jump->u.target - (sljit_sw)(code_ptr + 2) - executable_offset) >> 1; } else { SLJIT_ASSERT(jump->flags & JUMP_LABEL); @@ -276,7 +279,7 @@ static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_u1 return 0; } -static SLJIT_INLINE void set_jump_instruction(struct sljit_jump *jump) +static SLJIT_INLINE void set_jump_instruction(struct sljit_jump *jump, sljit_sw executable_offset) { sljit_s32 type = (jump->flags >> 4) & 0xf; sljit_sw diff; @@ -290,10 +293,12 @@ static SLJIT_INLINE void set_jump_instruction(struct sljit_jump *jump) if (jump->flags & JUMP_ADDR) { SLJIT_ASSERT(jump->u.target & 0x1); - diff = ((sljit_sw)jump->u.target - (sljit_sw)(jump->addr + 4)) >> 1; + diff = ((sljit_sw)jump->u.target - (sljit_sw)(jump->addr + sizeof(sljit_u32)) - executable_offset) >> 1; + } + else { + SLJIT_ASSERT(jump->u.label->addr & 0x1); + diff = ((sljit_sw)(jump->u.label->addr) - (sljit_sw)(jump->addr + sizeof(sljit_u32)) - executable_offset) >> 1; } - else - diff = ((sljit_sw)(jump->u.label->addr) - (sljit_sw)(jump->addr + 4)) >> 1; jump_inst = (sljit_u16*)jump->addr; switch (type) { @@ -336,7 +341,7 @@ static SLJIT_INLINE void set_jump_instruction(struct sljit_jump *jump) else if (type == 6) /* Encoding T1 of 'BL' instruction */ jump_inst[1] |= 0xd000; else - SLJIT_ASSERT_STOP(); + SLJIT_UNREACHABLE(); } SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler) @@ -347,6 +352,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil sljit_u16 *buf_ptr; sljit_u16 *buf_end; sljit_uw half_count; + sljit_sw executable_offset; struct sljit_label *label; struct sljit_jump *jump; @@ -362,6 +368,8 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil code_ptr = code; half_count = 0; + executable_offset = SLJIT_EXEC_OFFSET(code); + label = compiler->labels; jump = compiler->jumps; const_ = compiler->consts; @@ -376,13 +384,13 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil SLJIT_ASSERT(!jump || jump->addr >= half_count); SLJIT_ASSERT(!const_ || const_->addr >= half_count); if (label && label->size == half_count) { - label->addr = ((sljit_uw)code_ptr) | 0x1; + label->addr = ((sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset)) | 0x1; label->size = code_ptr - code; label = label->next; } if (jump && jump->addr == half_count) { jump->addr = (sljit_uw)code_ptr - ((jump->flags & IS_COND) ? 10 : 8); - code_ptr -= detect_jump_type(jump, code_ptr, code); + code_ptr -= detect_jump_type(jump, code_ptr, code, executable_offset); jump = jump->next; } if (const_ && const_->addr == half_count) { @@ -397,7 +405,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil } while (buf); if (label && label->size == half_count) { - label->addr = ((sljit_uw)code_ptr) | 0x1; + label->addr = ((sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset)) | 0x1; label->size = code_ptr - code; label = label->next; } @@ -409,17 +417,43 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil jump = compiler->jumps; while (jump) { - set_jump_instruction(jump); + set_jump_instruction(jump, executable_offset); jump = jump->next; } compiler->error = SLJIT_ERR_COMPILED; + compiler->executable_offset = executable_offset; compiler->executable_size = (code_ptr - code) * sizeof(sljit_u16); + + code = (sljit_u16 *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset); + code_ptr = (sljit_u16 *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + SLJIT_CACHE_FLUSH(code, code_ptr); /* Set thumb mode flag. */ return (void*)((sljit_uw)code | 0x1); } +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) +{ + switch (feature_type) { + case SLJIT_HAS_FPU: +#ifdef SLJIT_IS_FPU_AVAILABLE + return SLJIT_IS_FPU_AVAILABLE; +#else + /* Available by default. */ + return 1; +#endif + + case SLJIT_HAS_PRE_UPDATE: + case SLJIT_HAS_CLZ: + case SLJIT_HAS_CMOV: + return 1; + + default: + return 0; + } +} + /* --------------------------------------------------------------------- */ /* Core code generator functions. */ /* --------------------------------------------------------------------- */ @@ -500,24 +534,20 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst, #define ARG1_IMM 0x0010000 #define ARG2_IMM 0x0020000 -#define KEEP_FLAGS 0x0040000 /* SET_FLAGS must be 0x100000 as it is also the value of S bit (can be used for optimization). */ #define SET_FLAGS 0x0100000 #define UNUSED_RETURN 0x0200000 -#define SLOW_DEST 0x0400000 -#define SLOW_SRC1 0x0800000 -#define SLOW_SRC2 0x1000000 static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 dst, sljit_uw arg1, sljit_uw arg2) { /* dst must be register, TMP_REG1 - arg1 must be register, TMP_REG1, imm - arg2 must be register, TMP_REG2, imm */ + arg1 must be register, imm + arg2 must be register, imm */ sljit_s32 reg; sljit_uw imm, nimm; if (SLJIT_UNLIKELY((flags & (ARG1_IMM | ARG2_IMM)) == (ARG1_IMM | ARG2_IMM))) { - /* Both are immediates. */ + /* Both are immediates, no temporaries are used. */ flags &= ~ARG1_IMM; FAIL_IF(load_immediate(compiler, TMP_REG1, arg1)); arg1 = TMP_REG1; @@ -533,7 +563,7 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s /* No form with immediate operand. */ break; case SLJIT_MOV: - SLJIT_ASSERT(!(flags & SET_FLAGS) && (flags & ARG2_IMM) && arg1 == TMP_REG1); + SLJIT_ASSERT(!(flags & SET_FLAGS) && (flags & ARG2_IMM) && arg1 == TMP_REG2); return load_immediate(compiler, dst, imm); case SLJIT_NOT: if (!(flags & SET_FLAGS)) @@ -543,7 +573,7 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s break; case SLJIT_ADD: nimm = -imm; - if (!(flags & KEEP_FLAGS) && IS_2_LO_REGS(reg, dst)) { + if (IS_2_LO_REGS(reg, dst)) { if (imm <= 0x7) return push_inst16(compiler, ADDSI3 | IMM3(imm) | RD3(dst) | RN3(reg)); if (nimm <= 0x7) @@ -561,9 +591,12 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s if (nimm <= 0xfff) return push_inst32(compiler, SUBWI | RD4(dst) | RN4(reg) | IMM12(nimm)); } - imm = get_imm(imm); - if (imm != INVALID_IMM) - return push_inst32(compiler, ADD_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm); + nimm = get_imm(imm); + if (nimm != INVALID_IMM) + return push_inst32(compiler, ADD_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | nimm); + nimm = get_imm(-imm); + if (nimm != INVALID_IMM) + return push_inst32(compiler, SUB_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | nimm); break; case SLJIT_ADDC: imm = get_imm(imm); @@ -571,16 +604,27 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s return push_inst32(compiler, ADCI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm); break; case SLJIT_SUB: + /* SUB operation can be replaced by ADD because of the negative carry flag. */ if (flags & ARG1_IMM) { - if (!(flags & KEEP_FLAGS) && imm == 0 && IS_2_LO_REGS(reg, dst)) + if (imm == 0 && IS_2_LO_REGS(reg, dst)) return push_inst16(compiler, RSBSI | RD3(dst) | RN3(reg)); imm = get_imm(imm); if (imm != INVALID_IMM) return push_inst32(compiler, RSB_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm); break; } + if (flags & UNUSED_RETURN) { + if (imm <= 0xff && reg_map[reg] <= 7) + return push_inst16(compiler, CMPI | IMM8(imm) | RDN3(reg)); + nimm = get_imm(imm); + if (nimm != INVALID_IMM) + return push_inst32(compiler, CMPI_W | RN4(reg) | nimm); + nimm = get_imm(-imm); + if (nimm != INVALID_IMM) + return push_inst32(compiler, CMNI_W | RN4(reg) | nimm); + } nimm = -imm; - if (!(flags & KEEP_FLAGS) && IS_2_LO_REGS(reg, dst)) { + if (IS_2_LO_REGS(reg, dst)) { if (imm <= 0x7) return push_inst16(compiler, SUBSI3 | IMM3(imm) | RD3(dst) | RN3(reg)); if (nimm <= 0x7) @@ -591,8 +635,6 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s if (nimm <= 0xff) return push_inst16(compiler, ADDSI8 | IMM8(nimm) | RDN3(dst)); } - if (imm <= 0xff && (flags & UNUSED_RETURN)) - return push_inst16(compiler, CMPI | IMM8(imm) | RDN3(reg)); } if (!(flags & SET_FLAGS)) { if (imm <= 0xfff) @@ -600,9 +642,12 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s if (nimm <= 0xfff) return push_inst32(compiler, ADDWI | RD4(dst) | RN4(reg) | IMM12(nimm)); } - imm = get_imm(imm); - if (imm != INVALID_IMM) - return push_inst32(compiler, SUB_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm); + nimm = get_imm(imm); + if (nimm != INVALID_IMM) + return push_inst32(compiler, SUB_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | nimm); + nimm = get_imm(-imm); + if (nimm != INVALID_IMM) + return push_inst32(compiler, ADD_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | nimm); break; case SLJIT_SUBC: if (flags & ARG1_IMM) @@ -647,31 +692,35 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s } switch (flags & 0xffff) { case SLJIT_SHL: - if (!(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, reg)) + if (IS_2_LO_REGS(dst, reg)) return push_inst16(compiler, LSLSI | RD3(dst) | RN3(reg) | (imm << 6)); return push_inst32(compiler, LSL_WI | (flags & SET_FLAGS) | RD4(dst) | RM4(reg) | IMM5(imm)); case SLJIT_LSHR: - if (!(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, reg)) + if (IS_2_LO_REGS(dst, reg)) return push_inst16(compiler, LSRSI | RD3(dst) | RN3(reg) | (imm << 6)); return push_inst32(compiler, LSR_WI | (flags & SET_FLAGS) | RD4(dst) | RM4(reg) | IMM5(imm)); default: /* SLJIT_ASHR */ - if (!(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, reg)) + if (IS_2_LO_REGS(dst, reg)) return push_inst16(compiler, ASRSI | RD3(dst) | RN3(reg) | (imm << 6)); return push_inst32(compiler, ASR_WI | (flags & SET_FLAGS) | RD4(dst) | RM4(reg) | IMM5(imm)); } default: - SLJIT_ASSERT_STOP(); + SLJIT_UNREACHABLE(); break; } if (flags & ARG2_IMM) { - FAIL_IF(load_immediate(compiler, TMP_REG2, arg2)); - arg2 = TMP_REG2; + imm = arg2; + arg2 = (arg1 == TMP_REG1) ? TMP_REG2 : TMP_REG1; + FAIL_IF(load_immediate(compiler, arg2, imm)); } else { - FAIL_IF(load_immediate(compiler, TMP_REG1, arg1)); - arg1 = TMP_REG1; + imm = arg1; + arg1 = (arg2 == TMP_REG1) ? TMP_REG2 : TMP_REG1; + FAIL_IF(load_immediate(compiler, arg1, imm)); } + + SLJIT_ASSERT(arg1 != arg2); } /* Both arguments are registers. */ @@ -684,104 +733,102 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s case SLJIT_MOVU_U32: case SLJIT_MOVU_S32: case SLJIT_MOVU_P: - SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); + SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG2); if (dst == arg2) return SLJIT_SUCCESS; return push_inst16(compiler, MOV | SET_REGS44(dst, arg2)); case SLJIT_MOV_U8: case SLJIT_MOVU_U8: - SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); + SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG2); if (IS_2_LO_REGS(dst, arg2)) return push_inst16(compiler, UXTB | RD3(dst) | RN3(arg2)); return push_inst32(compiler, UXTB_W | RD4(dst) | RM4(arg2)); case SLJIT_MOV_S8: case SLJIT_MOVU_S8: - SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); + SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG2); if (IS_2_LO_REGS(dst, arg2)) return push_inst16(compiler, SXTB | RD3(dst) | RN3(arg2)); return push_inst32(compiler, SXTB_W | RD4(dst) | RM4(arg2)); case SLJIT_MOV_U16: case SLJIT_MOVU_U16: - SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); + SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG2); if (IS_2_LO_REGS(dst, arg2)) return push_inst16(compiler, UXTH | RD3(dst) | RN3(arg2)); return push_inst32(compiler, UXTH_W | RD4(dst) | RM4(arg2)); case SLJIT_MOV_S16: case SLJIT_MOVU_S16: - SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); + SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG2); if (IS_2_LO_REGS(dst, arg2)) return push_inst16(compiler, SXTH | RD3(dst) | RN3(arg2)); return push_inst32(compiler, SXTH_W | RD4(dst) | RM4(arg2)); case SLJIT_NOT: - SLJIT_ASSERT(arg1 == TMP_REG1); - if (!(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2)) + SLJIT_ASSERT(arg1 == TMP_REG2); + if (IS_2_LO_REGS(dst, arg2)) return push_inst16(compiler, MVNS | RD3(dst) | RN3(arg2)); return push_inst32(compiler, MVN_W | (flags & SET_FLAGS) | RD4(dst) | RM4(arg2)); case SLJIT_CLZ: - SLJIT_ASSERT(arg1 == TMP_REG1); + SLJIT_ASSERT(arg1 == TMP_REG2); FAIL_IF(push_inst32(compiler, CLZ | RN4(arg2) | RD4(dst) | RM4(arg2))); - if (flags & SET_FLAGS) { - if (reg_map[dst] <= 7) - return push_inst16(compiler, CMPI | RDN3(dst)); - return push_inst32(compiler, ADD_WI | SET_FLAGS | RN4(dst) | RD4(dst)); - } return SLJIT_SUCCESS; case SLJIT_ADD: - if (!(flags & KEEP_FLAGS) && IS_3_LO_REGS(dst, arg1, arg2)) + if (IS_3_LO_REGS(dst, arg1, arg2)) return push_inst16(compiler, ADDS | RD3(dst) | RN3(arg1) | RM3(arg2)); if (dst == arg1 && !(flags & SET_FLAGS)) return push_inst16(compiler, ADD | SET_REGS44(dst, arg2)); return push_inst32(compiler, ADD_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); case SLJIT_ADDC: - if (dst == arg1 && !(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2)) + if (dst == arg1 && IS_2_LO_REGS(dst, arg2)) return push_inst16(compiler, ADCS | RD3(dst) | RN3(arg2)); return push_inst32(compiler, ADC_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); case SLJIT_SUB: - if (!(flags & KEEP_FLAGS) && IS_3_LO_REGS(dst, arg1, arg2)) + if (flags & UNUSED_RETURN) { + if (IS_2_LO_REGS(arg1, arg2)) + return push_inst16(compiler, CMP | RD3(arg1) | RN3(arg2)); + return push_inst16(compiler, CMP_X | SET_REGS44(arg1, arg2)); + } + if (IS_3_LO_REGS(dst, arg1, arg2)) return push_inst16(compiler, SUBS | RD3(dst) | RN3(arg1) | RM3(arg2)); return push_inst32(compiler, SUB_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); case SLJIT_SUBC: - if (dst == arg1 && !(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2)) + if (dst == arg1 && IS_2_LO_REGS(dst, arg2)) return push_inst16(compiler, SBCS | RD3(dst) | RN3(arg2)); return push_inst32(compiler, SBC_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); case SLJIT_MUL: if (!(flags & SET_FLAGS)) return push_inst32(compiler, MUL | RD4(dst) | RN4(arg1) | RM4(arg2)); - SLJIT_ASSERT(reg_map[TMP_REG2] <= 7 && dst != TMP_REG2); + SLJIT_ASSERT(dst != TMP_REG2); FAIL_IF(push_inst32(compiler, SMULL | RT4(dst) | RD4(TMP_REG2) | RN4(arg1) | RM4(arg2))); /* cmp TMP_REG2, dst asr #31. */ return push_inst32(compiler, CMP_W | RN4(TMP_REG2) | 0x70e0 | RM4(dst)); case SLJIT_AND: - if (!(flags & KEEP_FLAGS)) { - if (dst == arg1 && IS_2_LO_REGS(dst, arg2)) - return push_inst16(compiler, ANDS | RD3(dst) | RN3(arg2)); - if ((flags & UNUSED_RETURN) && IS_2_LO_REGS(arg1, arg2)) - return push_inst16(compiler, TST | RD3(arg1) | RN3(arg2)); - } + if (dst == arg1 && IS_2_LO_REGS(dst, arg2)) + return push_inst16(compiler, ANDS | RD3(dst) | RN3(arg2)); + if ((flags & UNUSED_RETURN) && IS_2_LO_REGS(arg1, arg2)) + return push_inst16(compiler, TST | RD3(arg1) | RN3(arg2)); return push_inst32(compiler, AND_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); case SLJIT_OR: - if (dst == arg1 && !(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2)) + if (dst == arg1 && IS_2_LO_REGS(dst, arg2)) return push_inst16(compiler, ORRS | RD3(dst) | RN3(arg2)); return push_inst32(compiler, ORR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); case SLJIT_XOR: - if (dst == arg1 && !(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2)) + if (dst == arg1 && IS_2_LO_REGS(dst, arg2)) return push_inst16(compiler, EORS | RD3(dst) | RN3(arg2)); return push_inst32(compiler, EOR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); case SLJIT_SHL: - if (dst == arg1 && !(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2)) + if (dst == arg1 && IS_2_LO_REGS(dst, arg2)) return push_inst16(compiler, LSLS | RD3(dst) | RN3(arg2)); return push_inst32(compiler, LSL_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); case SLJIT_LSHR: - if (dst == arg1 && !(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2)) + if (dst == arg1 && IS_2_LO_REGS(dst, arg2)) return push_inst16(compiler, LSRS | RD3(dst) | RN3(arg2)); return push_inst32(compiler, LSR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); case SLJIT_ASHR: - if (dst == arg1 && !(flags & KEEP_FLAGS) && IS_2_LO_REGS(dst, arg2)) + if (dst == arg1 && IS_2_LO_REGS(dst, arg2)) return push_inst16(compiler, ASRS | RD3(dst) | RN3(arg2)); return push_inst32(compiler, ASR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2)); } - SLJIT_ASSERT_STOP(); + SLJIT_UNREACHABLE(); return SLJIT_SUCCESS; } @@ -791,9 +838,9 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s #define WORD_SIZE 0x00 #define BYTE_SIZE 0x04 #define HALF_SIZE 0x08 +#define PRELOAD 0x0c #define UPDATE 0x10 -#define ARG_TEST 0x20 #define IS_WORD_SIZE(flags) (!(flags & (BYTE_SIZE | HALF_SIZE))) #define OFFSET_CHECK(imm, shift) (!(argw & ~(imm << shift))) @@ -849,7 +896,7 @@ static const sljit_ins sljit_mem16_imm5[12] = { #define MEM_IMM8 0xc00 #define MEM_IMM12 0x800000 -static const sljit_ins sljit_mem32[12] = { +static const sljit_ins sljit_mem32[13] = { /* w u l */ 0xf8500000 /* ldr.w */, /* w u s */ 0xf8400000 /* str.w */, /* w s l */ 0xf8500000 /* ldr.w */, @@ -864,6 +911,8 @@ static const sljit_ins sljit_mem32[12] = { /* h u s */ 0xf8200000 /* strsh.w */, /* h s l */ 0xf9300000 /* ldrsh.w */, /* h s s */ 0xf8200000 /* strsh.w */, + +/* p u l */ 0xf8100000 /* pld */, }; /* Helper function. Dst should be reg + value, using at most 1 instruction, flags does not set. */ @@ -887,20 +936,67 @@ static sljit_s32 emit_set_delta(struct sljit_compiler *compiler, sljit_s32 dst, return SLJIT_ERR_UNSUPPORTED; } -/* Can perform an operation using at most 1 instruction. */ -static sljit_s32 getput_arg_fast(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw) +static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, + sljit_s32 arg, sljit_sw argw, sljit_s32 tmp_reg) { - sljit_s32 other_r, shift; + sljit_s32 other_r; + sljit_s32 update = flags & UPDATE; + sljit_uw tmp; SLJIT_ASSERT(arg & SLJIT_MEM); + SLJIT_ASSERT((arg & REG_MASK) != tmp_reg); + flags &= ~UPDATE; + arg &= ~SLJIT_MEM; + + if (SLJIT_UNLIKELY(!(arg & REG_MASK))) { + tmp = get_imm(argw & ~0xfff); + if (tmp != INVALID_IMM) { + FAIL_IF(push_inst32(compiler, MOV_WI | RD4(tmp_reg) | tmp)); + return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(tmp_reg) | (argw & 0xfff)); + } - if (SLJIT_UNLIKELY(flags & UPDATE)) { - if ((arg & REG_MASK) && !(arg & OFFS_REG_MASK) && argw <= 0xff && argw >= -0xff) { - if (SLJIT_UNLIKELY(flags & ARG_TEST)) - return 1; + FAIL_IF(load_immediate(compiler, tmp_reg, argw)); + if (IS_2_LO_REGS(reg, tmp_reg) && sljit_mem16_imm5[flags]) + return push_inst16(compiler, sljit_mem16_imm5[flags] | RD3(reg) | RN3(tmp_reg)); + return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(tmp_reg)); + } + + if (SLJIT_UNLIKELY(update)) { + SLJIT_ASSERT(reg != arg); - flags &= ~UPDATE; + if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { + other_r = OFFS_REG(arg); arg &= 0xf; + + if (IS_3_LO_REGS(reg, arg, other_r)) + FAIL_IF(push_inst16(compiler, sljit_mem16[flags] | RD3(reg) | RN3(arg) | RM3(other_r))); + else + FAIL_IF(push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(other_r))); + return push_inst16(compiler, ADD | SET_REGS44(arg, other_r)); + } + + if (argw > 0xff) { + tmp = get_imm(argw & ~0xff); + if (tmp != INVALID_IMM) { + push_inst32(compiler, ADD_WI | RD4(arg) | RN4(arg) | tmp); + argw = argw & 0xff; + } + } + else if (argw < -0xff) { + tmp = get_imm(-argw & ~0xff); + if (tmp != INVALID_IMM) { + push_inst32(compiler, SUB_WI | RD4(arg) | RN4(arg) | tmp); + argw = -(-argw & 0xff); + } + } + + if (argw == 0) { + if (IS_2_LO_REGS(reg, arg) && sljit_mem16_imm5[flags]) + return push_inst16(compiler, sljit_mem16_imm5[flags] | RD3(reg) | RN3(arg)); + return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(arg)); + } + + if (argw <= 0xff && argw >= -0xff) { if (argw >= 0) argw |= 0x200; else { @@ -908,219 +1004,83 @@ static sljit_s32 getput_arg_fast(struct sljit_compiler *compiler, sljit_s32 flag } SLJIT_ASSERT(argw >= 0 && (argw & 0xff) <= 0xff); - FAIL_IF(push_inst32(compiler, sljit_mem32[flags] | MEM_IMM8 | RT4(reg) | RN4(arg) | 0x100 | argw)); - return -1; + return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM8 | RT4(reg) | RN4(arg) | 0x100 | argw); } - return 0; + + FAIL_IF(load_immediate(compiler, tmp_reg, argw)); + + SLJIT_ASSERT(reg != tmp_reg); + + if (IS_3_LO_REGS(reg, arg, tmp_reg)) + FAIL_IF(push_inst16(compiler, sljit_mem16[flags] | RD3(reg) | RN3(arg) | RM3(tmp_reg))); + else + FAIL_IF(push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(tmp_reg))); + return push_inst16(compiler, ADD | SET_REGS44(arg, tmp_reg)); } if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { - if (SLJIT_UNLIKELY(flags & ARG_TEST)) - return 1; - argw &= 0x3; other_r = OFFS_REG(arg); arg &= 0xf; if (!argw && IS_3_LO_REGS(reg, arg, other_r)) - FAIL_IF(push_inst16(compiler, sljit_mem16[flags] | RD3(reg) | RN3(arg) | RM3(other_r))); - else - FAIL_IF(push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(other_r) | (argw << 4))); - return -1; + return push_inst16(compiler, sljit_mem16[flags] | RD3(reg) | RN3(arg) | RM3(other_r)); + return push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(other_r) | (argw << 4)); } - if (!(arg & REG_MASK) || argw > 0xfff || argw < -0xff) - return 0; - - if (SLJIT_UNLIKELY(flags & ARG_TEST)) - return 1; + if (argw > 0xfff) { + tmp = get_imm(argw & ~0xfff); + if (tmp != INVALID_IMM) { + push_inst32(compiler, ADD_WI | RD4(tmp_reg) | RN4(arg) | tmp); + arg = tmp_reg; + argw = argw & 0xfff; + } + } + else if (argw < -0xff) { + tmp = get_imm(-argw & ~0xff); + if (tmp != INVALID_IMM) { + push_inst32(compiler, SUB_WI | RD4(tmp_reg) | RN4(arg) | tmp); + arg = tmp_reg; + argw = -(-argw & 0xff); + } + } - arg &= 0xf; if (IS_2_LO_REGS(reg, arg) && sljit_mem16_imm5[flags]) { - shift = 3; + tmp = 3; if (IS_WORD_SIZE(flags)) { if (OFFSET_CHECK(0x1f, 2)) - shift = 2; + tmp = 2; } else if (flags & BYTE_SIZE) { if (OFFSET_CHECK(0x1f, 0)) - shift = 0; + tmp = 0; } else { SLJIT_ASSERT(flags & HALF_SIZE); if (OFFSET_CHECK(0x1f, 1)) - shift = 1; + tmp = 1; } - if (shift != 3) { - FAIL_IF(push_inst16(compiler, sljit_mem16_imm5[flags] | RD3(reg) | RN3(arg) | (argw << (6 - shift)))); - return -1; - } - } - - /* SP based immediate. */ - if (SLJIT_UNLIKELY(arg == SLJIT_SP) && OFFSET_CHECK(0xff, 2) && IS_WORD_SIZE(flags) && reg_map[reg] <= 7) { - FAIL_IF(push_inst16(compiler, STR_SP | ((flags & STORE) ? 0 : 0x800) | RDN3(reg) | (argw >> 2))); - return -1; + if (tmp < 3) + return push_inst16(compiler, sljit_mem16_imm5[flags] | RD3(reg) | RN3(arg) | (argw << (6 - tmp))); } - - if (argw >= 0) - FAIL_IF(push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(arg) | argw)); - else - FAIL_IF(push_inst32(compiler, sljit_mem32[flags] | MEM_IMM8 | RT4(reg) | RN4(arg) | -argw)); - return -1; -} - -/* see getput_arg below. - Note: can_cache is called only for binary operators. Those - operators always uses word arguments without write back. */ -static sljit_s32 can_cache(sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw) -{ - sljit_sw diff; - if ((arg & OFFS_REG_MASK) || !(next_arg & SLJIT_MEM)) - return 0; - - if (!(arg & REG_MASK)) { - diff = argw - next_argw; - if (diff <= 0xfff && diff >= -0xfff) - return 1; - return 0; + else if (SLJIT_UNLIKELY(arg == SLJIT_SP) && IS_WORD_SIZE(flags) && OFFSET_CHECK(0xff, 2) && reg_map[reg] <= 7) { + /* SP based immediate. */ + return push_inst16(compiler, STR_SP | ((flags & STORE) ? 0 : 0x800) | RDN3(reg) | (argw >> 2)); } - if (argw == next_argw) - return 1; + if (argw >= 0 && argw <= 0xfff) + return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(arg) | argw); + else if (argw < 0 && argw >= -0xff) + return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM8 | RT4(reg) | RN4(arg) | -argw); - diff = argw - next_argw; - if (arg == next_arg && diff <= 0xfff && diff >= -0xfff) - return 1; + SLJIT_ASSERT(arg != tmp_reg); - return 0; -} - -/* Emit the necessary instructions. See can_cache above. */ -static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, - sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw) -{ - sljit_s32 tmp_r, other_r; - sljit_sw diff; - - SLJIT_ASSERT(arg & SLJIT_MEM); - if (!(next_arg & SLJIT_MEM)) { - next_arg = 0; - next_argw = 0; - } - - tmp_r = (flags & STORE) ? TMP_REG3 : reg; - - if (SLJIT_UNLIKELY((flags & UPDATE) && (arg & REG_MASK))) { - /* Update only applies if a base register exists. */ - /* There is no caching here. */ - other_r = OFFS_REG(arg); - arg &= 0xf; - flags &= ~UPDATE; - - if (!other_r) { - if (!(argw & ~0xfff)) { - FAIL_IF(push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(arg) | argw)); - return push_inst32(compiler, ADDWI | RD4(arg) | RN4(arg) | IMM12(argw)); - } - - if (compiler->cache_arg == SLJIT_MEM) { - if (argw == compiler->cache_argw) { - other_r = TMP_REG3; - argw = 0; - } - else if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, argw - compiler->cache_argw) != SLJIT_ERR_UNSUPPORTED) { - FAIL_IF(compiler->error); - compiler->cache_argw = argw; - other_r = TMP_REG3; - argw = 0; - } - } - - if (argw) { - FAIL_IF(load_immediate(compiler, TMP_REG3, argw)); - compiler->cache_arg = SLJIT_MEM; - compiler->cache_argw = argw; - other_r = TMP_REG3; - argw = 0; - } - } - - argw &= 0x3; - if (!argw && IS_3_LO_REGS(reg, arg, other_r)) { - FAIL_IF(push_inst16(compiler, sljit_mem16[flags] | RD3(reg) | RN3(arg) | RM3(other_r))); - return push_inst16(compiler, ADD | SET_REGS44(arg, other_r)); - } - FAIL_IF(push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(other_r) | (argw << 4))); - return push_inst32(compiler, ADD_W | RD4(arg) | RN4(arg) | RM4(other_r) | (argw << 6)); - } - flags &= ~UPDATE; - - SLJIT_ASSERT(!(arg & OFFS_REG_MASK)); - - if (compiler->cache_arg == arg) { - diff = argw - compiler->cache_argw; - if (!(diff & ~0xfff)) - return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(TMP_REG3) | diff); - if (!((compiler->cache_argw - argw) & ~0xff)) - return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM8 | RT4(reg) | RN4(TMP_REG3) | (compiler->cache_argw - argw)); - if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, diff) != SLJIT_ERR_UNSUPPORTED) { - FAIL_IF(compiler->error); - return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(TMP_REG3) | 0); - } - } - - next_arg = (arg & REG_MASK) && (arg == next_arg) && (argw != next_argw); - arg &= 0xf; - if (arg && compiler->cache_arg == SLJIT_MEM) { - if (compiler->cache_argw == argw) - return push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(TMP_REG3)); - if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, argw - compiler->cache_argw) != SLJIT_ERR_UNSUPPORTED) { - FAIL_IF(compiler->error); - compiler->cache_argw = argw; - return push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(TMP_REG3)); - } - } - - compiler->cache_argw = argw; - if (next_arg && emit_set_delta(compiler, TMP_REG3, arg, argw) != SLJIT_ERR_UNSUPPORTED) { - FAIL_IF(compiler->error); - compiler->cache_arg = SLJIT_MEM | arg; - arg = 0; - } - else { - FAIL_IF(load_immediate(compiler, TMP_REG3, argw)); - compiler->cache_arg = SLJIT_MEM; - - diff = argw - next_argw; - if (next_arg && diff <= 0xfff && diff >= -0xfff) { - FAIL_IF(push_inst16(compiler, ADD | SET_REGS44(TMP_REG3, arg))); - compiler->cache_arg = SLJIT_MEM | arg; - arg = 0; - } - } - - if (arg) - return push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(TMP_REG3)); - return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(TMP_REG3) | 0); -} - -static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw) -{ - if (getput_arg_fast(compiler, flags, reg, arg, argw)) - return compiler->error; - compiler->cache_arg = 0; - compiler->cache_argw = 0; - return getput_arg(compiler, flags, reg, arg, argw, 0, 0); -} - -static SLJIT_INLINE sljit_s32 emit_op_mem2(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg1, sljit_sw arg1w, sljit_s32 arg2, sljit_sw arg2w) -{ - if (getput_arg_fast(compiler, flags, reg, arg1, arg1w)) - return compiler->error; - return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w); + FAIL_IF(load_immediate(compiler, tmp_reg, argw)); + if (IS_3_LO_REGS(reg, arg, tmp_reg)) + return push_inst16(compiler, sljit_mem16[flags] | RD3(reg) | RN3(arg) | RM3(tmp_reg)); + return push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(tmp_reg)); } /* --------------------------------------------------------------------- */ @@ -1132,14 +1092,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { sljit_s32 size, i, tmp; - sljit_ins push; + sljit_ins push = 0; CHECK_ERROR(); CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); - push = (1 << 4); - tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG; for (i = SLJIT_S0; i >= tmp; i--) push |= 1 << reg_map[i]; @@ -1152,7 +1110,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi : push_inst16(compiler, PUSH | (1 << 8) | push)); /* Stack must be aligned to 8 bytes: (LR, R4) */ - size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 2); + size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1); local_size = ((size + local_size + 7) & ~7) - size; compiler->local_size = local_size; if (local_size > 0) { @@ -1182,7 +1140,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); - size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 2); + size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1); compiler->local_size = ((size + local_size + 7) & ~7) - size; return SLJIT_SUCCESS; } @@ -1190,7 +1148,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) { sljit_s32 i, tmp; - sljit_ins pop; + sljit_ins pop = 0; CHECK_ERROR(); CHECK(check_sljit_emit_return(compiler, op, src, srcw)); @@ -1204,8 +1162,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *comp FAIL_IF(emit_op_imm(compiler, SLJIT_ADD | ARG2_IMM, SLJIT_SP, SLJIT_SP, compiler->local_size)); } - pop = (1 << 4); - tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG; for (i = SLJIT_S0; i >= tmp; i--) pop |= 1 << reg_map[i]; @@ -1263,7 +1219,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile case SLJIT_DIV_UW: case SLJIT_DIV_SW: SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments); - SLJIT_COMPILE_ASSERT(reg_map[2] == 1 && reg_map[3] == 2 && reg_map[4] == 12, bad_register_mapping); + SLJIT_ASSERT(reg_map[2] == 1 && reg_map[3] == 2 && reg_map[4] == 12); saved_reg_count = 0; if (compiler->scratches >= 4) @@ -1323,8 +1279,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile ADJUST_LOCAL_OFFSET(dst, dstw); ADJUST_LOCAL_OFFSET(src, srcw); - compiler->cache_arg = 0; - compiler->cache_argw = 0; + if (dst == SLJIT_UNUSED && !HAS_FLAGS(op)) { + /* Since TMP_PC has index 15, IS_2_LO_REGS and IS_3_LO_REGS checks always fail. */ + if (op <= SLJIT_MOV_P && (src & SLJIT_MEM)) + return emit_op_mem(compiler, PRELOAD, TMP_PC, src, srcw, TMP_REG1); + return SLJIT_SUCCESS; + } dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1; @@ -1384,31 +1344,25 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile srcw = (sljit_s16)srcw; break; default: - SLJIT_ASSERT_STOP(); + SLJIT_UNREACHABLE(); flags = 0; break; } if (src & SLJIT_IMM) - FAIL_IF(emit_op_imm(compiler, SLJIT_MOV | ARG2_IMM, dst_r, TMP_REG1, srcw)); + FAIL_IF(emit_op_imm(compiler, SLJIT_MOV | ARG2_IMM, dst_r, TMP_REG2, srcw)); else if (src & SLJIT_MEM) { - if (getput_arg_fast(compiler, flags, dst_r, src, srcw)) - FAIL_IF(compiler->error); - else - FAIL_IF(getput_arg(compiler, flags, dst_r, src, srcw, dst, dstw)); + FAIL_IF(emit_op_mem(compiler, flags, dst_r, src, srcw, ((flags & UPDATE) && dst_r == TMP_REG1) ? TMP_REG2 : TMP_REG1)); } else { if (dst_r != TMP_REG1) - return emit_op_imm(compiler, op, dst_r, TMP_REG1, src); + return emit_op_imm(compiler, op, dst_r, TMP_REG2, src); dst_r = src; } - if (dst & SLJIT_MEM) { - if (getput_arg_fast(compiler, flags | STORE, dst_r, dst, dstw)) - return compiler->error; - else - return getput_arg(compiler, flags | STORE, dst_r, dst, dstw, 0, 0); - } - return SLJIT_SUCCESS; + if (!(dst & SLJIT_MEM)) + return SLJIT_SUCCESS; + + return emit_op_mem(compiler, flags | STORE, dst_r, dst, dstw, (dst_r == TMP_REG1) ? TMP_REG2 : TMP_REG1); } if (op == SLJIT_NEG) { @@ -1419,29 +1373,22 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile return sljit_emit_op2(compiler, SLJIT_SUB | op_flags, dst, dstw, SLJIT_IMM, 0, src, srcw); } - flags = (GET_FLAGS(op_flags) ? SET_FLAGS : 0) | ((op_flags & SLJIT_KEEP_FLAGS) ? KEEP_FLAGS : 0); - if (src & SLJIT_MEM) { - if (getput_arg_fast(compiler, WORD_SIZE, TMP_REG2, src, srcw)) - FAIL_IF(compiler->error); - else - FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG2, src, srcw, dst, dstw)); - src = TMP_REG2; - } + flags = HAS_FLAGS(op_flags) ? SET_FLAGS : 0; if (src & SLJIT_IMM) flags |= ARG2_IMM; + else if (src & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src, srcw, TMP_REG1)); + srcw = TMP_REG1; + } else srcw = src; - emit_op_imm(compiler, flags | op, dst_r, TMP_REG1, srcw); + emit_op_imm(compiler, flags | op, dst_r, TMP_REG2, srcw); - if (dst & SLJIT_MEM) { - if (getput_arg_fast(compiler, flags | STORE, dst_r, dst, dstw)) - return compiler->error; - else - return getput_arg(compiler, flags | STORE, dst_r, dst, dstw, 0, 0); - } - return SLJIT_SUCCESS; + if (!(dst & SLJIT_MEM)) + return SLJIT_SUCCESS; + return emit_op_mem(compiler, flags | STORE, dst_r, dst, dstw, TMP_REG2); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op, @@ -1449,7 +1396,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) { - sljit_s32 dst_r, flags; + sljit_s32 dst_reg, flags, src2_reg; CHECK_ERROR(); CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); @@ -1457,70 +1404,39 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile ADJUST_LOCAL_OFFSET(src1, src1w); ADJUST_LOCAL_OFFSET(src2, src2w); - compiler->cache_arg = 0; - compiler->cache_argw = 0; - - dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1; - flags = (GET_FLAGS(op) ? SET_FLAGS : 0) | ((op & SLJIT_KEEP_FLAGS) ? KEEP_FLAGS : 0); - - if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, WORD_SIZE | STORE | ARG_TEST, TMP_REG1, dst, dstw)) - flags |= SLOW_DEST; - - if (src1 & SLJIT_MEM) { - if (getput_arg_fast(compiler, WORD_SIZE, TMP_REG1, src1, src1w)) - FAIL_IF(compiler->error); - else - flags |= SLOW_SRC1; - } - if (src2 & SLJIT_MEM) { - if (getput_arg_fast(compiler, WORD_SIZE, TMP_REG2, src2, src2w)) - FAIL_IF(compiler->error); - else - flags |= SLOW_SRC2; - } - - if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) { - if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) { - FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG2, src2, src2w, src1, src1w)); - FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG1, src1, src1w, dst, dstw)); - } - else { - FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG1, src1, src1w, src2, src2w)); - FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG2, src2, src2w, dst, dstw)); - } - } - else if (flags & SLOW_SRC1) - FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG1, src1, src1w, dst, dstw)); - else if (flags & SLOW_SRC2) - FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG2, src2, src2w, dst, dstw)); + if (dst == SLJIT_UNUSED && !HAS_FLAGS(op)) + return SLJIT_SUCCESS; - if (src1 & SLJIT_MEM) - src1 = TMP_REG1; - if (src2 & SLJIT_MEM) - src2 = TMP_REG2; + dst_reg = SLOW_IS_REG(dst) ? dst : TMP_REG1; + flags = HAS_FLAGS(op) ? SET_FLAGS : 0; if (src1 & SLJIT_IMM) flags |= ARG1_IMM; + else if (src1 & SLJIT_MEM) { + emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src1, src1w, TMP_REG1); + src1w = TMP_REG1; + } else src1w = src1; + if (src2 & SLJIT_IMM) flags |= ARG2_IMM; + else if (src2 & SLJIT_MEM) { + src2_reg = (!(flags & ARG1_IMM) && (src1w == TMP_REG1)) ? TMP_REG2 : TMP_REG1; + emit_op_mem(compiler, WORD_SIZE, src2_reg, src2, src2w, src2_reg); + src2w = src2_reg; + } else src2w = src2; if (dst == SLJIT_UNUSED) flags |= UNUSED_RETURN; - emit_op_imm(compiler, flags | GET_OPCODE(op), dst_r, src1w, src2w); + emit_op_imm(compiler, flags | GET_OPCODE(op), dst_reg, src1w, src2w); - if (dst & SLJIT_MEM) { - if (!(flags & SLOW_DEST)) { - getput_arg_fast(compiler, WORD_SIZE | STORE, dst_r, dst, dstw); - return compiler->error; - } - return getput_arg(compiler, WORD_SIZE | STORE, TMP_REG1, dst, dstw, 0, 0); - } - return SLJIT_SUCCESS; + if (!(dst & SLJIT_MEM)) + return SLJIT_SUCCESS; + return emit_op_mem(compiler, WORD_SIZE | STORE, dst_reg, dst, dstw, TMP_REG2); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg) @@ -1550,21 +1466,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *c /* Floating point operators */ /* --------------------------------------------------------------------- */ -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void) -{ -#ifdef SLJIT_IS_FPU_AVAILABLE - return SLJIT_IS_FPU_AVAILABLE; -#else - /* Available by default. */ - return 1; -#endif -} - #define FPU_LOAD (1 << 20) static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw) { - sljit_sw tmp; sljit_uw imm; sljit_sw inst = VSTR_F32 | (flags & (SLJIT_F32_OP | FPU_LOAD)); @@ -1572,8 +1477,8 @@ static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, /* Fast loads and stores. */ if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { - FAIL_IF(push_inst32(compiler, ADD_W | RD4(TMP_REG2) | RN4(arg & REG_MASK) | RM4(OFFS_REG(arg)) | ((argw & 0x3) << 6))); - arg = SLJIT_MEM | TMP_REG2; + FAIL_IF(push_inst32(compiler, ADD_W | RD4(TMP_REG1) | RN4(arg & REG_MASK) | RM4(OFFS_REG(arg)) | ((argw & 0x3) << 6))); + arg = SLJIT_MEM | TMP_REG1; argw = 0; } @@ -1584,21 +1489,6 @@ static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, return push_inst32(compiler, inst | RN4(arg & REG_MASK) | DD4(reg) | (-argw >> 2)); } - /* Slow cases */ - SLJIT_ASSERT(!(arg & OFFS_REG_MASK)); - if (compiler->cache_arg == arg) { - tmp = argw - compiler->cache_argw; - if (!(tmp & ~0x3fc)) - return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG3) | DD4(reg) | (tmp >> 2)); - if (!(-tmp & ~0x3fc)) - return push_inst32(compiler, inst | RN4(TMP_REG3) | DD4(reg) | (-tmp >> 2)); - if (emit_set_delta(compiler, TMP_REG3, TMP_REG3, tmp) != SLJIT_ERR_UNSUPPORTED) { - FAIL_IF(compiler->error); - compiler->cache_argw = argw; - return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG3) | DD4(reg)); - } - } - if (arg & REG_MASK) { if (emit_set_delta(compiler, TMP_REG1, arg & REG_MASK, argw) != SLJIT_ERR_UNSUPPORTED) { FAIL_IF(compiler->error); @@ -1617,19 +1507,18 @@ static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, } } - compiler->cache_arg = arg; - compiler->cache_argw = argw; - - FAIL_IF(load_immediate(compiler, TMP_REG3, argw)); + FAIL_IF(load_immediate(compiler, TMP_REG1, argw)); if (arg & REG_MASK) - FAIL_IF(push_inst16(compiler, ADD | SET_REGS44(TMP_REG3, (arg & REG_MASK)))); - return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG3) | DD4(reg)); + FAIL_IF(push_inst16(compiler, ADD | SET_REGS44(TMP_REG1, (arg & REG_MASK)))); + return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG1) | DD4(reg)); } static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, sljit_s32 src, sljit_sw srcw) { + op ^= SLJIT_F32_OP; + if (src & SLJIT_MEM) { FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG1, src, srcw)); src = TMP_FREG1; @@ -1637,9 +1526,6 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp FAIL_IF(push_inst32(compiler, VCVT_S32_F32 | (op & SLJIT_F32_OP) | DD4(TMP_FREG1) | DM4(src))); - if (dst == SLJIT_UNUSED) - return SLJIT_SUCCESS; - if (FAST_IS_REG(dst)) return push_inst32(compiler, VMOV | (1 << 20) | RT4(dst) | DN4(TMP_FREG1)); @@ -1653,6 +1539,8 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_comp { sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + op ^= SLJIT_F32_OP; + if (FAST_IS_REG(src)) FAIL_IF(push_inst32(compiler, VMOV | RT4(src) | DN4(TMP_FREG1))); else if (src & SLJIT_MEM) { @@ -1675,6 +1563,8 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compile sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) { + op ^= SLJIT_F32_OP; + if (src1 & SLJIT_MEM) { emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, TMP_FREG1, src1, src1w); src1 = TMP_FREG1; @@ -1696,16 +1586,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil sljit_s32 dst_r; CHECK_ERROR(); - compiler->cache_arg = 0; - compiler->cache_argw = 0; - if (GET_OPCODE(op) != SLJIT_CONV_F64_FROM_F32) - op ^= SLJIT_F32_OP; SLJIT_COMPILE_ASSERT((SLJIT_F32_OP == 0x100), float_transfer_bit_error); SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw); dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; + if (GET_OPCODE(op) != SLJIT_CONV_F64_FROM_F32) + op ^= SLJIT_F32_OP; + if (src & SLJIT_MEM) { emit_fop_mem(compiler, (op & SLJIT_F32_OP) | FPU_LOAD, dst_r, src, srcw); src = dst_r; @@ -1750,8 +1639,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil ADJUST_LOCAL_OFFSET(src1, src1w); ADJUST_LOCAL_OFFSET(src2, src2w); - compiler->cache_arg = 0; - compiler->cache_argw = 0; op ^= SLJIT_F32_OP; dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; @@ -1796,21 +1683,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler * CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw)); ADJUST_LOCAL_OFFSET(dst, dstw); - /* For UNUSED dst. Uncommon, but possible. */ - if (dst == SLJIT_UNUSED) - return SLJIT_SUCCESS; + SLJIT_ASSERT(reg_map[TMP_REG2] == 14); if (FAST_IS_REG(dst)) - return push_inst16(compiler, MOV | SET_REGS44(dst, TMP_REG3)); + return push_inst16(compiler, MOV | SET_REGS44(dst, TMP_REG2)); /* Memory. */ - if (getput_arg_fast(compiler, WORD_SIZE | STORE, TMP_REG3, dst, dstw)) - return compiler->error; - /* TMP_REG3 is used for caching. */ - FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG2, TMP_REG3))); - compiler->cache_arg = 0; - compiler->cache_argw = 0; - return getput_arg(compiler, WORD_SIZE | STORE, TMP_REG2, dst, dstw, 0, 0); + return emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG2, dst, dstw, TMP_REG1); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw) @@ -1819,21 +1698,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler CHECK(check_sljit_emit_fast_return(compiler, src, srcw)); ADJUST_LOCAL_OFFSET(src, srcw); + SLJIT_ASSERT(reg_map[TMP_REG2] == 14); + if (FAST_IS_REG(src)) - FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG3, src))); + FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG2, src))); else if (src & SLJIT_MEM) { - if (getput_arg_fast(compiler, WORD_SIZE, TMP_REG3, src, srcw)) - FAIL_IF(compiler->error); - else { - compiler->cache_arg = 0; - compiler->cache_argw = 0; - FAIL_IF(getput_arg(compiler, WORD_SIZE, TMP_REG2, src, srcw, 0, 0)); - FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG3, TMP_REG2))); - } + FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, src, srcw, TMP_REG2)); } else if (src & SLJIT_IMM) - FAIL_IF(load_immediate(compiler, TMP_REG3, srcw)); - return push_inst16(compiler, BLX | RN3(TMP_REG3)); + FAIL_IF(load_immediate(compiler, TMP_REG2, srcw)); + return push_inst16(compiler, BX | RN3(TMP_REG2)); } /* --------------------------------------------------------------------- */ @@ -1890,7 +1764,7 @@ static sljit_uw get_cc(sljit_s32 type) return 0x7; default: /* SLJIT_JUMP */ - SLJIT_ASSERT_STOP(); + SLJIT_UNREACHABLE(); return 0xe; } } @@ -1957,7 +1831,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi if (FAST_IS_REG(src)) return push_inst16(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RN3(src)); - FAIL_IF(emit_op_mem(compiler, WORD_SIZE, type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, src, srcw)); + FAIL_IF(emit_op_mem(compiler, WORD_SIZE, type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, src, srcw, TMP_REG1)); if (type >= SLJIT_FAST_CALL) return push_inst16(compiler, BLX | RN3(TMP_REG1)); } @@ -1974,23 +1848,18 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, - sljit_s32 src, sljit_sw srcw, sljit_s32 type) { sljit_s32 dst_r, flags = GET_ALL_FLAGS(op); - sljit_ins cc, ins; + sljit_ins cc; CHECK_ERROR(); - CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type)); + CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type)); ADJUST_LOCAL_OFFSET(dst, dstw); - ADJUST_LOCAL_OFFSET(src, srcw); - - if (dst == SLJIT_UNUSED) - return SLJIT_SUCCESS; op = GET_OPCODE(op); cc = get_cc(type & 0xff); - dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; if (op < SLJIT_ADD) { FAIL_IF(push_inst16(compiler, IT | (cc << 4) | (((cc & 0x1) ^ 0x1) << 3) | 0x4)); @@ -1998,60 +1867,86 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co FAIL_IF(push_inst32(compiler, MOV_WI | RD4(dst_r) | 1)); FAIL_IF(push_inst32(compiler, MOV_WI | RD4(dst_r) | 0)); } else { + /* The movsi (immediate) instruction does not set flags in IT block. */ FAIL_IF(push_inst16(compiler, MOVSI | RDN3(dst_r) | 1)); FAIL_IF(push_inst16(compiler, MOVSI | RDN3(dst_r) | 0)); } - if (dst_r != TMP_REG2) + if (!(dst & SLJIT_MEM)) return SLJIT_SUCCESS; - return emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG2, dst, dstw); + return emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG1, dst, dstw, TMP_REG2); } - ins = (op == SLJIT_AND ? ANDI : (op == SLJIT_OR ? ORRI : EORI)); - if ((op == SLJIT_OR || op == SLJIT_XOR) && FAST_IS_REG(dst) && dst == src) { - /* Does not change the other bits. */ + if (dst & SLJIT_MEM) + FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, dst, dstw, TMP_REG2)); + + if (op == SLJIT_AND) { + FAIL_IF(push_inst16(compiler, IT | (cc << 4) | (((cc & 0x1) ^ 0x1) << 3) | 0x4)); + FAIL_IF(push_inst32(compiler, ANDI | RN4(dst_r) | RD4(dst_r) | 1)); + FAIL_IF(push_inst32(compiler, ANDI | RN4(dst_r) | RD4(dst_r) | 0)); + } + else { FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8)); - FAIL_IF(push_inst32(compiler, ins | RN4(src) | RD4(dst) | 1)); - if (flags & SLJIT_SET_E) { - /* The condition must always be set, even if the ORRI/EORI is not executed above. */ - if (reg_map[dst] <= 7) - return push_inst16(compiler, MOVS | RD3(TMP_REG1) | RN3(dst)); - return push_inst32(compiler, MOV_W | SET_FLAGS | RD4(TMP_REG1) | RM4(dst)); - } + FAIL_IF(push_inst32(compiler, ((op == SLJIT_OR) ? ORRI : EORI) | RN4(dst_r) | RD4(dst_r) | 1)); + } + + if (dst & SLJIT_MEM) + FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG1, dst, dstw, TMP_REG2)); + + if (!(flags & SLJIT_SET_Z)) return SLJIT_SUCCESS; + + /* The condition must always be set, even if the ORR/EORI is not executed above. */ + if (reg_map[dst_r] <= 7) + return push_inst16(compiler, MOVS | RD3(TMP_REG1) | RN3(dst_r)); + return push_inst32(compiler, MOV_W | SET_FLAGS | RD4(TMP_REG1) | RM4(dst_r)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_reg, + sljit_s32 src, sljit_sw srcw) +{ + sljit_uw cc, tmp; + + CHECK_ERROR(); + CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw)); + + dst_reg &= ~SLJIT_I32_OP; + + cc = get_cc(type & 0xff); + + if (!(src & SLJIT_IMM)) { + FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8)); + return push_inst16(compiler, MOV | SET_REGS44(dst_reg, src)); } - compiler->cache_arg = 0; - compiler->cache_argw = 0; - if (src & SLJIT_MEM) { - FAIL_IF(emit_op_mem2(compiler, WORD_SIZE, TMP_REG2, src, srcw, dst, dstw)); - src = TMP_REG2; - srcw = 0; - } else if (src & SLJIT_IMM) { - FAIL_IF(load_immediate(compiler, TMP_REG2, srcw)); - src = TMP_REG2; - srcw = 0; + tmp = (sljit_uw) srcw; + + if (tmp < 0x10000) { + /* set low 16 bits, set hi 16 bits to 0. */ + FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8)); + return push_inst32(compiler, MOVW | RD4(dst_reg) | + COPY_BITS(tmp, 12, 16, 4) | COPY_BITS(tmp, 11, 26, 1) | COPY_BITS(tmp, 8, 12, 3) | (tmp & 0xff)); } - if (op == SLJIT_AND || src != dst_r) { - FAIL_IF(push_inst16(compiler, IT | (cc << 4) | (((cc & 0x1) ^ 0x1) << 3) | 0x4)); - FAIL_IF(push_inst32(compiler, ins | RN4(src) | RD4(dst_r) | 1)); - FAIL_IF(push_inst32(compiler, ins | RN4(src) | RD4(dst_r) | 0)); + tmp = get_imm(srcw); + if (tmp != INVALID_IMM) { + FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8)); + return push_inst32(compiler, MOV_WI | RD4(dst_reg) | tmp); } - else { + + tmp = get_imm(~srcw); + if (tmp != INVALID_IMM) { FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8)); - FAIL_IF(push_inst32(compiler, ins | RN4(src) | RD4(dst_r) | 1)); + return push_inst32(compiler, MVN_WI | RD4(dst_reg) | tmp); } - if (dst_r == TMP_REG2) - FAIL_IF(emit_op_mem2(compiler, WORD_SIZE | STORE, TMP_REG2, dst, dstw, 0, 0)); + FAIL_IF(push_inst16(compiler, IT | (cc << 4) | ((cc & 0x1) << 3) | 0x4)); - if (flags & SLJIT_SET_E) { - /* The condition must always be set, even if the ORR/EORI is not executed above. */ - if (reg_map[dst_r] <= 7) - return push_inst16(compiler, MOVS | RD3(TMP_REG1) | RN3(dst_r)); - return push_inst32(compiler, MOV_W | SET_FLAGS | RD4(TMP_REG1) | RM4(dst_r)); - } - return SLJIT_SUCCESS; + tmp = (sljit_uw) srcw; + FAIL_IF(push_inst32(compiler, MOVW | RD4(dst_reg) | + COPY_BITS(tmp, 12, 16, 4) | COPY_BITS(tmp, 11, 26, 1) | COPY_BITS(tmp, 8, 12, 3) | (tmp & 0xff))); + return push_inst32(compiler, MOVT | RD4(dst_reg) | + COPY_BITS(tmp, 12 + 16, 16, 4) | COPY_BITS(tmp, 11 + 16, 26, 1) | COPY_BITS(tmp, 8 + 16, 12, 3) | ((tmp & 0xff0000) >> 16)); } SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value) @@ -2067,24 +1962,26 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi PTR_FAIL_IF(!const_); set_const(const_, compiler); - dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1; + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; PTR_FAIL_IF(emit_imm32_const(compiler, dst_r, init_value)); if (dst & SLJIT_MEM) - PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, dst_r, dst, dstw)); + PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, dst_r, dst, dstw, TMP_REG2)); return const_; } -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr) +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) { sljit_u16 *inst = (sljit_u16*)addr; - modify_imm32_const(inst, new_addr); + modify_imm32_const(inst, new_target); + inst = (sljit_u16 *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); SLJIT_CACHE_FLUSH(inst, inst + 4); } -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant) +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) { sljit_u16 *inst = (sljit_u16*)addr; modify_imm32_const(inst, new_constant); + inst = (sljit_u16 *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); SLJIT_CACHE_FLUSH(inst, inst + 4); } diff --git a/src/3rdparty/pcre2/src/sljit/sljitNativeMIPS_32.c b/src/3rdparty/pcre2/src/sljit/sljitNativeMIPS_32.c index 5096e4f55e..62e16106b1 100644 --- a/src/3rdparty/pcre2/src/sljit/sljitNativeMIPS_32.c +++ b/src/3rdparty/pcre2/src/sljit/sljitNativeMIPS_32.c @@ -1,7 +1,7 @@ /* * Stack-less Just-In-Time compiler * - * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are * permitted provided that the following conditions are met: @@ -40,35 +40,37 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_a #define EMIT_LOGICAL(op_imm, op_norm) \ if (flags & SRC2_IMM) { \ - if (op & SLJIT_SET_E) \ + if (op & SLJIT_SET_Z) \ FAIL_IF(push_inst(compiler, op_imm | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG)); \ - if (CHECK_FLAGS(SLJIT_SET_E)) \ + if (!(flags & UNUSED_DEST)) \ FAIL_IF(push_inst(compiler, op_imm | S(src1) | T(dst) | IMM(src2), DR(dst))); \ } \ else { \ - if (op & SLJIT_SET_E) \ + if (op & SLJIT_SET_Z) \ FAIL_IF(push_inst(compiler, op_norm | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); \ - if (CHECK_FLAGS(SLJIT_SET_E)) \ + if (!(flags & UNUSED_DEST)) \ FAIL_IF(push_inst(compiler, op_norm | S(src1) | T(src2) | D(dst), DR(dst))); \ } #define EMIT_SHIFT(op_imm, op_v) \ if (flags & SRC2_IMM) { \ - if (op & SLJIT_SET_E) \ + if (op & SLJIT_SET_Z) \ FAIL_IF(push_inst(compiler, op_imm | T(src1) | DA(EQUAL_FLAG) | SH_IMM(src2), EQUAL_FLAG)); \ - if (CHECK_FLAGS(SLJIT_SET_E)) \ + if (!(flags & UNUSED_DEST)) \ FAIL_IF(push_inst(compiler, op_imm | T(src1) | D(dst) | SH_IMM(src2), DR(dst))); \ } \ else { \ - if (op & SLJIT_SET_E) \ + if (op & SLJIT_SET_Z) \ FAIL_IF(push_inst(compiler, op_v | S(src2) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG)); \ - if (CHECK_FLAGS(SLJIT_SET_E)) \ + if (!(flags & UNUSED_DEST)) \ FAIL_IF(push_inst(compiler, op_v | S(src2) | T(src1) | D(dst), DR(dst))); \ } static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags, sljit_s32 dst, sljit_s32 src1, sljit_sw src2) { + sljit_s32 is_overflow, is_carry, is_handled; + switch (GET_OPCODE(op)) { case SLJIT_MOV: case SLJIT_MOV_U32: @@ -93,8 +95,9 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl } return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xff), DR(dst)); } - else if (dst != src2) - SLJIT_ASSERT_STOP(); + else { + SLJIT_ASSERT(dst == src2); + } return SLJIT_SUCCESS; case SLJIT_MOV_U16: @@ -111,24 +114,25 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl } return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xffff), DR(dst)); } - else if (dst != src2) - SLJIT_ASSERT_STOP(); + else { + SLJIT_ASSERT(dst == src2); + } return SLJIT_SUCCESS; case SLJIT_NOT: SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); - if (op & SLJIT_SET_E) + if (op & SLJIT_SET_Z) FAIL_IF(push_inst(compiler, NOR | S(src2) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); - if (CHECK_FLAGS(SLJIT_SET_E)) + if (!(flags & UNUSED_DEST)) FAIL_IF(push_inst(compiler, NOR | S(src2) | T(src2) | D(dst), DR(dst))); return SLJIT_SUCCESS; case SLJIT_CLZ: SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); #if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1) - if (op & SLJIT_SET_E) + if (op & SLJIT_SET_Z) FAIL_IF(push_inst(compiler, CLZ | S(src2) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG)); - if (CHECK_FLAGS(SLJIT_SET_E)) + if (!(flags & UNUSED_DEST)) FAIL_IF(push_inst(compiler, CLZ | S(src2) | T(dst) | D(dst), DR(dst))); #else if (SLJIT_UNLIKELY(flags & UNUSED_DEST)) { @@ -145,130 +149,192 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl FAIL_IF(push_inst(compiler, ADDIU | S(dst) | T(dst) | IMM(1), DR(dst))); FAIL_IF(push_inst(compiler, BGEZ | S(TMP_REG1) | IMM(-2), UNMOVABLE_INS)); FAIL_IF(push_inst(compiler, SLL | T(TMP_REG1) | D(TMP_REG1) | SH_IMM(1), UNMOVABLE_INS)); - if (op & SLJIT_SET_E) - return push_inst(compiler, ADDU | S(dst) | TA(0) | DA(EQUAL_FLAG), EQUAL_FLAG); #endif return SLJIT_SUCCESS; case SLJIT_ADD: + is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW; + is_carry = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY); + if (flags & SRC2_IMM) { - if (op & SLJIT_SET_O) { + if (is_overflow) { if (src2 >= 0) - FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); + FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG)); else - FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); + FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG)); } - if (op & SLJIT_SET_E) + else if (op & SLJIT_SET_Z) FAIL_IF(push_inst(compiler, ADDIU | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG)); - if (op & (SLJIT_SET_C | SLJIT_SET_O)) { + + if (is_overflow || is_carry) { if (src2 >= 0) - FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(ULESS_FLAG) | IMM(src2), ULESS_FLAG)); + FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG)); else { - FAIL_IF(push_inst(compiler, ADDIU | SA(0) | TA(ULESS_FLAG) | IMM(src2), ULESS_FLAG)); - FAIL_IF(push_inst(compiler, OR | S(src1) | TA(ULESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG)); + FAIL_IF(push_inst(compiler, ADDIU | SA(0) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG)); + FAIL_IF(push_inst(compiler, OR | S(src1) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG)); } } /* dst may be the same as src1 or src2. */ - if (CHECK_FLAGS(SLJIT_SET_E)) + if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK)) FAIL_IF(push_inst(compiler, ADDIU | S(src1) | T(dst) | IMM(src2), DR(dst))); } else { - if (op & SLJIT_SET_O) - FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); - if (op & SLJIT_SET_E) + if (is_overflow) + FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); + else if (op & SLJIT_SET_Z) FAIL_IF(push_inst(compiler, ADDU | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); - if (op & (SLJIT_SET_C | SLJIT_SET_O)) - FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(ULESS_FLAG), ULESS_FLAG)); + + if (is_overflow || is_carry) + FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(OTHER_FLAG), OTHER_FLAG)); /* dst may be the same as src1 or src2. */ - if (CHECK_FLAGS(SLJIT_SET_E)) + if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK)) FAIL_IF(push_inst(compiler, ADDU | S(src1) | T(src2) | D(dst), DR(dst))); } /* a + b >= a | b (otherwise, the carry should be set to 1). */ - if (op & (SLJIT_SET_C | SLJIT_SET_O)) - FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(ULESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG)); - if (!(op & SLJIT_SET_O)) + if (is_overflow || is_carry) + FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG)); + if (!is_overflow) return SLJIT_SUCCESS; - FAIL_IF(push_inst(compiler, SLL | TA(ULESS_FLAG) | D(TMP_REG1) | SH_IMM(31), DR(TMP_REG1))); - FAIL_IF(push_inst(compiler, XOR | S(TMP_REG1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); - FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); - return push_inst(compiler, SLL | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG) | SH_IMM(31), OVERFLOW_FLAG); + FAIL_IF(push_inst(compiler, SLL | TA(OTHER_FLAG) | D(TMP_REG1) | SH_IMM(31), DR(TMP_REG1))); + FAIL_IF(push_inst(compiler, XOR | S(TMP_REG1) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG)); + FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(EQUAL_FLAG) | DA(OTHER_FLAG), OTHER_FLAG)); + if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, ADDU | S(dst) | TA(0) | DA(EQUAL_FLAG), EQUAL_FLAG)); + return push_inst(compiler, SRL | TA(OTHER_FLAG) | DA(OTHER_FLAG) | SH_IMM(31), OTHER_FLAG); case SLJIT_ADDC: + is_carry = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY); + if (flags & SRC2_IMM) { - if (op & SLJIT_SET_C) { + if (is_carry) { if (src2 >= 0) - FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(OVERFLOW_FLAG) | IMM(src2), OVERFLOW_FLAG)); + FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG)); else { - FAIL_IF(push_inst(compiler, ADDIU | SA(0) | TA(OVERFLOW_FLAG) | IMM(src2), OVERFLOW_FLAG)); - FAIL_IF(push_inst(compiler, OR | S(src1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); + FAIL_IF(push_inst(compiler, ADDIU | SA(0) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG)); + FAIL_IF(push_inst(compiler, OR | S(src1) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG)); } } FAIL_IF(push_inst(compiler, ADDIU | S(src1) | T(dst) | IMM(src2), DR(dst))); } else { - if (op & SLJIT_SET_C) - FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); + if (is_carry) + FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); /* dst may be the same as src1 or src2. */ FAIL_IF(push_inst(compiler, ADDU | S(src1) | T(src2) | D(dst), DR(dst))); } - if (op & SLJIT_SET_C) - FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); + if (is_carry) + FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG)); - FAIL_IF(push_inst(compiler, ADDU | S(dst) | TA(ULESS_FLAG) | D(dst), DR(dst))); - if (!(op & SLJIT_SET_C)) + FAIL_IF(push_inst(compiler, ADDU | S(dst) | TA(OTHER_FLAG) | D(dst), DR(dst))); + if (!is_carry) return SLJIT_SUCCESS; - /* Set ULESS_FLAG (dst == 0) && (ULESS_FLAG == 1). */ - FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(ULESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG)); + /* Set ULESS_FLAG (dst == 0) && (OTHER_FLAG == 1). */ + FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG)); /* Set carry flag. */ - return push_inst(compiler, OR | SA(ULESS_FLAG) | TA(OVERFLOW_FLAG) | DA(ULESS_FLAG), ULESS_FLAG); + return push_inst(compiler, OR | SA(OTHER_FLAG) | TA(EQUAL_FLAG) | DA(OTHER_FLAG), OTHER_FLAG); case SLJIT_SUB: - if ((flags & SRC2_IMM) && ((op & (SLJIT_SET_U | SLJIT_SET_S)) || src2 == SIMM_MIN)) { + if ((flags & SRC2_IMM) && src2 == SIMM_MIN) { FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(TMP_REG2) | IMM(src2), DR(TMP_REG2))); src2 = TMP_REG2; flags &= ~SRC2_IMM; } + is_handled = 0; + + if (flags & SRC2_IMM) { + if (GET_FLAG_TYPE(op) == SLJIT_LESS || GET_FLAG_TYPE(op) == SLJIT_GREATER_EQUAL) { + FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG)); + is_handled = 1; + } + else if (GET_FLAG_TYPE(op) == SLJIT_SIG_LESS || GET_FLAG_TYPE(op) == SLJIT_SIG_GREATER_EQUAL) { + FAIL_IF(push_inst(compiler, SLTI | S(src1) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG)); + is_handled = 1; + } + } + + if (!is_handled && GET_FLAG_TYPE(op) >= SLJIT_LESS && GET_FLAG_TYPE(op) <= SLJIT_SIG_LESS_EQUAL) { + is_handled = 1; + + if (flags & SRC2_IMM) { + FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(TMP_REG2) | IMM(src2), DR(TMP_REG2))); + src2 = TMP_REG2; + flags &= ~SRC2_IMM; + } + + if (GET_FLAG_TYPE(op) == SLJIT_LESS || GET_FLAG_TYPE(op) == SLJIT_GREATER_EQUAL) { + FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(OTHER_FLAG), OTHER_FLAG)); + } + else if (GET_FLAG_TYPE(op) == SLJIT_GREATER || GET_FLAG_TYPE(op) == SLJIT_LESS_EQUAL) + { + FAIL_IF(push_inst(compiler, SLTU | S(src2) | T(src1) | DA(OTHER_FLAG), OTHER_FLAG)); + } + else if (GET_FLAG_TYPE(op) == SLJIT_SIG_LESS || GET_FLAG_TYPE(op) == SLJIT_SIG_GREATER_EQUAL) { + FAIL_IF(push_inst(compiler, SLT | S(src1) | T(src2) | DA(OTHER_FLAG), OTHER_FLAG)); + } + else if (GET_FLAG_TYPE(op) == SLJIT_SIG_GREATER || GET_FLAG_TYPE(op) == SLJIT_SIG_LESS_EQUAL) + { + FAIL_IF(push_inst(compiler, SLT | S(src2) | T(src1) | DA(OTHER_FLAG), OTHER_FLAG)); + } + } + + if (is_handled) { + if (flags & SRC2_IMM) { + if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, ADDIU | S(src1) | TA(EQUAL_FLAG) | IMM(-src2), EQUAL_FLAG)); + if (!(flags & UNUSED_DEST)) + return push_inst(compiler, ADDIU | S(src1) | T(dst) | IMM(-src2), DR(dst)); + } + else { + if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, SUBU | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); + if (!(flags & UNUSED_DEST)) + return push_inst(compiler, SUBU | S(src1) | T(src2) | D(dst), DR(dst)); + } + return SLJIT_SUCCESS; + } + + is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW; + is_carry = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY); + if (flags & SRC2_IMM) { - if (op & SLJIT_SET_O) { + if (is_overflow) { if (src2 >= 0) - FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); + FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG)); else - FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); + FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG)); } - if (op & SLJIT_SET_E) + else if (op & SLJIT_SET_Z) FAIL_IF(push_inst(compiler, ADDIU | S(src1) | TA(EQUAL_FLAG) | IMM(-src2), EQUAL_FLAG)); - if (op & (SLJIT_SET_C | SLJIT_SET_O)) - FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(ULESS_FLAG) | IMM(src2), ULESS_FLAG)); + + if (is_overflow || is_carry) + FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG)); /* dst may be the same as src1 or src2. */ - if (CHECK_FLAGS(SLJIT_SET_E)) + if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK)) FAIL_IF(push_inst(compiler, ADDIU | S(src1) | T(dst) | IMM(-src2), DR(dst))); } else { - if (op & SLJIT_SET_O) - FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); - if (op & SLJIT_SET_E) + if (is_overflow) + FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); + else if (op & SLJIT_SET_Z) FAIL_IF(push_inst(compiler, SUBU | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); - if (op & (SLJIT_SET_U | SLJIT_SET_C | SLJIT_SET_O)) - FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(ULESS_FLAG), ULESS_FLAG)); - if (op & SLJIT_SET_U) - FAIL_IF(push_inst(compiler, SLTU | S(src2) | T(src1) | DA(UGREATER_FLAG), UGREATER_FLAG)); - if (op & SLJIT_SET_S) { - FAIL_IF(push_inst(compiler, SLT | S(src1) | T(src2) | DA(LESS_FLAG), LESS_FLAG)); - FAIL_IF(push_inst(compiler, SLT | S(src2) | T(src1) | DA(GREATER_FLAG), GREATER_FLAG)); - } + + if (is_overflow || is_carry) + FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(OTHER_FLAG), OTHER_FLAG)); /* dst may be the same as src1 or src2. */ - if (CHECK_FLAGS(SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_C)) + if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK)) FAIL_IF(push_inst(compiler, SUBU | S(src1) | T(src2) | D(dst), DR(dst))); } - if (!(op & SLJIT_SET_O)) + if (!is_overflow) return SLJIT_SUCCESS; - FAIL_IF(push_inst(compiler, SLL | TA(ULESS_FLAG) | D(TMP_REG1) | SH_IMM(31), DR(TMP_REG1))); - FAIL_IF(push_inst(compiler, XOR | S(TMP_REG1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); - FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); - return push_inst(compiler, SRL | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG) | SH_IMM(31), OVERFLOW_FLAG); + FAIL_IF(push_inst(compiler, SLL | TA(OTHER_FLAG) | D(TMP_REG1) | SH_IMM(31), DR(TMP_REG1))); + FAIL_IF(push_inst(compiler, XOR | S(TMP_REG1) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG)); + FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(EQUAL_FLAG) | DA(OTHER_FLAG), OTHER_FLAG)); + if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, ADDU | S(dst) | TA(0) | DA(EQUAL_FLAG), EQUAL_FLAG)); + return push_inst(compiler, SRL | TA(OTHER_FLAG) | DA(OTHER_FLAG) | SH_IMM(31), OTHER_FLAG); case SLJIT_SUBC: if ((flags & SRC2_IMM) && src2 == SIMM_MIN) { @@ -277,28 +343,31 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl flags &= ~SRC2_IMM; } + is_carry = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY); + if (flags & SRC2_IMM) { - if (op & SLJIT_SET_C) - FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(OVERFLOW_FLAG) | IMM(src2), OVERFLOW_FLAG)); + if (is_carry) + FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG)); /* dst may be the same as src1 or src2. */ FAIL_IF(push_inst(compiler, ADDIU | S(src1) | T(dst) | IMM(-src2), DR(dst))); } else { - if (op & SLJIT_SET_C) - FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); + if (is_carry) + FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); /* dst may be the same as src1 or src2. */ FAIL_IF(push_inst(compiler, SUBU | S(src1) | T(src2) | D(dst), DR(dst))); } - if (op & SLJIT_SET_C) - FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(ULESS_FLAG) | DA(LESS_FLAG), LESS_FLAG)); + if (is_carry) + FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(OTHER_FLAG) | D(TMP_REG1), DR(TMP_REG1))); - FAIL_IF(push_inst(compiler, SUBU | S(dst) | TA(ULESS_FLAG) | D(dst), DR(dst))); - return (op & SLJIT_SET_C) ? push_inst(compiler, OR | SA(OVERFLOW_FLAG) | TA(LESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG) : SLJIT_SUCCESS; + FAIL_IF(push_inst(compiler, SUBU | S(dst) | TA(OTHER_FLAG) | D(dst), DR(dst))); + return (is_carry) ? push_inst(compiler, OR | SA(EQUAL_FLAG) | T(TMP_REG1) | DA(OTHER_FLAG), OTHER_FLAG) : SLJIT_SUCCESS; case SLJIT_MUL: SLJIT_ASSERT(!(flags & SRC2_IMM)); - if (!(op & SLJIT_SET_O)) { + + if (GET_FLAG_TYPE(op) != SLJIT_MUL_OVERFLOW) { #if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1) return push_inst(compiler, MUL | S(src1) | T(src2) | D(dst), DR(dst)); #else @@ -307,10 +376,10 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl #endif } FAIL_IF(push_inst(compiler, MULT | S(src1) | T(src2), MOVABLE_INS)); - FAIL_IF(push_inst(compiler, MFHI | DA(ULESS_FLAG), ULESS_FLAG)); + FAIL_IF(push_inst(compiler, MFHI | DA(EQUAL_FLAG), EQUAL_FLAG)); FAIL_IF(push_inst(compiler, MFLO | D(dst), DR(dst))); - FAIL_IF(push_inst(compiler, SRA | T(dst) | DA(UGREATER_FLAG) | SH_IMM(31), UGREATER_FLAG)); - return push_inst(compiler, SUBU | SA(ULESS_FLAG) | TA(UGREATER_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG); + FAIL_IF(push_inst(compiler, SRA | T(dst) | DA(OTHER_FLAG) | SH_IMM(31), OTHER_FLAG)); + return push_inst(compiler, SUBU | SA(EQUAL_FLAG) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG); case SLJIT_AND: EMIT_LOGICAL(ANDI, AND); @@ -337,7 +406,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl return SLJIT_SUCCESS; } - SLJIT_ASSERT_STOP(); + SLJIT_UNREACHABLE(); return SLJIT_SUCCESS; } @@ -347,20 +416,22 @@ static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_ return push_inst(compiler, ORI | S(dst) | T(dst) | IMM(init_value), DR(dst)); } -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr) +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) { - sljit_ins *inst = (sljit_ins*)addr; + sljit_ins *inst = (sljit_ins *)addr; - inst[0] = (inst[0] & 0xffff0000) | ((new_addr >> 16) & 0xffff); - inst[1] = (inst[1] & 0xffff0000) | (new_addr & 0xffff); + inst[0] = (inst[0] & 0xffff0000) | ((new_target >> 16) & 0xffff); + inst[1] = (inst[1] & 0xffff0000) | (new_target & 0xffff); + inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); SLJIT_CACHE_FLUSH(inst, inst + 2); } -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant) +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) { - sljit_ins *inst = (sljit_ins*)addr; + sljit_ins *inst = (sljit_ins *)addr; inst[0] = (inst[0] & 0xffff0000) | ((new_constant >> 16) & 0xffff); inst[1] = (inst[1] & 0xffff0000) | (new_constant & 0xffff); + inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); SLJIT_CACHE_FLUSH(inst, inst + 2); } diff --git a/src/3rdparty/pcre2/src/sljit/sljitNativeMIPS_64.c b/src/3rdparty/pcre2/src/sljit/sljitNativeMIPS_64.c index c7ee8c9c2e..dd114bb27a 100644 --- a/src/3rdparty/pcre2/src/sljit/sljitNativeMIPS_64.c +++ b/src/3rdparty/pcre2/src/sljit/sljitNativeMIPS_64.c @@ -1,7 +1,7 @@ /* * Stack-less Just-In-Time compiler * - * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are * permitted provided that the following conditions are met: @@ -123,15 +123,15 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_a #define EMIT_LOGICAL(op_imm, op_norm) \ if (flags & SRC2_IMM) { \ - if (op & SLJIT_SET_E) \ + if (op & SLJIT_SET_Z) \ FAIL_IF(push_inst(compiler, op_imm | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG)); \ - if (CHECK_FLAGS(SLJIT_SET_E)) \ + if (!(flags & UNUSED_DEST)) \ FAIL_IF(push_inst(compiler, op_imm | S(src1) | T(dst) | IMM(src2), DR(dst))); \ } \ else { \ - if (op & SLJIT_SET_E) \ + if (op & SLJIT_SET_Z) \ FAIL_IF(push_inst(compiler, op_norm | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); \ - if (CHECK_FLAGS(SLJIT_SET_E)) \ + if (!(flags & UNUSED_DEST)) \ FAIL_IF(push_inst(compiler, op_norm | S(src1) | T(src2) | D(dst), DR(dst))); \ } @@ -144,16 +144,16 @@ static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_a } \ else \ ins = (op & SLJIT_I32_OP) ? op_imm : op_dimm; \ - if (op & SLJIT_SET_E) \ + if (op & SLJIT_SET_Z) \ FAIL_IF(push_inst(compiler, ins | T(src1) | DA(EQUAL_FLAG) | SH_IMM(src2), EQUAL_FLAG)); \ - if (CHECK_FLAGS(SLJIT_SET_E)) \ + if (!(flags & UNUSED_DEST)) \ FAIL_IF(push_inst(compiler, ins | T(src1) | D(dst) | SH_IMM(src2), DR(dst))); \ } \ else { \ ins = (op & SLJIT_I32_OP) ? op_v : op_dv; \ - if (op & SLJIT_SET_E) \ + if (op & SLJIT_SET_Z) \ FAIL_IF(push_inst(compiler, ins | S(src2) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG)); \ - if (CHECK_FLAGS(SLJIT_SET_E)) \ + if (!(flags & UNUSED_DEST)) \ FAIL_IF(push_inst(compiler, ins | S(src2) | T(src1) | D(dst), DR(dst))); \ } @@ -161,6 +161,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl sljit_s32 dst, sljit_s32 src1, sljit_sw src2) { sljit_ins ins; + sljit_s32 is_overflow, is_carry, is_handled; switch (GET_OPCODE(op)) { case SLJIT_MOV: @@ -180,8 +181,9 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl } return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xff), DR(dst)); } - else if (dst != src2) - SLJIT_ASSERT_STOP(); + else { + SLJIT_ASSERT(dst == src2); + } return SLJIT_SUCCESS; case SLJIT_MOV_U16: @@ -194,8 +196,9 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl } return push_inst(compiler, ANDI | S(src2) | T(dst) | IMM(0xffff), DR(dst)); } - else if (dst != src2) - SLJIT_ASSERT_STOP(); + else { + SLJIT_ASSERT(dst == src2); + } return SLJIT_SUCCESS; case SLJIT_MOV_U32: @@ -209,18 +212,18 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl case SLJIT_NOT: SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); - if (op & SLJIT_SET_E) + if (op & SLJIT_SET_Z) FAIL_IF(push_inst(compiler, NOR | S(src2) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); - if (CHECK_FLAGS(SLJIT_SET_E)) + if (!(flags & UNUSED_DEST)) FAIL_IF(push_inst(compiler, NOR | S(src2) | T(src2) | D(dst), DR(dst))); return SLJIT_SUCCESS; case SLJIT_CLZ: SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); #if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1) - if (op & SLJIT_SET_E) + if (op & SLJIT_SET_Z) FAIL_IF(push_inst(compiler, SELECT_OP(DCLZ, CLZ) | S(src2) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG)); - if (CHECK_FLAGS(SLJIT_SET_E)) + if (!(flags & UNUSED_DEST)) FAIL_IF(push_inst(compiler, SELECT_OP(DCLZ, CLZ) | S(src2) | T(dst) | D(dst), DR(dst))); #else if (SLJIT_UNLIKELY(flags & UNUSED_DEST)) { @@ -237,130 +240,192 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(dst) | T(dst) | IMM(1), DR(dst))); FAIL_IF(push_inst(compiler, BGEZ | S(TMP_REG1) | IMM(-2), UNMOVABLE_INS)); FAIL_IF(push_inst(compiler, SELECT_OP(DSLL, SLL) | T(TMP_REG1) | D(TMP_REG1) | SH_IMM(1), UNMOVABLE_INS)); - if (op & SLJIT_SET_E) - return push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(dst) | TA(0) | DA(EQUAL_FLAG), EQUAL_FLAG); #endif return SLJIT_SUCCESS; case SLJIT_ADD: + is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW; + is_carry = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY); + if (flags & SRC2_IMM) { - if (op & SLJIT_SET_O) { + if (is_overflow) { if (src2 >= 0) - FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); + FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG)); else - FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); + FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG)); } - if (op & SLJIT_SET_E) + else if (op & SLJIT_SET_Z) FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG)); - if (op & (SLJIT_SET_C | SLJIT_SET_O)) { + + if (is_overflow || is_carry) { if (src2 >= 0) - FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(ULESS_FLAG) | IMM(src2), ULESS_FLAG)); + FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG)); else { - FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | SA(0) | TA(ULESS_FLAG) | IMM(src2), ULESS_FLAG)); - FAIL_IF(push_inst(compiler, OR | S(src1) | TA(ULESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG)); + FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | SA(0) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG)); + FAIL_IF(push_inst(compiler, OR | S(src1) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG)); } } /* dst may be the same as src1 or src2. */ - if (CHECK_FLAGS(SLJIT_SET_E)) + if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK)) FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(src2), DR(dst))); } else { - if (op & SLJIT_SET_O) - FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); - if (op & SLJIT_SET_E) + if (is_overflow) + FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); + else if (op & SLJIT_SET_Z) FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); - if (op & (SLJIT_SET_C | SLJIT_SET_O)) - FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(ULESS_FLAG), ULESS_FLAG)); + + if (is_overflow || is_carry) + FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(OTHER_FLAG), OTHER_FLAG)); /* dst may be the same as src1 or src2. */ - if (CHECK_FLAGS(SLJIT_SET_E)) + if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK)) FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src1) | T(src2) | D(dst), DR(dst))); } /* a + b >= a | b (otherwise, the carry should be set to 1). */ - if (op & (SLJIT_SET_C | SLJIT_SET_O)) - FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(ULESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG)); - if (!(op & SLJIT_SET_O)) + if (is_overflow || is_carry) + FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG)); + if (!is_overflow) return SLJIT_SUCCESS; - FAIL_IF(push_inst(compiler, SELECT_OP(DSLL32, SLL) | TA(ULESS_FLAG) | D(TMP_REG1) | SH_IMM(31), DR(TMP_REG1))); - FAIL_IF(push_inst(compiler, XOR | S(TMP_REG1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); - FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); - return push_inst(compiler, SELECT_OP(DSRL32, SLL) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG) | SH_IMM(31), OVERFLOW_FLAG); + FAIL_IF(push_inst(compiler, SELECT_OP(DSLL32, SLL) | TA(OTHER_FLAG) | D(TMP_REG1) | SH_IMM(31), DR(TMP_REG1))); + FAIL_IF(push_inst(compiler, XOR | S(TMP_REG1) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG)); + FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(EQUAL_FLAG) | DA(OTHER_FLAG), OTHER_FLAG)); + if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(dst) | TA(0) | DA(EQUAL_FLAG), EQUAL_FLAG)); + return push_inst(compiler, SELECT_OP(DSRL32, SRL) | TA(OTHER_FLAG) | DA(OTHER_FLAG) | SH_IMM(31), OTHER_FLAG); case SLJIT_ADDC: + is_carry = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY); + if (flags & SRC2_IMM) { - if (op & SLJIT_SET_C) { + if (is_carry) { if (src2 >= 0) - FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(OVERFLOW_FLAG) | IMM(src2), OVERFLOW_FLAG)); + FAIL_IF(push_inst(compiler, ORI | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG)); else { - FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | SA(0) | TA(OVERFLOW_FLAG) | IMM(src2), OVERFLOW_FLAG)); - FAIL_IF(push_inst(compiler, OR | S(src1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); + FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | SA(0) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG)); + FAIL_IF(push_inst(compiler, OR | S(src1) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG)); } } FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(src2), DR(dst))); } else { - if (op & SLJIT_SET_C) - FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); + if (is_carry) + FAIL_IF(push_inst(compiler, OR | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); /* dst may be the same as src1 or src2. */ FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(src1) | T(src2) | D(dst), DR(dst))); } - if (op & SLJIT_SET_C) - FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); + if (is_carry) + FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG)); - FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(dst) | TA(ULESS_FLAG) | D(dst), DR(dst))); - if (!(op & SLJIT_SET_C)) + FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(dst) | TA(OTHER_FLAG) | D(dst), DR(dst))); + if (!is_carry) return SLJIT_SUCCESS; - /* Set ULESS_FLAG (dst == 0) && (ULESS_FLAG == 1). */ - FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(ULESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG)); + /* Set ULESS_FLAG (dst == 0) && (OTHER_FLAG == 1). */ + FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG)); /* Set carry flag. */ - return push_inst(compiler, OR | SA(ULESS_FLAG) | TA(OVERFLOW_FLAG) | DA(ULESS_FLAG), ULESS_FLAG); + return push_inst(compiler, OR | SA(OTHER_FLAG) | TA(EQUAL_FLAG) | DA(OTHER_FLAG), OTHER_FLAG); case SLJIT_SUB: - if ((flags & SRC2_IMM) && ((op & (SLJIT_SET_U | SLJIT_SET_S)) || src2 == SIMM_MIN)) { + if ((flags & SRC2_IMM) && src2 == SIMM_MIN) { FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(TMP_REG2) | IMM(src2), DR(TMP_REG2))); src2 = TMP_REG2; flags &= ~SRC2_IMM; } + is_handled = 0; + if (flags & SRC2_IMM) { - if (op & SLJIT_SET_O) { + if (GET_FLAG_TYPE(op) == SLJIT_LESS || GET_FLAG_TYPE(op) == SLJIT_GREATER_EQUAL) { + FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG)); + is_handled = 1; + } + else if (GET_FLAG_TYPE(op) == SLJIT_SIG_LESS || GET_FLAG_TYPE(op) == SLJIT_SIG_GREATER_EQUAL) { + FAIL_IF(push_inst(compiler, SLTI | S(src1) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG)); + is_handled = 1; + } + } + + if (!is_handled && GET_FLAG_TYPE(op) >= SLJIT_LESS && GET_FLAG_TYPE(op) <= SLJIT_SIG_LESS_EQUAL) { + is_handled = 1; + + if (flags & SRC2_IMM) { + FAIL_IF(push_inst(compiler, ADDIU | SA(0) | T(TMP_REG2) | IMM(src2), DR(TMP_REG2))); + src2 = TMP_REG2; + flags &= ~SRC2_IMM; + } + + if (GET_FLAG_TYPE(op) == SLJIT_LESS || GET_FLAG_TYPE(op) == SLJIT_GREATER_EQUAL) { + FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(OTHER_FLAG), OTHER_FLAG)); + } + else if (GET_FLAG_TYPE(op) == SLJIT_GREATER || GET_FLAG_TYPE(op) == SLJIT_LESS_EQUAL) + { + FAIL_IF(push_inst(compiler, SLTU | S(src2) | T(src1) | DA(OTHER_FLAG), OTHER_FLAG)); + } + else if (GET_FLAG_TYPE(op) == SLJIT_SIG_LESS || GET_FLAG_TYPE(op) == SLJIT_SIG_GREATER_EQUAL) { + FAIL_IF(push_inst(compiler, SLT | S(src1) | T(src2) | DA(OTHER_FLAG), OTHER_FLAG)); + } + else if (GET_FLAG_TYPE(op) == SLJIT_SIG_GREATER || GET_FLAG_TYPE(op) == SLJIT_SIG_LESS_EQUAL) + { + FAIL_IF(push_inst(compiler, SLT | S(src2) | T(src1) | DA(OTHER_FLAG), OTHER_FLAG)); + } + } + + if (is_handled) { + if (flags & SRC2_IMM) { + if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | TA(EQUAL_FLAG) | IMM(-src2), EQUAL_FLAG)); + if (!(flags & UNUSED_DEST)) + return push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(-src2), DR(dst)); + } + else { + if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); + if (!(flags & UNUSED_DEST)) + return push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(src1) | T(src2) | D(dst), DR(dst)); + } + return SLJIT_SUCCESS; + } + + is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW; + is_carry = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY); + + if (flags & SRC2_IMM) { + if (is_overflow) { if (src2 >= 0) - FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); + FAIL_IF(push_inst(compiler, OR | S(src1) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG)); else - FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); + FAIL_IF(push_inst(compiler, NOR | S(src1) | T(src1) | DA(EQUAL_FLAG), EQUAL_FLAG)); } - if (op & SLJIT_SET_E) + else if (op & SLJIT_SET_Z) FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | TA(EQUAL_FLAG) | IMM(-src2), EQUAL_FLAG)); - if (op & (SLJIT_SET_C | SLJIT_SET_O)) - FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(ULESS_FLAG) | IMM(src2), ULESS_FLAG)); + + if (is_overflow || is_carry) + FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(OTHER_FLAG) | IMM(src2), OTHER_FLAG)); /* dst may be the same as src1 or src2. */ - if (CHECK_FLAGS(SLJIT_SET_E)) + if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK)) FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(-src2), DR(dst))); } else { - if (op & SLJIT_SET_O) - FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); - if (op & SLJIT_SET_E) + if (is_overflow) + FAIL_IF(push_inst(compiler, XOR | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); + else if (op & SLJIT_SET_Z) FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); - if (op & (SLJIT_SET_U | SLJIT_SET_C | SLJIT_SET_O)) - FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(ULESS_FLAG), ULESS_FLAG)); - if (op & SLJIT_SET_U) - FAIL_IF(push_inst(compiler, SLTU | S(src2) | T(src1) | DA(UGREATER_FLAG), UGREATER_FLAG)); - if (op & SLJIT_SET_S) { - FAIL_IF(push_inst(compiler, SLT | S(src1) | T(src2) | DA(LESS_FLAG), LESS_FLAG)); - FAIL_IF(push_inst(compiler, SLT | S(src2) | T(src1) | DA(GREATER_FLAG), GREATER_FLAG)); - } + + if (is_overflow || is_carry) + FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(OTHER_FLAG), OTHER_FLAG)); /* dst may be the same as src1 or src2. */ - if (CHECK_FLAGS(SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_C)) + if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK)) FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(src1) | T(src2) | D(dst), DR(dst))); } - if (!(op & SLJIT_SET_O)) + if (!is_overflow) return SLJIT_SUCCESS; - FAIL_IF(push_inst(compiler, SELECT_OP(DSLL32, SLL) | TA(ULESS_FLAG) | D(TMP_REG1) | SH_IMM(31), DR(TMP_REG1))); - FAIL_IF(push_inst(compiler, XOR | S(TMP_REG1) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); - FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); - return push_inst(compiler, SELECT_OP(DSRL32, SRL) | TA(OVERFLOW_FLAG) | DA(OVERFLOW_FLAG) | SH_IMM(31), OVERFLOW_FLAG); + FAIL_IF(push_inst(compiler, SELECT_OP(DSLL32, SLL) | TA(OTHER_FLAG) | D(TMP_REG1) | SH_IMM(31), DR(TMP_REG1))); + FAIL_IF(push_inst(compiler, XOR | S(TMP_REG1) | TA(EQUAL_FLAG) | DA(EQUAL_FLAG), EQUAL_FLAG)); + FAIL_IF(push_inst(compiler, XOR | S(dst) | TA(EQUAL_FLAG) | DA(OTHER_FLAG), OTHER_FLAG)); + if (op & SLJIT_SET_Z) + FAIL_IF(push_inst(compiler, SELECT_OP(DADDU, ADDU) | S(dst) | TA(0) | DA(EQUAL_FLAG), EQUAL_FLAG)); + return push_inst(compiler, SELECT_OP(DSRL32, SRL) | TA(OTHER_FLAG) | DA(OTHER_FLAG) | SH_IMM(31), OTHER_FLAG); case SLJIT_SUBC: if ((flags & SRC2_IMM) && src2 == SIMM_MIN) { @@ -369,28 +434,31 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl flags &= ~SRC2_IMM; } + is_carry = GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY); + if (flags & SRC2_IMM) { - if (op & SLJIT_SET_C) - FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(OVERFLOW_FLAG) | IMM(src2), OVERFLOW_FLAG)); + if (is_carry) + FAIL_IF(push_inst(compiler, SLTIU | S(src1) | TA(EQUAL_FLAG) | IMM(src2), EQUAL_FLAG)); /* dst may be the same as src1 or src2. */ FAIL_IF(push_inst(compiler, SELECT_OP(DADDIU, ADDIU) | S(src1) | T(dst) | IMM(-src2), DR(dst))); } else { - if (op & SLJIT_SET_C) - FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG)); + if (is_carry) + FAIL_IF(push_inst(compiler, SLTU | S(src1) | T(src2) | DA(EQUAL_FLAG), EQUAL_FLAG)); /* dst may be the same as src1 or src2. */ FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(src1) | T(src2) | D(dst), DR(dst))); } - if (op & SLJIT_SET_C) - FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(ULESS_FLAG) | DA(LESS_FLAG), LESS_FLAG)); + if (is_carry) + FAIL_IF(push_inst(compiler, SLTU | S(dst) | TA(OTHER_FLAG) | D(TMP_REG1), DR(TMP_REG1))); - FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(dst) | TA(ULESS_FLAG) | D(dst), DR(dst))); - return (op & SLJIT_SET_C) ? push_inst(compiler, OR | SA(OVERFLOW_FLAG) | TA(LESS_FLAG) | DA(ULESS_FLAG), ULESS_FLAG) : SLJIT_SUCCESS; + FAIL_IF(push_inst(compiler, SELECT_OP(DSUBU, SUBU) | S(dst) | TA(OTHER_FLAG) | D(dst), DR(dst))); + return (is_carry) ? push_inst(compiler, OR | SA(EQUAL_FLAG) | T(TMP_REG1) | DA(OTHER_FLAG), OTHER_FLAG) : SLJIT_SUCCESS; case SLJIT_MUL: SLJIT_ASSERT(!(flags & SRC2_IMM)); - if (!(op & SLJIT_SET_O)) { + + if (GET_FLAG_TYPE(op) != SLJIT_MUL_OVERFLOW) { #if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1) if (op & SLJIT_I32_OP) return push_inst(compiler, MUL | S(src1) | T(src2) | D(dst), DR(dst)); @@ -402,10 +470,10 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl #endif } FAIL_IF(push_inst(compiler, SELECT_OP(DMULT, MULT) | S(src1) | T(src2), MOVABLE_INS)); - FAIL_IF(push_inst(compiler, MFHI | DA(ULESS_FLAG), ULESS_FLAG)); + FAIL_IF(push_inst(compiler, MFHI | DA(EQUAL_FLAG), EQUAL_FLAG)); FAIL_IF(push_inst(compiler, MFLO | D(dst), DR(dst))); - FAIL_IF(push_inst(compiler, SELECT_OP(DSRA32, SRA) | T(dst) | DA(UGREATER_FLAG) | SH_IMM(31), UGREATER_FLAG)); - return push_inst(compiler, SELECT_OP(DSUBU, SUBU) | SA(ULESS_FLAG) | TA(UGREATER_FLAG) | DA(OVERFLOW_FLAG), OVERFLOW_FLAG); + FAIL_IF(push_inst(compiler, SELECT_OP(DSRA32, SRA) | T(dst) | DA(OTHER_FLAG) | SH_IMM(31), OTHER_FLAG)); + return push_inst(compiler, SELECT_OP(DSUBU, SUBU) | SA(EQUAL_FLAG) | TA(OTHER_FLAG) | DA(OTHER_FLAG), OTHER_FLAG); case SLJIT_AND: EMIT_LOGICAL(ANDI, AND); @@ -432,7 +500,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl return SLJIT_SUCCESS; } - SLJIT_ASSERT_STOP(); + SLJIT_UNREACHABLE(); return SLJIT_SUCCESS; } @@ -446,24 +514,26 @@ static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_ return push_inst(compiler, ORI | S(dst) | T(dst) | IMM(init_value), DR(dst)); } -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr) +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) { - sljit_ins *inst = (sljit_ins*)addr; + sljit_ins *inst = (sljit_ins *)addr; - inst[0] = (inst[0] & 0xffff0000) | ((new_addr >> 48) & 0xffff); - inst[1] = (inst[1] & 0xffff0000) | ((new_addr >> 32) & 0xffff); - inst[3] = (inst[3] & 0xffff0000) | ((new_addr >> 16) & 0xffff); - inst[5] = (inst[5] & 0xffff0000) | (new_addr & 0xffff); + inst[0] = (inst[0] & 0xffff0000) | ((new_target >> 48) & 0xffff); + inst[1] = (inst[1] & 0xffff0000) | ((new_target >> 32) & 0xffff); + inst[3] = (inst[3] & 0xffff0000) | ((new_target >> 16) & 0xffff); + inst[5] = (inst[5] & 0xffff0000) | (new_target & 0xffff); + inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); SLJIT_CACHE_FLUSH(inst, inst + 6); } -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant) +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) { - sljit_ins *inst = (sljit_ins*)addr; + sljit_ins *inst = (sljit_ins *)addr; inst[0] = (inst[0] & 0xffff0000) | ((new_constant >> 48) & 0xffff); inst[1] = (inst[1] & 0xffff0000) | ((new_constant >> 32) & 0xffff); inst[3] = (inst[3] & 0xffff0000) | ((new_constant >> 16) & 0xffff); inst[5] = (inst[5] & 0xffff0000) | (new_constant & 0xffff); + inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); SLJIT_CACHE_FLUSH(inst, inst + 6); } diff --git a/src/3rdparty/pcre2/src/sljit/sljitNativeMIPS_common.c b/src/3rdparty/pcre2/src/sljit/sljitNativeMIPS_common.c index c2c251b1ff..00e8303090 100644 --- a/src/3rdparty/pcre2/src/sljit/sljitNativeMIPS_common.c +++ b/src/3rdparty/pcre2/src/sljit/sljitNativeMIPS_common.c @@ -1,7 +1,7 @@ /* * Stack-less Just-In-Time compiler * - * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are * permitted provided that the following conditions are met: @@ -57,19 +57,14 @@ typedef sljit_u32 sljit_ins; #define RETURN_ADDR_REG 31 /* Flags are kept in volatile registers. */ -#define EQUAL_FLAG 12 -/* And carry flag as well. */ -#define ULESS_FLAG 13 -#define UGREATER_FLAG 14 -#define LESS_FLAG 15 -#define GREATER_FLAG 31 -#define OVERFLOW_FLAG 1 +#define EQUAL_FLAG 31 +#define OTHER_FLAG 1 #define TMP_FREG1 (0) #define TMP_FREG2 ((SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1) << 1) static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = { - 0, 2, 5, 6, 7, 8, 9, 10, 11, 24, 23, 22, 21, 20, 19, 18, 17, 16, 29, 3, 25, 4 + 0, 2, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 24, 23, 22, 21, 20, 19, 18, 17, 16, 29, 3, 25, 4 }; /* --------------------------------------------------------------------- */ @@ -178,7 +173,13 @@ static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = { #if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1) #define CLZ (HI(28) | LO(32)) #define DCLZ (HI(28) | LO(36)) +#define MOVF (HI(0) | (0 << 16) | LO(1)) +#define MOVN (HI(0) | LO(11)) +#define MOVT (HI(0) | (1 << 16) | LO(1)) +#define MOVZ (HI(0) | LO(10)) #define MUL (HI(28) | LO(2)) +#define PREF (HI(51)) +#define PREFX (HI(19) | LO(15)) #define SEB (HI(31) | (16 << 6) | LO(32)) #define SEH (HI(31) | (24 << 6) | LO(32)) #endif @@ -218,7 +219,7 @@ static SLJIT_INLINE sljit_ins invert_branch(sljit_s32 flags) return (flags & IS_BIT26_COND) ? (1 << 26) : (1 << 16); } -static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code) +static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset) { sljit_sw diff; sljit_uw target_addr; @@ -237,9 +238,10 @@ static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_i target_addr = jump->u.target; else { SLJIT_ASSERT(jump->flags & JUMP_LABEL); - target_addr = (sljit_uw)(code + jump->u.label->size); + target_addr = (sljit_uw)(code + jump->u.label->size) + (sljit_uw)executable_offset; } - inst = (sljit_ins*)jump->addr; + + inst = (sljit_ins *)jump->addr; if (jump->flags & IS_COND) inst--; @@ -250,7 +252,7 @@ static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_i /* B instructions. */ if (jump->flags & IS_MOVABLE) { - diff = ((sljit_sw)target_addr - (sljit_sw)(inst)) >> 2; + diff = ((sljit_sw)target_addr - (sljit_sw)inst - executable_offset) >> 2; if (diff <= SIMM_MAX && diff >= SIMM_MIN) { jump->flags |= PATCH_B; @@ -268,7 +270,7 @@ static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_i } } else { - diff = ((sljit_sw)target_addr - (sljit_sw)(inst + 1)) >> 2; + diff = ((sljit_sw)target_addr - (sljit_sw)(inst + 1) - executable_offset) >> 2; if (diff <= SIMM_MAX && diff >= SIMM_MIN) { jump->flags |= PATCH_B; @@ -364,6 +366,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil sljit_ins *buf_ptr; sljit_ins *buf_end; sljit_uw word_count; + sljit_sw executable_offset; sljit_uw addr; struct sljit_label *label; @@ -380,9 +383,12 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil code_ptr = code; word_count = 0; + executable_offset = SLJIT_EXEC_OFFSET(code); + label = compiler->labels; jump = compiler->jumps; const_ = compiler->consts; + do { buf_ptr = (sljit_ins*)buf->memory; buf_end = buf_ptr + (buf->used_size >> 2); @@ -393,8 +399,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil SLJIT_ASSERT(!const_ || const_->addr >= word_count); /* These structures are ordered by their address. */ if (label && label->size == word_count) { - /* Just recording the address. */ - label->addr = (sljit_uw)code_ptr; + label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); label->size = code_ptr - code; label = label->next; } @@ -404,7 +409,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil #else jump->addr = (sljit_uw)(code_ptr - 7); #endif - code_ptr = detect_jump_type(jump, code_ptr, code); + code_ptr = detect_jump_type(jump, code_ptr, code, executable_offset); jump = jump->next; } if (const_ && const_->addr == word_count) { @@ -434,16 +439,16 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil while (jump) { do { addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target; - buf_ptr = (sljit_ins*)jump->addr; + buf_ptr = (sljit_ins *)jump->addr; if (jump->flags & PATCH_B) { - addr = (sljit_sw)(addr - (jump->addr + sizeof(sljit_ins))) >> 2; + addr = (sljit_sw)(addr - ((sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset) + sizeof(sljit_ins))) >> 2; SLJIT_ASSERT((sljit_sw)addr <= SIMM_MAX && (sljit_sw)addr >= SIMM_MIN); buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | (addr & 0xffff); break; } if (jump->flags & PATCH_J) { - SLJIT_ASSERT((addr & ~0xfffffff) == ((jump->addr + sizeof(sljit_ins)) & ~0xfffffff)); + SLJIT_ASSERT((addr & ~0xfffffff) == (((sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset) + sizeof(sljit_ins)) & ~0xfffffff)); buf_ptr[0] |= (addr >> 2) & 0x03ffffff; break; } @@ -476,7 +481,12 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil } compiler->error = SLJIT_ERR_COMPILED; + compiler->executable_offset = executable_offset; compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins); + + code = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset); + code_ptr = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + #ifndef __GNUC__ SLJIT_CACHE_FLUSH(code, code_ptr); #else @@ -486,6 +496,32 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil return code; } +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) +{ + sljit_sw fir = 0; + + switch (feature_type) { + case SLJIT_HAS_FPU: +#ifdef SLJIT_IS_FPU_AVAILABLE + return SLJIT_IS_FPU_AVAILABLE; +#elif defined(__GNUC__) + asm ("cfc1 %0, $0" : "=r"(fir)); + return (fir >> 22) & 0x1; +#else +#error "FIR check is not implemented for this architecture" +#endif + +#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1) + case SLJIT_HAS_CLZ: + case SLJIT_HAS_CMOV: + return 1; +#endif + + default: + return fir; + } +} + /* --------------------------------------------------------------------- */ /* Entry, exit */ /* --------------------------------------------------------------------- */ @@ -520,10 +556,6 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil #define SLOW_SRC2 0x20000 #define SLOW_DEST 0x40000 -/* Only these flags are set. UNUSED_DEST is not set when no flags should be set. */ -#define CHECK_FLAGS(list) \ - (!(flags & UNUSED_DEST) || (op & GET_FLAGS(~(list)))) - #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) #define STACK_STORE SW #define STACK_LOAD LW @@ -759,34 +791,28 @@ static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sl base = arg & REG_MASK; if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { - argw &= 0x3; - if ((flags & WRITE_BACK) && reg_ar == DR(base)) { - SLJIT_ASSERT(!(flags & LOAD_DATA) && DR(TMP_REG1) != reg_ar); - FAIL_IF(push_inst(compiler, ADDU_W | SA(reg_ar) | TA(0) | D(TMP_REG1), DR(TMP_REG1))); - reg_ar = DR(TMP_REG1); + if (SLJIT_UNLIKELY(flags & WRITE_BACK)) { + SLJIT_ASSERT(argw == 0); + FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(OFFS_REG(arg)) | D(base), DR(base))); + return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(base) | TA(reg_ar), delay_slot); } + argw &= 0x3; + /* Using the cache. */ if (argw == compiler->cache_argw) { - if (!(flags & WRITE_BACK)) { - if (arg == compiler->cache_arg) + if (arg == compiler->cache_arg) + return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(TMP_REG3) | TA(reg_ar), delay_slot); + + if ((SLJIT_MEM | (arg & OFFS_REG_MASK)) == compiler->cache_arg) { + if (arg == next_arg && argw == (next_argw & 0x3)) { + compiler->cache_arg = arg; + compiler->cache_argw = argw; + FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(TMP_REG3) | D(TMP_REG3), DR(TMP_REG3))); return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(TMP_REG3) | TA(reg_ar), delay_slot); - if ((SLJIT_MEM | (arg & OFFS_REG_MASK)) == compiler->cache_arg) { - if (arg == next_arg && argw == (next_argw & 0x3)) { - compiler->cache_arg = arg; - compiler->cache_argw = argw; - FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(TMP_REG3) | D(TMP_REG3), DR(TMP_REG3))); - return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(TMP_REG3) | TA(reg_ar), delay_slot); - } - FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(TMP_REG3) | DA(tmp_ar), tmp_ar)); - return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | SA(tmp_ar) | TA(reg_ar), delay_slot); - } - } - else { - if ((SLJIT_MEM | (arg & OFFS_REG_MASK)) == compiler->cache_arg) { - FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(TMP_REG3) | D(base), DR(base))); - return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(base) | TA(reg_ar), delay_slot); } + FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(TMP_REG3) | DA(tmp_ar), tmp_ar)); + return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | SA(tmp_ar) | TA(reg_ar), delay_slot); } } @@ -796,35 +822,18 @@ static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sl FAIL_IF(push_inst(compiler, SLL_W | T(OFFS_REG(arg)) | D(TMP_REG3) | SH_IMM(argw), DR(TMP_REG3))); } - if (!(flags & WRITE_BACK)) { - if (arg == next_arg && argw == (next_argw & 0x3)) { - compiler->cache_arg = arg; - compiler->cache_argw = argw; - FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(!argw ? OFFS_REG(arg) : TMP_REG3) | D(TMP_REG3), DR(TMP_REG3))); - tmp_ar = DR(TMP_REG3); - } - else - FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(!argw ? OFFS_REG(arg) : TMP_REG3) | DA(tmp_ar), tmp_ar)); - return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | SA(tmp_ar) | TA(reg_ar), delay_slot); + if (arg == next_arg && argw == (next_argw & 0x3)) { + compiler->cache_arg = arg; + compiler->cache_argw = argw; + FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(!argw ? OFFS_REG(arg) : TMP_REG3) | D(TMP_REG3), DR(TMP_REG3))); + tmp_ar = DR(TMP_REG3); } - FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(!argw ? OFFS_REG(arg) : TMP_REG3) | D(base), DR(base))); - return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(base) | TA(reg_ar), delay_slot); + else + FAIL_IF(push_inst(compiler, ADDU_W | S(base) | T(!argw ? OFFS_REG(arg) : TMP_REG3) | DA(tmp_ar), tmp_ar)); + return push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | SA(tmp_ar) | TA(reg_ar), delay_slot); } if (SLJIT_UNLIKELY(flags & WRITE_BACK) && base) { - /* Update only applies if a base register exists. */ - if (reg_ar == DR(base)) { - SLJIT_ASSERT(!(flags & LOAD_DATA) && DR(TMP_REG1) != reg_ar); - if (argw <= SIMM_MAX && argw >= SIMM_MIN) { - FAIL_IF(push_inst(compiler, data_transfer_insts[flags & MEM_MASK] | S(base) | TA(reg_ar) | IMM(argw), MOVABLE_INS)); - if (argw) - return push_inst(compiler, ADDIU_W | S(base) | T(base) | IMM(argw), DR(base)); - return SLJIT_SUCCESS; - } - FAIL_IF(push_inst(compiler, ADDU_W | SA(reg_ar) | TA(0) | D(TMP_REG1), DR(TMP_REG1))); - reg_ar = DR(TMP_REG1); - } - if (argw <= SIMM_MAX && argw >= SIMM_MIN) { if (argw) FAIL_IF(push_inst(compiler, ADDIU_W | S(base) | T(base) | IMM(argw), DR(base))); @@ -914,10 +923,8 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 } if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) { - if (op >= SLJIT_MOV && op <= SLJIT_MOVU_S32 && !(src2 & SLJIT_MEM)) - return SLJIT_SUCCESS; - if (GET_FLAGS(op)) - flags |= UNUSED_DEST; + SLJIT_ASSERT(HAS_FLAGS(op)); + flags |= UNUSED_DEST; } else if (FAST_IS_REG(dst)) { dst_r = dst; @@ -1079,6 +1086,29 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile return SLJIT_SUCCESS; } +#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1) +static sljit_s32 emit_prefetch(struct sljit_compiler *compiler, + sljit_s32 src, sljit_sw srcw) +{ + if (!(src & OFFS_REG_MASK)) { + if (srcw <= SIMM_MAX && srcw >= SIMM_MIN) + return push_inst(compiler, PREF | S(src & REG_MASK) | IMM(srcw), MOVABLE_INS); + + FAIL_IF(load_immediate(compiler, DR(TMP_REG1), srcw)); + return push_inst(compiler, PREFX | S(src & REG_MASK) | T(TMP_REG1), MOVABLE_INS); + } + + srcw &= 0x3; + + if (SLJIT_UNLIKELY(srcw != 0)) { + FAIL_IF(push_inst(compiler, SLL_W | T(OFFS_REG(src)) | D(TMP_REG1) | SH_IMM(srcw), DR(TMP_REG1))); + return push_inst(compiler, PREFX | S(src & REG_MASK) | T(TMP_REG1), MOVABLE_INS); + } + + return push_inst(compiler, PREFX | S(src & REG_MASK) | T(OFFS_REG(src)), MOVABLE_INS); +} +#endif + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, sljit_s32 src, sljit_sw srcw) @@ -1094,6 +1124,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile ADJUST_LOCAL_OFFSET(dst, dstw); ADJUST_LOCAL_OFFSET(src, srcw); + if (dst == SLJIT_UNUSED && !HAS_FLAGS(op)) { +#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1) + if (op <= SLJIT_MOV_P && (src & SLJIT_MEM)) + return emit_prefetch(compiler, src, srcw); +#endif + return SLJIT_SUCCESS; + } + #if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) if ((op & SLJIT_I32_OP) && GET_OPCODE(op) >= SLJIT_NOT) { flags |= INT_DATA | SIGNED_DATA; @@ -1197,6 +1235,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile ADJUST_LOCAL_OFFSET(src1, src1w); ADJUST_LOCAL_OFFSET(src2, src2w); + if (dst == SLJIT_UNUSED && !HAS_FLAGS(op)) + return SLJIT_SUCCESS; + #if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) if (op & SLJIT_I32_OP) { flags |= INT_DATA | SIGNED_DATA; @@ -1273,19 +1314,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *c /* Floating point operators */ /* --------------------------------------------------------------------- */ -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void) -{ -#ifdef SLJIT_IS_FPU_AVAILABLE - return SLJIT_IS_FPU_AVAILABLE; -#elif defined(__GNUC__) - sljit_sw fir; - asm ("cfc1 %0, $0" : "=r"(fir)); - return (fir >> 22) & 0x1; -#else -#error "FIR check is not implemented for this architecture" -#endif -} - #define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_F32_OP) >> 7)) #define FMT(op) (((op & SLJIT_F32_OP) ^ SLJIT_F32_OP) << (21 - 8)) @@ -1308,9 +1336,6 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp FAIL_IF(push_inst(compiler, (TRUNC_W_S ^ (flags >> 19)) | FMT(op) | FS(src) | FD(TMP_FREG1), MOVABLE_INS)); - if (dst == SLJIT_UNUSED) - return SLJIT_SUCCESS; - if (FAST_IS_REG(dst)) return push_inst(compiler, MFC1 | flags | T(dst) | FS(TMP_FREG1), MOVABLE_INS); @@ -1364,6 +1389,8 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compile sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) { + sljit_ins inst; + if (src1 & SLJIT_MEM) { FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w)); src1 = TMP_FREG1; @@ -1378,25 +1405,26 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compile else src2 <<= 1; - /* src2 and src1 are swapped. */ - if (op & SLJIT_SET_E) { - FAIL_IF(push_inst(compiler, C_UEQ_S | FMT(op) | FT(src2) | FS(src1), UNMOVABLE_INS)); - FAIL_IF(push_inst(compiler, CFC1 | TA(EQUAL_FLAG) | DA(FCSR_REG), EQUAL_FLAG)); - FAIL_IF(push_inst(compiler, SRL | TA(EQUAL_FLAG) | DA(EQUAL_FLAG) | SH_IMM(23), EQUAL_FLAG)); - FAIL_IF(push_inst(compiler, ANDI | SA(EQUAL_FLAG) | TA(EQUAL_FLAG) | IMM(1), EQUAL_FLAG)); - } - if (op & SLJIT_SET_S) { - /* Mixing the instructions for the two checks. */ - FAIL_IF(push_inst(compiler, C_ULT_S | FMT(op) | FT(src2) | FS(src1), UNMOVABLE_INS)); - FAIL_IF(push_inst(compiler, CFC1 | TA(ULESS_FLAG) | DA(FCSR_REG), ULESS_FLAG)); - FAIL_IF(push_inst(compiler, C_ULT_S | FMT(op) | FT(src1) | FS(src2), UNMOVABLE_INS)); - FAIL_IF(push_inst(compiler, SRL | TA(ULESS_FLAG) | DA(ULESS_FLAG) | SH_IMM(23), ULESS_FLAG)); - FAIL_IF(push_inst(compiler, ANDI | SA(ULESS_FLAG) | TA(ULESS_FLAG) | IMM(1), ULESS_FLAG)); - FAIL_IF(push_inst(compiler, CFC1 | TA(UGREATER_FLAG) | DA(FCSR_REG), UGREATER_FLAG)); - FAIL_IF(push_inst(compiler, SRL | TA(UGREATER_FLAG) | DA(UGREATER_FLAG) | SH_IMM(23), UGREATER_FLAG)); - FAIL_IF(push_inst(compiler, ANDI | SA(UGREATER_FLAG) | TA(UGREATER_FLAG) | IMM(1), UGREATER_FLAG)); - } - return push_inst(compiler, C_UN_S | FMT(op) | FT(src2) | FS(src1), FCSR_FCC); + switch (GET_FLAG_TYPE(op)) { + case SLJIT_EQUAL_F64: + case SLJIT_NOT_EQUAL_F64: + inst = C_UEQ_S; + break; + case SLJIT_LESS_F64: + case SLJIT_GREATER_EQUAL_F64: + inst = C_ULT_S; + break; + case SLJIT_GREATER_F64: + case SLJIT_LESS_EQUAL_F64: + inst = C_ULE_S; + break; + default: + SLJIT_ASSERT(GET_FLAG_TYPE(op) == SLJIT_UNORDERED_F64 || GET_FLAG_TYPE(op) == SLJIT_ORDERED_F64); + inst = C_UN_S; + break; + } + + return push_inst(compiler, inst | FMT(op) | FT(src2) | FS(src1), UNMOVABLE_INS); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, @@ -1542,10 +1570,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler * CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw)); ADJUST_LOCAL_OFFSET(dst, dstw); - /* For UNUSED dst. Uncommon, but possible. */ - if (dst == SLJIT_UNUSED) - return SLJIT_SUCCESS; - if (FAST_IS_REG(dst)) return push_inst(compiler, ADDU_W | SA(RETURN_ADDR_REG) | TA(0) | D(dst), DR(dst)); @@ -1634,55 +1658,39 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile switch (type) { case SLJIT_EQUAL: - case SLJIT_NOT_EQUAL_F64: BR_NZ(EQUAL_FLAG); break; case SLJIT_NOT_EQUAL: - case SLJIT_EQUAL_F64: BR_Z(EQUAL_FLAG); break; case SLJIT_LESS: - case SLJIT_LESS_F64: - BR_Z(ULESS_FLAG); - break; - case SLJIT_GREATER_EQUAL: - case SLJIT_GREATER_EQUAL_F64: - BR_NZ(ULESS_FLAG); - break; case SLJIT_GREATER: - case SLJIT_GREATER_F64: - BR_Z(UGREATER_FLAG); - break; - case SLJIT_LESS_EQUAL: - case SLJIT_LESS_EQUAL_F64: - BR_NZ(UGREATER_FLAG); - break; case SLJIT_SIG_LESS: - BR_Z(LESS_FLAG); - break; - case SLJIT_SIG_GREATER_EQUAL: - BR_NZ(LESS_FLAG); - break; case SLJIT_SIG_GREATER: - BR_Z(GREATER_FLAG); - break; - case SLJIT_SIG_LESS_EQUAL: - BR_NZ(GREATER_FLAG); - break; case SLJIT_OVERFLOW: case SLJIT_MUL_OVERFLOW: - BR_Z(OVERFLOW_FLAG); + BR_Z(OTHER_FLAG); break; + case SLJIT_GREATER_EQUAL: + case SLJIT_LESS_EQUAL: + case SLJIT_SIG_GREATER_EQUAL: + case SLJIT_SIG_LESS_EQUAL: case SLJIT_NOT_OVERFLOW: case SLJIT_MUL_NOT_OVERFLOW: - BR_NZ(OVERFLOW_FLAG); - break; - case SLJIT_UNORDERED_F64: - BR_F(); + BR_NZ(OTHER_FLAG); break; + case SLJIT_NOT_EQUAL_F64: + case SLJIT_GREATER_EQUAL_F64: + case SLJIT_GREATER_F64: case SLJIT_ORDERED_F64: BR_T(); break; + case SLJIT_EQUAL_F64: + case SLJIT_LESS_F64: + case SLJIT_LESS_EQUAL_F64: + case SLJIT_UNORDERED_F64: + BR_F(); + break; default: /* Not conditional branch. */ inst = 0; @@ -1854,86 +1862,6 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler #undef RESOLVE_IMM1 #undef RESOLVE_IMM2 -SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_fcmp(struct sljit_compiler *compiler, sljit_s32 type, - sljit_s32 src1, sljit_sw src1w, - sljit_s32 src2, sljit_sw src2w) -{ - struct sljit_jump *jump; - sljit_ins inst; - sljit_s32 if_true; - - CHECK_ERROR_PTR(); - CHECK_PTR(check_sljit_emit_fcmp(compiler, type, src1, src1w, src2, src2w)); - - compiler->cache_arg = 0; - compiler->cache_argw = 0; - - if (src1 & SLJIT_MEM) { - PTR_FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(type) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w)); - src1 = TMP_FREG1; - } - else - src1 <<= 1; - - if (src2 & SLJIT_MEM) { - PTR_FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(type) | LOAD_DATA, TMP_FREG2, src2, src2w, 0, 0)); - src2 = TMP_FREG2; - } - else - src2 <<= 1; - - jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); - PTR_FAIL_IF(!jump); - set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); - jump->flags |= IS_BIT16_COND; - - switch (type & 0xff) { - case SLJIT_EQUAL_F64: - inst = C_UEQ_S; - if_true = 1; - break; - case SLJIT_NOT_EQUAL_F64: - inst = C_UEQ_S; - if_true = 0; - break; - case SLJIT_LESS_F64: - inst = C_ULT_S; - if_true = 1; - break; - case SLJIT_GREATER_EQUAL_F64: - inst = C_ULT_S; - if_true = 0; - break; - case SLJIT_GREATER_F64: - inst = C_ULE_S; - if_true = 0; - break; - case SLJIT_LESS_EQUAL_F64: - inst = C_ULE_S; - if_true = 1; - break; - case SLJIT_UNORDERED_F64: - inst = C_UN_S; - if_true = 1; - break; - default: /* Make compilers happy. */ - SLJIT_ASSERT_STOP(); - case SLJIT_ORDERED_F64: - inst = C_UN_S; - if_true = 0; - break; - } - - PTR_FAIL_IF(push_inst(compiler, inst | FMT(type) | FT(src2) | FS(src1), UNMOVABLE_INS)); - /* Intentionally the other opcode. */ - PTR_FAIL_IF(push_inst(compiler, (if_true ? BC1F : BC1T) | JUMP_LENGTH, UNMOVABLE_INS)); - PTR_FAIL_IF(emit_const(compiler, TMP_REG2, 0)); - PTR_FAIL_IF(push_inst(compiler, JR | S(TMP_REG2), UNMOVABLE_INS)); - jump->addr = compiler->size; - PTR_FAIL_IF(push_inst(compiler, NOP, UNMOVABLE_INS)); - return jump; -} - #undef JUMP_LENGTH #undef BR_Z #undef BR_NZ @@ -2003,115 +1931,160 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, - sljit_s32 src, sljit_sw srcw, sljit_s32 type) { - sljit_s32 sugg_dst_ar, dst_ar; - sljit_s32 flags = GET_ALL_FLAGS(op); + sljit_s32 src_ar, dst_ar; + sljit_s32 saved_op = op; #if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) -# define mem_type WORD_DATA + sljit_s32 mem_type = WORD_DATA; #else sljit_s32 mem_type = (op & SLJIT_I32_OP) ? (INT_DATA | SIGNED_DATA) : WORD_DATA; #endif CHECK_ERROR(); - CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type)); + CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type)); ADJUST_LOCAL_OFFSET(dst, dstw); - if (dst == SLJIT_UNUSED) - return SLJIT_SUCCESS; - op = GET_OPCODE(op); #if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) - if (op == SLJIT_MOV_S32 || op == SLJIT_MOV_U32) + if (op == SLJIT_MOV_S32) mem_type = INT_DATA | SIGNED_DATA; #endif - sugg_dst_ar = DR((op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2); + dst_ar = DR((op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2); compiler->cache_arg = 0; compiler->cache_argw = 0; - if (op >= SLJIT_ADD && (src & SLJIT_MEM)) { - ADJUST_LOCAL_OFFSET(src, srcw); - FAIL_IF(emit_op_mem2(compiler, mem_type | LOAD_DATA, DR(TMP_REG1), src, srcw, dst, dstw)); - src = TMP_REG1; - srcw = 0; - } + + if (op >= SLJIT_ADD && (dst & SLJIT_MEM)) + FAIL_IF(emit_op_mem2(compiler, mem_type | LOAD_DATA, DR(TMP_REG1), dst, dstw, dst, dstw)); switch (type & 0xff) { case SLJIT_EQUAL: case SLJIT_NOT_EQUAL: - FAIL_IF(push_inst(compiler, SLTIU | SA(EQUAL_FLAG) | TA(sugg_dst_ar) | IMM(1), sugg_dst_ar)); - dst_ar = sugg_dst_ar; - break; - case SLJIT_LESS: - case SLJIT_GREATER_EQUAL: - case SLJIT_LESS_F64: - case SLJIT_GREATER_EQUAL_F64: - dst_ar = ULESS_FLAG; - break; - case SLJIT_GREATER: - case SLJIT_LESS_EQUAL: - case SLJIT_GREATER_F64: - case SLJIT_LESS_EQUAL_F64: - dst_ar = UGREATER_FLAG; - break; - case SLJIT_SIG_LESS: - case SLJIT_SIG_GREATER_EQUAL: - dst_ar = LESS_FLAG; - break; - case SLJIT_SIG_GREATER: - case SLJIT_SIG_LESS_EQUAL: - dst_ar = GREATER_FLAG; - break; - case SLJIT_OVERFLOW: - case SLJIT_NOT_OVERFLOW: - dst_ar = OVERFLOW_FLAG; + FAIL_IF(push_inst(compiler, SLTIU | SA(EQUAL_FLAG) | TA(dst_ar) | IMM(1), dst_ar)); + src_ar = dst_ar; break; case SLJIT_MUL_OVERFLOW: case SLJIT_MUL_NOT_OVERFLOW: - FAIL_IF(push_inst(compiler, SLTIU | SA(OVERFLOW_FLAG) | TA(sugg_dst_ar) | IMM(1), sugg_dst_ar)); - dst_ar = sugg_dst_ar; + FAIL_IF(push_inst(compiler, SLTIU | SA(OTHER_FLAG) | TA(dst_ar) | IMM(1), dst_ar)); + src_ar = dst_ar; type ^= 0x1; /* Flip type bit for the XORI below. */ break; + case SLJIT_GREATER_F64: + case SLJIT_LESS_EQUAL_F64: + type ^= 0x1; /* Flip type bit for the XORI below. */ case SLJIT_EQUAL_F64: case SLJIT_NOT_EQUAL_F64: - dst_ar = EQUAL_FLAG; - break; - + case SLJIT_LESS_F64: + case SLJIT_GREATER_EQUAL_F64: case SLJIT_UNORDERED_F64: case SLJIT_ORDERED_F64: - FAIL_IF(push_inst(compiler, CFC1 | TA(sugg_dst_ar) | DA(FCSR_REG), sugg_dst_ar)); - FAIL_IF(push_inst(compiler, SRL | TA(sugg_dst_ar) | DA(sugg_dst_ar) | SH_IMM(23), sugg_dst_ar)); - FAIL_IF(push_inst(compiler, ANDI | SA(sugg_dst_ar) | TA(sugg_dst_ar) | IMM(1), sugg_dst_ar)); - dst_ar = sugg_dst_ar; + FAIL_IF(push_inst(compiler, CFC1 | TA(dst_ar) | DA(FCSR_REG), dst_ar)); + FAIL_IF(push_inst(compiler, SRL | TA(dst_ar) | DA(dst_ar) | SH_IMM(23), dst_ar)); + FAIL_IF(push_inst(compiler, ANDI | SA(dst_ar) | TA(dst_ar) | IMM(1), dst_ar)); + src_ar = dst_ar; break; default: - SLJIT_ASSERT_STOP(); - dst_ar = sugg_dst_ar; + src_ar = OTHER_FLAG; break; } if (type & 0x1) { - FAIL_IF(push_inst(compiler, XORI | SA(dst_ar) | TA(sugg_dst_ar) | IMM(1), sugg_dst_ar)); - dst_ar = sugg_dst_ar; + FAIL_IF(push_inst(compiler, XORI | SA(src_ar) | TA(dst_ar) | IMM(1), dst_ar)); + src_ar = dst_ar; } - if (op >= SLJIT_ADD) { - if (DR(TMP_REG2) != dst_ar) - FAIL_IF(push_inst(compiler, ADDU_W | SA(dst_ar) | TA(0) | D(TMP_REG2), DR(TMP_REG2))); - return emit_op(compiler, op | flags, mem_type | CUMULATIVE_OP | LOGICAL_OP | IMM_OP | ALT_KEEP_CACHE, dst, dstw, src, srcw, TMP_REG2, 0); + if (op < SLJIT_ADD) { + if (dst & SLJIT_MEM) + return emit_op_mem(compiler, mem_type, src_ar, dst, dstw); + + if (src_ar != dst_ar) + return push_inst(compiler, ADDU_W | SA(src_ar) | TA(0) | DA(dst_ar), dst_ar); + return SLJIT_SUCCESS; } + /* OTHER_FLAG cannot be specified as src2 argument at the moment. */ + if (DR(TMP_REG2) != src_ar) + FAIL_IF(push_inst(compiler, ADDU_W | SA(src_ar) | TA(0) | D(TMP_REG2), DR(TMP_REG2))); + + mem_type |= CUMULATIVE_OP | LOGICAL_OP | IMM_OP | ALT_KEEP_CACHE; + if (dst & SLJIT_MEM) - return emit_op_mem(compiler, mem_type, dst_ar, dst, dstw); + return emit_op(compiler, saved_op, mem_type, dst, dstw, TMP_REG1, 0, TMP_REG2, 0); + return emit_op(compiler, saved_op, mem_type, dst, dstw, dst, dstw, TMP_REG2, 0); +} - if (sugg_dst_ar != dst_ar) - return push_inst(compiler, ADDU_W | SA(dst_ar) | TA(0) | DA(sugg_dst_ar), sugg_dst_ar); - return SLJIT_SUCCESS; +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_reg, + sljit_s32 src, sljit_sw srcw) +{ +#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1) + sljit_ins ins; +#endif -#if (defined SLJIT_CONFIG_MIPS_32 && SLJIT_CONFIG_MIPS_32) -# undef mem_type + CHECK_ERROR(); + CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw)); + +#if (defined SLJIT_MIPS_R1 && SLJIT_MIPS_R1) + + if (SLJIT_UNLIKELY(src & SLJIT_IMM)) { +#if (defined SLJIT_CONFIG_MIPS_64 && SLJIT_CONFIG_MIPS_64) + if (dst_reg & SLJIT_I32_OP) + srcw = (sljit_s32)srcw; +#endif + FAIL_IF(load_immediate(compiler, DR(TMP_REG1), srcw)); + src = TMP_REG1; + srcw = 0; + } + + dst_reg &= ~SLJIT_I32_OP; + + switch (type & 0xff) { + case SLJIT_EQUAL: + ins = MOVZ | TA(EQUAL_FLAG); + break; + case SLJIT_NOT_EQUAL: + ins = MOVN | TA(EQUAL_FLAG); + break; + case SLJIT_LESS: + case SLJIT_GREATER: + case SLJIT_SIG_LESS: + case SLJIT_SIG_GREATER: + case SLJIT_OVERFLOW: + case SLJIT_MUL_OVERFLOW: + ins = MOVN | TA(OTHER_FLAG); + break; + case SLJIT_GREATER_EQUAL: + case SLJIT_LESS_EQUAL: + case SLJIT_SIG_GREATER_EQUAL: + case SLJIT_SIG_LESS_EQUAL: + case SLJIT_NOT_OVERFLOW: + case SLJIT_MUL_NOT_OVERFLOW: + ins = MOVZ | TA(OTHER_FLAG); + break; + case SLJIT_EQUAL_F64: + case SLJIT_LESS_F64: + case SLJIT_LESS_EQUAL_F64: + case SLJIT_UNORDERED_F64: + ins = MOVT; + break; + case SLJIT_NOT_EQUAL_F64: + case SLJIT_GREATER_EQUAL_F64: + case SLJIT_GREATER_F64: + case SLJIT_ORDERED_F64: + ins = MOVF; + break; + default: + ins = MOVZ | TA(OTHER_FLAG); + SLJIT_UNREACHABLE(); + break; + } + + return push_inst(compiler, ins | S(src) | D(dst_reg), DR(dst_reg)); + +#else + return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw); #endif } @@ -2128,7 +2101,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi PTR_FAIL_IF(!const_); set_const(const_, compiler); - reg = SLOW_IS_REG(dst) ? dst : TMP_REG2; + reg = FAST_IS_REG(dst) ? dst : TMP_REG2; PTR_FAIL_IF(emit_const(compiler, reg, init_value)); diff --git a/src/3rdparty/pcre2/src/sljit/sljitNativePPC_32.c b/src/3rdparty/pcre2/src/sljit/sljitNativePPC_32.c index 0f23cf86dd..fc185f7847 100644 --- a/src/3rdparty/pcre2/src/sljit/sljitNativePPC_32.c +++ b/src/3rdparty/pcre2/src/sljit/sljitNativePPC_32.c @@ -1,7 +1,7 @@ /* * Stack-less Just-In-Time compiler * - * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are * permitted provided that the following conditions are met: @@ -88,77 +88,86 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl case SLJIT_NEG: SLJIT_ASSERT(src1 == TMP_REG1); - return push_inst(compiler, NEG | OERC(flags) | D(dst) | A(src2)); + /* Setting XER SO is not enough, CR SO is also needed. */ + return push_inst(compiler, NEG | OE((flags & ALT_FORM1) ? ALT_SET_FLAGS : 0) | RC(flags) | D(dst) | A(src2)); case SLJIT_CLZ: SLJIT_ASSERT(src1 == TMP_REG1); - return push_inst(compiler, CNTLZW | RC(flags) | S(src2) | A(dst)); + return push_inst(compiler, CNTLZW | S(src2) | A(dst)); case SLJIT_ADD: if (flags & ALT_FORM1) { - /* Flags does not set: BIN_IMM_EXTS unnecessary. */ - SLJIT_ASSERT(src2 == TMP_REG2); - return push_inst(compiler, ADDI | D(dst) | A(src1) | compiler->imm); + /* Setting XER SO is not enough, CR SO is also needed. */ + return push_inst(compiler, ADD | OE(ALT_SET_FLAGS) | RC(ALT_SET_FLAGS) | D(dst) | A(src1) | B(src2)); } + if (flags & ALT_FORM2) { /* Flags does not set: BIN_IMM_EXTS unnecessary. */ SLJIT_ASSERT(src2 == TMP_REG2); - return push_inst(compiler, ADDIS | D(dst) | A(src1) | compiler->imm); + + if (flags & ALT_FORM3) + return push_inst(compiler, ADDIS | D(dst) | A(src1) | compiler->imm); + + if (flags & ALT_FORM4) { + FAIL_IF(push_inst(compiler, ADDIS | D(dst) | A(src1) | (((compiler->imm >> 16) & 0xffff) + ((compiler->imm >> 15) & 0x1)))); + src1 = dst; + } + + return push_inst(compiler, ADDI | D(dst) | A(src1) | (compiler->imm & 0xffff)); } if (flags & ALT_FORM3) { SLJIT_ASSERT(src2 == TMP_REG2); return push_inst(compiler, ADDIC | D(dst) | A(src1) | compiler->imm); } - if (flags & ALT_FORM4) { - /* Flags does not set: BIN_IMM_EXTS unnecessary. */ - FAIL_IF(push_inst(compiler, ADDI | D(dst) | A(src1) | (compiler->imm & 0xffff))); - return push_inst(compiler, ADDIS | D(dst) | A(dst) | (((compiler->imm >> 16) & 0xffff) + ((compiler->imm >> 15) & 0x1))); - } if (!(flags & ALT_SET_FLAGS)) return push_inst(compiler, ADD | D(dst) | A(src1) | B(src2)); - return push_inst(compiler, ADDC | OERC(ALT_SET_FLAGS) | D(dst) | A(src1) | B(src2)); + if (flags & ALT_FORM4) + return push_inst(compiler, ADDC | RC(ALT_SET_FLAGS) | D(dst) | A(src1) | B(src2)); + return push_inst(compiler, ADD | RC(flags) | D(dst) | A(src1) | B(src2)); case SLJIT_ADDC: - if (flags & ALT_FORM1) { - FAIL_IF(push_inst(compiler, MFXER | D(0))); - FAIL_IF(push_inst(compiler, ADDE | D(dst) | A(src1) | B(src2))); - return push_inst(compiler, MTXER | S(0)); - } return push_inst(compiler, ADDE | D(dst) | A(src1) | B(src2)); case SLJIT_SUB: if (flags & ALT_FORM1) { + if (flags & ALT_FORM2) { + FAIL_IF(push_inst(compiler, CMPLI | CRD(0) | A(src1) | compiler->imm)); + if (!(flags & ALT_FORM3)) + return SLJIT_SUCCESS; + return push_inst(compiler, ADDI | D(dst) | A(src1) | (-compiler->imm & 0xffff)); + } + FAIL_IF(push_inst(compiler, CMPL | CRD(0) | A(src1) | B(src2))); + if (!(flags & ALT_FORM3)) + return SLJIT_SUCCESS; + return push_inst(compiler, SUBF | D(dst) | A(src2) | B(src1)); + } + + if (flags & ALT_FORM2) { + /* Setting XER SO is not enough, CR SO is also needed. */ + return push_inst(compiler, SUBF | OE(ALT_SET_FLAGS) | RC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1)); + } + + if (flags & ALT_FORM3) { /* Flags does not set: BIN_IMM_EXTS unnecessary. */ SLJIT_ASSERT(src2 == TMP_REG2); return push_inst(compiler, SUBFIC | D(dst) | A(src1) | compiler->imm); } - if (flags & (ALT_FORM2 | ALT_FORM3)) { - SLJIT_ASSERT(src2 == TMP_REG2); - if (flags & ALT_FORM2) - FAIL_IF(push_inst(compiler, CMPI | CRD(0) | A(src1) | compiler->imm)); - if (flags & ALT_FORM3) - return push_inst(compiler, CMPLI | CRD(4) | A(src1) | compiler->imm); - return SLJIT_SUCCESS; - } - if (flags & (ALT_FORM4 | ALT_FORM5)) { - if (flags & ALT_FORM4) - FAIL_IF(push_inst(compiler, CMPL | CRD(4) | A(src1) | B(src2))); - if (flags & ALT_FORM5) - FAIL_IF(push_inst(compiler, CMP | CRD(0) | A(src1) | B(src2))); - return SLJIT_SUCCESS; + + if (flags & ALT_FORM4) { + if (flags & ALT_FORM5) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, CMPI | CRD(0) | A(src1) | compiler->imm); + } + return push_inst(compiler, CMP | CRD(0) | A(src1) | B(src2)); } + if (!(flags & ALT_SET_FLAGS)) return push_inst(compiler, SUBF | D(dst) | A(src2) | B(src1)); - if (flags & ALT_FORM6) - FAIL_IF(push_inst(compiler, CMPL | CRD(4) | A(src1) | B(src2))); - return push_inst(compiler, SUBFC | OERC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1)); + if (flags & ALT_FORM5) + return push_inst(compiler, SUBFC | RC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1)); + return push_inst(compiler, SUBF | RC(flags) | D(dst) | A(src2) | B(src1)); case SLJIT_SUBC: - if (flags & ALT_FORM1) { - FAIL_IF(push_inst(compiler, MFXER | D(0))); - FAIL_IF(push_inst(compiler, SUBFE | D(dst) | A(src2) | B(src1))); - return push_inst(compiler, MTXER | S(0)); - } return push_inst(compiler, SUBFE | D(dst) | A(src2) | B(src1)); case SLJIT_MUL: @@ -166,7 +175,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl SLJIT_ASSERT(src2 == TMP_REG2); return push_inst(compiler, MULLI | D(dst) | A(src1) | compiler->imm); } - return push_inst(compiler, MULLW | OERC(flags) | D(dst) | A(src2) | B(src1)); + return push_inst(compiler, MULLW | OE(flags) | RC(flags) | D(dst) | A(src2) | B(src1)); case SLJIT_AND: if (flags & ALT_FORM1) { @@ -228,19 +237,15 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl return push_inst(compiler, SRW | RC(flags) | S(src1) | A(dst) | B(src2)); case SLJIT_ASHR: - if (flags & ALT_FORM3) - FAIL_IF(push_inst(compiler, MFXER | D(0))); if (flags & ALT_FORM1) { SLJIT_ASSERT(src2 == TMP_REG2); compiler->imm &= 0x1f; - FAIL_IF(push_inst(compiler, SRAWI | RC(flags) | S(src1) | A(dst) | (compiler->imm << 11))); + return push_inst(compiler, SRAWI | RC(flags) | S(src1) | A(dst) | (compiler->imm << 11)); } - else - FAIL_IF(push_inst(compiler, SRAW | RC(flags) | S(src1) | A(dst) | B(src2))); - return (flags & ALT_FORM3) ? push_inst(compiler, MTXER | S(0)) : SLJIT_SUCCESS; + return push_inst(compiler, SRAW | RC(flags) | S(src1) | A(dst) | B(src2)); } - SLJIT_ASSERT_STOP(); + SLJIT_UNREACHABLE(); return SLJIT_SUCCESS; } @@ -250,20 +255,22 @@ static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_ return push_inst(compiler, ORI | S(reg) | A(reg) | IMM(init_value)); } -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr) +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) { - sljit_ins *inst = (sljit_ins*)addr; + sljit_ins *inst = (sljit_ins *)addr; - inst[0] = (inst[0] & 0xffff0000) | ((new_addr >> 16) & 0xffff); - inst[1] = (inst[1] & 0xffff0000) | (new_addr & 0xffff); + inst[0] = (inst[0] & 0xffff0000) | ((new_target >> 16) & 0xffff); + inst[1] = (inst[1] & 0xffff0000) | (new_target & 0xffff); + inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); SLJIT_CACHE_FLUSH(inst, inst + 2); } -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant) +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) { - sljit_ins *inst = (sljit_ins*)addr; + sljit_ins *inst = (sljit_ins *)addr; inst[0] = (inst[0] & 0xffff0000) | ((new_constant >> 16) & 0xffff); inst[1] = (inst[1] & 0xffff0000) | (new_constant & 0xffff); + inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); SLJIT_CACHE_FLUSH(inst, inst + 2); } diff --git a/src/3rdparty/pcre2/src/sljit/sljitNativePPC_64.c b/src/3rdparty/pcre2/src/sljit/sljitNativePPC_64.c index 8e3223f725..5366c30d90 100644 --- a/src/3rdparty/pcre2/src/sljit/sljitNativePPC_64.c +++ b/src/3rdparty/pcre2/src/sljit/sljitNativePPC_64.c @@ -1,7 +1,7 @@ /* * Stack-less Just-In-Time compiler * - * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are * permitted provided that the following conditions are met: @@ -204,84 +204,118 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl case SLJIT_NEG: SLJIT_ASSERT(src1 == TMP_REG1); + + if ((flags & (ALT_FORM1 | ALT_SIGN_EXT)) == (ALT_FORM1 | ALT_SIGN_EXT)) { + FAIL_IF(push_inst(compiler, RLDI(TMP_REG2, src2, 32, 31, 1))); + FAIL_IF(push_inst(compiler, NEG | OE(ALT_SET_FLAGS) | RC(ALT_SET_FLAGS) | D(dst) | A(TMP_REG2))); + return push_inst(compiler, RLDI(dst, dst, 32, 32, 0)); + } + UN_EXTS(); - return push_inst(compiler, NEG | OERC(flags) | D(dst) | A(src2)); + /* Setting XER SO is not enough, CR SO is also needed. */ + return push_inst(compiler, NEG | OE((flags & ALT_FORM1) ? ALT_SET_FLAGS : 0) | RC(flags) | D(dst) | A(src2)); case SLJIT_CLZ: SLJIT_ASSERT(src1 == TMP_REG1); if (flags & ALT_FORM1) - return push_inst(compiler, CNTLZW | RC(flags) | S(src2) | A(dst)); - return push_inst(compiler, CNTLZD | RC(flags) | S(src2) | A(dst)); + return push_inst(compiler, CNTLZW | S(src2) | A(dst)); + return push_inst(compiler, CNTLZD | S(src2) | A(dst)); case SLJIT_ADD: if (flags & ALT_FORM1) { - /* Flags does not set: BIN_IMM_EXTS unnecessary. */ - SLJIT_ASSERT(src2 == TMP_REG2); - return push_inst(compiler, ADDI | D(dst) | A(src1) | compiler->imm); + if (flags & ALT_SIGN_EXT) { + FAIL_IF(push_inst(compiler, RLDI(TMP_REG1, src1, 32, 31, 1))); + src1 = TMP_REG1; + FAIL_IF(push_inst(compiler, RLDI(TMP_REG2, src2, 32, 31, 1))); + src2 = TMP_REG2; + } + /* Setting XER SO is not enough, CR SO is also needed. */ + FAIL_IF(push_inst(compiler, ADD | OE(ALT_SET_FLAGS) | RC(ALT_SET_FLAGS) | D(dst) | A(src1) | B(src2))); + if (flags & ALT_SIGN_EXT) + return push_inst(compiler, RLDI(dst, dst, 32, 32, 0)); + return SLJIT_SUCCESS; } + if (flags & ALT_FORM2) { /* Flags does not set: BIN_IMM_EXTS unnecessary. */ SLJIT_ASSERT(src2 == TMP_REG2); - return push_inst(compiler, ADDIS | D(dst) | A(src1) | compiler->imm); + + if (flags & ALT_FORM3) + return push_inst(compiler, ADDIS | D(dst) | A(src1) | compiler->imm); + + if (flags & ALT_FORM4) { + FAIL_IF(push_inst(compiler, ADDIS | D(dst) | A(src1) | (((compiler->imm >> 16) & 0xffff) + ((compiler->imm >> 15) & 0x1)))); + src1 = dst; + } + + return push_inst(compiler, ADDI | D(dst) | A(src1) | (compiler->imm & 0xffff)); } if (flags & ALT_FORM3) { SLJIT_ASSERT(src2 == TMP_REG2); BIN_IMM_EXTS(); return push_inst(compiler, ADDIC | D(dst) | A(src1) | compiler->imm); } - if (flags & ALT_FORM4) { - /* Flags does not set: BIN_IMM_EXTS unnecessary. */ - FAIL_IF(push_inst(compiler, ADDI | D(dst) | A(src1) | (compiler->imm & 0xffff))); - return push_inst(compiler, ADDIS | D(dst) | A(dst) | (((compiler->imm >> 16) & 0xffff) + ((compiler->imm >> 15) & 0x1))); - } if (!(flags & ALT_SET_FLAGS)) return push_inst(compiler, ADD | D(dst) | A(src1) | B(src2)); BIN_EXTS(); - return push_inst(compiler, ADDC | OERC(ALT_SET_FLAGS) | D(dst) | A(src1) | B(src2)); + if (flags & ALT_FORM4) + return push_inst(compiler, ADDC | RC(ALT_SET_FLAGS) | D(dst) | A(src1) | B(src2)); + return push_inst(compiler, ADD | RC(flags) | D(dst) | A(src1) | B(src2)); case SLJIT_ADDC: - if (flags & ALT_FORM1) { - FAIL_IF(push_inst(compiler, MFXER | D(0))); - FAIL_IF(push_inst(compiler, ADDE | D(dst) | A(src1) | B(src2))); - return push_inst(compiler, MTXER | S(0)); - } BIN_EXTS(); return push_inst(compiler, ADDE | D(dst) | A(src1) | B(src2)); case SLJIT_SUB: if (flags & ALT_FORM1) { + if (flags & ALT_FORM2) { + FAIL_IF(push_inst(compiler, CMPLI | CRD(0 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | compiler->imm)); + if (!(flags & ALT_FORM3)) + return SLJIT_SUCCESS; + return push_inst(compiler, ADDI | D(dst) | A(src1) | (-compiler->imm & 0xffff)); + } + FAIL_IF(push_inst(compiler, CMPL | CRD(0 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | B(src2))); + if (!(flags & ALT_FORM3)) + return SLJIT_SUCCESS; + return push_inst(compiler, SUBF | D(dst) | A(src2) | B(src1)); + } + + if (flags & ALT_FORM2) { + if (flags & ALT_SIGN_EXT) { + FAIL_IF(push_inst(compiler, RLDI(TMP_REG1, src1, 32, 31, 1))); + src1 = TMP_REG1; + FAIL_IF(push_inst(compiler, RLDI(TMP_REG2, src2, 32, 31, 1))); + src2 = TMP_REG2; + } + /* Setting XER SO is not enough, CR SO is also needed. */ + FAIL_IF(push_inst(compiler, SUBF | OE(ALT_SET_FLAGS) | RC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1))); + if (flags & ALT_SIGN_EXT) + return push_inst(compiler, RLDI(dst, dst, 32, 32, 0)); + return SLJIT_SUCCESS; + } + + if (flags & ALT_FORM3) { /* Flags does not set: BIN_IMM_EXTS unnecessary. */ SLJIT_ASSERT(src2 == TMP_REG2); return push_inst(compiler, SUBFIC | D(dst) | A(src1) | compiler->imm); } - if (flags & (ALT_FORM2 | ALT_FORM3)) { - SLJIT_ASSERT(src2 == TMP_REG2); - if (flags & ALT_FORM2) - FAIL_IF(push_inst(compiler, CMPI | CRD(0 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | compiler->imm)); - if (flags & ALT_FORM3) - return push_inst(compiler, CMPLI | CRD(4 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | compiler->imm); - return SLJIT_SUCCESS; - } - if (flags & (ALT_FORM4 | ALT_FORM5)) { - if (flags & ALT_FORM4) - FAIL_IF(push_inst(compiler, CMPL | CRD(4 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | B(src2))); - if (flags & ALT_FORM5) - return push_inst(compiler, CMP | CRD(0 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | B(src2)); - return SLJIT_SUCCESS; + + if (flags & ALT_FORM4) { + if (flags & ALT_FORM5) { + SLJIT_ASSERT(src2 == TMP_REG2); + return push_inst(compiler, CMPI | CRD(0 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | compiler->imm); + } + return push_inst(compiler, CMP | CRD(0 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | B(src2)); } + if (!(flags & ALT_SET_FLAGS)) return push_inst(compiler, SUBF | D(dst) | A(src2) | B(src1)); BIN_EXTS(); - if (flags & ALT_FORM6) - FAIL_IF(push_inst(compiler, CMPL | CRD(4 | ((flags & ALT_SIGN_EXT) ? 0 : 1)) | A(src1) | B(src2))); - return push_inst(compiler, SUBFC | OERC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1)); + if (flags & ALT_FORM5) + return push_inst(compiler, SUBFC | RC(ALT_SET_FLAGS) | D(dst) | A(src2) | B(src1)); + return push_inst(compiler, SUBF | RC(flags) | D(dst) | A(src2) | B(src1)); case SLJIT_SUBC: - if (flags & ALT_FORM1) { - FAIL_IF(push_inst(compiler, MFXER | D(0))); - FAIL_IF(push_inst(compiler, SUBFE | D(dst) | A(src2) | B(src1))); - return push_inst(compiler, MTXER | S(0)); - } BIN_EXTS(); return push_inst(compiler, SUBFE | D(dst) | A(src2) | B(src1)); @@ -292,8 +326,8 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl } BIN_EXTS(); if (flags & ALT_FORM2) - return push_inst(compiler, MULLW | OERC(flags) | D(dst) | A(src2) | B(src1)); - return push_inst(compiler, MULLD | OERC(flags) | D(dst) | A(src2) | B(src1)); + return push_inst(compiler, MULLW | OE(flags) | RC(flags) | D(dst) | A(src2) | B(src1)); + return push_inst(compiler, MULLD | OE(flags) | RC(flags) | D(dst) | A(src2) | B(src1)); case SLJIT_AND: if (flags & ALT_FORM1) { @@ -345,10 +379,8 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl compiler->imm &= 0x1f; return push_inst(compiler, RLWINM | RC(flags) | S(src1) | A(dst) | (compiler->imm << 11) | ((31 - compiler->imm) << 1)); } - else { - compiler->imm &= 0x3f; - return push_inst(compiler, RLDI(dst, src1, compiler->imm, 63 - compiler->imm, 1) | RC(flags)); - } + compiler->imm &= 0x3f; + return push_inst(compiler, RLDI(dst, src1, compiler->imm, 63 - compiler->imm, 1) | RC(flags)); } return push_inst(compiler, ((flags & ALT_FORM2) ? SLW : SLD) | RC(flags) | S(src1) | A(dst) | B(src2)); @@ -359,33 +391,25 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl compiler->imm &= 0x1f; return push_inst(compiler, RLWINM | RC(flags) | S(src1) | A(dst) | (((32 - compiler->imm) & 0x1f) << 11) | (compiler->imm << 6) | (31 << 1)); } - else { - compiler->imm &= 0x3f; - return push_inst(compiler, RLDI(dst, src1, 64 - compiler->imm, compiler->imm, 0) | RC(flags)); - } + compiler->imm &= 0x3f; + return push_inst(compiler, RLDI(dst, src1, 64 - compiler->imm, compiler->imm, 0) | RC(flags)); } return push_inst(compiler, ((flags & ALT_FORM2) ? SRW : SRD) | RC(flags) | S(src1) | A(dst) | B(src2)); case SLJIT_ASHR: - if (flags & ALT_FORM3) - FAIL_IF(push_inst(compiler, MFXER | D(0))); if (flags & ALT_FORM1) { SLJIT_ASSERT(src2 == TMP_REG2); if (flags & ALT_FORM2) { compiler->imm &= 0x1f; - FAIL_IF(push_inst(compiler, SRAWI | RC(flags) | S(src1) | A(dst) | (compiler->imm << 11))); - } - else { - compiler->imm &= 0x3f; - FAIL_IF(push_inst(compiler, SRADI | RC(flags) | S(src1) | A(dst) | ((compiler->imm & 0x1f) << 11) | ((compiler->imm & 0x20) >> 4))); + return push_inst(compiler, SRAWI | RC(flags) | S(src1) | A(dst) | (compiler->imm << 11)); } + compiler->imm &= 0x3f; + return push_inst(compiler, SRADI | RC(flags) | S(src1) | A(dst) | ((compiler->imm & 0x1f) << 11) | ((compiler->imm & 0x20) >> 4)); } - else - FAIL_IF(push_inst(compiler, ((flags & ALT_FORM2) ? SRAW : SRAD) | RC(flags) | S(src1) | A(dst) | B(src2))); - return (flags & ALT_FORM3) ? push_inst(compiler, MTXER | S(0)) : SLJIT_SUCCESS; + return push_inst(compiler, ((flags & ALT_FORM2) ? SRAW : SRAD) | RC(flags) | S(src1) | A(dst) | B(src2)); } - SLJIT_ASSERT_STOP(); + SLJIT_UNREACHABLE(); return SLJIT_SUCCESS; } @@ -398,18 +422,19 @@ static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_ return push_inst(compiler, ORI | S(reg) | A(reg) | IMM(init_value)); } -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr) +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) { sljit_ins *inst = (sljit_ins*)addr; - inst[0] = (inst[0] & 0xffff0000) | ((new_addr >> 48) & 0xffff); - inst[1] = (inst[1] & 0xffff0000) | ((new_addr >> 32) & 0xffff); - inst[3] = (inst[3] & 0xffff0000) | ((new_addr >> 16) & 0xffff); - inst[4] = (inst[4] & 0xffff0000) | (new_addr & 0xffff); + inst[0] = (inst[0] & 0xffff0000) | ((new_target >> 48) & 0xffff); + inst[1] = (inst[1] & 0xffff0000) | ((new_target >> 32) & 0xffff); + inst[3] = (inst[3] & 0xffff0000) | ((new_target >> 16) & 0xffff); + inst[4] = (inst[4] & 0xffff0000) | (new_target & 0xffff); + inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); SLJIT_CACHE_FLUSH(inst, inst + 5); } -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant) +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) { sljit_ins *inst = (sljit_ins*)addr; @@ -417,5 +442,6 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_consta inst[1] = (inst[1] & 0xffff0000) | ((new_constant >> 32) & 0xffff); inst[3] = (inst[3] & 0xffff0000) | ((new_constant >> 16) & 0xffff); inst[4] = (inst[4] & 0xffff0000) | (new_constant & 0xffff); + inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); SLJIT_CACHE_FLUSH(inst, inst + 5); } diff --git a/src/3rdparty/pcre2/src/sljit/sljitNativePPC_common.c b/src/3rdparty/pcre2/src/sljit/sljitNativePPC_common.c index a3647327bf..2bf855c6bc 100644 --- a/src/3rdparty/pcre2/src/sljit/sljitNativePPC_common.c +++ b/src/3rdparty/pcre2/src/sljit/sljitNativePPC_common.c @@ -1,7 +1,7 @@ /* * Stack-less Just-In-Time compiler * - * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are * permitted provided that the following conditions are met: @@ -127,9 +127,9 @@ static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 7] = { /* Instruction bit sections. OE and Rc flag (see ALT_SET_FLAGS). */ -#define OERC(flags) (((flags & ALT_SET_FLAGS) >> 10) | (flags & ALT_SET_FLAGS)) +#define OE(flags) ((flags) & ALT_SET_FLAGS) /* Rc flag (see ALT_SET_FLAGS). */ -#define RC(flags) ((flags & ALT_SET_FLAGS) >> 10) +#define RC(flags) (((flags) & ALT_SET_FLAGS) >> 10) #define HI(opcode) ((opcode) << 26) #define LO(opcode) ((opcode) << 1) @@ -154,6 +154,7 @@ static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 7] = { #define CMPL (HI(31) | LO(32)) #define CMPLI (HI(10)) #define CROR (HI(19) | LO(449)) +#define DCBT (HI(31) | LO(278)) #define DIVD (HI(31) | LO(489)) #define DIVDU (HI(31) | LO(457)) #define DIVW (HI(31) | LO(491)) @@ -249,7 +250,7 @@ static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins) return SLJIT_SUCCESS; } -static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code) +static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset) { sljit_sw diff; sljit_uw target_addr; @@ -267,7 +268,7 @@ static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_in target_addr = jump->u.target; else { SLJIT_ASSERT(jump->flags & JUMP_LABEL); - target_addr = (sljit_uw)(code + jump->u.label->size); + target_addr = (sljit_uw)(code + jump->u.label->size) + (sljit_uw)executable_offset; } #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) && (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) @@ -275,7 +276,7 @@ static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_in goto keep_address; #endif - diff = ((sljit_sw)target_addr - (sljit_sw)(code_ptr)) & ~0x3l; + diff = ((sljit_sw)target_addr - (sljit_sw)(code_ptr) - executable_offset) & ~0x3l; extra_jump_flags = 0; if (jump->flags & IS_COND) { @@ -296,6 +297,7 @@ static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_in jump->flags |= PATCH_B | extra_jump_flags; return 1; } + if (target_addr <= 0x03ffffff) { jump->flags |= PATCH_B | PATCH_ABS_B | extra_jump_flags; return 1; @@ -309,6 +311,7 @@ keep_address: jump->flags |= PATCH_ABS32; return 1; } + if (target_addr <= 0x7fffffffffffl) { jump->flags |= PATCH_ABS48; return 1; @@ -326,6 +329,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil sljit_ins *buf_ptr; sljit_ins *buf_end; sljit_uw word_count; + sljit_sw executable_offset; sljit_uw addr; struct sljit_label *label; @@ -349,9 +353,12 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil code_ptr = code; word_count = 0; + executable_offset = SLJIT_EXEC_OFFSET(code); + label = compiler->labels; jump = compiler->jumps; const_ = compiler->consts; + do { buf_ptr = (sljit_ins*)buf->memory; buf_end = buf_ptr + (buf->used_size >> 2); @@ -363,7 +370,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil /* These structures are ordered by their address. */ if (label && label->size == word_count) { /* Just recording the address. */ - label->addr = (sljit_uw)code_ptr; + label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); label->size = code_ptr - code; label = label->next; } @@ -373,7 +380,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil #else jump->addr = (sljit_uw)(code_ptr - 6); #endif - if (detect_jump_type(jump, code_ptr, code)) { + if (detect_jump_type(jump, code_ptr, code, executable_offset)) { #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) code_ptr[-3] = code_ptr[0]; code_ptr -= 3; @@ -420,7 +427,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil } while (buf); if (label && label->size == word_count) { - label->addr = (sljit_uw)code_ptr; + label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); label->size = code_ptr - code; label = label->next; } @@ -438,11 +445,12 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil while (jump) { do { addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target; - buf_ptr = (sljit_ins*)jump->addr; + buf_ptr = (sljit_ins *)jump->addr; + if (jump->flags & PATCH_B) { if (jump->flags & IS_COND) { if (!(jump->flags & PATCH_ABS_B)) { - addr = addr - jump->addr; + addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset); SLJIT_ASSERT((sljit_sw)addr <= 0x7fff && (sljit_sw)addr >= -0x8000); *buf_ptr = BCx | (addr & 0xfffc) | ((*buf_ptr) & 0x03ff0001); } @@ -453,7 +461,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil } else { if (!(jump->flags & PATCH_ABS_B)) { - addr = addr - jump->addr; + addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset); SLJIT_ASSERT((sljit_sw)addr <= 0x01ffffff && (sljit_sw)addr >= -0x02000000); *buf_ptr = Bx | (addr & 0x03fffffc) | ((*buf_ptr) & 0x1); } @@ -464,6 +472,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil } break; } + /* Set the fields of immediate loads. */ #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 16) & 0xffff); @@ -492,22 +501,48 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil } compiler->error = SLJIT_ERR_COMPILED; + compiler->executable_offset = executable_offset; compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins); - SLJIT_CACHE_FLUSH(code, code_ptr); + + code = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset); #if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) if (((sljit_sw)code_ptr) & 0x4) code_ptr++; +#endif sljit_set_function_context(NULL, (struct sljit_function_context*)code_ptr, (sljit_sw)code, (void*)sljit_generate_code); +#endif + + code_ptr = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + + SLJIT_CACHE_FLUSH(code, code_ptr); + +#if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL) return code_ptr; #else - sljit_set_function_context(NULL, (struct sljit_function_context*)code_ptr, (sljit_sw)code, (void*)sljit_generate_code); - return code_ptr; + return code; #endif +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) +{ + switch (feature_type) { + case SLJIT_HAS_FPU: +#ifdef SLJIT_IS_FPU_AVAILABLE + return SLJIT_IS_FPU_AVAILABLE; #else - return code; + /* Available by default. */ + return 1; #endif + + case SLJIT_HAS_PRE_UPDATE: + case SLJIT_HAS_CLZ: + return 1; + + default: + return 0; + } } /* --------------------------------------------------------------------- */ @@ -544,7 +579,6 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil #define ALT_FORM3 0x040000 #define ALT_FORM4 0x080000 #define ALT_FORM5 0x100000 -#define ALT_FORM6 0x200000 /* Source and destination is register. */ #define REG_DEST 0x000001 @@ -559,7 +593,7 @@ ALT_SIGN_EXT 0x000200 ALT_SET_FLAGS 0x000400 ALT_FORM1 0x010000 ... -ALT_FORM6 0x200000 */ +ALT_FORM5 0x100000 */ #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) #include "sljitNativePPC_32.c" @@ -726,7 +760,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *comp (((inst) & ~(INT_ALIGNED | UPDATE_REQ)) | (((flags) & MEM_MASK) <= GPR_REG ? D(reg) : FD(reg))) #endif -static const sljit_ins data_transfer_insts[64 + 8] = { +static const sljit_ins data_transfer_insts[64 + 16] = { /* -------- Unsigned -------- */ @@ -835,11 +869,20 @@ static const sljit_ins data_transfer_insts[64 + 8] = { /* d n x s */ HI(31) | LO(727) /* stfdx */, /* d n x l */ HI(31) | LO(599) /* lfdx */, +/* d w i s */ HI(55) /* stfdu */, +/* d w i l */ HI(51) /* lfdu */, +/* d w x s */ HI(31) | LO(759) /* stfdux */, +/* d w x l */ HI(31) | LO(631) /* lfdux */, + /* s n i s */ HI(52) /* stfs */, /* s n i l */ HI(48) /* lfs */, /* s n x s */ HI(31) | LO(663) /* stfsx */, /* s n x l */ HI(31) | LO(535) /* lfsx */, +/* s w i s */ HI(53) /* stfsu */, +/* s w i l */ HI(49) /* lfsu */, +/* s w x s */ HI(31) | LO(695) /* stfsux */, +/* s w x l */ HI(31) | LO(567) /* lfsux */, }; #undef ARCH_32_64 @@ -850,7 +893,7 @@ static sljit_s32 getput_arg_fast(struct sljit_compiler *compiler, sljit_s32 inp_ sljit_ins inst; /* Should work when (arg & REG_MASK) == 0. */ - SLJIT_COMPILE_ASSERT(A(0) == 0, a0_must_be_0); + SLJIT_ASSERT(A(0) == 0); SLJIT_ASSERT(arg & SLJIT_MEM); if (arg & OFFS_REG_MASK) { @@ -1005,10 +1048,6 @@ static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 inp_flags #endif if (inp_flags & WRITE_BACK) { - if (arg == reg) { - FAIL_IF(push_inst(compiler, OR | S(reg) | A(tmp_r) | B(reg))); - reg = tmp_r; - } tmp_r = arg; FAIL_IF(push_inst(compiler, ADDIS | D(arg) | A(arg) | IMM(high_short >> 16))); } @@ -1131,7 +1170,7 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 sljit_s32 src1_r; sljit_s32 src2_r; sljit_s32 sugg_src2_r = TMP_REG2; - sljit_s32 flags = input_flags & (ALT_FORM1 | ALT_FORM2 | ALT_FORM3 | ALT_FORM4 | ALT_FORM5 | ALT_FORM6 | ALT_SIGN_EXT | ALT_SET_FLAGS); + sljit_s32 flags = input_flags & (ALT_FORM1 | ALT_FORM2 | ALT_FORM3 | ALT_FORM4 | ALT_FORM5 | ALT_SIGN_EXT | ALT_SET_FLAGS); if (!(input_flags & ALT_KEEP_CACHE)) { compiler->cache_arg = 0; @@ -1140,8 +1179,6 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 /* Destination check. */ if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) { - if (op >= SLJIT_MOV && op <= SLJIT_MOVU_S32 && !(src2 & SLJIT_MEM)) - return SLJIT_SUCCESS; dst_r = TMP_REG2; } else if (FAST_IS_REG(dst)) { @@ -1294,6 +1331,31 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile return SLJIT_SUCCESS; } +static sljit_s32 emit_prefetch(struct sljit_compiler *compiler, + sljit_s32 src, sljit_sw srcw) +{ + if (!(src & OFFS_REG_MASK)) { + if (srcw == 0 && (src & REG_MASK) != SLJIT_UNUSED) + return push_inst(compiler, DCBT | A(0) | B(src & REG_MASK)); + + FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); + /* Works with SLJIT_MEM0() case as well. */ + return push_inst(compiler, DCBT | A(src & REG_MASK) | B(TMP_REG1)); + } + + srcw &= 0x3; + + if (srcw == 0) + return push_inst(compiler, DCBT | A(src & REG_MASK) | B(OFFS_REG(src))); + +#if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) + FAIL_IF(push_inst(compiler, RLWINM | S(OFFS_REG(src)) | A(TMP_REG1) | (srcw << 11) | ((31 - srcw) << 1))); +#else + FAIL_IF(push_inst(compiler, RLDI(TMP_REG1, OFFS_REG(src), srcw, 63 - srcw, 1))); +#endif + return push_inst(compiler, DCBT | A(src & REG_MASK) | B(TMP_REG1)); +} + #define EMIT_MOV(type, type_flags, type_cast) \ emit_op(compiler, (src & SLJIT_IMM) ? SLJIT_MOV : type, flags | (type_flags), dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? type_cast srcw : srcw) @@ -1301,7 +1363,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile sljit_s32 dst, sljit_sw dstw, sljit_s32 src, sljit_sw srcw) { - sljit_s32 flags = GET_FLAGS(op) ? ALT_SET_FLAGS : 0; + sljit_s32 flags = HAS_FLAGS(op) ? ALT_SET_FLAGS : 0; sljit_s32 op_flags = GET_ALL_FLAGS(op); CHECK_ERROR(); @@ -1309,11 +1371,18 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile ADJUST_LOCAL_OFFSET(dst, dstw); ADJUST_LOCAL_OFFSET(src, srcw); + if (dst == SLJIT_UNUSED && !HAS_FLAGS(op)) { + if (op <= SLJIT_MOV_P && (src & SLJIT_MEM)) + return emit_prefetch(compiler, src, srcw); + + return SLJIT_SUCCESS; + } + op = GET_OPCODE(op); if ((src & SLJIT_IMM) && srcw == 0) src = TMP_ZERO; - if (op_flags & SLJIT_SET_O) + if (GET_FLAG_TYPE(op_flags) == SLJIT_OVERFLOW) FAIL_IF(push_inst(compiler, MTXER | S(TMP_ZERO))); if (op_flags & SLJIT_I32_OP) { @@ -1339,6 +1408,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile flags |= INT_DATA | SIGNED_DATA; if (src & SLJIT_IMM) srcw = (sljit_s32)srcw; + if (HAS_FLAGS(op_flags)) + flags |= ALT_SIGN_EXT; } #endif } @@ -1404,7 +1475,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile return emit_op(compiler, SLJIT_NOT, flags, dst, dstw, TMP_REG1, 0, src, srcw); case SLJIT_NEG: - return emit_op(compiler, SLJIT_NEG, flags, dst, dstw, TMP_REG1, 0, src, srcw); + return emit_op(compiler, SLJIT_NEG, flags | (GET_FLAG_TYPE(op_flags) ? ALT_FORM1 : 0), dst, dstw, TMP_REG1, 0, src, srcw); case SLJIT_CLZ: #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) @@ -1457,7 +1528,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) { - sljit_s32 flags = GET_FLAGS(op) ? ALT_SET_FLAGS : 0; + sljit_s32 flags = HAS_FLAGS(op) ? ALT_SET_FLAGS : 0; CHECK_ERROR(); CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); @@ -1465,6 +1536,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile ADJUST_LOCAL_OFFSET(src1, src1w); ADJUST_LOCAL_OFFSET(src2, src2w); + if (dst == SLJIT_UNUSED && !HAS_FLAGS(op)) + return SLJIT_SUCCESS; + if ((src1 & SLJIT_IMM) && src1w == 0) src1 = TMP_ZERO; if ((src2 & SLJIT_IMM) && src2w == 0) @@ -1478,45 +1552,48 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile src1w = (sljit_s32)(src1w); if (src2 & SLJIT_IMM) src2w = (sljit_s32)(src2w); - if (GET_FLAGS(op)) + if (HAS_FLAGS(op)) flags |= ALT_SIGN_EXT; } #endif - if (op & SLJIT_SET_O) + if (GET_FLAG_TYPE(op) == SLJIT_OVERFLOW) FAIL_IF(push_inst(compiler, MTXER | S(TMP_ZERO))); if (src2 == TMP_REG2) flags |= ALT_KEEP_CACHE; switch (GET_OPCODE(op)) { case SLJIT_ADD: - if (!GET_FLAGS(op) && ((src1 | src2) & SLJIT_IMM)) { + if (GET_FLAG_TYPE(op) == SLJIT_OVERFLOW) + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM1, dst, dstw, src1, src1w, src2, src2w); + + if (!HAS_FLAGS(op) && ((src1 | src2) & SLJIT_IMM)) { if (TEST_SL_IMM(src2, src2w)) { compiler->imm = src2w & 0xffff; - return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0); + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0); } if (TEST_SL_IMM(src1, src1w)) { compiler->imm = src1w & 0xffff; - return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0); + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0); } if (TEST_SH_IMM(src2, src2w)) { compiler->imm = (src2w >> 16) & 0xffff; - return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0); + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2 | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0); } if (TEST_SH_IMM(src1, src1w)) { compiler->imm = (src1w >> 16) & 0xffff; - return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0); + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2 | ALT_FORM3, dst, dstw, src2, src2w, TMP_REG2, 0); } /* Range between -1 and -32768 is covered above. */ if (TEST_ADD_IMM(src2, src2w)) { compiler->imm = src2w & 0xffffffff; - return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4, dst, dstw, src1, src1w, TMP_REG2, 0); + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2 | ALT_FORM4, dst, dstw, src1, src1w, TMP_REG2, 0); } if (TEST_ADD_IMM(src1, src1w)) { compiler->imm = src1w & 0xffffffff; - return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4, dst, dstw, src2, src2w, TMP_REG2, 0); + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2 | ALT_FORM4, dst, dstw, src2, src2w, TMP_REG2, 0); } } - if (!(GET_FLAGS(op) & (SLJIT_SET_E | SLJIT_SET_O))) { + if (HAS_FLAGS(op)) { if (TEST_SL_IMM(src2, src2w)) { compiler->imm = src2w & 0xffff; return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0); @@ -1526,75 +1603,75 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src2, src2w, TMP_REG2, 0); } } - return emit_op(compiler, SLJIT_ADD, flags, dst, dstw, src1, src1w, src2, src2w); + return emit_op(compiler, SLJIT_ADD, flags | ((GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY)) ? ALT_FORM4 : 0), dst, dstw, src1, src1w, src2, src2w); case SLJIT_ADDC: - return emit_op(compiler, SLJIT_ADDC, flags | (!(op & SLJIT_KEEP_FLAGS) ? 0 : ALT_FORM1), dst, dstw, src1, src1w, src2, src2w); + return emit_op(compiler, SLJIT_ADDC, flags, dst, dstw, src1, src1w, src2, src2w); case SLJIT_SUB: - if (!GET_FLAGS(op) && ((src1 | src2) & SLJIT_IMM)) { + if (GET_FLAG_TYPE(op) >= SLJIT_LESS && GET_FLAG_TYPE(op) <= SLJIT_LESS_EQUAL) { + if (dst == SLJIT_UNUSED) { + if (TEST_UL_IMM(src2, src2w)) { + compiler->imm = src2w & 0xffff; + return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM1 | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0); + } + return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM1, dst, dstw, src1, src1w, src2, src2w); + } + + if ((src2 & SLJIT_IMM) && src2w >= 0 && src2w <= (SIMM_MAX + 1)) { + compiler->imm = src2w; + return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM1 | ALT_FORM2 | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0); + } + return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM1 | ALT_FORM3, dst, dstw, src1, src1w, src2, src2w); + } + + if (GET_FLAG_TYPE(op) == SLJIT_OVERFLOW) + return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2, dst, dstw, src1, src1w, src2, src2w); + + if (!HAS_FLAGS(op) && ((src1 | src2) & SLJIT_IMM)) { if (TEST_SL_IMM(src2, -src2w)) { compiler->imm = (-src2w) & 0xffff; - return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0); + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0); } if (TEST_SL_IMM(src1, src1w)) { compiler->imm = src1w & 0xffff; - return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0); + return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM3, dst, dstw, src2, src2w, TMP_REG2, 0); } if (TEST_SH_IMM(src2, -src2w)) { compiler->imm = ((-src2w) >> 16) & 0xffff; - return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0); + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2 | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0); } /* Range between -1 and -32768 is covered above. */ if (TEST_ADD_IMM(src2, -src2w)) { compiler->imm = -src2w & 0xffffffff; - return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM4, dst, dstw, src1, src1w, TMP_REG2, 0); + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2 | ALT_FORM4, dst, dstw, src1, src1w, TMP_REG2, 0); } } - if (dst == SLJIT_UNUSED && (op & (SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S)) && !(op & (SLJIT_SET_O | SLJIT_SET_C))) { - if (!(op & SLJIT_SET_U)) { - /* We know ALT_SIGN_EXT is set if it is an SLJIT_I32_OP on 64 bit systems. */ - if (TEST_SL_IMM(src2, src2w)) { - compiler->imm = src2w & 0xffff; - return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0); - } - if (GET_FLAGS(op) == SLJIT_SET_E && TEST_SL_IMM(src1, src1w)) { - compiler->imm = src1w & 0xffff; - return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0); - } - } - if (!(op & (SLJIT_SET_E | SLJIT_SET_S))) { - /* We know ALT_SIGN_EXT is set if it is an SLJIT_I32_OP on 64 bit systems. */ - if (TEST_UL_IMM(src2, src2w)) { - compiler->imm = src2w & 0xffff; - return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0); - } - return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM4, dst, dstw, src1, src1w, src2, src2w); - } - if ((src2 & SLJIT_IMM) && src2w >= 0 && src2w <= 0x7fff) { - compiler->imm = src2w; - return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2 | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0); + + if (dst == SLJIT_UNUSED && GET_FLAG_TYPE(op) != GET_FLAG_TYPE(SLJIT_SET_CARRY)) { + if (TEST_SL_IMM(src2, src2w)) { + compiler->imm = src2w & 0xffff; + return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM4 | ALT_FORM5, dst, dstw, src1, src1w, TMP_REG2, 0); } - return emit_op(compiler, SLJIT_SUB, flags | ((op & SLJIT_SET_U) ? ALT_FORM4 : 0) | ((op & (SLJIT_SET_E | SLJIT_SET_S)) ? ALT_FORM5 : 0), dst, dstw, src1, src1w, src2, src2w); + return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM4, dst, dstw, src1, src1w, src2, src2w); } - if (!(op & (SLJIT_SET_E | SLJIT_SET_U | SLJIT_SET_S | SLJIT_SET_O))) { - if (TEST_SL_IMM(src2, -src2w)) { - compiler->imm = (-src2w) & 0xffff; - return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0); - } + + if (TEST_SL_IMM(src2, -src2w)) { + compiler->imm = (-src2w) & 0xffff; + return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0); } /* We know ALT_SIGN_EXT is set if it is an SLJIT_I32_OP on 64 bit systems. */ - return emit_op(compiler, SLJIT_SUB, flags | (!(op & SLJIT_SET_U) ? 0 : ALT_FORM6), dst, dstw, src1, src1w, src2, src2w); + return emit_op(compiler, SLJIT_SUB, flags | ((GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY)) ? ALT_FORM5 : 0), dst, dstw, src1, src1w, src2, src2w); case SLJIT_SUBC: - return emit_op(compiler, SLJIT_SUBC, flags | (!(op & SLJIT_KEEP_FLAGS) ? 0 : ALT_FORM1), dst, dstw, src1, src1w, src2, src2w); + return emit_op(compiler, SLJIT_SUBC, flags, dst, dstw, src1, src1w, src2, src2w); case SLJIT_MUL: #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) if (op & SLJIT_I32_OP) flags |= ALT_FORM2; #endif - if (!GET_FLAGS(op)) { + if (!HAS_FLAGS(op)) { if (TEST_SL_IMM(src2, src2w)) { compiler->imm = src2w & 0xffff; return emit_op(compiler, SLJIT_MUL, flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0); @@ -1604,13 +1681,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile return emit_op(compiler, SLJIT_MUL, flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0); } } + else + FAIL_IF(push_inst(compiler, MTXER | S(TMP_ZERO))); return emit_op(compiler, SLJIT_MUL, flags, dst, dstw, src1, src1w, src2, src2w); case SLJIT_AND: case SLJIT_OR: case SLJIT_XOR: /* Commutative unsigned operations. */ - if (!GET_FLAGS(op) || GET_OPCODE(op) == SLJIT_AND) { + if (!HAS_FLAGS(op) || GET_OPCODE(op) == SLJIT_AND) { if (TEST_UL_IMM(src2, src2w)) { compiler->imm = src2w; return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0); @@ -1628,7 +1707,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0); } } - if (!GET_FLAGS(op) && GET_OPCODE(op) != SLJIT_AND) { + if (GET_OPCODE(op) != SLJIT_AND && GET_OPCODE(op) != SLJIT_AND) { + /* Unlike or and xor, and resets unwanted bits as well. */ if (TEST_UI_IMM(src2, src2w)) { compiler->imm = src2w; return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0); @@ -1640,12 +1720,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile } return emit_op(compiler, GET_OPCODE(op), flags, dst, dstw, src1, src1w, src2, src2w); - case SLJIT_ASHR: - if (op & SLJIT_KEEP_FLAGS) - flags |= ALT_FORM3; - /* Fall through. */ case SLJIT_SHL: case SLJIT_LSHR: + case SLJIT_ASHR: #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) if (op & SLJIT_I32_OP) flags |= ALT_FORM2; @@ -1685,17 +1762,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *c /* Floating point operators */ /* --------------------------------------------------------------------- */ -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void) -{ -#ifdef SLJIT_IS_FPU_AVAILABLE - return SLJIT_IS_FPU_AVAILABLE; -#else - /* Available by default. */ - return 1; -#endif -} - -#define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_F32_OP) >> 6)) +#define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_F32_OP) >> 5)) #define SELECT_FOP(op, single, double) ((op & SLJIT_F32_OP) ? single : double) #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) @@ -1727,9 +1794,6 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp op = GET_OPCODE(op); FAIL_IF(push_inst(compiler, (op == SLJIT_CONV_S32_FROM_F64 ? FCTIWZ : FCTIDZ) | FD(TMP_FREG1) | FB(src))); - if (dst == SLJIT_UNUSED) - return SLJIT_SUCCESS; - if (op == SLJIT_CONV_SW_FROM_F64) { if (FAST_IS_REG(dst)) { FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, 0, 0)); @@ -1737,12 +1801,8 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp } return emit_op_mem2(compiler, DOUBLE_DATA, TMP_FREG1, dst, dstw, 0, 0); } - #else FAIL_IF(push_inst(compiler, FCTIWZ | FD(TMP_FREG1) | FB(src))); - - if (dst == SLJIT_UNUSED) - return SLJIT_SUCCESS; #endif if (FAST_IS_REG(dst)) { @@ -2019,10 +2079,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler * CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw)); ADJUST_LOCAL_OFFSET(dst, dstw); - /* For UNUSED dst. Uncommon, but possible. */ - if (dst == SLJIT_UNUSED) - return SLJIT_SUCCESS; - if (FAST_IS_REG(dst)) return push_inst(compiler, MFLR | D(dst)); @@ -2079,33 +2135,33 @@ static sljit_ins get_bo_bi_flags(sljit_s32 type) return (4 << 21) | (2 << 16); case SLJIT_LESS: - case SLJIT_LESS_F64: - return (12 << 21) | ((4 + 0) << 16); - - case SLJIT_GREATER_EQUAL: - case SLJIT_GREATER_EQUAL_F64: - return (4 << 21) | ((4 + 0) << 16); - - case SLJIT_GREATER: - case SLJIT_GREATER_F64: - return (12 << 21) | ((4 + 1) << 16); - - case SLJIT_LESS_EQUAL: - case SLJIT_LESS_EQUAL_F64: - return (4 << 21) | ((4 + 1) << 16); - case SLJIT_SIG_LESS: return (12 << 21) | (0 << 16); + case SLJIT_GREATER_EQUAL: case SLJIT_SIG_GREATER_EQUAL: return (4 << 21) | (0 << 16); + case SLJIT_GREATER: case SLJIT_SIG_GREATER: return (12 << 21) | (1 << 16); + case SLJIT_LESS_EQUAL: case SLJIT_SIG_LESS_EQUAL: return (4 << 21) | (1 << 16); + case SLJIT_LESS_F64: + return (12 << 21) | ((4 + 0) << 16); + + case SLJIT_GREATER_EQUAL_F64: + return (4 << 21) | ((4 + 0) << 16); + + case SLJIT_GREATER_F64: + return (12 << 21) | ((4 + 1) << 16); + + case SLJIT_LESS_EQUAL_F64: + return (4 << 21) | ((4 + 1) << 16); + case SLJIT_OVERFLOW: case SLJIT_MUL_OVERFLOW: return (12 << 21) | (3 << 16); @@ -2207,153 +2263,147 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi return push_inst(compiler, BCCTR | (20 << 21) | (type >= SLJIT_FAST_CALL ? 1 : 0)); } -/* Get a bit from CR, all other bits are zeroed. */ -#define GET_CR_BIT(bit, dst) \ - FAIL_IF(push_inst(compiler, MFCR | D(dst))); \ - FAIL_IF(push_inst(compiler, RLWINM | S(dst) | A(dst) | ((1 + (bit)) << 11) | (31 << 6) | (31 << 1))); - -#define INVERT_BIT(dst) \ - FAIL_IF(push_inst(compiler, XORI | S(dst) | A(dst) | 0x1)); - SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, - sljit_s32 src, sljit_sw srcw, sljit_s32 type) { - sljit_s32 reg, input_flags; - sljit_s32 flags = GET_ALL_FLAGS(op); - sljit_sw original_dstw = dstw; + sljit_s32 reg, input_flags, cr_bit, invert; + sljit_s32 saved_op = op; + sljit_sw saved_dstw = dstw; CHECK_ERROR(); - CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type)); + CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type)); ADJUST_LOCAL_OFFSET(dst, dstw); - if (dst == SLJIT_UNUSED) - return SLJIT_SUCCESS; +#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) + input_flags = (op & SLJIT_I32_OP) ? INT_DATA : WORD_DATA; +#else + input_flags = WORD_DATA; +#endif op = GET_OPCODE(op); reg = (op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2; compiler->cache_arg = 0; compiler->cache_argw = 0; - if (op >= SLJIT_ADD && (src & SLJIT_MEM)) { - ADJUST_LOCAL_OFFSET(src, srcw); -#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) - input_flags = (flags & SLJIT_I32_OP) ? INT_DATA : WORD_DATA; -#else - input_flags = WORD_DATA; -#endif - FAIL_IF(emit_op_mem2(compiler, input_flags | LOAD_DATA, TMP_REG1, src, srcw, dst, dstw)); - src = TMP_REG1; - srcw = 0; - } - switch (type & 0xff) { - case SLJIT_EQUAL: - GET_CR_BIT(2, reg); - break; + if (op >= SLJIT_ADD && (dst & SLJIT_MEM)) + FAIL_IF(emit_op_mem2(compiler, input_flags | LOAD_DATA, TMP_REG1, dst, dstw, dst, dstw)); - case SLJIT_NOT_EQUAL: - GET_CR_BIT(2, reg); - INVERT_BIT(reg); - break; + invert = 0; + cr_bit = 0; + switch (type & 0xff) { case SLJIT_LESS: - case SLJIT_LESS_F64: - GET_CR_BIT(4 + 0, reg); + case SLJIT_SIG_LESS: break; case SLJIT_GREATER_EQUAL: - case SLJIT_GREATER_EQUAL_F64: - GET_CR_BIT(4 + 0, reg); - INVERT_BIT(reg); + case SLJIT_SIG_GREATER_EQUAL: + invert = 1; break; case SLJIT_GREATER: - case SLJIT_GREATER_F64: - GET_CR_BIT(4 + 1, reg); + case SLJIT_SIG_GREATER: + cr_bit = 1; break; case SLJIT_LESS_EQUAL: - case SLJIT_LESS_EQUAL_F64: - GET_CR_BIT(4 + 1, reg); - INVERT_BIT(reg); - break; - - case SLJIT_SIG_LESS: - GET_CR_BIT(0, reg); - break; - - case SLJIT_SIG_GREATER_EQUAL: - GET_CR_BIT(0, reg); - INVERT_BIT(reg); + case SLJIT_SIG_LESS_EQUAL: + cr_bit = 1; + invert = 1; break; - case SLJIT_SIG_GREATER: - GET_CR_BIT(1, reg); + case SLJIT_EQUAL: + cr_bit = 2; break; - case SLJIT_SIG_LESS_EQUAL: - GET_CR_BIT(1, reg); - INVERT_BIT(reg); + case SLJIT_NOT_EQUAL: + cr_bit = 2; + invert = 1; break; case SLJIT_OVERFLOW: case SLJIT_MUL_OVERFLOW: - GET_CR_BIT(3, reg); + cr_bit = 3; break; case SLJIT_NOT_OVERFLOW: case SLJIT_MUL_NOT_OVERFLOW: - GET_CR_BIT(3, reg); - INVERT_BIT(reg); + cr_bit = 3; + invert = 1; + break; + + case SLJIT_LESS_F64: + cr_bit = 4 + 0; + break; + + case SLJIT_GREATER_EQUAL_F64: + cr_bit = 4 + 0; + invert = 1; + break; + + case SLJIT_GREATER_F64: + cr_bit = 4 + 1; + break; + + case SLJIT_LESS_EQUAL_F64: + cr_bit = 4 + 1; + invert = 1; break; case SLJIT_EQUAL_F64: - GET_CR_BIT(4 + 2, reg); + cr_bit = 4 + 2; break; case SLJIT_NOT_EQUAL_F64: - GET_CR_BIT(4 + 2, reg); - INVERT_BIT(reg); + cr_bit = 4 + 2; + invert = 1; break; case SLJIT_UNORDERED_F64: - GET_CR_BIT(4 + 3, reg); + cr_bit = 4 + 3; break; case SLJIT_ORDERED_F64: - GET_CR_BIT(4 + 3, reg); - INVERT_BIT(reg); + cr_bit = 4 + 3; + invert = 1; break; default: - SLJIT_ASSERT_STOP(); + SLJIT_UNREACHABLE(); break; } + FAIL_IF(push_inst(compiler, MFCR | D(reg))); + FAIL_IF(push_inst(compiler, RLWINM | S(reg) | A(reg) | ((1 + (cr_bit)) << 11) | (31 << 6) | (31 << 1))); + + if (invert) + FAIL_IF(push_inst(compiler, XORI | S(reg) | A(reg) | 0x1)); + if (op < SLJIT_ADD) { -#if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64) - if (op == SLJIT_MOV) - input_flags = WORD_DATA; - else { - op = SLJIT_MOV_U32; - input_flags = INT_DATA; - } -#else - op = SLJIT_MOV; - input_flags = WORD_DATA; -#endif - if (reg != TMP_REG2) + if (!(dst & SLJIT_MEM)) return SLJIT_SUCCESS; - return emit_op(compiler, op, input_flags, dst, dstw, TMP_REG1, 0, TMP_REG2, 0); + return emit_op_mem2(compiler, input_flags, reg, dst, dstw, reg, 0); } #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) compiler->skip_checks = 1; #endif - return sljit_emit_op2(compiler, op | flags, dst, original_dstw, src, srcw, TMP_REG2, 0); + if (dst & SLJIT_MEM) + return sljit_emit_op2(compiler, saved_op, dst, saved_dstw, TMP_REG1, 0, TMP_REG2, 0); + return sljit_emit_op2(compiler, saved_op, dst, 0, dst, 0, TMP_REG2, 0); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_reg, + sljit_s32 src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw)); + + return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw);; } SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value) @@ -2369,7 +2419,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi PTR_FAIL_IF(!const_); set_const(const_, compiler); - reg = SLOW_IS_REG(dst) ? dst : TMP_REG2; + reg = FAST_IS_REG(dst) ? dst : TMP_REG2; PTR_FAIL_IF(emit_const(compiler, reg, init_value)); diff --git a/src/3rdparty/pcre2/src/sljit/sljitNativeSPARC_32.c b/src/3rdparty/pcre2/src/sljit/sljitNativeSPARC_32.c index 7e589a17c2..ee42130e87 100644 --- a/src/3rdparty/pcre2/src/sljit/sljitNativeSPARC_32.c +++ b/src/3rdparty/pcre2/src/sljit/sljitNativeSPARC_32.c @@ -1,7 +1,7 @@ /* * Stack-less Just-In-Time compiler * - * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are * permitted provided that the following conditions are met: @@ -60,7 +60,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl return push_inst(compiler, SRA | D(dst) | S1(dst) | IMM(24), DR(dst)); } else if (dst != src2) - SLJIT_ASSERT_STOP(); + SLJIT_UNREACHABLE(); return SLJIT_SUCCESS; case SLJIT_MOV_U16: @@ -71,7 +71,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl return push_inst(compiler, (op == SLJIT_MOV_S16 ? SRA : SRL) | D(dst) | S1(dst) | IMM(16), DR(dst)); } else if (dst != src2) - SLJIT_ASSERT_STOP(); + SLJIT_UNREACHABLE(); return SLJIT_SUCCESS; case SLJIT_NOT: @@ -80,18 +80,17 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl case SLJIT_CLZ: SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM)); - /* sparc 32 does not support SLJIT_KEEP_FLAGS. Not sure I can fix this. */ FAIL_IF(push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(src2) | S2(0), SET_FLAGS)); FAIL_IF(push_inst(compiler, OR | D(TMP_REG1) | S1(0) | S2(src2), DR(TMP_REG1))); FAIL_IF(push_inst(compiler, BICC | DA(0x1) | (7 & DISP_MASK), UNMOVABLE_INS)); - FAIL_IF(push_inst(compiler, OR | (flags & SET_FLAGS) | D(dst) | S1(0) | IMM(32), UNMOVABLE_INS | (flags & SET_FLAGS))); + FAIL_IF(push_inst(compiler, OR | D(dst) | S1(0) | IMM(32), UNMOVABLE_INS)); FAIL_IF(push_inst(compiler, OR | D(dst) | S1(0) | IMM(-1), DR(dst))); /* Loop. */ FAIL_IF(push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(TMP_REG1) | S2(0), SET_FLAGS)); FAIL_IF(push_inst(compiler, SLL | D(TMP_REG1) | S1(TMP_REG1) | IMM(1), DR(TMP_REG1))); FAIL_IF(push_inst(compiler, BICC | DA(0xe) | (-2 & DISP_MASK), UNMOVABLE_INS)); - return push_inst(compiler, ADD | (flags & SET_FLAGS) | D(dst) | S1(dst) | IMM(1), UNMOVABLE_INS | (flags & SET_FLAGS)); + return push_inst(compiler, ADD | D(dst) | S1(dst) | IMM(1), UNMOVABLE_INS); case SLJIT_ADD: return push_inst(compiler, ADD | (flags & SET_FLAGS) | D(dst) | S1(src1) | ARG2(flags, src2), DR(dst) | (flags & SET_FLAGS)); @@ -135,7 +134,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl return !(flags & SET_FLAGS) ? SLJIT_SUCCESS : push_inst(compiler, SUB | SET_FLAGS | D(0) | S1(dst) | S2(0), SET_FLAGS); } - SLJIT_ASSERT_STOP(); + SLJIT_UNREACHABLE(); return SLJIT_SUCCESS; } @@ -145,20 +144,22 @@ static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_ return push_inst(compiler, OR | D(dst) | S1(dst) | IMM_ARG | (init_value & 0x3ff), DR(dst)); } -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr) +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) { - sljit_ins *inst = (sljit_ins*)addr; + sljit_ins *inst = (sljit_ins *)addr; - inst[0] = (inst[0] & 0xffc00000) | ((new_addr >> 10) & 0x3fffff); - inst[1] = (inst[1] & 0xfffffc00) | (new_addr & 0x3ff); + inst[0] = (inst[0] & 0xffc00000) | ((new_target >> 10) & 0x3fffff); + inst[1] = (inst[1] & 0xfffffc00) | (new_target & 0x3ff); + inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); SLJIT_CACHE_FLUSH(inst, inst + 2); } -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant) +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) { - sljit_ins *inst = (sljit_ins*)addr; + sljit_ins *inst = (sljit_ins *)addr; inst[0] = (inst[0] & 0xffc00000) | ((new_constant >> 10) & 0x3fffff); inst[1] = (inst[1] & 0xfffffc00) | (new_constant & 0x3ff); + inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset); SLJIT_CACHE_FLUSH(inst, inst + 2); } diff --git a/src/3rdparty/pcre2/src/sljit/sljitNativeSPARC_common.c b/src/3rdparty/pcre2/src/sljit/sljitNativeSPARC_common.c index f3a33a1097..9831bd83d7 100644 --- a/src/3rdparty/pcre2/src/sljit/sljitNativeSPARC_common.c +++ b/src/3rdparty/pcre2/src/sljit/sljitNativeSPARC_common.c @@ -1,7 +1,7 @@ /* * Stack-less Just-In-Time compiler * - * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are * permitted provided that the following conditions are met: @@ -199,7 +199,7 @@ static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins, sljit return SLJIT_SUCCESS; } -static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code) +static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset) { sljit_sw diff; sljit_uw target_addr; @@ -213,7 +213,7 @@ static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_i target_addr = jump->u.target; else { SLJIT_ASSERT(jump->flags & JUMP_LABEL); - target_addr = (sljit_uw)(code + jump->u.label->size); + target_addr = (sljit_uw)(code + jump->u.label->size) + (sljit_uw)executable_offset; } inst = (sljit_ins*)jump->addr; @@ -239,8 +239,9 @@ static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_i if (jump->flags & IS_COND) inst--; + diff = ((sljit_sw)target_addr - (sljit_sw)(inst - 1) - executable_offset) >> 2; + if (jump->flags & IS_MOVABLE) { - diff = ((sljit_sw)target_addr - (sljit_sw)(inst - 1)) >> 2; if (diff <= MAX_DISP && diff >= MIN_DISP) { jump->flags |= PATCH_B; inst--; @@ -257,7 +258,8 @@ static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_i } } - diff = ((sljit_sw)target_addr - (sljit_sw)(inst)) >> 2; + diff += sizeof(sljit_ins); + if (diff <= MAX_DISP && diff >= MIN_DISP) { jump->flags |= PATCH_B; if (jump->flags & IS_COND) @@ -280,6 +282,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil sljit_ins *buf_ptr; sljit_ins *buf_end; sljit_uw word_count; + sljit_sw executable_offset; sljit_uw addr; struct sljit_label *label; @@ -296,9 +299,12 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil code_ptr = code; word_count = 0; + executable_offset = SLJIT_EXEC_OFFSET(code); + label = compiler->labels; jump = compiler->jumps; const_ = compiler->consts; + do { buf_ptr = (sljit_ins*)buf->memory; buf_end = buf_ptr + (buf->used_size >> 2); @@ -310,7 +316,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil /* These structures are ordered by their address. */ if (label && label->size == word_count) { /* Just recording the address. */ - label->addr = (sljit_uw)code_ptr; + label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); label->size = code_ptr - code; label = label->next; } @@ -320,7 +326,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil #else jump->addr = (sljit_uw)(code_ptr - 6); #endif - code_ptr = detect_jump_type(jump, code_ptr, code); + code_ptr = detect_jump_type(jump, code_ptr, code, executable_offset); jump = jump->next; } if (const_ && const_->addr == word_count) { @@ -336,7 +342,7 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil } while (buf); if (label && label->size == word_count) { - label->addr = (sljit_uw)code_ptr; + label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); label->size = code_ptr - code; label = label->next; } @@ -350,16 +356,16 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil while (jump) { do { addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target; - buf_ptr = (sljit_ins*)jump->addr; + buf_ptr = (sljit_ins *)jump->addr; if (jump->flags & PATCH_CALL) { - addr = (sljit_sw)(addr - jump->addr) >> 2; + addr = (sljit_sw)(addr - (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2; SLJIT_ASSERT((sljit_sw)addr <= 0x1fffffff && (sljit_sw)addr >= -0x20000000); buf_ptr[0] = CALL | (addr & 0x3fffffff); break; } if (jump->flags & PATCH_B) { - addr = (sljit_sw)(addr - jump->addr) >> 2; + addr = (sljit_sw)(addr - (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2; SLJIT_ASSERT((sljit_sw)addr <= MAX_DISP && (sljit_sw)addr >= MIN_DISP); buf_ptr[0] = (buf_ptr[0] & ~DISP_MASK) | (addr & DISP_MASK); break; @@ -378,11 +384,37 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil compiler->error = SLJIT_ERR_COMPILED; + compiler->executable_offset = executable_offset; compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins); + + code = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset); + code_ptr = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); + SLJIT_CACHE_FLUSH(code, code_ptr); return code; } +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) +{ + switch (feature_type) { + case SLJIT_HAS_FPU: +#ifdef SLJIT_IS_FPU_AVAILABLE + return SLJIT_IS_FPU_AVAILABLE; +#else + /* Available by default. */ + return 1; +#endif + +#if (defined SLJIT_CONFIG_SPARC_64 && SLJIT_CONFIG_SPARC_64) + case SLJIT_HAS_CMOV: + return 1; +#endif + + default: + return 0; + } +} + /* --------------------------------------------------------------------- */ /* Entry, exit */ /* --------------------------------------------------------------------- */ @@ -567,7 +599,6 @@ static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sl base = arg & REG_MASK; if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) { argw &= 0x3; - SLJIT_ASSERT(argw != 0); /* Using the cache. */ if (((SLJIT_MEM | (arg & OFFS_REG_MASK)) == compiler->cache_arg) && (argw == compiler->cache_argw)) @@ -652,18 +683,16 @@ static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s3 compiler->cache_argw = 0; } - if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) { - if (op >= SLJIT_MOV && op <= SLJIT_MOVU_S32 && !(src2 & SLJIT_MEM)) - return SLJIT_SUCCESS; - } - else if (FAST_IS_REG(dst)) { - dst_r = dst; - flags |= REG_DEST; - if (op >= SLJIT_MOV && op <= SLJIT_MOVU_S32) - sugg_src2_r = dst_r; + if (dst != SLJIT_UNUSED) { + if (FAST_IS_REG(dst)) { + dst_r = dst; + flags |= REG_DEST; + if (op >= SLJIT_MOV && op <= SLJIT_MOVU_S32) + sugg_src2_r = dst_r; + } + else if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, flags | ARG_TEST, TMP_REG1, dst, dstw)) + flags |= SLOW_DEST; } - else if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, flags | ARG_TEST, TMP_REG1, dst, dstw)) - flags |= SLOW_DEST; if (flags & IMM_OP) { if ((src2 & SLJIT_IMM) && src2w) { @@ -812,13 +841,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile sljit_s32 dst, sljit_sw dstw, sljit_s32 src, sljit_sw srcw) { - sljit_s32 flags = GET_FLAGS(op) ? SET_FLAGS : 0; + sljit_s32 flags = HAS_FLAGS(op) ? SET_FLAGS : 0; CHECK_ERROR(); CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw)); ADJUST_LOCAL_OFFSET(dst, dstw); ADJUST_LOCAL_OFFSET(src, srcw); + if (dst == SLJIT_UNUSED && !HAS_FLAGS(op)) + return SLJIT_SUCCESS; + op = GET_OPCODE(op); switch (op) { case SLJIT_MOV: @@ -881,7 +913,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) { - sljit_s32 flags = GET_FLAGS(op) ? SET_FLAGS : 0; + sljit_s32 flags = HAS_FLAGS(op) ? SET_FLAGS : 0; CHECK_ERROR(); CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w)); @@ -889,6 +921,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile ADJUST_LOCAL_OFFSET(src1, src1w); ADJUST_LOCAL_OFFSET(src2, src2w); + if (dst == SLJIT_UNUSED && !HAS_FLAGS(op)) + return SLJIT_SUCCESS; + op = GET_OPCODE(op); switch (op) { case SLJIT_ADD: @@ -910,7 +945,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile if (src2 & SLJIT_IMM) src2w &= 0x1f; #else - SLJIT_ASSERT_STOP(); + SLJIT_UNREACHABLE(); #endif return emit_op(compiler, op, flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w); } @@ -943,16 +978,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *c /* Floating point operators */ /* --------------------------------------------------------------------- */ -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void) -{ -#ifdef SLJIT_IS_FPU_AVAILABLE - return SLJIT_IS_FPU_AVAILABLE; -#else - /* Available by default. */ - return 1; -#endif -} - #define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_F32_OP) >> 7)) #define SELECT_FOP(op, single, double) ((op & SLJIT_F32_OP) ? single : double) #define FLOAT_TMP_MEM_OFFSET (22 * sizeof(sljit_sw)) @@ -970,9 +995,6 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSTOI, FDTOI) | DA(TMP_FREG1) | S2A(src), MOVABLE_INS)); - if (dst == SLJIT_UNUSED) - return SLJIT_SUCCESS; - if (FAST_IS_REG(dst)) { FAIL_IF(emit_op_mem2(compiler, SINGLE_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET)); return emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, dst, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET); @@ -1186,10 +1208,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler * CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw)); ADJUST_LOCAL_OFFSET(dst, dstw); - /* For UNUSED dst. Uncommon, but possible. */ - if (dst == SLJIT_UNUSED) - return SLJIT_SUCCESS; - if (FAST_IS_REG(dst)) return push_inst(compiler, OR | D(dst) | S1(0) | S2(TMP_LINK), DR(dst)); @@ -1285,7 +1303,7 @@ static sljit_ins get_cc(sljit_s32 type) return DA(0xf); default: - SLJIT_ASSERT_STOP(); + SLJIT_UNREACHABLE(); return DA(0x8); } } @@ -1373,30 +1391,23 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, - sljit_s32 src, sljit_sw srcw, sljit_s32 type) { - sljit_s32 reg, flags = (GET_FLAGS(op) ? SET_FLAGS : 0); + sljit_s32 reg, flags = HAS_FLAGS(op) ? SET_FLAGS : 0; CHECK_ERROR(); - CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type)); + CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type)); ADJUST_LOCAL_OFFSET(dst, dstw); - if (dst == SLJIT_UNUSED) - return SLJIT_SUCCESS; - #if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) op = GET_OPCODE(op); reg = (op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2; compiler->cache_arg = 0; compiler->cache_argw = 0; - if (op >= SLJIT_ADD && (src & SLJIT_MEM)) { - ADJUST_LOCAL_OFFSET(src, srcw); - FAIL_IF(emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw, dst, dstw)); - src = TMP_REG1; - srcw = 0; - } + + if (op >= SLJIT_ADD && (dst & SLJIT_MEM)) + FAIL_IF(emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, dst, dstw, dst, dstw)); type &= 0xff; if (type < SLJIT_EQUAL_F64) @@ -1407,10 +1418,31 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co FAIL_IF(push_inst(compiler, OR | D(reg) | S1(0) | IMM(1), UNMOVABLE_INS)); FAIL_IF(push_inst(compiler, OR | D(reg) | S1(0) | IMM(0), UNMOVABLE_INS)); - if (op >= SLJIT_ADD) - return emit_op(compiler, op, flags | CUMULATIVE_OP | IMM_OP | ALT_KEEP_CACHE, dst, dstw, src, srcw, TMP_REG2, 0); + if (op >= SLJIT_ADD) { + flags |= CUMULATIVE_OP | IMM_OP | ALT_KEEP_CACHE; + if (dst & SLJIT_MEM) + return emit_op(compiler, op, flags, dst, dstw, TMP_REG1, 0, TMP_REG2, 0); + return emit_op(compiler, op, flags, dst, 0, dst, 0, TMP_REG2, 0); + } + + if (!(dst & SLJIT_MEM)) + return SLJIT_SUCCESS; + + return emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw); +#else +#error "Implementation required" +#endif +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_reg, + sljit_s32 src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw)); - return (reg == TMP_REG2) ? emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw) : SLJIT_SUCCESS; +#if (defined SLJIT_CONFIG_SPARC_32 && SLJIT_CONFIG_SPARC_32) + return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw);; #else #error "Implementation required" #endif @@ -1429,7 +1461,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi PTR_FAIL_IF(!const_); set_const(const_, compiler); - reg = SLOW_IS_REG(dst) ? dst : TMP_REG2; + reg = FAST_IS_REG(dst) ? dst : TMP_REG2; PTR_FAIL_IF(emit_const(compiler, reg, init_value)); diff --git a/src/3rdparty/pcre2/src/sljit/sljitNativeTILEGX-encoder.c b/src/3rdparty/pcre2/src/sljit/sljitNativeTILEGX-encoder.c index 719632908c..dd82ebae6a 100644 --- a/src/3rdparty/pcre2/src/sljit/sljitNativeTILEGX-encoder.c +++ b/src/3rdparty/pcre2/src/sljit/sljitNativeTILEGX-encoder.c @@ -2,7 +2,7 @@ * Stack-less Just-In-Time compiler * * Copyright 2013-2013 Tilera Corporation(jiwang@tilera.com). All rights reserved. - * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are * permitted provided that the following conditions are met: diff --git a/src/3rdparty/pcre2/src/sljit/sljitNativeTILEGX_64.c b/src/3rdparty/pcre2/src/sljit/sljitNativeTILEGX_64.c index 462a8b9cd9..003f43a790 100644 --- a/src/3rdparty/pcre2/src/sljit/sljitNativeTILEGX_64.c +++ b/src/3rdparty/pcre2/src/sljit/sljitNativeTILEGX_64.c @@ -2,7 +2,7 @@ * Stack-less Just-In-Time compiler * * Copyright 2013-2013 Tilera Corporation(jiwang@tilera.com). All rights reserved. - * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are * permitted provided that the following conditions are met: @@ -687,7 +687,7 @@ static sljit_s32 update_buffer(struct sljit_compiler *compiler) inst_buf[0] = inst1; inst_buf_index = 1; } else - SLJIT_ASSERT_STOP(); + SLJIT_UNREACHABLE(); #ifdef TILEGX_JIT_DEBUG return push_inst_nodebug(compiler, bits); @@ -727,10 +727,10 @@ static sljit_s32 update_buffer(struct sljit_compiler *compiler) return push_inst(compiler, bits); #endif } else - SLJIT_ASSERT_STOP(); + SLJIT_UNREACHABLE(); } - SLJIT_ASSERT_STOP(); + SLJIT_UNREACHABLE(); } static sljit_s32 flush_buffer(struct sljit_compiler *compiler) @@ -814,7 +814,7 @@ static sljit_s32 push_3_buffer(struct sljit_compiler *compiler, tilegx_mnemonic break; default: printf("unrecoginzed opc: %s\n", opcode->name); - SLJIT_ASSERT_STOP(); + SLJIT_UNREACHABLE(); } inst_buf_index++; @@ -859,7 +859,7 @@ static sljit_s32 push_2_buffer(struct sljit_compiler *compiler, tilegx_mnemonic break; default: printf("unrecoginzed opc: %s\n", opcode->name); - SLJIT_ASSERT_STOP(); + SLJIT_UNREACHABLE(); } inst_buf_index++; @@ -1952,7 +1952,7 @@ static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sl return SLJIT_SUCCESS; } - SLJIT_ASSERT_STOP(); + SLJIT_UNREACHABLE(); return SLJIT_SUCCESS; } @@ -2092,9 +2092,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type)); ADJUST_LOCAL_OFFSET(dst, dstw); - if (dst == SLJIT_UNUSED) - return SLJIT_SUCCESS; - op = GET_OPCODE(op); if (op == SLJIT_MOV_S32 || op == SLJIT_MOV_U32) mem_type = INT_DATA | SIGNED_DATA; @@ -2143,7 +2140,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co break; default: - SLJIT_ASSERT_STOP(); + SLJIT_UNREACHABLE(); dst_ar = sugg_dst_ar; break; } @@ -2186,7 +2183,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile case SLJIT_DIVMOD_SW: case SLJIT_DIV_UW: case SLJIT_DIV_SW: - SLJIT_ASSERT_STOP(); + SLJIT_UNREACHABLE(); } return SLJIT_SUCCESS; @@ -2487,19 +2484,14 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump * sljit_emit_jump(struct sljit_compil return jump; } -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void) -{ - return 0; -} - SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, sljit_s32 src, sljit_sw srcw) { - SLJIT_ASSERT_STOP(); + SLJIT_UNREACHABLE(); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) { - SLJIT_ASSERT_STOP(); + SLJIT_UNREACHABLE(); } SLJIT_API_FUNC_ATTRIBUTE struct sljit_const * sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value) @@ -2526,13 +2518,13 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const * sljit_emit_const(struct sljit_comp return const_; } -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr) +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target) { sljit_ins *inst = (sljit_ins *)addr; - inst[0] = (inst[0] & ~(0xFFFFL << 43)) | (((new_addr >> 32) & 0xffff) << 43); - inst[1] = (inst[1] & ~(0xFFFFL << 43)) | (((new_addr >> 16) & 0xffff) << 43); - inst[2] = (inst[2] & ~(0xFFFFL << 43)) | ((new_addr & 0xffff) << 43); + inst[0] = (inst[0] & ~(0xFFFFL << 43)) | (((new_target >> 32) & 0xffff) << 43); + inst[1] = (inst[1] & ~(0xFFFFL << 43)) | (((new_target >> 16) & 0xffff) << 43); + inst[2] = (inst[2] & ~(0xFFFFL << 43)) | ((new_target & 0xffff) << 43); SLJIT_CACHE_FLUSH(inst, inst + 3); } diff --git a/src/3rdparty/pcre2/src/sljit/sljitNativeX86_32.c b/src/3rdparty/pcre2/src/sljit/sljitNativeX86_32.c index 78f3dcb06f..f5cf8834b0 100644 --- a/src/3rdparty/pcre2/src/sljit/sljitNativeX86_32.c +++ b/src/3rdparty/pcre2/src/sljit/sljitNativeX86_32.c @@ -1,7 +1,7 @@ /* * Stack-less Just-In-Time compiler * - * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are * permitted provided that the following conditions are met: @@ -38,7 +38,7 @@ static sljit_s32 emit_do_imm(struct sljit_compiler *compiler, sljit_u8 opcode, s return SLJIT_SUCCESS; } -static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_s32 type) +static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_s32 type, sljit_sw executable_offset) { if (type == SLJIT_JUMP) { *code_ptr++ = JMP_i32; @@ -57,7 +57,7 @@ static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ if (jump->flags & JUMP_LABEL) jump->flags |= PATCH_MW; else - sljit_unaligned_store_sw(code_ptr, jump->u.target - (jump->addr + 4)); + sljit_unaligned_store_sw(code_ptr, jump->u.target - (jump->addr + 4) - (sljit_uw)executable_offset); code_ptr += 4; return code_ptr; @@ -75,9 +75,30 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); compiler->args = args; - compiler->flags_saved = 0; - size = 1 + (scratches > 7 ? (scratches - 7) : 0) + (saveds <= 3 ? saveds : 3); +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + /* [esp+0] for saving temporaries and third argument for calls. */ + compiler->saveds_offset = 1 * sizeof(sljit_sw); +#else + /* [esp+0] for saving temporaries and space for maximum three arguments. */ + if (scratches <= 1) + compiler->saveds_offset = 1 * sizeof(sljit_sw); + else + compiler->saveds_offset = ((scratches == 2) ? 2 : 3) * sizeof(sljit_sw); +#endif + + if (scratches > 3) + compiler->saveds_offset += ((scratches > (3 + 6)) ? 6 : (scratches - 3)) * sizeof(sljit_sw); + + compiler->locals_offset = compiler->saveds_offset; + + if (saveds > 3) + compiler->locals_offset += (saveds - 3) * sizeof(sljit_sw); + + if (options & SLJIT_F64_ALIGNMENT) + compiler->locals_offset = (compiler->locals_offset + sizeof(sljit_f64) - 1) & ~(sizeof(sljit_f64) - 1); + + size = 1 + (scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3); #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) size += (args > 0 ? (args * 2) : 0) + (args > 2 ? 2 : 0); #else @@ -94,11 +115,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi *inst++ = MOD_REG | (reg_map[TMP_REG1] << 3) | 0x4 /* esp */; } #endif - if (saveds > 2 || scratches > 7) + if (saveds > 2 || scratches > 9) PUSH_REG(reg_map[SLJIT_S2]); - if (saveds > 1 || scratches > 8) + if (saveds > 1 || scratches > 10) PUSH_REG(reg_map[SLJIT_S1]); - if (saveds > 0 || scratches > 9) + if (saveds > 0 || scratches > 11) PUSH_REG(reg_map[SLJIT_S0]); #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) @@ -134,51 +155,64 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi } #endif - SLJIT_COMPILE_ASSERT(SLJIT_LOCALS_OFFSET >= (2 + 4) * sizeof(sljit_uw), require_at_least_two_words); + SLJIT_ASSERT(SLJIT_LOCALS_OFFSET > 0); + #if defined(__APPLE__) /* Ignore pushed registers and SLJIT_LOCALS_OFFSET when computing the aligned local size. */ - saveds = (2 + (scratches > 7 ? (scratches - 7) : 0) + (saveds <= 3 ? saveds : 3)) * sizeof(sljit_uw); + saveds = (2 + (scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3)) * sizeof(sljit_uw); local_size = ((SLJIT_LOCALS_OFFSET + saveds + local_size + 15) & ~15) - saveds; #else - if (options & SLJIT_DOUBLE_ALIGNMENT) { - local_size = SLJIT_LOCALS_OFFSET + ((local_size + 7) & ~7); - - inst = (sljit_u8*)ensure_buf(compiler, 1 + 17); - FAIL_IF(!inst); - - INC_SIZE(17); - inst[0] = MOV_r_rm; - inst[1] = MOD_REG | (reg_map[TMP_REG1] << 3) | reg_map[SLJIT_SP]; - inst[2] = GROUP_F7; - inst[3] = MOD_REG | (0 << 3) | reg_map[SLJIT_SP]; - sljit_unaligned_store_sw(inst + 4, 0x4); - inst[8] = JNE_i8; - inst[9] = 6; - inst[10] = GROUP_BINARY_81; - inst[11] = MOD_REG | (5 << 3) | reg_map[SLJIT_SP]; - sljit_unaligned_store_sw(inst + 12, 0x4); - inst[16] = PUSH_r + reg_map[TMP_REG1]; - } + if (options & SLJIT_F64_ALIGNMENT) + local_size = SLJIT_LOCALS_OFFSET + ((local_size + sizeof(sljit_f64) - 1) & ~(sizeof(sljit_f64) - 1)); else - local_size = SLJIT_LOCALS_OFFSET + ((local_size + 3) & ~3); + local_size = SLJIT_LOCALS_OFFSET + ((local_size + sizeof(sljit_sw) - 1) & ~(sizeof(sljit_sw) - 1)); #endif compiler->local_size = local_size; + #ifdef _WIN32 if (local_size > 1024) { #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) FAIL_IF(emit_do_imm(compiler, MOV_r_i32 + reg_map[SLJIT_R0], local_size)); #else - local_size -= SLJIT_LOCALS_OFFSET; + /* Space for a single argument. This amount is excluded when the stack is allocated below. */ + local_size -= sizeof(sljit_sw); FAIL_IF(emit_do_imm(compiler, MOV_r_i32 + reg_map[SLJIT_R0], local_size)); FAIL_IF(emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32, - SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, SLJIT_LOCALS_OFFSET)); + SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, sizeof(sljit_sw))); #endif FAIL_IF(sljit_emit_ijump(compiler, SLJIT_CALL1, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_grow_stack))); } #endif SLJIT_ASSERT(local_size > 0); + +#if !defined(__APPLE__) + if (options & SLJIT_F64_ALIGNMENT) { + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_SP, 0); + + /* Some space might allocated during sljit_grow_stack() above on WIN32. */ + FAIL_IF(emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32, + SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size + sizeof(sljit_sw))); + +#if defined _WIN32 && !(defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + if (compiler->local_size > 1024) + FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32, + TMP_REG1, 0, TMP_REG1, 0, SLJIT_IMM, sizeof(sljit_sw))); +#endif + + inst = (sljit_u8*)ensure_buf(compiler, 1 + 6); + FAIL_IF(!inst); + + INC_SIZE(6); + inst[0] = GROUP_BINARY_81; + inst[1] = MOD_REG | AND | reg_map[SLJIT_SP]; + sljit_unaligned_store_sw(inst + 2, ~(sizeof(sljit_f64) - 1)); + + /* The real local size must be used. */ + return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), compiler->local_size, TMP_REG1, 0); + } +#endif return emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32, SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size); } @@ -193,14 +227,36 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp compiler->args = args; +#if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) + /* [esp+0] for saving temporaries and third argument for calls. */ + compiler->saveds_offset = 1 * sizeof(sljit_sw); +#else + /* [esp+0] for saving temporaries and space for maximum three arguments. */ + if (scratches <= 1) + compiler->saveds_offset = 1 * sizeof(sljit_sw); + else + compiler->saveds_offset = ((scratches == 2) ? 2 : 3) * sizeof(sljit_sw); +#endif + + if (scratches > 3) + compiler->saveds_offset += ((scratches > (3 + 6)) ? 6 : (scratches - 3)) * sizeof(sljit_sw); + + compiler->locals_offset = compiler->saveds_offset; + + if (saveds > 3) + compiler->locals_offset += (saveds - 3) * sizeof(sljit_sw); + + if (options & SLJIT_F64_ALIGNMENT) + compiler->locals_offset = (compiler->locals_offset + sizeof(sljit_f64) - 1) & ~(sizeof(sljit_f64) - 1); + #if defined(__APPLE__) - saveds = (2 + (scratches > 7 ? (scratches - 7) : 0) + (saveds <= 3 ? saveds : 3)) * sizeof(sljit_uw); + saveds = (2 + (scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3)) * sizeof(sljit_uw); compiler->local_size = ((SLJIT_LOCALS_OFFSET + saveds + local_size + 15) & ~15) - saveds; #else - if (options & SLJIT_DOUBLE_ALIGNMENT) - compiler->local_size = SLJIT_LOCALS_OFFSET + ((local_size + 7) & ~7); + if (options & SLJIT_F64_ALIGNMENT) + compiler->local_size = SLJIT_LOCALS_OFFSET + ((local_size + sizeof(sljit_f64) - 1) & ~(sizeof(sljit_f64) - 1)); else - compiler->local_size = SLJIT_LOCALS_OFFSET + ((local_size + 3) & ~3); + compiler->local_size = SLJIT_LOCALS_OFFSET + ((local_size + sizeof(sljit_sw) - 1) & ~(sizeof(sljit_sw) - 1)); #endif return SLJIT_SUCCESS; } @@ -214,23 +270,19 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *comp CHECK(check_sljit_emit_return(compiler, op, src, srcw)); SLJIT_ASSERT(compiler->args >= 0); - compiler->flags_saved = 0; FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); SLJIT_ASSERT(compiler->local_size > 0); - FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32, - SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, compiler->local_size)); #if !defined(__APPLE__) - if (compiler->options & SLJIT_DOUBLE_ALIGNMENT) { - inst = (sljit_u8*)ensure_buf(compiler, 1 + 3); - FAIL_IF(!inst); - - INC_SIZE(3); - inst[0] = MOV_r_rm; - inst[1] = (reg_map[SLJIT_SP] << 3) | 0x4 /* SIB */; - inst[2] = (4 << 3) | reg_map[SLJIT_SP]; - } + if (compiler->options & SLJIT_F64_ALIGNMENT) + EMIT_MOV(compiler, SLJIT_SP, 0, SLJIT_MEM1(SLJIT_SP), compiler->local_size) + else + FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32, + SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, compiler->local_size)); +#else + FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32, + SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, compiler->local_size)); #endif size = 2 + (compiler->scratches > 7 ? (compiler->scratches - 7) : 0) + @@ -247,11 +299,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *comp INC_SIZE(size); - if (compiler->saveds > 0 || compiler->scratches > 9) + if (compiler->saveds > 0 || compiler->scratches > 11) POP_REG(reg_map[SLJIT_S0]); - if (compiler->saveds > 1 || compiler->scratches > 8) + if (compiler->saveds > 1 || compiler->scratches > 10) POP_REG(reg_map[SLJIT_S1]); - if (compiler->saveds > 2 || compiler->scratches > 7) + if (compiler->saveds > 2 || compiler->scratches > 9) POP_REG(reg_map[SLJIT_S2]); POP_REG(reg_map[TMP_REG1]); #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) diff --git a/src/3rdparty/pcre2/src/sljit/sljitNativeX86_64.c b/src/3rdparty/pcre2/src/sljit/sljitNativeX86_64.c index e88ddedcd1..039b68c45a 100644 --- a/src/3rdparty/pcre2/src/sljit/sljitNativeX86_64.c +++ b/src/3rdparty/pcre2/src/sljit/sljitNativeX86_64.c @@ -1,7 +1,7 @@ /* * Stack-less Just-In-Time compiler * - * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are * permitted provided that the following conditions are met: @@ -47,9 +47,8 @@ static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ *code_ptr++ = 10 + 3; } - SLJIT_COMPILE_ASSERT(reg_map[TMP_REG3] == 9, tmp3_is_9_first); - *code_ptr++ = REX_W | REX_B; - *code_ptr++ = MOV_r_i32 + 1; + *code_ptr++ = REX_W | ((reg_map[TMP_REG2] <= 7) ? 0 : REX_B); + *code_ptr++ = MOV_r_i32 | reg_lmap[TMP_REG2]; jump->addr = (sljit_uw)code_ptr; if (jump->flags & JUMP_LABEL) @@ -58,31 +57,10 @@ static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ sljit_unaligned_store_sw(code_ptr, jump->u.target); code_ptr += sizeof(sljit_sw); - *code_ptr++ = REX_B; - *code_ptr++ = GROUP_FF; - *code_ptr++ = (type >= SLJIT_FAST_CALL) ? (MOD_REG | CALL_rm | 1) : (MOD_REG | JMP_rm | 1); - - return code_ptr; -} - -static sljit_u8* generate_fixed_jump(sljit_u8 *code_ptr, sljit_sw addr, sljit_s32 type) -{ - sljit_sw delta = addr - ((sljit_sw)code_ptr + 1 + sizeof(sljit_s32)); - - if (delta <= HALFWORD_MAX && delta >= HALFWORD_MIN) { - *code_ptr++ = (type == 2) ? CALL_i32 : JMP_i32; - sljit_unaligned_store_sw(code_ptr, delta); - } - else { - SLJIT_COMPILE_ASSERT(reg_map[TMP_REG3] == 9, tmp3_is_9_second); - *code_ptr++ = REX_W | REX_B; - *code_ptr++ = MOV_r_i32 + 1; - sljit_unaligned_store_sw(code_ptr, addr); - code_ptr += sizeof(sljit_sw); + if (reg_map[TMP_REG2] >= 8) *code_ptr++ = REX_B; - *code_ptr++ = GROUP_FF; - *code_ptr++ = (type == 2) ? (MOD_REG | CALL_rm | 1) : (MOD_REG | JMP_rm | 1); - } + *code_ptr++ = GROUP_FF; + *code_ptr++ = MOD_REG | (type >= SLJIT_FAST_CALL ? CALL_rm : JMP_rm) | reg_lmap[TMP_REG2]; return code_ptr; } @@ -98,7 +76,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); - compiler->flags_saved = 0; +#ifdef _WIN64 + /* Two/four register slots for parameters plus space for xmm6 register if needed. */ + if (fscratches >= 6 || fsaveds >= 1) + compiler->locals_offset = 6 * sizeof(sljit_sw); + else + compiler->locals_offset = ((scratches > 2) ? 4 : 2) * sizeof(sljit_sw); +#endif /* Including the return address saved by the call instruction. */ saved_register_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1); @@ -177,7 +161,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi INC_SIZE(4 + (3 + sizeof(sljit_s32))); *inst++ = REX_W; *inst++ = GROUP_BINARY_83; - *inst++ = MOD_REG | SUB | 4; + *inst++ = MOD_REG | SUB | reg_map[SLJIT_SP]; /* Allocated size for registers must be divisible by 8. */ SLJIT_ASSERT(!(saved_register_size & 0x7)); /* Aligned to 16 byte. */ @@ -189,7 +173,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi local_size -= 4 * sizeof(sljit_sw); } /* Second instruction */ - SLJIT_COMPILE_ASSERT(reg_map[SLJIT_R0] < 8, temporary_reg1_is_loreg); + SLJIT_ASSERT(reg_map[SLJIT_R0] < 8); *inst++ = REX_W; *inst++ = MOV_rm_i32; *inst++ = MOD_REG | reg_lmap[SLJIT_R0]; @@ -202,25 +186,26 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi } #endif - SLJIT_ASSERT(local_size > 0); - if (local_size <= 127) { - inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); - FAIL_IF(!inst); - INC_SIZE(4); - *inst++ = REX_W; - *inst++ = GROUP_BINARY_83; - *inst++ = MOD_REG | SUB | 4; - *inst++ = local_size; - } - else { - inst = (sljit_u8*)ensure_buf(compiler, 1 + 7); - FAIL_IF(!inst); - INC_SIZE(7); - *inst++ = REX_W; - *inst++ = GROUP_BINARY_81; - *inst++ = MOD_REG | SUB | 4; - sljit_unaligned_store_s32(inst, local_size); - inst += sizeof(sljit_s32); + if (local_size > 0) { + if (local_size <= 127) { + inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); + FAIL_IF(!inst); + INC_SIZE(4); + *inst++ = REX_W; + *inst++ = GROUP_BINARY_83; + *inst++ = MOD_REG | SUB | reg_map[SLJIT_SP]; + *inst++ = local_size; + } + else { + inst = (sljit_u8*)ensure_buf(compiler, 1 + 7); + FAIL_IF(!inst); + INC_SIZE(7); + *inst++ = REX_W; + *inst++ = GROUP_BINARY_81; + *inst++ = MOD_REG | SUB | reg_map[SLJIT_SP]; + sljit_unaligned_store_s32(inst, local_size); + inst += sizeof(sljit_s32); + } } #ifdef _WIN64 @@ -247,6 +232,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size)); set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size); +#ifdef _WIN64 + /* Two/four register slots for parameters plus space for xmm6 register if needed. */ + if (fscratches >= 6 || fsaveds >= 1) + compiler->locals_offset = 6 * sizeof(sljit_sw); + else + compiler->locals_offset = ((scratches > 2) ? 4 : 2) * sizeof(sljit_sw); +#endif + /* Including the return address saved by the call instruction. */ saved_register_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1); compiler->local_size = ((local_size + SLJIT_LOCALS_OFFSET + saved_register_size + 15) & ~15) - saved_register_size; @@ -261,7 +254,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *comp CHECK_ERROR(); CHECK(check_sljit_emit_return(compiler, op, src, srcw)); - compiler->flags_saved = 0; FAIL_IF(emit_mov_before_return(compiler, op, src, srcw)); #ifdef _WIN64 @@ -275,24 +267,25 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *comp } #endif - SLJIT_ASSERT(compiler->local_size > 0); - if (compiler->local_size <= 127) { - inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); - FAIL_IF(!inst); - INC_SIZE(4); - *inst++ = REX_W; - *inst++ = GROUP_BINARY_83; - *inst++ = MOD_REG | ADD | 4; - *inst = compiler->local_size; - } - else { - inst = (sljit_u8*)ensure_buf(compiler, 1 + 7); - FAIL_IF(!inst); - INC_SIZE(7); - *inst++ = REX_W; - *inst++ = GROUP_BINARY_81; - *inst++ = MOD_REG | ADD | 4; - sljit_unaligned_store_s32(inst, compiler->local_size); + if (compiler->local_size > 0) { + if (compiler->local_size <= 127) { + inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); + FAIL_IF(!inst); + INC_SIZE(4); + *inst++ = REX_W; + *inst++ = GROUP_BINARY_83; + *inst++ = MOD_REG | ADD | 4; + *inst = compiler->local_size; + } + else { + inst = (sljit_u8*)ensure_buf(compiler, 1 + 7); + FAIL_IF(!inst); + INC_SIZE(7); + *inst++ = REX_W; + *inst++ = GROUP_BINARY_81; + *inst++ = MOD_REG | ADD | 4; + sljit_unaligned_store_s32(inst, compiler->local_size); + } } tmp = compiler->scratches; @@ -387,13 +380,12 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_s32 if (b & SLJIT_MEM) { if (!(b & OFFS_REG_MASK)) { if (NOT_HALFWORD(immb)) { - if (emit_load_imm64(compiler, TMP_REG3, immb)) - return NULL; + PTR_FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immb)); immb = 0; if (b & REG_MASK) - b |= TO_OFFS_REG(TMP_REG3); + b |= TO_OFFS_REG(TMP_REG2); else - b |= TMP_REG3; + b |= TMP_REG2; } else if (reg_lmap[b & REG_MASK] == 4) b |= TO_OFFS_REG(SLJIT_SP); @@ -553,17 +545,19 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_s32 /* Call / return instructions */ /* --------------------------------------------------------------------- */ -static SLJIT_INLINE sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 type) +static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 type) { sljit_u8 *inst; + /* After any change update IS_REG_CHANGED_BY_CALL as well. */ #ifndef _WIN64 - SLJIT_COMPILE_ASSERT(reg_map[SLJIT_R1] == 6 && reg_map[SLJIT_R0] < 8 && reg_map[SLJIT_R2] < 8, args_registers); + SLJIT_ASSERT(reg_map[SLJIT_R1] == 6 && reg_map[SLJIT_R0] < 8 && reg_map[SLJIT_R2] < 8 && reg_map[TMP_REG1] == 2); inst = (sljit_u8*)ensure_buf(compiler, 1 + ((type < SLJIT_CALL3) ? 3 : 6)); FAIL_IF(!inst); INC_SIZE((type < SLJIT_CALL3) ? 3 : 6); if (type >= SLJIT_CALL3) { + /* Move third argument to TMP_REG1. */ *inst++ = REX_W; *inst++ = MOV_r_rm; *inst++ = MOD_REG | (0x2 /* rdx */ << 3) | reg_lmap[SLJIT_R2]; @@ -572,12 +566,13 @@ static SLJIT_INLINE sljit_s32 call_with_args(struct sljit_compiler *compiler, sl *inst++ = MOV_r_rm; *inst++ = MOD_REG | (0x7 /* rdi */ << 3) | reg_lmap[SLJIT_R0]; #else - SLJIT_COMPILE_ASSERT(reg_map[SLJIT_R1] == 2 && reg_map[SLJIT_R0] < 8 && reg_map[SLJIT_R2] < 8, args_registers); + SLJIT_ASSERT(reg_map[SLJIT_R1] == 2 && reg_map[SLJIT_R0] < 8 && reg_map[SLJIT_R2] < 8 && reg_map[TMP_REG1] == 8); inst = (sljit_u8*)ensure_buf(compiler, 1 + ((type < SLJIT_CALL3) ? 3 : 6)); FAIL_IF(!inst); INC_SIZE((type < SLJIT_CALL3) ? 3 : 6); if (type >= SLJIT_CALL3) { + /* Move third argument to TMP_REG1. */ *inst++ = REX_W | REX_R; *inst++ = MOV_r_rm; *inst++ = MOD_REG | (0x0 /* r8 */ << 3) | reg_lmap[SLJIT_R2]; diff --git a/src/3rdparty/pcre2/src/sljit/sljitNativeX86_common.c b/src/3rdparty/pcre2/src/sljit/sljitNativeX86_common.c index aa5ba089d2..eb0886d671 100644 --- a/src/3rdparty/pcre2/src/sljit/sljitNativeX86_common.c +++ b/src/3rdparty/pcre2/src/sljit/sljitNativeX86_common.c @@ -1,7 +1,7 @@ /* * Stack-less Just-In-Time compiler * - * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are * permitted provided that the following conditions are met: @@ -67,12 +67,15 @@ SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void) #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 3] = { - 0, 0, 2, 1, 0, 0, 0, 0, 7, 6, 3, 4, 5 + 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 7, 6, 3, 4, 5 }; #define CHECK_EXTRA_REGS(p, w, do) \ - if (p >= SLJIT_R3 && p <= SLJIT_R6) { \ - w = SLJIT_LOCALS_OFFSET + ((p) - (SLJIT_R3 + 4)) * sizeof(sljit_sw); \ + if (p >= SLJIT_R3 && p <= SLJIT_S3) { \ + if (p <= compiler->scratches) \ + w = compiler->saveds_offset - ((p) - SLJIT_R2) * (sljit_sw)sizeof(sljit_sw); \ + else \ + w = compiler->locals_offset + ((p) - SLJIT_S2) * (sljit_sw)sizeof(sljit_sw); \ p = SLJIT_MEM1(SLJIT_SP); \ do; \ } @@ -82,28 +85,27 @@ static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 3] = { /* Last register + 1. */ #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2) #define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3) -#define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4) /* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present Note: avoid to use r12 and r13 for memory addessing - therefore r12 is better for SAVED_EREG than SAVED_REG. */ + therefore r12 is better to be a higher saved register. */ #ifndef _WIN64 -/* 1st passed in rdi, 2nd argument passed in rsi, 3rd in rdx. */ -static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = { - 0, 0, 6, 1, 8, 11, 10, 12, 5, 13, 14, 15, 3, 4, 2, 7, 9 +/* Args: rdi(=7), rsi(=6), rdx(=2), rcx(=1), r8, r9. Scratches: rax(=0), r10, r11 */ +static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = { + 0, 0, 6, 1, 7, 8, 11, 10, 12, 5, 13, 14, 15, 3, 4, 2, 9 }; /* low-map. reg_map & 0x7. */ -static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 5] = { - 0, 0, 6, 1, 0, 3, 2, 4, 5, 5, 6, 7, 3, 4, 2, 7, 1 +static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 4] = { + 0, 0, 6, 1, 7, 0, 3, 2, 4, 5, 5, 6, 7, 3, 4, 2, 1 }; #else -/* 1st passed in rcx, 2nd argument passed in rdx, 3rd in r8. */ -static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = { - 0, 0, 2, 1, 11, 12, 5, 13, 14, 15, 7, 6, 3, 4, 10, 8, 9 +/* Args: rcx(=1), rdx(=2), r8, r9. Scratches: rax(=0), r10, r11 */ +static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = { + 0, 0, 2, 1, 10, 11, 12, 5, 13, 14, 15, 7, 6, 3, 4, 8, 9 }; /* low-map. reg_map & 0x7. */ -static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 5] = { - 0, 0, 2, 1, 3, 4, 5, 5, 6, 7, 7, 6, 3, 4, 2, 0, 1 +static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 4] = { + 0, 0, 2, 1, 2, 3, 4, 5, 5, 6, 7, 7, 6, 3, 4, 0, 1 }; #endif @@ -166,7 +168,7 @@ static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 5] = { #define CALL_i32 0xe8 #define CALL_rm (/* GROUP_FF */ 2 << 3) #define CDQ 0x99 -#define CMOVNE_r_rm (/* GROUP_0F */ 0x45) +#define CMOVE_r_rm (/* GROUP_0F */ 0x44) #define CMP (/* BINARY */ 7 << 3) #define CMP_EAX_i32 0x3d #define CMP_r_rm 0x3b @@ -214,6 +216,7 @@ static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 5] = { #define POP_r 0x58 #define POP_rm 0x8f #define POPF 0x9d +#define PREFETCH 0x18 #define PUSH_i32 0x68 #define PUSH_r 0x50 #define PUSH_rm (/* GROUP_FF */ 6 << 3) @@ -409,13 +412,13 @@ static sljit_u8 get_jump_code(sljit_s32 type) return 0; } +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) +static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_s32 type, sljit_sw executable_offset); +#else static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_s32 type); - -#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) -static sljit_u8* generate_fixed_jump(sljit_u8 *code_ptr, sljit_sw addr, sljit_s32 type); #endif -static sljit_u8* generate_near_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_u8 *code, sljit_s32 type) +static sljit_u8* generate_near_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_u8 *code, sljit_s32 type, sljit_sw executable_offset) { sljit_s32 short_jump; sljit_uw label_addr; @@ -423,7 +426,8 @@ static sljit_u8* generate_near_jump_code(struct sljit_jump *jump, sljit_u8 *code if (jump->flags & JUMP_LABEL) label_addr = (sljit_uw)(code + jump->u.label->size); else - label_addr = jump->u.target; + label_addr = jump->u.target - executable_offset; + short_jump = (sljit_sw)(label_addr - (jump->addr + 2)) >= -128 && (sljit_sw)(label_addr - (jump->addr + 2)) <= 127; #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) @@ -476,6 +480,8 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil sljit_u8 *buf_ptr; sljit_u8 *buf_end; sljit_u8 len; + sljit_sw executable_offset; + sljit_sw jump_addr; struct sljit_label *label; struct sljit_jump *jump; @@ -494,6 +500,8 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil label = compiler->labels; jump = compiler->jumps; const_ = compiler->consts; + executable_offset = SLJIT_EXEC_OFFSET(code); + do { buf_ptr = buf->memory; buf_end = buf_ptr + buf->used_size; @@ -506,35 +514,28 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil buf_ptr += len; } else { - if (*buf_ptr >= 4) { + if (*buf_ptr >= 2) { jump->addr = (sljit_uw)code_ptr; if (!(jump->flags & SLJIT_REWRITABLE_JUMP)) - code_ptr = generate_near_jump_code(jump, code_ptr, code, *buf_ptr - 4); - else - code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 4); + code_ptr = generate_near_jump_code(jump, code_ptr, code, *buf_ptr - 2, executable_offset); + else { +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 2, executable_offset); +#else + code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 2); +#endif + } jump = jump->next; } else if (*buf_ptr == 0) { - label->addr = (sljit_uw)code_ptr; + label->addr = ((sljit_uw)code_ptr) + executable_offset; label->size = code_ptr - code; label = label->next; } - else if (*buf_ptr == 1) { + else { /* *buf_ptr is 1 */ const_->addr = ((sljit_uw)code_ptr) - sizeof(sljit_sw); const_ = const_->next; } - else { -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - *code_ptr++ = (*buf_ptr == 2) ? CALL_i32 : JMP_i32; - buf_ptr++; - sljit_unaligned_store_sw(code_ptr, *(sljit_sw*)buf_ptr - ((sljit_sw)code_ptr + sizeof(sljit_sw))); - code_ptr += sizeof(sljit_sw); - buf_ptr += sizeof(sljit_sw) - 1; -#else - code_ptr = generate_fixed_jump(code_ptr, *(sljit_sw*)(buf_ptr + 1), *buf_ptr); - buf_ptr += sizeof(sljit_sw); -#endif - } buf_ptr++; } } while (buf_ptr < buf_end); @@ -548,24 +549,26 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil jump = compiler->jumps; while (jump) { + jump_addr = jump->addr + executable_offset; + if (jump->flags & PATCH_MB) { - SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_s8))) >= -128 && (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_s8))) <= 127); - *(sljit_u8*)jump->addr = (sljit_u8)(jump->u.label->addr - (jump->addr + sizeof(sljit_s8))); + SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s8))) >= -128 && (sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s8))) <= 127); + *(sljit_u8*)jump->addr = (sljit_u8)(jump->u.label->addr - (jump_addr + sizeof(sljit_s8))); } else if (jump->flags & PATCH_MW) { if (jump->flags & JUMP_LABEL) { #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sw)))); + sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_sw)))); #else - SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_s32))) >= HALFWORD_MIN && (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_s32))) <= HALFWORD_MAX); - sljit_unaligned_store_s32((void*)jump->addr, (sljit_s32)(jump->u.label->addr - (jump->addr + sizeof(sljit_s32)))); + SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s32))) >= HALFWORD_MIN && (sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s32))) <= HALFWORD_MAX); + sljit_unaligned_store_s32((void*)jump->addr, (sljit_s32)(jump->u.label->addr - (jump_addr + sizeof(sljit_s32)))); #endif } else { #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_sw)))); + sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)(jump->u.target - (jump_addr + sizeof(sljit_sw)))); #else - SLJIT_ASSERT((sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_s32))) >= HALFWORD_MIN && (sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_s32))) <= HALFWORD_MAX); - sljit_unaligned_store_s32((void*)jump->addr, (sljit_s32)(jump->u.target - (jump->addr + sizeof(sljit_s32)))); + SLJIT_ASSERT((sljit_sw)(jump->u.target - (jump_addr + sizeof(sljit_s32))) >= HALFWORD_MIN && (sljit_sw)(jump->u.target - (jump_addr + sizeof(sljit_s32))) <= HALFWORD_MAX); + sljit_unaligned_store_s32((void*)jump->addr, (sljit_s32)(jump->u.target - (jump_addr + sizeof(sljit_s32)))); #endif } } @@ -577,11 +580,54 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil jump = jump->next; } - /* Maybe we waste some space because of short jumps. */ + /* Some space may be wasted because of short jumps. */ SLJIT_ASSERT(code_ptr <= code + compiler->size); compiler->error = SLJIT_ERR_COMPILED; + compiler->executable_offset = executable_offset; compiler->executable_size = code_ptr - code; - return (void*)code; + return (void*)(code + executable_offset); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) +{ + switch (feature_type) { + case SLJIT_HAS_FPU: +#ifdef SLJIT_IS_FPU_AVAILABLE + return SLJIT_IS_FPU_AVAILABLE; +#elif (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) + if (cpu_has_sse2 == -1) + get_cpu_features(); + return cpu_has_sse2; +#else /* SLJIT_DETECT_SSE2 */ + return 1; +#endif /* SLJIT_DETECT_SSE2 */ + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + case SLJIT_HAS_VIRTUAL_REGISTERS: + return 1; +#endif + + case SLJIT_HAS_CLZ: + case SLJIT_HAS_CMOV: + if (cpu_has_cmov == -1) + get_cpu_features(); + return cpu_has_cmov; + + case SLJIT_HAS_PREF_SHIFT_REG: + return 1; + + case SLJIT_HAS_SSE2: +#if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) + if (cpu_has_sse2 == -1) + get_cpu_features(); + return cpu_has_sse2; +#else + return 1; +#endif + + default: + return 0; + } } /* --------------------------------------------------------------------- */ @@ -604,52 +650,8 @@ static sljit_s32 emit_mov(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_s32 src, sljit_sw srcw); -static SLJIT_INLINE sljit_s32 emit_save_flags(struct sljit_compiler *compiler) -{ - sljit_u8 *inst; - -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - inst = (sljit_u8*)ensure_buf(compiler, 1 + 5); - FAIL_IF(!inst); - INC_SIZE(5); -#else - inst = (sljit_u8*)ensure_buf(compiler, 1 + 6); - FAIL_IF(!inst); - INC_SIZE(6); - *inst++ = REX_W; -#endif - *inst++ = LEA_r_m; /* lea esp/rsp, [esp/rsp + sizeof(sljit_sw)] */ - *inst++ = 0x64; - *inst++ = 0x24; - *inst++ = (sljit_u8)sizeof(sljit_sw); - *inst++ = PUSHF; - compiler->flags_saved = 1; - return SLJIT_SUCCESS; -} - -static SLJIT_INLINE sljit_s32 emit_restore_flags(struct sljit_compiler *compiler, sljit_s32 keep_flags) -{ - sljit_u8 *inst; - -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - inst = (sljit_u8*)ensure_buf(compiler, 1 + 5); - FAIL_IF(!inst); - INC_SIZE(5); - *inst++ = POPF; -#else - inst = (sljit_u8*)ensure_buf(compiler, 1 + 6); - FAIL_IF(!inst); - INC_SIZE(6); - *inst++ = POPF; - *inst++ = REX_W; -#endif - *inst++ = LEA_r_m; /* lea esp/rsp, [esp/rsp - sizeof(sljit_sw)] */ - *inst++ = 0x64; - *inst++ = 0x24; - *inst++ = (sljit_u8)(-(sljit_s8)sizeof(sljit_sw)); - compiler->flags_saved = keep_flags; - return SLJIT_SUCCESS; -} +#define EMIT_MOV(compiler, dst, dstw, src, srcw) \ + FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw)); #ifdef _WIN32 #include <malloc.h> @@ -680,15 +682,8 @@ static sljit_s32 emit_mov(struct sljit_compiler *compiler, { sljit_u8* inst; - if (dst == SLJIT_UNUSED) { - /* No destination, doesn't need to setup flags. */ - if (src & SLJIT_MEM) { - inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw); - FAIL_IF(!inst); - *inst = MOV_r_rm; - } - return SLJIT_SUCCESS; - } + SLJIT_ASSERT(dst != SLJIT_UNUSED); + if (FAST_IS_REG(src)) { inst = emit_x86_instruction(compiler, 1, src, 0, dst, dstw); FAIL_IF(!inst); @@ -710,8 +705,10 @@ static sljit_s32 emit_mov(struct sljit_compiler *compiler, } #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) if (!compiler->mode32 && NOT_HALFWORD(srcw)) { - FAIL_IF(emit_load_imm64(compiler, TMP_REG2, srcw)); - inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, dst, dstw); + /* Immediate to memory move. Only SLJIT_MOV operation copies + an immediate directly into memory so TMP_REG1 can be used. */ + FAIL_IF(emit_load_imm64(compiler, TMP_REG1, srcw)); + inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw); FAIL_IF(!inst); *inst = MOV_rm_r; return SLJIT_SUCCESS; @@ -729,7 +726,8 @@ static sljit_s32 emit_mov(struct sljit_compiler *compiler, return SLJIT_SUCCESS; } - /* Memory to memory move. Requires two instruction. */ + /* Memory to memory move. Only SLJIT_MOV operation copies + data from memory to memory so TMP_REG1 can be used. */ inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw); FAIL_IF(!inst); *inst = MOV_r_rm; @@ -739,9 +737,6 @@ static sljit_s32 emit_mov(struct sljit_compiler *compiler, return SLJIT_SUCCESS; } -#define EMIT_MOV(compiler, dst, dstw, src, srcw) \ - FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw)); - SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op) { sljit_u8 *inst; @@ -771,20 +766,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile case SLJIT_DIVMOD_SW: case SLJIT_DIV_UW: case SLJIT_DIV_SW: - compiler->flags_saved = 0; #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) #ifdef _WIN64 - SLJIT_COMPILE_ASSERT( + SLJIT_ASSERT( reg_map[SLJIT_R0] == 0 && reg_map[SLJIT_R1] == 2 - && reg_map[TMP_REG1] > 7, - invalid_register_assignment_for_div_mul); + && reg_map[TMP_REG1] > 7); #else - SLJIT_COMPILE_ASSERT( + SLJIT_ASSERT( reg_map[SLJIT_R0] == 0 && reg_map[SLJIT_R1] < 7 - && reg_map[TMP_REG1] == 2, - invalid_register_assignment_for_div_mul); + && reg_map[TMP_REG1] == 2); #endif compiler->mode32 = op & SLJIT_I32_OP; #endif @@ -908,9 +900,6 @@ static sljit_s32 emit_mov_byte(struct sljit_compiler *compiler, sljit_s32 sign, compiler->mode32 = 0; #endif - if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM)) - return SLJIT_SUCCESS; /* Empty instruction. */ - if (src & SLJIT_IMM) { if (FAST_IS_REG(dst)) { #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) @@ -1039,6 +1028,30 @@ static sljit_s32 emit_mov_byte(struct sljit_compiler *compiler, sljit_s32 sign, return SLJIT_SUCCESS; } +static sljit_s32 emit_prefetch(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src, sljit_sw srcw) +{ + sljit_u8* inst; + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = 1; +#endif + + inst = emit_x86_instruction(compiler, 2, 0, 0, src, srcw); + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst++ = PREFETCH; + + if (op >= SLJIT_MOV_U8 && op <= SLJIT_MOV_S8) + *inst |= (3 << 3); + else if (op >= SLJIT_MOV_U16 && op <= SLJIT_MOV_S16) + *inst |= (2 << 3); + else + *inst |= (1 << 3); + + return SLJIT_SUCCESS; +} + static sljit_s32 emit_mov_half(struct sljit_compiler *compiler, sljit_s32 sign, sljit_s32 dst, sljit_sw dstw, sljit_s32 src, sljit_sw srcw) @@ -1050,9 +1063,6 @@ static sljit_s32 emit_mov_half(struct sljit_compiler *compiler, sljit_s32 sign, compiler->mode32 = 0; #endif - if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM)) - return SLJIT_SUCCESS; /* Empty instruction. */ - if (src & SLJIT_IMM) { if (FAST_IS_REG(dst)) { #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) @@ -1096,14 +1106,6 @@ static sljit_s32 emit_unary(struct sljit_compiler *compiler, sljit_u8 opcode, { sljit_u8* inst; - if (dst == SLJIT_UNUSED) { - EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); - inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0); - FAIL_IF(!inst); - *inst++ = GROUP_F7; - *inst |= opcode; - return SLJIT_SUCCESS; - } if (dst == src && dstw == srcw) { /* Same input and output */ inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw); @@ -1112,14 +1114,19 @@ static sljit_s32 emit_unary(struct sljit_compiler *compiler, sljit_u8 opcode, *inst |= opcode; return SLJIT_SUCCESS; } + + if (dst == SLJIT_UNUSED) + dst = TMP_REG1; + if (FAST_IS_REG(dst)) { EMIT_MOV(compiler, dst, 0, src, srcw); - inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw); + inst = emit_x86_instruction(compiler, 1, 0, 0, dst, 0); FAIL_IF(!inst); *inst++ = GROUP_F7; *inst |= opcode; return SLJIT_SUCCESS; } + EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0); FAIL_IF(!inst); @@ -1135,20 +1142,12 @@ static sljit_s32 emit_not_with_flags(struct sljit_compiler *compiler, { sljit_u8* inst; - if (dst == SLJIT_UNUSED) { - EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); - inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0); - FAIL_IF(!inst); - *inst++ = GROUP_F7; - *inst |= NOT_rm; - inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0); - FAIL_IF(!inst); - *inst = OR_r_rm; - return SLJIT_SUCCESS; - } + if (dst == SLJIT_UNUSED) + dst = TMP_REG1; + if (FAST_IS_REG(dst)) { EMIT_MOV(compiler, dst, 0, src, srcw); - inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw); + inst = emit_x86_instruction(compiler, 1, 0, 0, dst, 0); FAIL_IF(!inst); *inst++ = GROUP_F7; *inst |= NOT_rm; @@ -1157,6 +1156,7 @@ static sljit_s32 emit_not_with_flags(struct sljit_compiler *compiler, *inst = OR_r_rm; return SLJIT_SUCCESS; } + EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0); FAIL_IF(!inst); @@ -1169,6 +1169,10 @@ static sljit_s32 emit_not_with_flags(struct sljit_compiler *compiler, return SLJIT_SUCCESS; } +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) +static const sljit_sw emit_clz_arg = 32 + 31; +#endif + static sljit_s32 emit_clz(struct sljit_compiler *compiler, sljit_s32 op_flags, sljit_s32 dst, sljit_sw dstw, sljit_s32 src, sljit_sw srcw) @@ -1177,22 +1181,6 @@ static sljit_s32 emit_clz(struct sljit_compiler *compiler, sljit_s32 op_flags, sljit_s32 dst_r; SLJIT_UNUSED_ARG(op_flags); - if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) { - /* Just set the zero flag. */ - EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); - inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0); - FAIL_IF(!inst); - *inst++ = GROUP_F7; - *inst |= NOT_rm; -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 31, TMP_REG1, 0); -#else - inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? 63 : 31, TMP_REG1, 0); -#endif - FAIL_IF(!inst); - *inst |= SHR; - return SLJIT_SUCCESS; - } if (SLJIT_UNLIKELY(src & SLJIT_IMM)) { EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw); @@ -1200,81 +1188,53 @@ static sljit_s32 emit_clz(struct sljit_compiler *compiler, sljit_s32 op_flags, srcw = 0; } - inst = emit_x86_instruction(compiler, 2, TMP_REG1, 0, src, srcw); + if (cpu_has_cmov == -1) + get_cpu_features(); + + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; + + inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw); FAIL_IF(!inst); *inst++ = GROUP_0F; *inst = BSR_r_rm; #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - if (FAST_IS_REG(dst)) - dst_r = dst; - else { - /* Find an unused temporary register. */ - if ((dst & REG_MASK) != SLJIT_R0 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R0)) - dst_r = SLJIT_R0; - else if ((dst & REG_MASK) != SLJIT_R1 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R1)) - dst_r = SLJIT_R1; + if (cpu_has_cmov) { + if (dst_r != TMP_REG1) { + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 32 + 31); + inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG1, 0); + } else - dst_r = SLJIT_R2; - EMIT_MOV(compiler, dst, dstw, dst_r, 0); - } - EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, 32 + 31); -#else - dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2; - compiler->mode32 = 0; - EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? 64 + 63 : 32 + 31); - compiler->mode32 = op_flags & SLJIT_I32_OP; -#endif + inst = emit_x86_instruction(compiler, 2, dst_r, 0, SLJIT_MEM0(), (sljit_sw)&emit_clz_arg); - if (cpu_has_cmov == -1) - get_cpu_features(); - - if (cpu_has_cmov) { - inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG1, 0); FAIL_IF(!inst); *inst++ = GROUP_0F; - *inst = CMOVNE_r_rm; - } else { -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - inst = (sljit_u8*)ensure_buf(compiler, 1 + 4); - FAIL_IF(!inst); - INC_SIZE(4); + *inst = CMOVE_r_rm; + } + else + FAIL_IF(sljit_emit_cmov_generic(compiler, SLJIT_EQUAL, dst_r, SLJIT_IMM, 32 + 31)); - *inst++ = JE_i8; - *inst++ = 2; - *inst++ = MOV_r_rm; - *inst++ = MOD_REG | (reg_map[dst_r] << 3) | reg_map[TMP_REG1]; + inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0); #else - inst = (sljit_u8*)ensure_buf(compiler, 1 + 5); - FAIL_IF(!inst); - INC_SIZE(5); + if (cpu_has_cmov) { + EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? (64 + 63) : (32 + 31)); - *inst++ = JE_i8; - *inst++ = 3; - *inst++ = REX_W | (reg_map[dst_r] >= 8 ? REX_R : 0) | (reg_map[TMP_REG1] >= 8 ? REX_B : 0); - *inst++ = MOV_r_rm; - *inst++ = MOD_REG | (reg_lmap[dst_r] << 3) | reg_lmap[TMP_REG1]; -#endif + inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0); + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst = CMOVE_r_rm; } + else + FAIL_IF(sljit_emit_cmov_generic(compiler, SLJIT_EQUAL, dst_r, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? (64 + 63) : (32 + 31))); -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0); -#else inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? 63 : 31, dst_r, 0); #endif + FAIL_IF(!inst); *(inst + 1) |= XOR; -#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - if (dst & SLJIT_MEM) { - inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw); - FAIL_IF(!inst); - *inst = XCHG_r_rm; - } -#else if (dst & SLJIT_MEM) - EMIT_MOV(compiler, dst, dstw, TMP_REG2, 0); -#endif + EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); return SLJIT_SUCCESS; } @@ -1282,7 +1242,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile sljit_s32 dst, sljit_sw dstw, sljit_s32 src, sljit_sw srcw) { - sljit_u8* inst; sljit_s32 update = 0; sljit_s32 op_flags = GET_ALL_FLAGS(op); #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) @@ -1303,7 +1262,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile compiler->mode32 = op_flags & SLJIT_I32_OP; #endif + if (dst == SLJIT_UNUSED && !HAS_FLAGS(op)) { + if (op <= SLJIT_MOV_P && (src & SLJIT_MEM)) + return emit_prefetch(compiler, op, src, srcw); + return SLJIT_SUCCESS; + } + op = GET_OPCODE(op); + if (op >= SLJIT_MOV && op <= SLJIT_MOVU_P) { #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) compiler->mode32 = 0; @@ -1361,14 +1327,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile #endif } - if (SLJIT_UNLIKELY(update) && (src & SLJIT_MEM) && !src_is_ereg && (src & REG_MASK) && (srcw != 0 || (src & OFFS_REG_MASK) != 0)) { - inst = emit_x86_instruction(compiler, 1, src & REG_MASK, 0, src, srcw); - FAIL_IF(!inst); - *inst = LEA_r_m; - src &= SLJIT_MEM | 0xf; - srcw = 0; - } - #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_U32 || op == SLJIT_MOV_S32 || op == SLJIT_MOV_P) || (src & SLJIT_MEM))) { SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_SP)); @@ -1412,31 +1370,40 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), dstw, TMP_REG1, 0); #endif - if (SLJIT_UNLIKELY(update) && (dst & SLJIT_MEM) && (dst & REG_MASK) && (dstw != 0 || (dst & OFFS_REG_MASK) != 0)) { - inst = emit_x86_instruction(compiler, 1, dst & REG_MASK, 0, dst, dstw); - FAIL_IF(!inst); - *inst = LEA_r_m; + if (SLJIT_UNLIKELY(update) && (src & SLJIT_MEM) && !src_is_ereg && (src & REG_MASK)) { + if ((src & OFFS_REG_MASK) != 0) { + FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32, + (src & REG_MASK), 0, (src & REG_MASK), 0, OFFS_REG(dst), 0)); + } + else if (srcw != 0) { + FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32, + (src & REG_MASK), 0, (src & REG_MASK), 0, SLJIT_IMM, srcw)); + } + } + + if (SLJIT_UNLIKELY(update) && (dst & SLJIT_MEM) && (dst & REG_MASK)) { + if ((dst & OFFS_REG_MASK) != 0) { + FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32, + (dst & REG_MASK), 0, (dst & REG_MASK), 0, OFFS_REG(dst), 0)); + } + else if (dstw != 0) { + FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32, + (dst & REG_MASK), 0, (dst & REG_MASK), 0, SLJIT_IMM, dstw)); + } } return SLJIT_SUCCESS; } - if (SLJIT_UNLIKELY(GET_FLAGS(op_flags))) - compiler->flags_saved = 0; - switch (op) { case SLJIT_NOT: - if (SLJIT_UNLIKELY(op_flags & SLJIT_SET_E)) + if (SLJIT_UNLIKELY(op_flags & SLJIT_SET_Z)) return emit_not_with_flags(compiler, dst, dstw, src, srcw); return emit_unary(compiler, NOT_rm, dst, dstw, src, srcw); case SLJIT_NEG: - if (SLJIT_UNLIKELY(op_flags & SLJIT_KEEP_FLAGS) && !compiler->flags_saved) - FAIL_IF(emit_save_flags(compiler)); return emit_unary(compiler, NEG_rm, dst, dstw, src, srcw); case SLJIT_CLZ: - if (SLJIT_UNLIKELY(op_flags & SLJIT_KEEP_FLAGS) && !compiler->flags_saved) - FAIL_IF(emit_save_flags(compiler)); return emit_clz(compiler, op_flags, dst, dstw, src, srcw); } @@ -1456,8 +1423,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile *(inst + 1) |= (op_imm); \ } \ else { \ - FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immw)); \ - inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, arg, argw); \ + FAIL_IF(emit_load_imm64(compiler, (arg == TMP_REG1) ? TMP_REG2 : TMP_REG1, immw)); \ + inst = emit_x86_instruction(compiler, 1, (arg == TMP_REG1) ? TMP_REG2 : TMP_REG1, 0, arg, argw); \ FAIL_IF(!inst); \ *inst = (op_mr); \ } @@ -1683,7 +1650,7 @@ static sljit_s32 emit_mul(struct sljit_compiler *compiler, sljit_u8* inst; sljit_s32 dst_r; - dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; + dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1; /* Register destination. */ if (dst_r == src1 && !(src2 & SLJIT_IMM)) { @@ -1735,9 +1702,9 @@ static sljit_s32 emit_mul(struct sljit_compiler *compiler, sljit_unaligned_store_s32(inst, (sljit_s32)src1w); } else { - EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w); if (dst_r != src2) EMIT_MOV(compiler, dst_r, 0, src2, src2w); + FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w)); inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0); FAIL_IF(!inst); *inst++ = GROUP_0F; @@ -1778,9 +1745,9 @@ static sljit_s32 emit_mul(struct sljit_compiler *compiler, sljit_unaligned_store_s32(inst, (sljit_s32)src2w); } else { - EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src2w); if (dst_r != src1) EMIT_MOV(compiler, dst_r, 0, src1, src1w); + FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w)); inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0); FAIL_IF(!inst); *inst++ = GROUP_0F; @@ -1799,13 +1766,13 @@ static sljit_s32 emit_mul(struct sljit_compiler *compiler, *inst = IMUL_r_rm; } - if (dst_r == TMP_REG1) + if (dst & SLJIT_MEM) EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); return SLJIT_SUCCESS; } -static sljit_s32 emit_lea_binary(struct sljit_compiler *compiler, sljit_s32 keep_flags, +static sljit_s32 emit_lea_binary(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) @@ -1814,12 +1781,10 @@ static sljit_s32 emit_lea_binary(struct sljit_compiler *compiler, sljit_s32 keep sljit_s32 dst_r, done = 0; /* These cases better be left to handled by normal way. */ - if (!keep_flags) { - if (dst == src1 && dstw == src1w) - return SLJIT_ERR_UNSUPPORTED; - if (dst == src2 && dstw == src2w) - return SLJIT_ERR_UNSUPPORTED; - } + if (dst == src1 && dstw == src1w) + return SLJIT_ERR_UNSUPPORTED; + if (dst == src2 && dstw == src2w) + return SLJIT_ERR_UNSUPPORTED; dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; @@ -1931,7 +1896,7 @@ static sljit_s32 emit_test_binary(struct sljit_compiler *compiler, } #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - if (src2 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) { + if (src2 == SLJIT_R0 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) { #else if (src2 == SLJIT_R0 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128)) { #endif @@ -1948,8 +1913,8 @@ static sljit_s32 emit_test_binary(struct sljit_compiler *compiler, *inst = GROUP_F7; } else { - FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w)); - inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src1, src1w); + FAIL_IF(emit_load_imm64(compiler, TMP_REG1, src2w)); + inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src1, src1w); FAIL_IF(!inst); *inst = TEST_rm_r; } @@ -1977,8 +1942,8 @@ static sljit_s32 emit_test_binary(struct sljit_compiler *compiler, *inst = GROUP_F7; } else { - FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w)); - inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src2, src2w); + FAIL_IF(emit_load_imm64(compiler, TMP_REG1, src1w)); + inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w); FAIL_IF(!inst); *inst = TEST_rm_r; } @@ -2090,25 +2055,29 @@ static sljit_s32 emit_shift(struct sljit_compiler *compiler, EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); } else { - /* This case is really difficult, since ecx itself may used for - addressing, and we must ensure to work even in that case. */ + /* This case is complex since ecx itself may be used for + addressing, and this case must be supported as well. */ +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); -#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0); -#else - /* [esp+0] contains the flags. */ - EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_sw), SLJIT_PREF_SHIFT_REG, 0); -#endif + EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_PREF_SHIFT_REG, 0); EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w); inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); FAIL_IF(!inst); *inst |= mode; -#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0); + EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_SP), 0); + EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); #else - EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_sw)); -#endif + EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w); + EMIT_MOV(compiler, TMP_REG2, 0, src2, src2w); + inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0); + FAIL_IF(!inst); + *inst = XCHG_r_rm; + inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0); + FAIL_IF(!inst); + *inst |= mode; + EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0); EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0); +#endif } return SLJIT_SUCCESS; @@ -2167,54 +2136,31 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile compiler->mode32 = op & SLJIT_I32_OP; #endif - if (GET_OPCODE(op) >= SLJIT_MUL) { - if (SLJIT_UNLIKELY(GET_FLAGS(op))) - compiler->flags_saved = 0; - else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved) - FAIL_IF(emit_save_flags(compiler)); - } + if (dst == SLJIT_UNUSED && !HAS_FLAGS(op)) + return SLJIT_SUCCESS; switch (GET_OPCODE(op)) { case SLJIT_ADD: - if (!GET_FLAGS(op)) { - if (emit_lea_binary(compiler, op & SLJIT_KEEP_FLAGS, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED) + if (!HAS_FLAGS(op)) { + if (emit_lea_binary(compiler, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED) return compiler->error; } - else - compiler->flags_saved = 0; - if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved) - FAIL_IF(emit_save_flags(compiler)); return emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32, dst, dstw, src1, src1w, src2, src2w); case SLJIT_ADDC: - if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */ - FAIL_IF(emit_restore_flags(compiler, 1)); - else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS)) - FAIL_IF(emit_save_flags(compiler)); - if (SLJIT_UNLIKELY(GET_FLAGS(op))) - compiler->flags_saved = 0; return emit_cum_binary(compiler, ADC_r_rm, ADC_rm_r, ADC, ADC_EAX_i32, dst, dstw, src1, src1w, src2, src2w); case SLJIT_SUB: - if (!GET_FLAGS(op)) { - if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, op & SLJIT_KEEP_FLAGS, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED) + if (!HAS_FLAGS(op)) { + if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED) return compiler->error; } - else - compiler->flags_saved = 0; - if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved) - FAIL_IF(emit_save_flags(compiler)); + if (dst == SLJIT_UNUSED) return emit_cmp_binary(compiler, src1, src1w, src2, src2w); return emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32, dst, dstw, src1, src1w, src2, src2w); case SLJIT_SUBC: - if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */ - FAIL_IF(emit_restore_flags(compiler, 1)); - else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS)) - FAIL_IF(emit_save_flags(compiler)); - if (SLJIT_UNLIKELY(GET_FLAGS(op))) - compiler->flags_saved = 0; return emit_non_cum_binary(compiler, SBB_r_rm, SBB_rm_r, SBB, SBB_EAX_i32, dst, dstw, src1, src1w, src2, src2w); case SLJIT_MUL: @@ -2231,13 +2177,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile return emit_cum_binary(compiler, XOR_r_rm, XOR_rm_r, XOR, XOR_EAX_i32, dst, dstw, src1, src1w, src2, src2w); case SLJIT_SHL: - return emit_shift_with_flags(compiler, SHL, GET_FLAGS(op), + return emit_shift_with_flags(compiler, SHL, HAS_FLAGS(op), dst, dstw, src1, src1w, src2, src2w); case SLJIT_LSHR: - return emit_shift_with_flags(compiler, SHR, GET_FLAGS(op), + return emit_shift_with_flags(compiler, SHR, HAS_FLAGS(op), dst, dstw, src1, src1w, src2, src2w); case SLJIT_ASHR: - return emit_shift_with_flags(compiler, SAR, GET_FLAGS(op), + return emit_shift_with_flags(compiler, SAR, HAS_FLAGS(op), dst, dstw, src1, src1w, src2, src2w); } @@ -2248,7 +2194,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg) { CHECK_REG_INDEX(check_sljit_get_register_index(reg)); #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - if (reg >= SLJIT_R3 && reg <= SLJIT_R6) + if (reg >= SLJIT_R3 && reg <= SLJIT_R8) return -1; #endif return reg_map[reg]; @@ -2279,36 +2225,25 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *c /* Floating point operators */ /* --------------------------------------------------------------------- */ -/* Alignment + 2 * 16 bytes. */ -static sljit_s32 sse2_data[3 + (4 + 4) * 2]; +/* Alignment(3) + 4 * 16 bytes. */ +static sljit_s32 sse2_data[3 + (4 * 4)]; static sljit_s32 *sse2_buffer; static void init_compiler(void) { + /* Align to 16 bytes. */ sse2_buffer = (sljit_s32*)(((sljit_uw)sse2_data + 15) & ~0xf); - /* Single precision constants. */ + + /* Single precision constants (each constant is 16 byte long). */ sse2_buffer[0] = 0x80000000; sse2_buffer[4] = 0x7fffffff; - /* Double precision constants. */ + /* Double precision constants (each constant is 16 byte long). */ sse2_buffer[8] = 0; sse2_buffer[9] = 0x80000000; sse2_buffer[12] = 0xffffffff; sse2_buffer[13] = 0x7fffffff; } -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_is_fpu_available(void) -{ -#ifdef SLJIT_IS_FPU_AVAILABLE - return SLJIT_IS_FPU_AVAILABLE; -#elif (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) - if (cpu_has_sse2 == -1) - get_cpu_features(); - return cpu_has_sse2; -#else /* SLJIT_DETECT_SSE2 */ - return 1; -#endif /* SLJIT_DETECT_SSE2 */ -} - static sljit_s32 emit_sse2(struct sljit_compiler *compiler, sljit_u8 opcode, sljit_s32 single, sljit_s32 xmm1, sljit_s32 xmm2, sljit_sw xmm2w) { @@ -2349,7 +2284,7 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp sljit_s32 dst, sljit_sw dstw, sljit_s32 src, sljit_sw srcw) { - sljit_s32 dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1; + sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; sljit_u8 *inst; #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) @@ -2362,7 +2297,7 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp *inst++ = GROUP_0F; *inst = CVTTSD2SI_r_xm; - if (dst_r == TMP_REG1 && dst != SLJIT_UNUSED) + if (dst & SLJIT_MEM) return emit_mov(compiler, dst, dstw, TMP_REG1, 0); return SLJIT_SUCCESS; } @@ -2406,7 +2341,6 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compile sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) { - compiler->flags_saved = 0; if (!FAST_IS_REG(src1)) { FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src1, src1w)); src1 = TMP_FREG; @@ -2455,7 +2389,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil return SLJIT_SUCCESS; } - if (SLOW_IS_REG(dst)) { + if (FAST_IS_REG(dst)) { dst_r = dst; if (dst != src) FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, dst_r, src, srcw)); @@ -2553,11 +2487,6 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi CHECK_ERROR_PTR(); CHECK_PTR(check_sljit_emit_label(compiler)); - /* We should restore the flags before the label, - since other taken jumps has their own flags as well. */ - if (SLJIT_UNLIKELY(compiler->flags_saved)) - PTR_FAIL_IF(emit_restore_flags(compiler, 0)); - if (compiler->last_label && compiler->last_label->size == compiler->size) return compiler->last_label; @@ -2582,12 +2511,6 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile CHECK_ERROR_PTR(); CHECK_PTR(check_sljit_emit_jump(compiler, type)); - if (SLJIT_UNLIKELY(compiler->flags_saved)) { - if ((type & 0xff) <= SLJIT_JUMP) - PTR_FAIL_IF(emit_restore_flags(compiler, 0)); - compiler->flags_saved = 0; - } - jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump)); PTR_FAIL_IF_NULL(jump); set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP); @@ -2607,10 +2530,18 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compile PTR_FAIL_IF_NULL(inst); *inst++ = 0; - *inst++ = type + 4; + *inst++ = type + 2; return jump; } +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) +#ifndef _WIN64 +#define IS_REG_CHANGED_BY_CALL(src, type) ((src) == SLJIT_R3) +#else +#define IS_REG_CHANGED_BY_CALL(src, type) ((src) == SLJIT_R2) +#endif +#endif + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw) { sljit_u8 *inst; @@ -2622,12 +2553,6 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi CHECK_EXTRA_REGS(src, srcw, (void)0); - if (SLJIT_UNLIKELY(compiler->flags_saved)) { - if (type <= SLJIT_JUMP) - FAIL_IF(emit_restore_flags(compiler, 0)); - compiler->flags_saved = 0; - } - if (type >= SLJIT_CALL1) { #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL) @@ -2638,11 +2563,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi if (src == SLJIT_MEM1(SLJIT_SP) && type >= SLJIT_CALL3) srcw += sizeof(sljit_sw); #endif -#endif -#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && defined(_WIN64) - if (src == SLJIT_R2) { - EMIT_MOV(compiler, TMP_REG1, 0, src, 0); - src = TMP_REG1; +#else + if ((src & SLJIT_MEM) || IS_REG_CHANGED_BY_CALL(src, type)) { + EMIT_MOV(compiler, TMP_REG2, 0, src, srcw); + src = TMP_REG2; } #endif FAIL_IF(call_with_args(compiler, type)); @@ -2665,7 +2589,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi FAIL_IF_NULL(inst); *inst++ = 0; - *inst++ = type + 4; + *inst++ = type + 2; } else { #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) @@ -2682,37 +2606,29 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 dst, sljit_sw dstw, - sljit_s32 src, sljit_sw srcw, sljit_s32 type) { sljit_u8 *inst; sljit_u8 cond_set = 0; #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) sljit_s32 reg; -#else - /* CHECK_EXTRA_REGS migh overwrite these values. */ +#endif + /* ADJUST_LOCAL_OFFSET and CHECK_EXTRA_REGS might overwrite these values. */ sljit_s32 dst_save = dst; sljit_sw dstw_save = dstw; -#endif CHECK_ERROR(); - CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type)); - SLJIT_UNUSED_ARG(srcw); - - if (dst == SLJIT_UNUSED) - return SLJIT_SUCCESS; + CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type)); ADJUST_LOCAL_OFFSET(dst, dstw); CHECK_EXTRA_REGS(dst, dstw, (void)0); - if (SLJIT_UNLIKELY(compiler->flags_saved)) - FAIL_IF(emit_restore_flags(compiler, op & SLJIT_KEEP_FLAGS)); type &= 0xff; /* setcc = jcc + 0x10. */ cond_set = get_jump_code(type) + 0x10; #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && dst == src) { + if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst)) { inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + 3); FAIL_IF(!inst); INC_SIZE(4 + 3); @@ -2727,7 +2643,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co return SLJIT_SUCCESS; } - reg = (op == SLJIT_MOV && FAST_IS_REG(dst)) ? dst : TMP_REG1; + reg = (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG1; inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + 4); FAIL_IF(!inst); @@ -2738,6 +2654,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co *inst++ = cond_set; *inst++ = MOD_REG | reg_lmap[reg]; *inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R)); + /* The movzx instruction does not affect flags. */ *inst++ = GROUP_0F; *inst++ = MOVZX_r_rm8; *inst = MOD_REG | (reg_lmap[reg] << 3) | reg_lmap[reg]; @@ -2749,12 +2666,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co compiler->mode32 = GET_OPCODE(op) != SLJIT_MOV; return emit_mov(compiler, dst, dstw, TMP_REG1, 0); } + #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) compiler->skip_checks = 1; #endif - return sljit_emit_op2(compiler, op, dst, dstw, dst, dstw, TMP_REG1, 0); -#else /* SLJIT_CONFIG_X86_64 */ + return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0); + +#else + /* The SLJIT_CONFIG_X86_32 code path starts here. */ if (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst)) { if (reg_map[dst] <= 4) { /* Low byte is accessible. */ @@ -2808,8 +2728,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co return SLJIT_SUCCESS; } - if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && dst == src && reg_map[dst] <= 4) { - SLJIT_COMPILE_ASSERT(reg_map[SLJIT_R0] == 0, scratch_reg1_must_be_eax); + if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && reg_map[dst] <= 4) { + SLJIT_ASSERT(reg_map[SLJIT_R0] == 0); + if (dst != SLJIT_R0) { inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 2 + 1); FAIL_IF(!inst); @@ -2868,6 +2789,46 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co #endif /* SLJIT_CONFIG_X86_64 */ } +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_reg, + sljit_s32 src, sljit_sw srcw) +{ + sljit_u8* inst; + + CHECK_ERROR(); + CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw)); + +#if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) + dst_reg &= ~SLJIT_I32_OP; + + if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV) || (dst_reg >= SLJIT_R3 && dst_reg <= SLJIT_S3)) + return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw); +#else + if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV)) + return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw); +#endif + + /* ADJUST_LOCAL_OFFSET is not needed. */ + CHECK_EXTRA_REGS(src, srcw, (void)0); + +#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) + compiler->mode32 = dst_reg & SLJIT_I32_OP; + dst_reg &= ~SLJIT_I32_OP; +#endif + + if (SLJIT_UNLIKELY(src & SLJIT_IMM)) { + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw); + src = TMP_REG1; + srcw = 0; + } + + inst = emit_x86_instruction(compiler, 2, dst_reg, 0, src, srcw); + FAIL_IF(!inst); + *inst++ = GROUP_0F; + *inst = get_jump_code(type & 0xff) - 0x40; + return SLJIT_SUCCESS; +} + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset) { CHECK_ERROR(); @@ -2886,16 +2847,16 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *c if (NOT_HALFWORD(offset)) { FAIL_IF(emit_load_imm64(compiler, TMP_REG1, offset)); #if (defined SLJIT_DEBUG && SLJIT_DEBUG) - SLJIT_ASSERT(emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0) != SLJIT_ERR_UNSUPPORTED); + SLJIT_ASSERT(emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0) != SLJIT_ERR_UNSUPPORTED); return compiler->error; #else - return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0); + return emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0); #endif } #endif if (offset != 0) - return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, SLJIT_IMM, offset); + return emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, SLJIT_IMM, offset); return emit_mov(compiler, dst, dstw, SLJIT_SP, 0); } @@ -2919,14 +2880,11 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) compiler->mode32 = 0; - reg = SLOW_IS_REG(dst) ? dst : TMP_REG1; + reg = FAST_IS_REG(dst) ? dst : TMP_REG1; if (emit_load_imm64(compiler, reg, init_value)) return NULL; #else - if (dst == SLJIT_UNUSED) - dst = TMP_REG1; - if (emit_mov(compiler, dst, dstw, SLJIT_IMM, init_value)) return NULL; #endif @@ -2946,82 +2904,18 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi return const_; } -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr) +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset) { + SLJIT_UNUSED_ARG(executable_offset); #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) - sljit_unaligned_store_sw((void*)addr, new_addr - (addr + 4)); + sljit_unaligned_store_sw((void*)addr, new_target - (addr + 4) - (sljit_uw)executable_offset); #else - sljit_unaligned_store_sw((void*)addr, (sljit_sw) new_addr); + sljit_unaligned_store_sw((void*)addr, (sljit_sw) new_target); #endif } -SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant) +SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset) { + SLJIT_UNUSED_ARG(executable_offset); sljit_unaligned_store_sw((void*)addr, new_constant); } - -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_x86_is_sse2_available(void) -{ -#if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2) - if (cpu_has_sse2 == -1) - get_cpu_features(); - return cpu_has_sse2; -#else - return 1; -#endif -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_x86_is_cmov_available(void) -{ - if (cpu_has_cmov == -1) - get_cpu_features(); - return cpu_has_cmov; -} - -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_x86_emit_cmov(struct sljit_compiler *compiler, - sljit_s32 type, - sljit_s32 dst_reg, - sljit_s32 src, sljit_sw srcw) -{ - sljit_u8* inst; - - CHECK_ERROR(); -#if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - CHECK_ARGUMENT(sljit_x86_is_cmov_available()); - CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_I32_OP))); - CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_ORDERED_F64); - CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(dst_reg & ~SLJIT_I32_OP)); - FUNCTION_CHECK_SRC(src, srcw); -#endif -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) - if (SLJIT_UNLIKELY(!!compiler->verbose)) { - fprintf(compiler->verbose, " x86_cmov%s %s%s, ", - !(dst_reg & SLJIT_I32_OP) ? "" : ".i", - jump_names[type & 0xff], JUMP_POSTFIX(type)); - sljit_verbose_reg(compiler, dst_reg & ~SLJIT_I32_OP); - fprintf(compiler->verbose, ", "); - sljit_verbose_param(compiler, src, srcw); - fprintf(compiler->verbose, "\n"); - } -#endif - - ADJUST_LOCAL_OFFSET(src, srcw); - CHECK_EXTRA_REGS(src, srcw, (void)0); - -#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - compiler->mode32 = dst_reg & SLJIT_I32_OP; -#endif - dst_reg &= ~SLJIT_I32_OP; - - if (SLJIT_UNLIKELY(src & SLJIT_IMM)) { - EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw); - src = TMP_REG1; - srcw = 0; - } - - inst = emit_x86_instruction(compiler, 2, dst_reg, 0, src, srcw); - FAIL_IF(!inst); - *inst++ = GROUP_0F; - *inst = get_jump_code(type & 0xff) - 0x40; - return SLJIT_SUCCESS; -} diff --git a/src/3rdparty/pcre2/src/sljit/sljitUtils.c b/src/3rdparty/pcre2/src/sljit/sljitUtils.c index ec5c321194..9029db292c 100644 --- a/src/3rdparty/pcre2/src/sljit/sljitUtils.c +++ b/src/3rdparty/pcre2/src/sljit/sljitUtils.c @@ -1,7 +1,7 @@ /* * Stack-less Just-In-Time compiler * - * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. + * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, are * permitted provided that the following conditions are met: @@ -206,10 +206,7 @@ static sljit_sw sljit_page_align = 0; SLJIT_API_FUNC_ATTRIBUTE struct sljit_stack* SLJIT_CALL sljit_allocate_stack(sljit_uw limit, sljit_uw max_limit, void *allocator_data) { struct sljit_stack *stack; - union { - void *ptr; - sljit_uw uw; - } base; + void *ptr; #ifdef _WIN32 SYSTEM_INFO si; #endif @@ -233,29 +230,29 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_stack* SLJIT_CALL sljit_allocate_stack(slj } #endif - /* Align limit and max_limit. */ - max_limit = (max_limit + sljit_page_align) & ~sljit_page_align; - stack = (struct sljit_stack*)SLJIT_MALLOC(sizeof(struct sljit_stack), allocator_data); if (!stack) return NULL; + /* Align max_limit. */ + max_limit = (max_limit + sljit_page_align) & ~sljit_page_align; + #ifdef _WIN32 - base.ptr = VirtualAlloc(NULL, max_limit, MEM_RESERVE, PAGE_READWRITE); - if (!base.ptr) { + ptr = VirtualAlloc(NULL, max_limit, MEM_RESERVE, PAGE_READWRITE); + if (!ptr) { SLJIT_FREE(stack, allocator_data); return NULL; } - stack->base = base.uw; + stack->max_limit = (sljit_u8 *)ptr; + stack->base = stack->max_limit + max_limit; stack->limit = stack->base; - stack->max_limit = stack->base + max_limit; - if (sljit_stack_resize(stack, stack->base + limit)) { + if (sljit_stack_resize(stack, stack->base - limit)) { sljit_free_stack(stack, allocator_data); return NULL; } #else #ifdef MAP_ANON - base.ptr = mmap(NULL, max_limit, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); + ptr = mmap(NULL, max_limit, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0); #else if (dev_zero < 0) { if (open_dev_zero()) { @@ -263,15 +260,15 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_stack* SLJIT_CALL sljit_allocate_stack(slj return NULL; } } - base.ptr = mmap(NULL, max_limit, PROT_READ | PROT_WRITE, MAP_PRIVATE, dev_zero, 0); + ptr = mmap(NULL, max_limit, PROT_READ | PROT_WRITE, MAP_PRIVATE, dev_zero, 0); #endif - if (base.ptr == MAP_FAILED) { + if (ptr == MAP_FAILED) { SLJIT_FREE(stack, allocator_data); return NULL; } - stack->base = base.uw; - stack->limit = stack->base + limit; - stack->max_limit = stack->base + max_limit; + stack->max_limit = (sljit_u8 *)ptr; + stack->base = stack->max_limit + max_limit; + stack->limit = stack->base - limit; #endif stack->top = stack->base; return stack; @@ -279,53 +276,53 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_stack* SLJIT_CALL sljit_allocate_stack(slj #undef PAGE_ALIGN -SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_free_stack(struct sljit_stack* stack, void *allocator_data) +SLJIT_API_FUNC_ATTRIBUTE void SLJIT_CALL sljit_free_stack(struct sljit_stack *stack, void *allocator_data) { SLJIT_UNUSED_ARG(allocator_data); #ifdef _WIN32 - VirtualFree((void*)stack->base, 0, MEM_RELEASE); + VirtualFree((void*)stack->max_limit, 0, MEM_RELEASE); #else - munmap((void*)stack->base, stack->max_limit - stack->base); + munmap((void*)stack->max_limit, stack->base - stack->max_limit); #endif SLJIT_FREE(stack, allocator_data); } -SLJIT_API_FUNC_ATTRIBUTE sljit_sw SLJIT_CALL sljit_stack_resize(struct sljit_stack* stack, sljit_uw new_limit) +SLJIT_API_FUNC_ATTRIBUTE sljit_sw SLJIT_CALL sljit_stack_resize(struct sljit_stack *stack, sljit_u8 *new_limit) { sljit_uw aligned_old_limit; sljit_uw aligned_new_limit; - if ((new_limit > stack->max_limit) || (new_limit < stack->base)) + if ((new_limit < stack->max_limit) || (new_limit >= stack->base)) return -1; #ifdef _WIN32 - aligned_new_limit = (new_limit + sljit_page_align) & ~sljit_page_align; - aligned_old_limit = (stack->limit + sljit_page_align) & ~sljit_page_align; + aligned_new_limit = (sljit_uw)new_limit & ~sljit_page_align; + aligned_old_limit = ((sljit_uw)stack->limit) & ~sljit_page_align; if (aligned_new_limit != aligned_old_limit) { - if (aligned_new_limit > aligned_old_limit) { - if (!VirtualAlloc((void*)aligned_old_limit, aligned_new_limit - aligned_old_limit, MEM_COMMIT, PAGE_READWRITE)) + if (aligned_new_limit < aligned_old_limit) { + if (!VirtualAlloc((void*)aligned_new_limit, aligned_old_limit - aligned_new_limit, MEM_COMMIT, PAGE_READWRITE)) return -1; } else { - if (!VirtualFree((void*)aligned_new_limit, aligned_old_limit - aligned_new_limit, MEM_DECOMMIT)) + if (!VirtualFree((void*)aligned_old_limit, aligned_new_limit - aligned_old_limit, MEM_DECOMMIT)) return -1; } } stack->limit = new_limit; return 0; #else - if (new_limit >= stack->limit) { + if (new_limit <= stack->limit) { stack->limit = new_limit; return 0; } - aligned_new_limit = (new_limit + sljit_page_align) & ~sljit_page_align; - aligned_old_limit = (stack->limit + sljit_page_align) & ~sljit_page_align; + aligned_new_limit = (sljit_uw)new_limit & ~sljit_page_align; + aligned_old_limit = ((sljit_uw)stack->limit) & ~sljit_page_align; /* If madvise is available, we release the unnecessary space. */ #if defined(MADV_DONTNEED) - if (aligned_new_limit < aligned_old_limit) - madvise((void*)aligned_new_limit, aligned_old_limit - aligned_new_limit, MADV_DONTNEED); + if (aligned_new_limit > aligned_old_limit) + madvise((void*)aligned_old_limit, aligned_new_limit - aligned_old_limit, MADV_DONTNEED); #elif defined(POSIX_MADV_DONTNEED) - if (aligned_new_limit < aligned_old_limit) - posix_madvise((void*)aligned_new_limit, aligned_old_limit - aligned_new_limit, POSIX_MADV_DONTNEED); + if (aligned_new_limit > aligned_old_limit) + posix_madvise((void*)aligned_old_limit, aligned_new_limit - aligned_old_limit, POSIX_MADV_DONTNEED); #endif stack->limit = new_limit; return 0; |