diff options
Diffstat (limited to 'src/3rdparty/pcre2/src/sljit/sljitNativeARM_64.c')
-rw-r--r-- | src/3rdparty/pcre2/src/sljit/sljitNativeARM_64.c | 1596 |
1 files changed, 1361 insertions, 235 deletions
diff --git a/src/3rdparty/pcre2/src/sljit/sljitNativeARM_64.c b/src/3rdparty/pcre2/src/sljit/sljitNativeARM_64.c index 96453b4abe..b268582f42 100644 --- a/src/3rdparty/pcre2/src/sljit/sljitNativeARM_64.c +++ b/src/3rdparty/pcre2/src/sljit/sljitNativeARM_64.c @@ -67,78 +67,124 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = { /* Instrucion forms */ /* --------------------------------------------------------------------- */ -#define ADC 0x9a000000 -#define ADD 0x8b000000 -#define ADDE 0x8b200000 -#define ADDI 0x91000000 -#define AND 0x8a000000 -#define ANDI 0x92000000 -#define ASRV 0x9ac02800 -#define B 0x14000000 -#define B_CC 0x54000000 -#define BL 0x94000000 -#define BLR 0xd63f0000 -#define BR 0xd61f0000 -#define BRK 0xd4200000 -#define CBZ 0xb4000000 -#define CLZ 0xdac01000 -#define CSEL 0x9a800000 -#define CSINC 0x9a800400 -#define EOR 0xca000000 -#define EORI 0xd2000000 -#define FABS 0x1e60c000 -#define FADD 0x1e602800 -#define FCMP 0x1e602000 -#define FCVT 0x1e224000 -#define FCVTZS 0x9e780000 -#define FDIV 0x1e601800 -#define FMOV 0x1e604000 -#define FMUL 0x1e600800 -#define FNEG 0x1e614000 -#define FSUB 0x1e603800 -#define LDRI 0xf9400000 -#define LDRI_F64 0xfd400000 -#define LDP 0xa9400000 -#define LDP_F64 0x6d400000 -#define LDP_POST 0xa8c00000 -#define LDR_PRE 0xf8400c00 -#define LSLV 0x9ac02000 -#define LSRV 0x9ac02400 -#define MADD 0x9b000000 -#define MOVK 0xf2800000 -#define MOVN 0x92800000 -#define MOVZ 0xd2800000 -#define NOP 0xd503201f -#define ORN 0xaa200000 -#define ORR 0xaa000000 -#define ORRI 0xb2000000 -#define RET 0xd65f0000 -#define SBC 0xda000000 -#define SBFM 0x93000000 -#define SCVTF 0x9e620000 -#define SDIV 0x9ac00c00 -#define SMADDL 0x9b200000 -#define SMULH 0x9b403c00 -#define STP 0xa9000000 -#define STP_F64 0x6d000000 -#define STP_PRE 0xa9800000 -#define STRB 0x38206800 -#define STRBI 0x39000000 -#define STRI 0xf9000000 -#define STRI_F64 0xfd000000 -#define STR_FI 0x3d000000 -#define STR_FR 0x3c206800 -#define STUR_FI 0x3c000000 -#define STURBI 0x38000000 -#define SUB 0xcb000000 -#define SUBI 0xd1000000 -#define SUBS 0xeb000000 -#define UBFM 0xd3000000 -#define UDIV 0x9ac00800 -#define UMULH 0x9bc03c00 - -/* dest_reg is the absolute name of the register - Useful for reordering instructions in the delay slot. */ +#define ADC 0x9a000000 +#define ADD 0x8b000000 +#define ADDE 0x8b200000 +#define ADDI 0x91000000 +#define AND 0x8a000000 +#define ANDI 0x92000000 +#define AND_v 0x0e201c00 +#define ASRV 0x9ac02800 +#define B 0x14000000 +#define B_CC 0x54000000 +#define BL 0x94000000 +#define BLR 0xd63f0000 +#define BR 0xd61f0000 +#define BRK 0xd4200000 +#define CAS 0xc8a07c00 +#define CASB 0x08a07c00 +#define CASH 0x48a07c00 +#define CBZ 0xb4000000 +#define CCMPI 0xfa400800 +#define CLZ 0xdac01000 +#define CSEL 0x9a800000 +#define CSINC 0x9a800400 +#define DUP_e 0x0e000400 +#define DUP_g 0x0e000c00 +#define EOR 0xca000000 +#define EOR_v 0x2e201c00 +#define EORI 0xd2000000 +#define EXTR 0x93c00000 +#define FABS 0x1e60c000 +#define FADD 0x1e602800 +#define FCMP 0x1e602000 +#define FCSEL 0x1e600c00 +#define FCVT 0x1e224000 +#define FCVTL 0x0e217800 +#define FCVTZS 0x9e780000 +#define FDIV 0x1e601800 +#define FMOV 0x1e604000 +#define FMOV_R 0x9e660000 +#define FMOV_I 0x1e601000 +#define FMUL 0x1e600800 +#define FNEG 0x1e614000 +#define FSUB 0x1e603800 +#define INS 0x4e001c00 +#define INS_e 0x6e000400 +#define LD1 0x0c407000 +#define LD1_s 0x0d400000 +#define LD1R 0x0d40c000 +#define LDRI 0xf9400000 +#define LDRI_F64 0xfd400000 +#define LDRI_POST 0xf8400400 +#define LDP 0xa9400000 +#define LDP_F64 0x6d400000 +#define LDP_POST 0xa8c00000 +#define LDR_PRE 0xf8400c00 +#define LDXR 0xc85f7c00 +#define LDXRB 0x085f7c00 +#define LDXRH 0x485f7c00 +#define LSLV 0x9ac02000 +#define LSRV 0x9ac02400 +#define MADD 0x9b000000 +#define MOVI 0x0f000400 +#define MOVK 0xf2800000 +#define MOVN 0x92800000 +#define MOVZ 0xd2800000 +#define NOP 0xd503201f +#define ORN 0xaa200000 +#define ORR 0xaa000000 +#define ORR_v 0x0ea01c00 +#define ORRI 0xb2000000 +#define RBIT 0xdac00000 +#define RET 0xd65f0000 +#define REV 0xdac00c00 +#define REV16 0xdac00400 +#define RORV 0x9ac02c00 +#define SBC 0xda000000 +#define SBFM 0x93400000 +#define SCVTF 0x9e620000 +#define SDIV 0x9ac00c00 +#define SMADDL 0x9b200000 +#define SMOV 0x0e002c00 +#define SMULH 0x9b403c00 +#define SSHLL 0x0f00a400 +#define ST1 0x0c007000 +#define ST1_s 0x0d000000 +#define STP 0xa9000000 +#define STP_F64 0x6d000000 +#define STP_PRE 0xa9800000 +#define STRB 0x38206800 +#define STRBI 0x39000000 +#define STRI 0xf9000000 +#define STRI_F64 0xfd000000 +#define STR_FI 0x3d000000 +#define STR_FR 0x3c206800 +#define STUR_FI 0x3c000000 +#define STURBI 0x38000000 +#define STXR 0xc8007c00 +#define STXRB 0x8007c00 +#define STXRH 0x48007c00 +#define SUB 0xcb000000 +#define SUBI 0xd1000000 +#define SUBS 0xeb000000 +#define TBZ 0x36000000 +#define UBFM 0xd3400000 +#define UCVTF 0x9e630000 +#define UDIV 0x9ac00800 +#define UMOV 0x0e003c00 +#define UMULH 0x9bc03c00 +#define USHLL 0x2f00a400 +#define USHR 0x2f000400 +#define USRA 0x2f001400 +#define XTN 0x0e212800 + +#define CSET (CSINC | RM(TMP_ZERO) | RN(TMP_ZERO)) +#define LDR (STRI | (1 << 22)) +#define LDRB (STRBI | (1 << 22)) +#define LDRH (LDRB | (1 << 30)) +#define MOV (ORR | RN(TMP_ZERO)) + static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins) { sljit_ins *ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins)); @@ -173,7 +219,7 @@ static SLJIT_INLINE sljit_sw detect_jump_type(struct sljit_jump *jump, sljit_ins target_addr = (sljit_uw)(code + jump->u.label->size) + (sljit_uw)executable_offset; } - diff = (sljit_sw)target_addr - (sljit_sw)(code_ptr + 4) - executable_offset; + diff = (sljit_sw)target_addr - (sljit_sw)(code_ptr - 4) - executable_offset; if (jump->flags & IS_COND) { diff += SSIZE_OF(ins); @@ -296,8 +342,8 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil } next_addr = compute_next_addr(label, jump, const_, put_label); } - code_ptr ++; - word_count ++; + code_ptr++; + word_count++; } while (buf_ptr < buf_end); buf = buf->next; @@ -383,16 +429,23 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) { switch (feature_type) { case SLJIT_HAS_FPU: + case SLJIT_HAS_SIMD: #ifdef SLJIT_IS_FPU_AVAILABLE - return SLJIT_IS_FPU_AVAILABLE; + return (SLJIT_IS_FPU_AVAILABLE) != 0; #else /* Available by default. */ return 1; #endif case SLJIT_HAS_CLZ: + case SLJIT_HAS_CTZ: + case SLJIT_HAS_REV: + case SLJIT_HAS_ROT: case SLJIT_HAS_CMOV: case SLJIT_HAS_PREFETCH: + case SLJIT_HAS_COPY_F32: + case SLJIT_HAS_COPY_F64: + case SLJIT_HAS_ATOMIC: return 1; default: @@ -400,6 +453,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type) } } +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type) +{ + switch (type) { + case SLJIT_UNORDERED_OR_EQUAL: + case SLJIT_ORDERED_NOT_EQUAL: + return 2; + } + + return 0; +} + /* --------------------------------------------------------------------- */ /* Core code generator functions. */ /* --------------------------------------------------------------------- */ @@ -631,6 +695,12 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s switch (op) { case SLJIT_MUL: case SLJIT_CLZ: + case SLJIT_CTZ: + case SLJIT_REV: + case SLJIT_REV_U16: + case SLJIT_REV_S16: + case SLJIT_REV_U32: + case SLJIT_REV_S32: case SLJIT_ADDC: case SLJIT_SUBC: /* No form with immediate operand (except imm 0, which @@ -639,10 +709,6 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s case SLJIT_MOV: SLJIT_ASSERT(!(flags & SET_FLAGS) && (flags & ARG2_IMM) && arg1 == TMP_REG1); return load_immediate(compiler, dst, imm); - case SLJIT_NOT: - SLJIT_ASSERT(flags & ARG2_IMM); - FAIL_IF(load_immediate(compiler, dst, (flags & INT_OP) ? (~imm & 0xffffffff) : ~imm)); - goto set_flags; case SLJIT_SUB: compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB; if (flags & ARG1_IMM) @@ -689,8 +755,13 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s break; CHECK_FLAGS(3 << 29); return push_inst(compiler, (ANDI ^ inv_bits) | RD(dst) | RN(reg) | inst_bits); - case SLJIT_OR: case SLJIT_XOR: + if (imm == -1) { + FAIL_IF(push_inst(compiler, (ORN ^ inv_bits) | RD(dst) | RN(TMP_ZERO) | RM(reg))); + goto set_flags; + } + /* fallthrough */ + case SLJIT_OR: inst_bits = logical_imm(imm, LOGICAL_IMM_CHECK | ((flags & INT_OP) ? 16 : 32)); if (!inst_bits) break; @@ -701,36 +772,52 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s FAIL_IF(push_inst(compiler, (inst_bits ^ inv_bits) | RD(dst) | RN(reg))); goto set_flags; case SLJIT_SHL: + case SLJIT_MSHL: if (flags & ARG1_IMM) break; + if (flags & INT_OP) { imm &= 0x1f; - FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) - | (((sljit_ins)-imm & 0x1f) << 16) | ((31 - (sljit_ins)imm) << 10))); - } - else { + inst_bits = (((sljit_ins)-imm & 0x1f) << 16) | ((31 - (sljit_ins)imm) << 10); + } else { imm &= 0x3f; - FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) | (1 << 22) - | (((sljit_ins)-imm & 0x3f) << 16) | ((63 - (sljit_ins)imm) << 10))); + inst_bits = ((sljit_ins)1 << 22) | (((sljit_ins)-imm & 0x3f) << 16) | ((63 - (sljit_ins)imm) << 10); } + + inv_bits |= inv_bits >> 9; + FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) | inst_bits)); goto set_flags; case SLJIT_LSHR: + case SLJIT_MLSHR: case SLJIT_ASHR: + case SLJIT_MASHR: if (flags & ARG1_IMM) break; - if (op == SLJIT_ASHR) + + inv_bits |= inv_bits >> 9; + if (op >= SLJIT_ASHR) inv_bits |= 1 << 30; + if (flags & INT_OP) { imm &= 0x1f; - FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) - | ((sljit_ins)imm << 16) | (31 << 10))); - } - else { + inst_bits = ((sljit_ins)imm << 16) | (31 << 10); + } else { imm &= 0x3f; - FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) - | (1 << 22) | ((sljit_ins)imm << 16) | (63 << 10))); + inst_bits = ((sljit_ins)1 << 22) | ((sljit_ins)imm << 16) | (63 << 10); } + + FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) | inst_bits)); goto set_flags; + case SLJIT_ROTL: + case SLJIT_ROTR: + if (flags & ARG1_IMM) + break; + + if (op == SLJIT_ROTL) + imm = -imm; + + imm &= (flags & INT_OP) ? 0x1f : 0x3f; + return push_inst(compiler, (EXTR ^ (inv_bits | (inv_bits >> 9))) | RD(dst) | RN(arg1) | RM(arg1) | ((sljit_ins)imm << 10)); default: SLJIT_UNREACHABLE(); break; @@ -761,22 +848,22 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); if (dst == arg2) return SLJIT_SUCCESS; - return push_inst(compiler, ORR | RD(dst) | RN(TMP_ZERO) | RM(arg2)); + return push_inst(compiler, MOV | RD(dst) | RM(arg2)); case SLJIT_MOV_U8: SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); - return push_inst(compiler, (UBFM ^ W_OP) | RD(dst) | RN(arg2) | (7 << 10)); + inv_bits |= inv_bits >> 9; + return push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg2) | (7 << 10)); case SLJIT_MOV_S8: SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); - if (!(flags & INT_OP)) - inv_bits |= 1 << 22; + inv_bits |= inv_bits >> 9; return push_inst(compiler, (SBFM ^ inv_bits) | RD(dst) | RN(arg2) | (7 << 10)); case SLJIT_MOV_U16: SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); - return push_inst(compiler, (UBFM ^ W_OP) | RD(dst) | RN(arg2) | (15 << 10)); + inv_bits |= inv_bits >> 9; + return push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg2) | (15 << 10)); case SLJIT_MOV_S16: SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); - if (!(flags & INT_OP)) - inv_bits |= 1 << 22; + inv_bits |= inv_bits >> 9; return push_inst(compiler, (SBFM ^ inv_bits) | RD(dst) | RN(arg2) | (15 << 10)); case SLJIT_MOV32: SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); @@ -785,17 +872,36 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s /* fallthrough */ case SLJIT_MOV_U32: SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); - return push_inst(compiler, (ORR ^ W_OP) | RD(dst) | RN(TMP_ZERO) | RM(arg2)); + return push_inst(compiler, (MOV ^ W_OP) | RD(dst) | RM(arg2)); case SLJIT_MOV_S32: SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1); return push_inst(compiler, SBFM | (1 << 22) | RD(dst) | RN(arg2) | (31 << 10)); - case SLJIT_NOT: - SLJIT_ASSERT(arg1 == TMP_REG1); - FAIL_IF(push_inst(compiler, (ORN ^ inv_bits) | RD(dst) | RN(TMP_ZERO) | RM(arg2))); - break; /* Set flags. */ case SLJIT_CLZ: SLJIT_ASSERT(arg1 == TMP_REG1); return push_inst(compiler, (CLZ ^ inv_bits) | RD(dst) | RN(arg2)); + case SLJIT_CTZ: + SLJIT_ASSERT(arg1 == TMP_REG1); + FAIL_IF(push_inst(compiler, (RBIT ^ inv_bits) | RD(dst) | RN(arg2))); + return push_inst(compiler, (CLZ ^ inv_bits) | RD(dst) | RN(dst)); + case SLJIT_REV: + SLJIT_ASSERT(arg1 == TMP_REG1); + inv_bits |= inv_bits >> 21; + return push_inst(compiler, (REV ^ inv_bits) | RD(dst) | RN(arg2)); + case SLJIT_REV_U16: + case SLJIT_REV_S16: + SLJIT_ASSERT(arg1 == TMP_REG1 && dst != TMP_REG2); + FAIL_IF(push_inst(compiler, (REV16 ^ (sljit_ins)0x80000000) | RD(dst) | RN(arg2))); + if (dst == TMP_REG1 || (arg2 == TMP_REG2 && op == SLJIT_REV_U16)) + return SLJIT_SUCCESS; + inv_bits |= inv_bits >> 9; + return push_inst(compiler, ((op == SLJIT_REV_U16 ? UBFM : SBFM) ^ inv_bits) | RD(dst) | RN(dst) | (15 << 10)); + case SLJIT_REV_U32: + case SLJIT_REV_S32: + SLJIT_ASSERT(arg1 == TMP_REG1 && dst != TMP_REG2); + FAIL_IF(push_inst(compiler, (REV ^ (sljit_ins)0x80000400) | RD(dst) | RN(arg2))); + if (op == SLJIT_REV_U32 || dst == TMP_REG1) + return SLJIT_SUCCESS; + return push_inst(compiler, SBFM | (1 << 22) | RD(dst) | RN(dst) | (31 << 10)); case SLJIT_ADD: compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD; CHECK_FLAGS(1 << 29); @@ -834,14 +940,23 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s FAIL_IF(push_inst(compiler, (EOR ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2))); break; /* Set flags. */ case SLJIT_SHL: + case SLJIT_MSHL: FAIL_IF(push_inst(compiler, (LSLV ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2))); break; /* Set flags. */ case SLJIT_LSHR: + case SLJIT_MLSHR: FAIL_IF(push_inst(compiler, (LSRV ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2))); break; /* Set flags. */ case SLJIT_ASHR: + case SLJIT_MASHR: FAIL_IF(push_inst(compiler, (ASRV ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2))); break; /* Set flags. */ + case SLJIT_ROTL: + FAIL_IF(push_inst(compiler, (SUB ^ inv_bits) | RD(TMP_REG2) | RN(TMP_ZERO) | RM(arg2))); + arg2 = TMP_REG2; + /* fallthrough */ + case SLJIT_ROTR: + return push_inst(compiler, (RORV ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)); default: SLJIT_UNREACHABLE(); return SLJIT_SUCCESS; @@ -895,21 +1010,37 @@ static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, s return push_inst(compiler, STRBI | type | RT(reg) | RN(tmp_reg) | ((sljit_ins)argw << 10)); } - if (argw >= 0 && (argw & ((1 << shift) - 1)) == 0) { - if ((argw >> shift) <= 0xfff) - return push_inst(compiler, STRBI | type | RT(reg) | RN(arg) | ((sljit_ins)argw << (10 - shift))); + if ((argw & ((1 << shift) - 1)) == 0) { + if (argw >= 0) { + if ((argw >> shift) <= 0xfff) + return push_inst(compiler, STRBI | type | RT(reg) | RN(arg) | ((sljit_ins)argw << (10 - shift))); - if (argw <= 0xffffff) { - FAIL_IF(push_inst(compiler, ADDI | (1 << 22) | RD(tmp_reg) | RN(arg) | (((sljit_ins)argw >> 12) << 10))); + if (argw <= 0xffffff) { + FAIL_IF(push_inst(compiler, ADDI | (1 << 22) | RD(tmp_reg) | RN(arg) | (((sljit_ins)argw >> 12) << 10))); - argw = ((argw & 0xfff) >> shift); + argw = ((argw & 0xfff) >> shift); + return push_inst(compiler, STRBI | type | RT(reg) | RN(tmp_reg) | ((sljit_ins)argw << 10)); + } + } else if (argw < -256 && argw >= -0xfff000) { + FAIL_IF(push_inst(compiler, SUBI | (1 << 22) | RD(tmp_reg) | RN(arg) | (((sljit_ins)(-argw + 0xfff) >> 12) << 10))); + argw = ((0x1000 + argw) & 0xfff) >> shift; return push_inst(compiler, STRBI | type | RT(reg) | RN(tmp_reg) | ((sljit_ins)argw << 10)); } } - if (argw <= 255 && argw >= -256) + if (argw <= 0xff && argw >= -0x100) return push_inst(compiler, STURBI | type | RT(reg) | RN(arg) | (((sljit_ins)argw & 0x1ff) << 12)); + if (argw >= 0) { + if (argw <= 0xfff0ff && ((argw + 0x100) & 0xfff) <= 0x1ff) { + FAIL_IF(push_inst(compiler, ADDI | (1 << 22) | RD(tmp_reg) | RN(arg) | (((sljit_ins)argw >> 12) << 10))); + return push_inst(compiler, STURBI | type | RT(reg) | RN(tmp_reg) | (((sljit_ins)argw & 0x1ff) << 12)); + } + } else if (argw >= -0xfff100 && ((-argw + 0xff) & 0xfff) <= 0x1ff) { + FAIL_IF(push_inst(compiler, SUBI | (1 << 22) | RD(tmp_reg) | RN(arg) | (((sljit_ins)-argw >> 12) << 10))); + return push_inst(compiler, STURBI | type | RT(reg) | RN(tmp_reg) | (((sljit_ins)argw & 0x1ff) << 12)); + } + FAIL_IF(load_immediate(compiler, tmp_reg, argw)); return push_inst(compiler, STRB | type | RT(reg) | RN(arg) | RM(tmp_reg)); @@ -924,15 +1055,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) { sljit_s32 prev, fprev, saved_regs_size, i, tmp; - sljit_s32 word_arg_count = 0; + sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options); sljit_ins offs; CHECK_ERROR(); CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); - saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 2); - saved_regs_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, SSIZE_OF(f64)); + saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - saved_arg_count, 2); + saved_regs_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, f64); local_size = (local_size + saved_regs_size + 0xf) & ~0xf; compiler->local_size = local_size; @@ -954,7 +1085,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi prev = -1; tmp = SLJIT_S0 - saveds; - for (i = SLJIT_S0; i > tmp; i--) { + for (i = SLJIT_S0 - saved_arg_count; i > tmp; i--) { if (prev == -1) { prev = i; continue; @@ -1003,23 +1134,27 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi if (prev != -1) FAIL_IF(push_inst(compiler, STRI | RT(prev) | RN(SLJIT_SP) | (offs >> 5) | ((fprev == -1) ? (1 << 10) : 0))); - arg_types >>= SLJIT_ARG_SHIFT; #ifdef _WIN32 if (local_size > 4096) FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | (1 << 10) | (1 << 22))); #endif /* _WIN32 */ - tmp = 0; - while (arg_types > 0) { - if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) { - if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) { - FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_S0 - tmp) | RN(TMP_ZERO) | RM(SLJIT_R0 + word_arg_count))); + if (!(options & SLJIT_ENTER_REG_ARG)) { + arg_types >>= SLJIT_ARG_SHIFT; + saved_arg_count = 0; + tmp = SLJIT_R0; + + while (arg_types) { + if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) { + if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) { + FAIL_IF(push_inst(compiler, MOV | RD(SLJIT_S0 - saved_arg_count) | RM(tmp))); + saved_arg_count++; + } tmp++; } - word_arg_count++; + arg_types >>= SLJIT_ARG_SHIFT; } - arg_types >>= SLJIT_ARG_SHIFT; } #ifdef _WIN32 @@ -1100,26 +1235,34 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size)); set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size); - saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 2); - saved_regs_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, SSIZE_OF(f64)); + saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - SLJIT_KEPT_SAVEDS_COUNT(options), 2); + saved_regs_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, f64); compiler->local_size = (local_size + saved_regs_size + 0xf) & ~0xf; return SLJIT_SUCCESS; } -static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler) +static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_s32 is_return_to) { sljit_s32 local_size, prev, fprev, i, tmp; sljit_ins offs; local_size = compiler->local_size; - if (local_size > 512 && local_size <= 512 + 496) { - FAIL_IF(push_inst(compiler, LDP_POST | RT(TMP_FP) | RT2(TMP_LR) - | RN(SLJIT_SP) | ((sljit_ins)(local_size - 512) << (15 - 3)))); - local_size = 512; - } else - FAIL_IF(push_inst(compiler, LDP | RT(TMP_FP) | RT2(TMP_LR) | RN(SLJIT_SP))); + if (!is_return_to) { + if (local_size > 512 && local_size <= 512 + 496) { + FAIL_IF(push_inst(compiler, LDP_POST | RT(TMP_FP) | RT2(TMP_LR) + | RN(SLJIT_SP) | ((sljit_ins)(local_size - 512) << (15 - 3)))); + local_size = 512; + } else + FAIL_IF(push_inst(compiler, LDP | RT(TMP_FP) | RT2(TMP_LR) | RN(SLJIT_SP))); + } else { + if (local_size > 512 && local_size <= 512 + 248) { + FAIL_IF(push_inst(compiler, LDRI_POST | RT(TMP_FP) | RN(SLJIT_SP) | ((sljit_ins)(local_size - 512) << 12))); + local_size = 512; + } else + FAIL_IF(push_inst(compiler, LDRI | RT(TMP_FP) | RN(SLJIT_SP) | 0)); + } if (local_size > 512) { local_size -= 512; @@ -1137,7 +1280,7 @@ static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler) prev = -1; tmp = SLJIT_S0 - compiler->saveds; - for (i = SLJIT_S0; i > tmp; i--) { + for (i = SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options); i > tmp; i--) { if (prev == -1) { prev = i; continue; @@ -1195,11 +1338,34 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler CHECK_ERROR(); CHECK(check_sljit_emit_return_void(compiler)); - FAIL_IF(emit_stack_frame_release(compiler)); + FAIL_IF(emit_stack_frame_release(compiler, 0)); return push_inst(compiler, RET | RN(TMP_LR)); } +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler, + sljit_s32 src, sljit_sw srcw) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_return_to(compiler, src, srcw)); + + if (src & SLJIT_MEM) { + ADJUST_LOCAL_OFFSET(src, srcw); + FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src, srcw, TMP_REG1)); + src = TMP_REG1; + srcw = 0; + } else if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) { + FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | RM(src))); + src = TMP_REG1; + srcw = 0; + } + + FAIL_IF(emit_stack_frame_release(compiler, 1)); + + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw); +} + /* --------------------------------------------------------------------- */ /* Operators */ /* --------------------------------------------------------------------- */ @@ -1219,12 +1385,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile return push_inst(compiler, NOP); case SLJIT_LMUL_UW: case SLJIT_LMUL_SW: - FAIL_IF(push_inst(compiler, ORR | RD(TMP_REG1) | RN(TMP_ZERO) | RM(SLJIT_R0))); + FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | RM(SLJIT_R0))); FAIL_IF(push_inst(compiler, MADD | RD(SLJIT_R0) | RN(SLJIT_R0) | RM(SLJIT_R1) | RT2(TMP_ZERO))); return push_inst(compiler, (op == SLJIT_LMUL_UW ? UMULH : SMULH) | RD(SLJIT_R1) | RN(TMP_REG1) | RM(SLJIT_R1)); case SLJIT_DIVMOD_UW: case SLJIT_DIVMOD_SW: - FAIL_IF(push_inst(compiler, (ORR ^ inv_bits) | RD(TMP_REG1) | RN(TMP_ZERO) | RM(SLJIT_R0))); + FAIL_IF(push_inst(compiler, (MOV ^ inv_bits) | RD(TMP_REG1) | RM(SLJIT_R0))); FAIL_IF(push_inst(compiler, ((op == SLJIT_DIVMOD_UW ? UDIV : SDIV) ^ inv_bits) | RD(SLJIT_R0) | RN(SLJIT_R0) | RM(SLJIT_R1))); FAIL_IF(push_inst(compiler, (MADD ^ inv_bits) | RD(SLJIT_R1) | RN(SLJIT_R0) | RM(SLJIT_R1) | RT2(TMP_ZERO))); return push_inst(compiler, (SUB ^ inv_bits) | RD(SLJIT_R1) | RN(TMP_REG1) | RM(SLJIT_R1)); @@ -1266,33 +1432,33 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile break; case SLJIT_MOV_U8: mem_flags = BYTE_SIZE; - if (src & SLJIT_IMM) + if (src == SLJIT_IMM) srcw = (sljit_u8)srcw; break; case SLJIT_MOV_S8: mem_flags = BYTE_SIZE | SIGNED; - if (src & SLJIT_IMM) + if (src == SLJIT_IMM) srcw = (sljit_s8)srcw; break; case SLJIT_MOV_U16: mem_flags = HALF_SIZE; - if (src & SLJIT_IMM) + if (src == SLJIT_IMM) srcw = (sljit_u16)srcw; break; case SLJIT_MOV_S16: mem_flags = HALF_SIZE | SIGNED; - if (src & SLJIT_IMM) + if (src == SLJIT_IMM) srcw = (sljit_s16)srcw; break; case SLJIT_MOV_U32: mem_flags = INT_SIZE; - if (src & SLJIT_IMM) + if (src == SLJIT_IMM) srcw = (sljit_u32)srcw; break; case SLJIT_MOV_S32: case SLJIT_MOV32: mem_flags = INT_SIZE | SIGNED; - if (src & SLJIT_IMM) + if (src == SLJIT_IMM) srcw = (sljit_s32)srcw; break; default: @@ -1301,7 +1467,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile break; } - if (src & SLJIT_IMM) + if (src == SLJIT_IMM) FAIL_IF(emit_op_imm(compiler, SLJIT_MOV | ARG2_IMM, dst_r, TMP_REG1, srcw)); else if (!(src & SLJIT_MEM)) dst_r = src; @@ -1314,11 +1480,24 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile } flags = HAS_FLAGS(op_flags) ? SET_FLAGS : 0; - mem_flags = WORD_SIZE; - if (op_flags & SLJIT_32) { - flags |= INT_OP; + switch (op) { + case SLJIT_REV_U16: + case SLJIT_REV_S16: + mem_flags = HALF_SIZE; + break; + case SLJIT_REV_U32: + case SLJIT_REV_S32: mem_flags = INT_SIZE; + break; + default: + mem_flags = WORD_SIZE; + + if (op_flags & SLJIT_32) { + flags |= INT_OP; + mem_flags = INT_SIZE; + } + break; } if (src & SLJIT_MEM) { @@ -1368,12 +1547,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile src2 = TMP_REG2; } - if (src1 & SLJIT_IMM) + if (src1 == SLJIT_IMM) flags |= ARG1_IMM; else src1w = src1; - if (src2 & SLJIT_IMM) + if (src2 == SLJIT_IMM) flags |= ARG2_IMM; else src2w = src2; @@ -1392,13 +1571,79 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compil CHECK_ERROR(); CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w)); -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ - || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - compiler->skip_checks = 1; -#endif + SLJIT_SKIP_CHECKS(compiler); return sljit_emit_op2(compiler, op, TMP_REG1, 0, src1, src1w, src2, src2w); } +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst_reg, + sljit_s32 src1_reg, + sljit_s32 src2_reg, + sljit_s32 src3, sljit_sw src3w) +{ + sljit_ins inv_bits, imm; + sljit_s32 is_left; + sljit_sw mask; + + CHECK_ERROR(); + CHECK(check_sljit_emit_shift_into(compiler, op, dst_reg, src1_reg, src2_reg, src3, src3w)); + + is_left = (GET_OPCODE(op) == SLJIT_SHL || GET_OPCODE(op) == SLJIT_MSHL); + + if (src1_reg == src2_reg) { + SLJIT_SKIP_CHECKS(compiler); + return sljit_emit_op2(compiler, (is_left ? SLJIT_ROTL : SLJIT_ROTR) | (op & SLJIT_32), dst_reg, 0, src1_reg, 0, src3, src3w); + } + + ADJUST_LOCAL_OFFSET(src3, src3w); + + inv_bits = (op & SLJIT_32) ? W_OP : 0; + + if (src3 == SLJIT_IMM) { + mask = inv_bits ? 0x1f : 0x3f; + src3w &= mask; + + if (src3w == 0) + return SLJIT_SUCCESS; + + if (is_left) + src3w = (src3w ^ mask) + 1; + + return push_inst(compiler, (EXTR ^ (inv_bits | (inv_bits >> 9))) | RD(dst_reg) + | RN(is_left ? src1_reg : src2_reg) | RM(is_left ? src2_reg : src1_reg) | ((sljit_ins)src3w << 10)); + } + + if (src3 & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, inv_bits ? INT_SIZE : WORD_SIZE, TMP_REG2, src3, src3w, TMP_REG2)); + src3 = TMP_REG2; + } else if (dst_reg == src3) { + FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG2) | RM(src3))); + src3 = TMP_REG2; + } + + FAIL_IF(push_inst(compiler, ((is_left ? LSLV : LSRV) ^ inv_bits) | RD(dst_reg) | RN(src1_reg) | RM(src3))); + + if (!(op & SLJIT_SHIFT_INTO_NON_ZERO)) { + /* Shift left/right by 1. */ + if (is_left) + imm = (sljit_ins)(inv_bits ? ((1 << 16) | (31 << 10)) : ((1 << 16) | (63 << 10) | (1 << 22))); + else + imm = (sljit_ins)(inv_bits ? ((31 << 16) | (30 << 10)) : ((63 << 16) | (62 << 10) | (1 << 22))); + + FAIL_IF(push_inst(compiler, (UBFM ^ (inv_bits | (inv_bits >> 9))) | RD(TMP_REG1) | RN(src2_reg) | imm)); + + /* Set imm to mask. */ + imm = (sljit_ins)(inv_bits ? (4 << 10) : ((5 << 10) | (1 << 22))); + FAIL_IF(push_inst(compiler, (EORI ^ inv_bits) | RD(TMP_REG2) | RN(src3) | imm)); + + src2_reg = TMP_REG1; + } else + FAIL_IF(push_inst(compiler, (SUB ^ inv_bits) | RD(TMP_REG2) | RN(TMP_ZERO) | RM(src3))); + + FAIL_IF(push_inst(compiler, ((is_left ? LSRV : LSLV) ^ inv_bits) | RD(TMP_REG1) | RN(src2_reg) | RM(TMP_REG2))); + return push_inst(compiler, (ORR ^ inv_bits) | RD(dst_reg) | RN(dst_reg) | RM(TMP_REG1)); +} + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw) { @@ -1409,7 +1654,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *comp switch (op) { case SLJIT_FAST_RETURN: if (FAST_IS_REG(src)) - FAIL_IF(push_inst(compiler, ORR | RD(TMP_LR) | RN(TMP_ZERO) | RM(src))); + FAIL_IF(push_inst(compiler, MOV | RD(TMP_LR) | RM(src))); else FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_LR, src, srcw, TMP_REG1)); @@ -1439,15 +1684,42 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *comp return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_dst(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw) { - CHECK_REG_INDEX(check_sljit_get_register_index(reg)); - return reg_map[reg]; + sljit_s32 dst_r = TMP_LR; + + CHECK_ERROR(); + CHECK(check_sljit_emit_op_dst(compiler, op, dst, dstw)); + ADJUST_LOCAL_OFFSET(dst, dstw); + + switch (op) { + case SLJIT_FAST_ENTER: + if (FAST_IS_REG(dst)) + return push_inst(compiler, MOV | RD(dst) | RM(TMP_LR)); + break; + case SLJIT_GET_RETURN_ADDRESS: + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; + FAIL_IF(emit_op_mem(compiler, WORD_SIZE, dst_r, SLJIT_MEM1(SLJIT_SP), 0x8, TMP_REG2)); + break; + } + + if (dst & SLJIT_MEM) + return emit_op_mem(compiler, WORD_SIZE | STORE, dst_r, dst, dstw, TMP_REG2); + + return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg) +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 type, sljit_s32 reg) { - CHECK_REG_INDEX(check_sljit_get_float_register_index(reg)); + CHECK_REG_INDEX(check_sljit_get_register_index(type, reg)); + + if (type == SLJIT_GP_REGISTER) + return reg_map[reg]; + + if (type != SLJIT_FLOAT_REGISTER && type != SLJIT_SIMD_REG_64 && type != SLJIT_SIMD_REG_128) + return -1; + return freg_map[reg]; } @@ -1525,7 +1797,7 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp inv_bits |= W_OP; if (src & SLJIT_MEM) { - emit_fop_mem(compiler, (op & SLJIT_32) ? INT_SIZE : WORD_SIZE, TMP_FREG1, src, srcw); + FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) ? INT_SIZE : WORD_SIZE, TMP_FREG1, src, srcw)); src = TMP_FREG1; } @@ -1536,35 +1808,59 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp return SLJIT_SUCCESS; } -static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op, +static sljit_s32 sljit_emit_fop1_conv_f64_from_w(struct sljit_compiler *compiler, sljit_ins ins, sljit_s32 dst, sljit_sw dstw, sljit_s32 src, sljit_sw srcw) { sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; - sljit_ins inv_bits = (op & SLJIT_32) ? (1 << 22) : 0; - - if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) - inv_bits |= W_OP; if (src & SLJIT_MEM) { - emit_op_mem(compiler, ((GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) ? INT_SIZE : WORD_SIZE), TMP_REG1, src, srcw, TMP_REG1); + emit_op_mem(compiler, (ins & W_OP) ? WORD_SIZE : INT_SIZE, TMP_REG1, src, srcw, TMP_REG1); src = TMP_REG1; - } else if (src & SLJIT_IMM) { -#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) - if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) - srcw = (sljit_s32)srcw; -#endif + } else if (src == SLJIT_IMM) { FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); src = TMP_REG1; } - FAIL_IF(push_inst(compiler, (SCVTF ^ inv_bits) | VD(dst_r) | RN(src))); + FAIL_IF(push_inst(compiler, ins | VD(dst_r) | RN(src))); if (dst & SLJIT_MEM) - return emit_fop_mem(compiler, ((op & SLJIT_32) ? INT_SIZE : WORD_SIZE) | STORE, TMP_FREG1, dst, dstw); + return emit_fop_mem(compiler, ((ins & (1 << 22)) ? WORD_SIZE : INT_SIZE) | STORE, TMP_FREG1, dst, dstw); return SLJIT_SUCCESS; } +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_ins inv_bits = (op & SLJIT_32) ? (1 << 22) : 0; + + if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) { + inv_bits |= W_OP; + + if (src == SLJIT_IMM) + srcw = (sljit_s32)srcw; + } + + return sljit_emit_fop1_conv_f64_from_w(compiler, SCVTF ^ inv_bits, dst, dstw, src, srcw); +} + +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_ins inv_bits = (op & SLJIT_32) ? (1 << 22) : 0; + + if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_U32) { + inv_bits |= W_OP; + + if (src == SLJIT_IMM) + srcw = (sljit_u32)srcw; + } + + return sljit_emit_fop1_conv_f64_from_w(compiler, UCVTF ^ inv_bits, dst, dstw, src, srcw); +} + static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src1, sljit_sw src1w, sljit_s32 src2, sljit_sw src2w) @@ -1573,16 +1869,22 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compile sljit_ins inv_bits = (op & SLJIT_32) ? (1 << 22) : 0; if (src1 & SLJIT_MEM) { - emit_fop_mem(compiler, mem_flags, TMP_FREG1, src1, src1w); + FAIL_IF(emit_fop_mem(compiler, mem_flags, TMP_FREG1, src1, src1w)); src1 = TMP_FREG1; } if (src2 & SLJIT_MEM) { - emit_fop_mem(compiler, mem_flags, TMP_FREG2, src2, src2w); + FAIL_IF(emit_fop_mem(compiler, mem_flags, TMP_FREG2, src2, src2w)); src2 = TMP_FREG2; } - return push_inst(compiler, (FCMP ^ inv_bits) | VN(src1) | VM(src2)); + FAIL_IF(push_inst(compiler, (FCMP ^ inv_bits) | VN(src1) | VM(src2))); + + if (GET_FLAG_TYPE(op) != SLJIT_UNORDERED_OR_EQUAL) + return SLJIT_SUCCESS; + + FAIL_IF(push_inst(compiler, CSINC | (0x0 << 12) | RD(TMP_REG1) | RN(TMP_ZERO) | RM(TMP_ZERO))); + return push_inst(compiler, CCMPI | (0x0 << 16) | (0x7 << 12) | RN(TMP_REG1) | 0x4); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op, @@ -1601,7 +1903,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; if (src & SLJIT_MEM) { - emit_fop_mem(compiler, (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) ? (mem_flags ^ 0x1) : mem_flags, dst_r, src, srcw); + FAIL_IF(emit_fop_mem(compiler, (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) ? (mem_flags ^ 0x1) : mem_flags, dst_r, src, srcw)); src = dst_r; } @@ -1646,11 +1948,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1; if (src1 & SLJIT_MEM) { - emit_fop_mem(compiler, mem_flags, TMP_FREG1, src1, src1w); + FAIL_IF(emit_fop_mem(compiler, mem_flags, TMP_FREG1, src1, src1w)); src1 = TMP_FREG1; } if (src2 & SLJIT_MEM) { - emit_fop_mem(compiler, mem_flags, TMP_FREG2, src2, src2w); + FAIL_IF(emit_fop_mem(compiler, mem_flags, TMP_FREG2, src2, src2w)); src2 = TMP_FREG2; } @@ -1667,6 +1969,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil case SLJIT_DIV_F64: FAIL_IF(push_inst(compiler, (FDIV ^ inv_bits) | VD(dst_r) | VN(src1) | VM(src2))); break; + case SLJIT_COPYSIGN_F64: + FAIL_IF(push_inst(compiler, (FMOV_R ^ ((op & SLJIT_32) ? (W_OP | (1 << 22)) : 0)) | VN(src2) | RD(TMP_REG1))); + FAIL_IF(push_inst(compiler, (FABS ^ inv_bits) | VD(dst_r) | VN(src1))); + FAIL_IF(push_inst(compiler, TBZ | ((op & SLJIT_32) ? 0 : ((sljit_ins)1 << 31)) | (0x1f << 19) | (2 << 5) | RT(TMP_REG1))); + return push_inst(compiler, (FNEG ^ inv_bits) | VD(dst_r) | VN(dst_r)); } if (!(dst & SLJIT_MEM)) @@ -1674,21 +1981,79 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil return emit_fop_mem(compiler, mem_flags | STORE, TMP_FREG1, dst, dstw); } -/* --------------------------------------------------------------------- */ -/* Other instructions */ -/* --------------------------------------------------------------------- */ +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset32(struct sljit_compiler *compiler, + sljit_s32 freg, sljit_f32 value) +{ + sljit_u32 exp; + union { + sljit_u32 imm; + sljit_f32 value; + } u; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fset32(compiler, freg, value)); + + u.value = value; + + if (u.imm == 0) + return push_inst(compiler, (FMOV_R ^ (W_OP | (1 << 22))) | RN(TMP_ZERO) | VD(freg) | (1 << 16)); + + if ((u.imm << (32 - 19)) == 0) { + exp = (u.imm >> (23 + 2)) & 0x3f; -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) + if (exp == 0x20 || exp == 0x1f) + return push_inst(compiler, (FMOV_I ^ (1 << 22)) | (sljit_ins)((((u.imm >> 24) & 0x80) | ((u.imm >> 19) & 0x7f)) << 13) | VD(freg)); + } + + FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_s32)u.imm)); + return push_inst(compiler, (FMOV_R ^ (W_OP | (1 << 22))) | RN(TMP_REG1) | VD(freg) | (1 << 16)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler, + sljit_s32 freg, sljit_f64 value) { + sljit_uw exp; + union { + sljit_uw imm; + sljit_f64 value; + } u; + CHECK_ERROR(); - CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw)); - ADJUST_LOCAL_OFFSET(dst, dstw); + CHECK(check_sljit_emit_fset64(compiler, freg, value)); + + u.value = value; + + if (u.imm == 0) + return push_inst(compiler, FMOV_R | RN(TMP_ZERO) | VD(freg) | (sljit_ins)1 << 16); + + if ((u.imm << (64 - 48)) == 0) { + exp = (u.imm >> (52 + 2)) & 0x1ff; + + if (exp == 0x100 || exp == 0xff) + return push_inst(compiler, FMOV_I | (sljit_ins)((((u.imm >> 56) & 0x80) | ((u.imm >> 48) & 0x7f)) << 13) | VD(freg)); + } + + FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_sw)u.imm)); + return push_inst(compiler, FMOV_R | RN(TMP_REG1) | VD(freg) | (1 << 16)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 freg, sljit_s32 reg) +{ + sljit_ins inst; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fcopy(compiler, op, freg, reg)); + + if (GET_OPCODE(op) == SLJIT_COPY_TO_F64) + inst = FMOV_R | RN(reg) | VD(freg) | (1 << 16); + else + inst = FMOV_R | VN(freg) | RD(reg); - if (FAST_IS_REG(dst)) - return push_inst(compiler, ORR | RD(dst) | RN(TMP_ZERO) | RM(TMP_LR)); + if (op & SLJIT_32) + inst ^= W_OP | (1 << 22); - /* Memory. */ - return emit_op_mem(compiler, WORD_SIZE | STORE, TMP_LR, dst, dstw, TMP_REG1); + return push_inst(compiler, inst); } /* --------------------------------------------------------------------- */ @@ -1699,11 +2064,17 @@ static sljit_ins get_cc(struct sljit_compiler *compiler, sljit_s32 type) { switch (type) { case SLJIT_EQUAL: - case SLJIT_EQUAL_F64: + case SLJIT_ATOMIC_STORED: + case SLJIT_F_EQUAL: + case SLJIT_ORDERED_EQUAL: + case SLJIT_UNORDERED_OR_EQUAL: return 0x1; case SLJIT_NOT_EQUAL: - case SLJIT_NOT_EQUAL_F64: + case SLJIT_ATOMIC_NOT_STORED: + case SLJIT_F_NOT_EQUAL: + case SLJIT_UNORDERED_OR_NOT_EQUAL: + case SLJIT_ORDERED_NOT_EQUAL: return 0x0; case SLJIT_CARRY: @@ -1712,7 +2083,6 @@ static sljit_ins get_cc(struct sljit_compiler *compiler, sljit_s32 type) /* fallthrough */ case SLJIT_LESS: - case SLJIT_LESS_F64: return 0x2; case SLJIT_NOT_CARRY: @@ -1721,27 +2091,33 @@ static sljit_ins get_cc(struct sljit_compiler *compiler, sljit_s32 type) /* fallthrough */ case SLJIT_GREATER_EQUAL: - case SLJIT_GREATER_EQUAL_F64: return 0x3; case SLJIT_GREATER: - case SLJIT_GREATER_F64: + case SLJIT_UNORDERED_OR_GREATER: return 0x9; case SLJIT_LESS_EQUAL: - case SLJIT_LESS_EQUAL_F64: + case SLJIT_F_LESS_EQUAL: + case SLJIT_ORDERED_LESS_EQUAL: return 0x8; case SLJIT_SIG_LESS: + case SLJIT_UNORDERED_OR_LESS: return 0xa; case SLJIT_SIG_GREATER_EQUAL: + case SLJIT_F_GREATER_EQUAL: + case SLJIT_ORDERED_GREATER_EQUAL: return 0xb; case SLJIT_SIG_GREATER: + case SLJIT_F_GREATER: + case SLJIT_ORDERED_GREATER: return 0xd; case SLJIT_SIG_LESS_EQUAL: + case SLJIT_UNORDERED_OR_LESS_EQUAL: return 0xc; case SLJIT_OVERFLOW: @@ -1749,7 +2125,7 @@ static sljit_ins get_cc(struct sljit_compiler *compiler, sljit_s32 type) return 0x0; /* fallthrough */ - case SLJIT_UNORDERED_F64: + case SLJIT_UNORDERED: return 0x7; case SLJIT_NOT_OVERFLOW: @@ -1757,9 +2133,16 @@ static sljit_ins get_cc(struct sljit_compiler *compiler, sljit_s32 type) return 0x1; /* fallthrough */ - case SLJIT_ORDERED_F64: + case SLJIT_ORDERED: return 0x6; + case SLJIT_F_LESS: + case SLJIT_ORDERED_LESS: + return 0x5; + + case SLJIT_UNORDERED_OR_GREATER_EQUAL: + return 0x4; + default: SLJIT_UNREACHABLE(); return 0xe; @@ -1816,15 +2199,11 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compile CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types)); if (type & SLJIT_CALL_RETURN) { - PTR_FAIL_IF(emit_stack_frame_release(compiler)); + PTR_FAIL_IF(emit_stack_frame_release(compiler, 0)); type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP); } -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ - || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - compiler->skip_checks = 1; -#endif - + SLJIT_SKIP_CHECKS(compiler); return sljit_emit_jump(compiler, type); } @@ -1846,7 +2225,7 @@ static SLJIT_INLINE struct sljit_jump* emit_cmp_to0(struct sljit_compiler *compi PTR_FAIL_IF(emit_op_mem(compiler, inv_bits ? INT_SIZE : WORD_SIZE, TMP_REG1, src, srcw, TMP_REG1)); src = TMP_REG1; } - else if (src & SLJIT_IMM) { + else if (src == SLJIT_IMM) { PTR_FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); src = TMP_REG1; } @@ -1869,10 +2248,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi CHECK_ERROR(); CHECK(check_sljit_emit_ijump(compiler, type, src, srcw)); - ADJUST_LOCAL_OFFSET(src, srcw); - if (!(src & SLJIT_IMM)) { + if (src != SLJIT_IMM) { if (src & SLJIT_MEM) { + ADJUST_LOCAL_OFFSET(src, srcw); FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src, srcw, TMP_REG1)); src = TMP_REG1; } @@ -1897,28 +2276,24 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi SLJIT_UNUSED_ARG(arg_types); CHECK_ERROR(); CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw)); - ADJUST_LOCAL_OFFSET(src, srcw); if (src & SLJIT_MEM) { + ADJUST_LOCAL_OFFSET(src, srcw); FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src, srcw, TMP_REG1)); src = TMP_REG1; } if (type & SLJIT_CALL_RETURN) { - if (src >= SLJIT_FIRST_SAVED_REG && src <= SLJIT_S0) { - FAIL_IF(push_inst(compiler, ORR | RD(TMP_REG1) | RN(TMP_ZERO) | RM(src))); + if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) { + FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | RM(src))); src = TMP_REG1; } - FAIL_IF(emit_stack_frame_release(compiler)); + FAIL_IF(emit_stack_frame_release(compiler, 0)); type = SLJIT_JUMP; } -#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \ - || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS) - compiler->skip_checks = 1; -#endif - + SLJIT_SKIP_CHECKS(compiler); return sljit_emit_ijump(compiler, type, src, srcw); } @@ -1933,7 +2308,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type)); ADJUST_LOCAL_OFFSET(dst, dstw); - cc = get_cc(compiler, type & 0xff); + cc = get_cc(compiler, type); dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; if (GET_OPCODE(op) < SLJIT_ADD) { @@ -1970,39 +2345,135 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co return SLJIT_SUCCESS; } -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type, +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 dst_reg, - sljit_s32 src, sljit_sw srcw) + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2_reg) { - sljit_ins inv_bits = (dst_reg & SLJIT_32) ? W_OP : 0; + sljit_ins inv_bits = (type & SLJIT_32) ? W_OP : 0; sljit_ins cc; CHECK_ERROR(); - CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw)); + CHECK(check_sljit_emit_select(compiler, type, dst_reg, src1, src1w, src2_reg)); - if (SLJIT_UNLIKELY(src & SLJIT_IMM)) { - if (dst_reg & SLJIT_32) - srcw = (sljit_s32)srcw; - FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); - src = TMP_REG1; - srcw = 0; + ADJUST_LOCAL_OFFSET(src1, src1w); + + if (src1 == SLJIT_IMM) { + if (type & SLJIT_32) + src1w = (sljit_s32)src1w; + FAIL_IF(load_immediate(compiler, TMP_REG1, src1w)); + src1 = TMP_REG1; + } else if (src1 & SLJIT_MEM) { + FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src1, src1w, TMP_REG2)); + src1 = TMP_REG1; } - cc = get_cc(compiler, type & 0xff); - dst_reg &= ~SLJIT_32; + cc = get_cc(compiler, type & ~SLJIT_32); + return push_inst(compiler, (CSEL ^ inv_bits) | (cc << 12) | RD(dst_reg) | RN(src2_reg) | RM(src1)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_freg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2_freg) +{ + sljit_ins inv_bits = (type & SLJIT_32) ? (1 << 22) : 0; + sljit_ins cc; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fselect(compiler, type, dst_freg, src1, src1w, src2_freg)); + + ADJUST_LOCAL_OFFSET(src1, src1w); - return push_inst(compiler, (CSEL ^ inv_bits) | (cc << 12) | RD(dst_reg) | RN(dst_reg) | RM(src)); + if (src1 & SLJIT_MEM) { + FAIL_IF(emit_fop_mem(compiler, (type & SLJIT_32) ? INT_SIZE : WORD_SIZE, TMP_FREG1, src1, src1w)); + src1 = TMP_FREG1; + } + + cc = get_cc(compiler, type & ~SLJIT_32); + return push_inst(compiler, (FCSEL ^ inv_bits) | (cc << 12) | VD(dst_freg) | VN(src2_freg) | VM(src1)); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 reg, sljit_s32 mem, sljit_sw memw) { - sljit_u32 sign = 0, inst; + sljit_u32 inst; CHECK_ERROR(); CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw)); + if (!(reg & REG_PAIR_MASK)) + return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw); + + ADJUST_LOCAL_OFFSET(mem, memw); + + if (!(mem & REG_MASK)) { + FAIL_IF(load_immediate(compiler, TMP_REG1, memw & ~0x1f8)); + + mem = SLJIT_MEM1(TMP_REG1); + memw &= 0x1f8; + } else if (mem & OFFS_REG_MASK) { + FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG1) | RN(mem & REG_MASK) | RM(OFFS_REG(mem)) | ((sljit_ins)(memw & 0x3) << 10))); + + mem = SLJIT_MEM1(TMP_REG1); + memw = 0; + } else if ((memw & 0x7) != 0 || memw > 0x1f8 || memw < -0x200) { + inst = ADDI; + + if (memw < 0) { + /* Remains negative for integer min. */ + memw = -memw; + inst = SUBI; + } else if ((memw & 0x7) == 0 && memw <= 0x7ff0) { + if (!(type & SLJIT_MEM_STORE) && (mem & REG_MASK) == REG_PAIR_FIRST(reg)) { + FAIL_IF(push_inst(compiler, LDRI | RD(REG_PAIR_SECOND(reg)) | RN(mem & REG_MASK) | ((sljit_ins)memw << 7))); + return push_inst(compiler, LDRI | RD(REG_PAIR_FIRST(reg)) | RN(mem & REG_MASK) | ((sljit_ins)(memw + 0x8) << 7)); + } + + inst = (type & SLJIT_MEM_STORE) ? STRI : LDRI; + + FAIL_IF(push_inst(compiler, inst | RD(REG_PAIR_FIRST(reg)) | RN(mem & REG_MASK) | ((sljit_ins)memw << 7))); + return push_inst(compiler, inst | RD(REG_PAIR_SECOND(reg)) | RN(mem & REG_MASK) | ((sljit_ins)(memw + 0x8) << 7)); + } + + if ((sljit_uw)memw <= 0xfff) { + FAIL_IF(push_inst(compiler, inst | RD(TMP_REG1) | RN(mem & REG_MASK) | ((sljit_ins)memw << 10))); + memw = 0; + } else if ((sljit_uw)memw <= 0xffffff) { + FAIL_IF(push_inst(compiler, inst | (1 << 22) | RD(TMP_REG1) | RN(mem & REG_MASK) | (((sljit_ins)memw >> 12) << 10))); + + if ((memw & 0xe07) != 0) { + FAIL_IF(push_inst(compiler, inst | RD(TMP_REG1) | RN(TMP_REG1) | (((sljit_ins)memw & 0xfff) << 10))); + memw = 0; + } else { + memw &= 0xfff; + } + } else { + FAIL_IF(load_immediate(compiler, TMP_REG1, memw)); + FAIL_IF(push_inst(compiler, (inst == ADDI ? ADD : SUB) | RD(TMP_REG1) | RN(mem & REG_MASK) | RM(TMP_REG1))); + memw = 0; + } + + mem = SLJIT_MEM1(TMP_REG1); + + if (inst == SUBI) + memw = -memw; + } + + SLJIT_ASSERT((memw & 0x7) == 0 && memw <= 0x1f8 && memw >= -0x200); + return push_inst(compiler, ((type & SLJIT_MEM_STORE) ? STP : LDP) | RT(REG_PAIR_FIRST(reg)) | RT2(REG_PAIR_SECOND(reg)) | RN(mem & REG_MASK) | (sljit_ins)((memw & 0x3f8) << 12)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem_update(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 reg, + sljit_s32 mem, sljit_sw memw) +{ + sljit_u32 sign = 0, inst; + + CHECK_ERROR(); + CHECK(check_sljit_emit_mem_update(compiler, type, reg, mem, memw)); + if ((mem & OFFS_REG_MASK) || (memw > 255 || memw < -256)) return SLJIT_ERR_UNSUPPORTED; @@ -2042,20 +2513,20 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compile if (!(type & SLJIT_MEM_STORE)) inst |= sign ? 0x00800000 : 0x00400000; - if (type & SLJIT_MEM_PRE) + if (!(type & SLJIT_MEM_POST)) inst |= 0x800; return push_inst(compiler, inst | RT(reg) | RN(mem & REG_MASK) | (sljit_ins)((memw & 0x1ff) << 12)); } -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compiler, sljit_s32 type, +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem_update(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 freg, sljit_s32 mem, sljit_sw memw) { sljit_u32 inst; CHECK_ERROR(); - CHECK(check_sljit_emit_fmem(compiler, type, freg, mem, memw)); + CHECK(check_sljit_emit_fmem_update(compiler, type, freg, mem, memw)); if ((mem & OFFS_REG_MASK) || (memw > 255 || memw < -256)) return SLJIT_ERR_UNSUPPORTED; @@ -2071,12 +2542,667 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compil if (!(type & SLJIT_MEM_STORE)) inst |= 0x00400000; - if (type & SLJIT_MEM_PRE) + if (!(type & SLJIT_MEM_POST)) inst |= 0x800; return push_inst(compiler, inst | VT(freg) | RN(mem & REG_MASK) | (sljit_ins)((memw & 0x1ff) << 12)); } +static sljit_s32 sljit_emit_simd_mem_offset(struct sljit_compiler *compiler, sljit_s32 *mem_ptr, sljit_sw memw) +{ + sljit_ins ins; + sljit_s32 mem = *mem_ptr; + + if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) { + *mem_ptr = TMP_REG1; + return push_inst(compiler, ADD | RD(TMP_REG1) | RN(mem & REG_MASK) | RM(OFFS_REG(mem)) | ((sljit_ins)(memw & 0x3) << 10)); + } + + if (!(mem & REG_MASK)) { + *mem_ptr = TMP_REG1; + return load_immediate(compiler, TMP_REG1, memw); + } + + mem &= REG_MASK; + + if (memw == 0) { + *mem_ptr = mem; + return SLJIT_SUCCESS; + } + + *mem_ptr = TMP_REG1; + + if (memw < -0xffffff || memw > 0xffffff) { + FAIL_IF(load_immediate(compiler, TMP_REG1, memw)); + return push_inst(compiler, ADD | RD(TMP_REG1) | RN(TMP_REG1) | RM(mem)); + } + + ins = ADDI; + + if (memw < 0) { + memw = -memw; + ins = SUBI; + } + + if (memw > 0xfff) { + FAIL_IF(push_inst(compiler, ins | (1 << 22) | RD(TMP_REG1) | RN(mem) | ((sljit_ins)(memw >> 12) << 10))); + + memw &= 0xfff; + if (memw == 0) + return SLJIT_SUCCESS; + + mem = TMP_REG1; + } + + return push_inst(compiler, ins | RD(TMP_REG1) | RN(mem) | ((sljit_ins)memw << 10)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 srcdst, sljit_sw srcdstw) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_ins ins; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_mov(compiler, type, freg, srcdst, srcdstw)); + + ADJUST_LOCAL_OFFSET(srcdst, srcdstw); + + if (reg_size != 3 && reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3)) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + if (!(srcdst & SLJIT_MEM)) { + if (type & SLJIT_SIMD_STORE) + ins = VD(srcdst) | VN(freg) | VM(freg); + else + ins = VD(freg) | VN(srcdst) | VM(srcdst); + + if (reg_size == 4) + ins |= (1 << 30); + + return push_inst(compiler, ORR_v | ins); + } + + FAIL_IF(sljit_emit_simd_mem_offset(compiler, &srcdst, srcdstw)); + + if (elem_size > 3) + elem_size = 3; + + ins = (type & SLJIT_SIMD_STORE) ? ST1 : LD1; + + if (reg_size == 4) + ins |= (1 << 30); + + return push_inst(compiler, ins | ((sljit_ins)elem_size << 10) | RN(srcdst) | VT(freg)); +} + +static sljit_ins simd_get_imm(sljit_s32 elem_size, sljit_uw value) +{ + sljit_ins result; + + if (elem_size > 2 && (sljit_u32)value == (value >> 32)) { + elem_size = 2; + value = (sljit_u32)value; + } + + if (elem_size == 2 && (sljit_u16)value == (value >> 16)) { + elem_size = 1; + value = (sljit_u16)value; + } + + if (elem_size == 1 && (sljit_u8)value == (value >> 8)) { + elem_size = 0; + value = (sljit_u8)value; + } + + switch (elem_size) { + case 0: + SLJIT_ASSERT(value <= 0xff); + result = 0xe000; + break; + case 1: + SLJIT_ASSERT(value <= 0xffff); + result = 0; + + while (1) { + if (value <= 0xff) { + result |= 0x8000; + break; + } + + if ((value & 0xff) == 0) { + value >>= 8; + result |= 0xa000; + break; + } + + if (result != 0) + return ~(sljit_ins)0; + + value ^= (sljit_uw)0xffff; + result = (1 << 29); + } + break; + case 2: + SLJIT_ASSERT(value <= 0xffffffff); + result = 0; + + while (1) { + if (value <= 0xff) { + result |= 0x0000; + break; + } + + if ((value & ~(sljit_uw)0xff00) == 0) { + value >>= 8; + result |= 0x2000; + break; + } + + if ((value & ~(sljit_uw)0xff0000) == 0) { + value >>= 16; + result |= 0x4000; + break; + } + + if ((value & ~(sljit_uw)0xff000000) == 0) { + value >>= 24; + result |= 0x6000; + break; + } + + if ((value & (sljit_uw)0xff) == 0xff && (value >> 16) == 0) { + value >>= 8; + result |= 0xc000; + break; + } + + if ((value & (sljit_uw)0xffff) == 0xffff && (value >> 24) == 0) { + value >>= 16; + result |= 0xd000; + break; + } + + if (result != 0) + return ~(sljit_ins)0; + + value ^= (sljit_uw)0xffffffff; + result = (1 << 29); + } + break; + default: + return ~(sljit_ins)0; + } + + return (((sljit_ins)value & 0x1f) << 5) | (((sljit_ins)value & 0xe0) << 11) | result; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_ins ins, imm; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_replicate(compiler, type, freg, src, srcw)); + + ADJUST_LOCAL_OFFSET(src, srcw); + + if (reg_size != 3 && reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3)) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + if (src & SLJIT_MEM) { + FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src, srcw)); + + ins = (sljit_ins)elem_size << 10; + + if (reg_size == 4) + ins |= (sljit_ins)1 << 30; + + return push_inst(compiler, LD1R | ins | RN(src) | VT(freg)); + } + + ins = (sljit_ins)1 << (16 + elem_size); + + if (reg_size == 4) + ins |= (sljit_ins)1 << 30; + + if (type & SLJIT_SIMD_FLOAT) { + if (src == SLJIT_IMM) + return push_inst(compiler, MOVI | (ins & ((sljit_ins)1 << 30)) | VD(freg)); + + return push_inst(compiler, DUP_e | ins | VD(freg) | VN(src)); + } + + if (src == SLJIT_IMM) { + if (elem_size < 3) + srcw &= ((sljit_sw)1 << (((sljit_sw)1 << elem_size) << 3)) - 1; + + imm = simd_get_imm(elem_size, (sljit_uw)srcw); + + if (imm != ~(sljit_ins)0) { + imm |= ins & ((sljit_ins)1 << 30); + + return push_inst(compiler, MOVI | imm | VD(freg)); + } + + FAIL_IF(load_immediate(compiler, TMP_REG1, srcw)); + src = TMP_REG1; + } + + return push_inst(compiler, DUP_g | ins | VD(freg) | RN(src)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, sljit_s32 lane_index, + sljit_s32 srcdst, sljit_sw srcdstw) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_ins ins; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_lane_mov(compiler, type, freg, lane_index, srcdst, srcdstw)); + + ADJUST_LOCAL_OFFSET(srcdst, srcdstw); + + if (reg_size != 3 && reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3)) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + if (type & SLJIT_SIMD_LANE_ZERO) { + ins = (reg_size == 3) ? 0 : ((sljit_ins)1 << 30); + + if ((type & SLJIT_SIMD_FLOAT) && freg == srcdst) { + FAIL_IF(push_inst(compiler, ORR_v | ins | VD(TMP_FREG1) | VN(freg) | VM(freg))); + srcdst = TMP_FREG1; + srcdstw = 0; + } + + FAIL_IF(push_inst(compiler, MOVI | ins | VD(freg))); + } + + if (srcdst & SLJIT_MEM) { + FAIL_IF(sljit_emit_simd_mem_offset(compiler, &srcdst, srcdstw)); + + if (elem_size == 3) + ins = 0x8400; + else if (elem_size == 0) + ins = 0; + else + ins = (sljit_ins)0x2000 << elem_size; + + lane_index = lane_index << elem_size; + ins |= (sljit_ins)(((lane_index & 0x8) << 27) | ((lane_index & 0x7) << 10)); + + return push_inst(compiler, ((type & SLJIT_SIMD_STORE) ? ST1_s : LD1_s) | ins | RN(srcdst) | VT(freg)); + } + + if (type & SLJIT_SIMD_FLOAT) { + if (type & SLJIT_SIMD_STORE) + ins = INS_e | ((sljit_ins)1 << (16 + elem_size)) | ((sljit_ins)lane_index << (11 + elem_size)) | VD(srcdst) | VN(freg); + else + ins = INS_e | ((((sljit_ins)lane_index << 1) | 1) << (16 + elem_size)) | VD(freg) | VN(srcdst); + + return push_inst(compiler, ins); + } + + if (srcdst == SLJIT_IMM) { + if (elem_size < 3) + srcdstw &= ((sljit_sw)1 << (((sljit_sw)1 << elem_size) << 3)) - 1; + + FAIL_IF(load_immediate(compiler, TMP_REG1, srcdstw)); + srcdst = TMP_REG1; + } + + if (type & SLJIT_SIMD_STORE) { + ins = RD(srcdst) | VN(freg); + + if ((type & SLJIT_SIMD_LANE_SIGNED) && (elem_size < 2 || (elem_size == 2 && !(type & SLJIT_32)))) { + ins |= SMOV; + + if (!(type & SLJIT_32)) + ins |= (sljit_ins)1 << 30; + } else + ins |= UMOV; + } else + ins = INS | VD(freg) | RN(srcdst); + + if (elem_size == 3) + ins |= (sljit_ins)1 << 30; + + return push_inst(compiler, ins | ((((sljit_ins)lane_index << 1) | 1) << (16 + elem_size))); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 src, sljit_s32 src_lane_index) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_ins ins; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_lane_replicate(compiler, type, freg, src, src_lane_index)); + + if (reg_size != 3 && reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3)) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + ins = (((sljit_ins)src_lane_index << 1) | 1) << (16 + elem_size); + + if (reg_size == 4) + ins |= (sljit_ins)1 << 30; + + return push_inst(compiler, DUP_e | ins | VD(freg) | VN(src)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_s32 elem2_size = SLJIT_SIMD_GET_ELEM2_SIZE(type); + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_extend(compiler, type, freg, src, srcw)); + + ADJUST_LOCAL_OFFSET(src, srcw); + + if (reg_size != 3 && reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if ((type & SLJIT_SIMD_FLOAT) && (elem_size != 2 || elem2_size != 3)) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + if (src & SLJIT_MEM) { + FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src, srcw)); + + if (reg_size == 4 && elem2_size - elem_size == 1) + FAIL_IF(push_inst(compiler, LD1 | ((sljit_ins)elem_size << 10) | RN(src) | VT(freg))); + else + FAIL_IF(push_inst(compiler, LD1_s | ((sljit_ins)0x2000 << (reg_size - elem2_size + elem_size)) | RN(src) | VT(freg))); + src = freg; + } + + if (type & SLJIT_SIMD_FLOAT) { + SLJIT_ASSERT(reg_size == 4); + return push_inst(compiler, FCVTL | (1 << 22) | VD(freg) | VN(src)); + } + + do { + FAIL_IF(push_inst(compiler, ((type & SLJIT_SIMD_EXTEND_SIGNED) ? SSHLL : USHLL) + | ((sljit_ins)1 << (19 + elem_size)) | VD(freg) | VN(src))); + src = freg; + } while (++elem_size < elem2_size); + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 freg, + sljit_s32 dst, sljit_sw dstw) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_ins ins, imms; + sljit_s32 dst_r; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_sign(compiler, type, freg, dst, dstw)); + + ADJUST_LOCAL_OFFSET(dst, dstw); + + if (reg_size != 3 && reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3)) + return SLJIT_ERR_UNSUPPORTED; + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + switch (elem_size) { + case 0: + imms = 0x643219; + ins = USHR | (0x9 << 16); + break; + case 1: + imms = (reg_size == 4) ? 0x643219 : 0x6231; + ins = USHR | (0x11 << 16); + break; + case 2: + imms = (reg_size == 4) ? 0x6231 : 0x61; + ins = USHR | (0x21 << 16); + break; + default: + imms = 0x61; + ins = USHR | (0x41 << 16); + break; + } + + if (reg_size == 4) + ins |= (1 << 30); + + FAIL_IF(push_inst(compiler, ins | VD(TMP_FREG1) | VN(freg))); + + if (reg_size == 4 && elem_size > 0) + FAIL_IF(push_inst(compiler, XTN | ((sljit_ins)(elem_size - 1) << 22) | VD(TMP_FREG1) | VN(TMP_FREG1))); + + if (imms >= 0x100) { + ins = (reg_size == 4 && elem_size == 0) ? (1 << 30) : 0; + + do { + FAIL_IF(push_inst(compiler, USRA | ins | ((imms & 0xff) << 16) | VD(TMP_FREG1) | VN(TMP_FREG1))); + imms >>= 8; + } while (imms >= 0x100); + } + + FAIL_IF(push_inst(compiler, USRA | (1 << 30) | (imms << 16) | VD(TMP_FREG1) | VN(TMP_FREG1))); + + dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1; + ins = (0x1 << 16); + + if (reg_size == 4 && elem_size == 0) { + FAIL_IF(push_inst(compiler, INS_e | (0x3 << 16) | (0x8 << 11) | VD(TMP_FREG1) | VN(TMP_FREG1))); + ins = (0x2 << 16); + } + + FAIL_IF(push_inst(compiler, UMOV | ins | RD(dst_r) | VN(TMP_FREG1))); + + if (dst_r == TMP_REG1) + return emit_op_mem(compiler, STORE | ((type & SLJIT_32) ? INT_SIZE : WORD_SIZE), TMP_REG1, dst, dstw, TMP_REG2); + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_freg, sljit_s32 src1_freg, sljit_s32 src2_freg) +{ + sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type); + sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type); + sljit_ins ins = 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_simd_op2(compiler, type, dst_freg, src1_freg, src2_freg)); + + if (reg_size != 3 && reg_size != 4) + return SLJIT_ERR_UNSUPPORTED; + + if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3)) + return SLJIT_ERR_UNSUPPORTED; + + switch (SLJIT_SIMD_GET_OPCODE(type)) { + case SLJIT_SIMD_OP2_AND: + ins = AND_v; + break; + case SLJIT_SIMD_OP2_OR: + ins = ORR_v; + break; + case SLJIT_SIMD_OP2_XOR: + ins = EOR_v; + break; + } + + if (type & SLJIT_SIMD_TEST) + return SLJIT_SUCCESS; + + if (reg_size == 4) + ins |= (sljit_ins)1 << 30; + + return push_inst(compiler, ins | VD(dst_freg) | VN(src1_freg) | VM(src2_freg)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst_reg, + sljit_s32 mem_reg) +{ + sljit_ins ins; + + CHECK_ERROR(); + CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg)); + +#ifdef __ARM_FEATURE_ATOMICS + switch (GET_OPCODE(op)) { + case SLJIT_MOV32: + case SLJIT_MOV_U32: + ins = LDR ^ (1 << 30); + break; + case SLJIT_MOV_U16: + ins = LDRH; + break; + case SLJIT_MOV_U8: + ins = LDRB; + break; + default: + ins = LDR; + break; + } +#else /* !__ARM_FEATURE_ATOMICS */ + switch (GET_OPCODE(op)) { + case SLJIT_MOV32: + case SLJIT_MOV_U32: + ins = LDXR ^ (1 << 30); + break; + case SLJIT_MOV_U8: + ins = LDXRB; + break; + case SLJIT_MOV_U16: + ins = LDXRH; + break; + default: + ins = LDXR; + break; + } +#endif /* ARM_FEATURE_ATOMICS */ + return push_inst(compiler, ins | RN(mem_reg) | RT(dst_reg)); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 src_reg, + sljit_s32 mem_reg, + sljit_s32 temp_reg) +{ + sljit_ins ins; + sljit_s32 tmp = temp_reg; + sljit_ins cmp = 0; + sljit_ins inv_bits = W_OP; + + CHECK_ERROR(); + CHECK(check_sljit_emit_atomic_store(compiler, op, src_reg, mem_reg, temp_reg)); + +#ifdef __ARM_FEATURE_ATOMICS + if (op & SLJIT_SET_ATOMIC_STORED) + cmp = (SUBS ^ W_OP) | RD(TMP_ZERO); + + switch (GET_OPCODE(op)) { + case SLJIT_MOV32: + case SLJIT_MOV_U32: + ins = CAS ^ (1 << 30); + break; + case SLJIT_MOV_U16: + ins = CASH; + break; + case SLJIT_MOV_U8: + ins = CASB; + break; + default: + ins = CAS; + inv_bits = 0; + if (cmp) + cmp ^= W_OP; + break; + } + + if (cmp) { + FAIL_IF(push_inst(compiler, (MOV ^ inv_bits) | RM(temp_reg) | RD(TMP_REG1))); + tmp = TMP_REG1; + } + FAIL_IF(push_inst(compiler, ins | RM(tmp) | RN(mem_reg) | RD(src_reg))); + if (!cmp) + return SLJIT_SUCCESS; + + FAIL_IF(push_inst(compiler, cmp | RM(tmp) | RN(temp_reg))); + FAIL_IF(push_inst(compiler, (CSET ^ inv_bits) | RD(tmp))); + return push_inst(compiler, cmp | RM(tmp) | RN(TMP_ZERO)); +#else /* !__ARM_FEATURE_ATOMICS */ + SLJIT_UNUSED_ARG(tmp); + SLJIT_UNUSED_ARG(inv_bits); + + if (op & SLJIT_SET_ATOMIC_STORED) + cmp = (SUBI ^ W_OP) | (1 << 29); + + switch (GET_OPCODE(op)) { + case SLJIT_MOV32: + case SLJIT_MOV_U32: + ins = STXR ^ (1 << 30); + break; + case SLJIT_MOV_U8: + ins = STXRB; + break; + case SLJIT_MOV_U16: + ins = STXRH; + break; + default: + ins = STXR; + break; + } + + FAIL_IF(push_inst(compiler, ins | RM(TMP_REG1) | RN(mem_reg) | RT(src_reg))); + return cmp ? push_inst(compiler, cmp | RD(TMP_ZERO) | RN(TMP_REG1)) : SLJIT_SUCCESS; +#endif /* __ARM_FEATURE_ATOMICS */ +} + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset) { sljit_s32 dst_reg; |