summaryrefslogtreecommitdiffstats
path: root/src/3rdparty/pcre2/src/sljit/sljitNativeARM_64.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/3rdparty/pcre2/src/sljit/sljitNativeARM_64.c')
-rw-r--r--src/3rdparty/pcre2/src/sljit/sljitNativeARM_64.c1596
1 files changed, 1361 insertions, 235 deletions
diff --git a/src/3rdparty/pcre2/src/sljit/sljitNativeARM_64.c b/src/3rdparty/pcre2/src/sljit/sljitNativeARM_64.c
index 96453b4abe..b268582f42 100644
--- a/src/3rdparty/pcre2/src/sljit/sljitNativeARM_64.c
+++ b/src/3rdparty/pcre2/src/sljit/sljitNativeARM_64.c
@@ -67,78 +67,124 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
/* Instrucion forms */
/* --------------------------------------------------------------------- */
-#define ADC 0x9a000000
-#define ADD 0x8b000000
-#define ADDE 0x8b200000
-#define ADDI 0x91000000
-#define AND 0x8a000000
-#define ANDI 0x92000000
-#define ASRV 0x9ac02800
-#define B 0x14000000
-#define B_CC 0x54000000
-#define BL 0x94000000
-#define BLR 0xd63f0000
-#define BR 0xd61f0000
-#define BRK 0xd4200000
-#define CBZ 0xb4000000
-#define CLZ 0xdac01000
-#define CSEL 0x9a800000
-#define CSINC 0x9a800400
-#define EOR 0xca000000
-#define EORI 0xd2000000
-#define FABS 0x1e60c000
-#define FADD 0x1e602800
-#define FCMP 0x1e602000
-#define FCVT 0x1e224000
-#define FCVTZS 0x9e780000
-#define FDIV 0x1e601800
-#define FMOV 0x1e604000
-#define FMUL 0x1e600800
-#define FNEG 0x1e614000
-#define FSUB 0x1e603800
-#define LDRI 0xf9400000
-#define LDRI_F64 0xfd400000
-#define LDP 0xa9400000
-#define LDP_F64 0x6d400000
-#define LDP_POST 0xa8c00000
-#define LDR_PRE 0xf8400c00
-#define LSLV 0x9ac02000
-#define LSRV 0x9ac02400
-#define MADD 0x9b000000
-#define MOVK 0xf2800000
-#define MOVN 0x92800000
-#define MOVZ 0xd2800000
-#define NOP 0xd503201f
-#define ORN 0xaa200000
-#define ORR 0xaa000000
-#define ORRI 0xb2000000
-#define RET 0xd65f0000
-#define SBC 0xda000000
-#define SBFM 0x93000000
-#define SCVTF 0x9e620000
-#define SDIV 0x9ac00c00
-#define SMADDL 0x9b200000
-#define SMULH 0x9b403c00
-#define STP 0xa9000000
-#define STP_F64 0x6d000000
-#define STP_PRE 0xa9800000
-#define STRB 0x38206800
-#define STRBI 0x39000000
-#define STRI 0xf9000000
-#define STRI_F64 0xfd000000
-#define STR_FI 0x3d000000
-#define STR_FR 0x3c206800
-#define STUR_FI 0x3c000000
-#define STURBI 0x38000000
-#define SUB 0xcb000000
-#define SUBI 0xd1000000
-#define SUBS 0xeb000000
-#define UBFM 0xd3000000
-#define UDIV 0x9ac00800
-#define UMULH 0x9bc03c00
-
-/* dest_reg is the absolute name of the register
- Useful for reordering instructions in the delay slot. */
+#define ADC 0x9a000000
+#define ADD 0x8b000000
+#define ADDE 0x8b200000
+#define ADDI 0x91000000
+#define AND 0x8a000000
+#define ANDI 0x92000000
+#define AND_v 0x0e201c00
+#define ASRV 0x9ac02800
+#define B 0x14000000
+#define B_CC 0x54000000
+#define BL 0x94000000
+#define BLR 0xd63f0000
+#define BR 0xd61f0000
+#define BRK 0xd4200000
+#define CAS 0xc8a07c00
+#define CASB 0x08a07c00
+#define CASH 0x48a07c00
+#define CBZ 0xb4000000
+#define CCMPI 0xfa400800
+#define CLZ 0xdac01000
+#define CSEL 0x9a800000
+#define CSINC 0x9a800400
+#define DUP_e 0x0e000400
+#define DUP_g 0x0e000c00
+#define EOR 0xca000000
+#define EOR_v 0x2e201c00
+#define EORI 0xd2000000
+#define EXTR 0x93c00000
+#define FABS 0x1e60c000
+#define FADD 0x1e602800
+#define FCMP 0x1e602000
+#define FCSEL 0x1e600c00
+#define FCVT 0x1e224000
+#define FCVTL 0x0e217800
+#define FCVTZS 0x9e780000
+#define FDIV 0x1e601800
+#define FMOV 0x1e604000
+#define FMOV_R 0x9e660000
+#define FMOV_I 0x1e601000
+#define FMUL 0x1e600800
+#define FNEG 0x1e614000
+#define FSUB 0x1e603800
+#define INS 0x4e001c00
+#define INS_e 0x6e000400
+#define LD1 0x0c407000
+#define LD1_s 0x0d400000
+#define LD1R 0x0d40c000
+#define LDRI 0xf9400000
+#define LDRI_F64 0xfd400000
+#define LDRI_POST 0xf8400400
+#define LDP 0xa9400000
+#define LDP_F64 0x6d400000
+#define LDP_POST 0xa8c00000
+#define LDR_PRE 0xf8400c00
+#define LDXR 0xc85f7c00
+#define LDXRB 0x085f7c00
+#define LDXRH 0x485f7c00
+#define LSLV 0x9ac02000
+#define LSRV 0x9ac02400
+#define MADD 0x9b000000
+#define MOVI 0x0f000400
+#define MOVK 0xf2800000
+#define MOVN 0x92800000
+#define MOVZ 0xd2800000
+#define NOP 0xd503201f
+#define ORN 0xaa200000
+#define ORR 0xaa000000
+#define ORR_v 0x0ea01c00
+#define ORRI 0xb2000000
+#define RBIT 0xdac00000
+#define RET 0xd65f0000
+#define REV 0xdac00c00
+#define REV16 0xdac00400
+#define RORV 0x9ac02c00
+#define SBC 0xda000000
+#define SBFM 0x93400000
+#define SCVTF 0x9e620000
+#define SDIV 0x9ac00c00
+#define SMADDL 0x9b200000
+#define SMOV 0x0e002c00
+#define SMULH 0x9b403c00
+#define SSHLL 0x0f00a400
+#define ST1 0x0c007000
+#define ST1_s 0x0d000000
+#define STP 0xa9000000
+#define STP_F64 0x6d000000
+#define STP_PRE 0xa9800000
+#define STRB 0x38206800
+#define STRBI 0x39000000
+#define STRI 0xf9000000
+#define STRI_F64 0xfd000000
+#define STR_FI 0x3d000000
+#define STR_FR 0x3c206800
+#define STUR_FI 0x3c000000
+#define STURBI 0x38000000
+#define STXR 0xc8007c00
+#define STXRB 0x8007c00
+#define STXRH 0x48007c00
+#define SUB 0xcb000000
+#define SUBI 0xd1000000
+#define SUBS 0xeb000000
+#define TBZ 0x36000000
+#define UBFM 0xd3400000
+#define UCVTF 0x9e630000
+#define UDIV 0x9ac00800
+#define UMOV 0x0e003c00
+#define UMULH 0x9bc03c00
+#define USHLL 0x2f00a400
+#define USHR 0x2f000400
+#define USRA 0x2f001400
+#define XTN 0x0e212800
+
+#define CSET (CSINC | RM(TMP_ZERO) | RN(TMP_ZERO))
+#define LDR (STRI | (1 << 22))
+#define LDRB (STRBI | (1 << 22))
+#define LDRH (LDRB | (1 << 30))
+#define MOV (ORR | RN(TMP_ZERO))
+
static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins)
{
sljit_ins *ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
@@ -173,7 +219,7 @@ static SLJIT_INLINE sljit_sw detect_jump_type(struct sljit_jump *jump, sljit_ins
target_addr = (sljit_uw)(code + jump->u.label->size) + (sljit_uw)executable_offset;
}
- diff = (sljit_sw)target_addr - (sljit_sw)(code_ptr + 4) - executable_offset;
+ diff = (sljit_sw)target_addr - (sljit_sw)(code_ptr - 4) - executable_offset;
if (jump->flags & IS_COND) {
diff += SSIZE_OF(ins);
@@ -296,8 +342,8 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
}
next_addr = compute_next_addr(label, jump, const_, put_label);
}
- code_ptr ++;
- word_count ++;
+ code_ptr++;
+ word_count++;
} while (buf_ptr < buf_end);
buf = buf->next;
@@ -383,16 +429,23 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
{
switch (feature_type) {
case SLJIT_HAS_FPU:
+ case SLJIT_HAS_SIMD:
#ifdef SLJIT_IS_FPU_AVAILABLE
- return SLJIT_IS_FPU_AVAILABLE;
+ return (SLJIT_IS_FPU_AVAILABLE) != 0;
#else
/* Available by default. */
return 1;
#endif
case SLJIT_HAS_CLZ:
+ case SLJIT_HAS_CTZ:
+ case SLJIT_HAS_REV:
+ case SLJIT_HAS_ROT:
case SLJIT_HAS_CMOV:
case SLJIT_HAS_PREFETCH:
+ case SLJIT_HAS_COPY_F32:
+ case SLJIT_HAS_COPY_F64:
+ case SLJIT_HAS_ATOMIC:
return 1;
default:
@@ -400,6 +453,17 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
}
}
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type)
+{
+ switch (type) {
+ case SLJIT_UNORDERED_OR_EQUAL:
+ case SLJIT_ORDERED_NOT_EQUAL:
+ return 2;
+ }
+
+ return 0;
+}
+
/* --------------------------------------------------------------------- */
/* Core code generator functions. */
/* --------------------------------------------------------------------- */
@@ -631,6 +695,12 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s
switch (op) {
case SLJIT_MUL:
case SLJIT_CLZ:
+ case SLJIT_CTZ:
+ case SLJIT_REV:
+ case SLJIT_REV_U16:
+ case SLJIT_REV_S16:
+ case SLJIT_REV_U32:
+ case SLJIT_REV_S32:
case SLJIT_ADDC:
case SLJIT_SUBC:
/* No form with immediate operand (except imm 0, which
@@ -639,10 +709,6 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s
case SLJIT_MOV:
SLJIT_ASSERT(!(flags & SET_FLAGS) && (flags & ARG2_IMM) && arg1 == TMP_REG1);
return load_immediate(compiler, dst, imm);
- case SLJIT_NOT:
- SLJIT_ASSERT(flags & ARG2_IMM);
- FAIL_IF(load_immediate(compiler, dst, (flags & INT_OP) ? (~imm & 0xffffffff) : ~imm));
- goto set_flags;
case SLJIT_SUB:
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB;
if (flags & ARG1_IMM)
@@ -689,8 +755,13 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s
break;
CHECK_FLAGS(3 << 29);
return push_inst(compiler, (ANDI ^ inv_bits) | RD(dst) | RN(reg) | inst_bits);
- case SLJIT_OR:
case SLJIT_XOR:
+ if (imm == -1) {
+ FAIL_IF(push_inst(compiler, (ORN ^ inv_bits) | RD(dst) | RN(TMP_ZERO) | RM(reg)));
+ goto set_flags;
+ }
+ /* fallthrough */
+ case SLJIT_OR:
inst_bits = logical_imm(imm, LOGICAL_IMM_CHECK | ((flags & INT_OP) ? 16 : 32));
if (!inst_bits)
break;
@@ -701,36 +772,52 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s
FAIL_IF(push_inst(compiler, (inst_bits ^ inv_bits) | RD(dst) | RN(reg)));
goto set_flags;
case SLJIT_SHL:
+ case SLJIT_MSHL:
if (flags & ARG1_IMM)
break;
+
if (flags & INT_OP) {
imm &= 0x1f;
- FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1)
- | (((sljit_ins)-imm & 0x1f) << 16) | ((31 - (sljit_ins)imm) << 10)));
- }
- else {
+ inst_bits = (((sljit_ins)-imm & 0x1f) << 16) | ((31 - (sljit_ins)imm) << 10);
+ } else {
imm &= 0x3f;
- FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) | (1 << 22)
- | (((sljit_ins)-imm & 0x3f) << 16) | ((63 - (sljit_ins)imm) << 10)));
+ inst_bits = ((sljit_ins)1 << 22) | (((sljit_ins)-imm & 0x3f) << 16) | ((63 - (sljit_ins)imm) << 10);
}
+
+ inv_bits |= inv_bits >> 9;
+ FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) | inst_bits));
goto set_flags;
case SLJIT_LSHR:
+ case SLJIT_MLSHR:
case SLJIT_ASHR:
+ case SLJIT_MASHR:
if (flags & ARG1_IMM)
break;
- if (op == SLJIT_ASHR)
+
+ inv_bits |= inv_bits >> 9;
+ if (op >= SLJIT_ASHR)
inv_bits |= 1 << 30;
+
if (flags & INT_OP) {
imm &= 0x1f;
- FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1)
- | ((sljit_ins)imm << 16) | (31 << 10)));
- }
- else {
+ inst_bits = ((sljit_ins)imm << 16) | (31 << 10);
+ } else {
imm &= 0x3f;
- FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1)
- | (1 << 22) | ((sljit_ins)imm << 16) | (63 << 10)));
+ inst_bits = ((sljit_ins)1 << 22) | ((sljit_ins)imm << 16) | (63 << 10);
}
+
+ FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) | inst_bits));
goto set_flags;
+ case SLJIT_ROTL:
+ case SLJIT_ROTR:
+ if (flags & ARG1_IMM)
+ break;
+
+ if (op == SLJIT_ROTL)
+ imm = -imm;
+
+ imm &= (flags & INT_OP) ? 0x1f : 0x3f;
+ return push_inst(compiler, (EXTR ^ (inv_bits | (inv_bits >> 9))) | RD(dst) | RN(arg1) | RM(arg1) | ((sljit_ins)imm << 10));
default:
SLJIT_UNREACHABLE();
break;
@@ -761,22 +848,22 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s
SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
if (dst == arg2)
return SLJIT_SUCCESS;
- return push_inst(compiler, ORR | RD(dst) | RN(TMP_ZERO) | RM(arg2));
+ return push_inst(compiler, MOV | RD(dst) | RM(arg2));
case SLJIT_MOV_U8:
SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
- return push_inst(compiler, (UBFM ^ W_OP) | RD(dst) | RN(arg2) | (7 << 10));
+ inv_bits |= inv_bits >> 9;
+ return push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg2) | (7 << 10));
case SLJIT_MOV_S8:
SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
- if (!(flags & INT_OP))
- inv_bits |= 1 << 22;
+ inv_bits |= inv_bits >> 9;
return push_inst(compiler, (SBFM ^ inv_bits) | RD(dst) | RN(arg2) | (7 << 10));
case SLJIT_MOV_U16:
SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
- return push_inst(compiler, (UBFM ^ W_OP) | RD(dst) | RN(arg2) | (15 << 10));
+ inv_bits |= inv_bits >> 9;
+ return push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg2) | (15 << 10));
case SLJIT_MOV_S16:
SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
- if (!(flags & INT_OP))
- inv_bits |= 1 << 22;
+ inv_bits |= inv_bits >> 9;
return push_inst(compiler, (SBFM ^ inv_bits) | RD(dst) | RN(arg2) | (15 << 10));
case SLJIT_MOV32:
SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
@@ -785,17 +872,36 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s
/* fallthrough */
case SLJIT_MOV_U32:
SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
- return push_inst(compiler, (ORR ^ W_OP) | RD(dst) | RN(TMP_ZERO) | RM(arg2));
+ return push_inst(compiler, (MOV ^ W_OP) | RD(dst) | RM(arg2));
case SLJIT_MOV_S32:
SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
return push_inst(compiler, SBFM | (1 << 22) | RD(dst) | RN(arg2) | (31 << 10));
- case SLJIT_NOT:
- SLJIT_ASSERT(arg1 == TMP_REG1);
- FAIL_IF(push_inst(compiler, (ORN ^ inv_bits) | RD(dst) | RN(TMP_ZERO) | RM(arg2)));
- break; /* Set flags. */
case SLJIT_CLZ:
SLJIT_ASSERT(arg1 == TMP_REG1);
return push_inst(compiler, (CLZ ^ inv_bits) | RD(dst) | RN(arg2));
+ case SLJIT_CTZ:
+ SLJIT_ASSERT(arg1 == TMP_REG1);
+ FAIL_IF(push_inst(compiler, (RBIT ^ inv_bits) | RD(dst) | RN(arg2)));
+ return push_inst(compiler, (CLZ ^ inv_bits) | RD(dst) | RN(dst));
+ case SLJIT_REV:
+ SLJIT_ASSERT(arg1 == TMP_REG1);
+ inv_bits |= inv_bits >> 21;
+ return push_inst(compiler, (REV ^ inv_bits) | RD(dst) | RN(arg2));
+ case SLJIT_REV_U16:
+ case SLJIT_REV_S16:
+ SLJIT_ASSERT(arg1 == TMP_REG1 && dst != TMP_REG2);
+ FAIL_IF(push_inst(compiler, (REV16 ^ (sljit_ins)0x80000000) | RD(dst) | RN(arg2)));
+ if (dst == TMP_REG1 || (arg2 == TMP_REG2 && op == SLJIT_REV_U16))
+ return SLJIT_SUCCESS;
+ inv_bits |= inv_bits >> 9;
+ return push_inst(compiler, ((op == SLJIT_REV_U16 ? UBFM : SBFM) ^ inv_bits) | RD(dst) | RN(dst) | (15 << 10));
+ case SLJIT_REV_U32:
+ case SLJIT_REV_S32:
+ SLJIT_ASSERT(arg1 == TMP_REG1 && dst != TMP_REG2);
+ FAIL_IF(push_inst(compiler, (REV ^ (sljit_ins)0x80000400) | RD(dst) | RN(arg2)));
+ if (op == SLJIT_REV_U32 || dst == TMP_REG1)
+ return SLJIT_SUCCESS;
+ return push_inst(compiler, SBFM | (1 << 22) | RD(dst) | RN(dst) | (31 << 10));
case SLJIT_ADD:
compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
CHECK_FLAGS(1 << 29);
@@ -834,14 +940,23 @@ static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, s
FAIL_IF(push_inst(compiler, (EOR ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)));
break; /* Set flags. */
case SLJIT_SHL:
+ case SLJIT_MSHL:
FAIL_IF(push_inst(compiler, (LSLV ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)));
break; /* Set flags. */
case SLJIT_LSHR:
+ case SLJIT_MLSHR:
FAIL_IF(push_inst(compiler, (LSRV ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)));
break; /* Set flags. */
case SLJIT_ASHR:
+ case SLJIT_MASHR:
FAIL_IF(push_inst(compiler, (ASRV ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)));
break; /* Set flags. */
+ case SLJIT_ROTL:
+ FAIL_IF(push_inst(compiler, (SUB ^ inv_bits) | RD(TMP_REG2) | RN(TMP_ZERO) | RM(arg2)));
+ arg2 = TMP_REG2;
+ /* fallthrough */
+ case SLJIT_ROTR:
+ return push_inst(compiler, (RORV ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2));
default:
SLJIT_UNREACHABLE();
return SLJIT_SUCCESS;
@@ -895,21 +1010,37 @@ static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, s
return push_inst(compiler, STRBI | type | RT(reg) | RN(tmp_reg) | ((sljit_ins)argw << 10));
}
- if (argw >= 0 && (argw & ((1 << shift) - 1)) == 0) {
- if ((argw >> shift) <= 0xfff)
- return push_inst(compiler, STRBI | type | RT(reg) | RN(arg) | ((sljit_ins)argw << (10 - shift)));
+ if ((argw & ((1 << shift) - 1)) == 0) {
+ if (argw >= 0) {
+ if ((argw >> shift) <= 0xfff)
+ return push_inst(compiler, STRBI | type | RT(reg) | RN(arg) | ((sljit_ins)argw << (10 - shift)));
- if (argw <= 0xffffff) {
- FAIL_IF(push_inst(compiler, ADDI | (1 << 22) | RD(tmp_reg) | RN(arg) | (((sljit_ins)argw >> 12) << 10)));
+ if (argw <= 0xffffff) {
+ FAIL_IF(push_inst(compiler, ADDI | (1 << 22) | RD(tmp_reg) | RN(arg) | (((sljit_ins)argw >> 12) << 10)));
- argw = ((argw & 0xfff) >> shift);
+ argw = ((argw & 0xfff) >> shift);
+ return push_inst(compiler, STRBI | type | RT(reg) | RN(tmp_reg) | ((sljit_ins)argw << 10));
+ }
+ } else if (argw < -256 && argw >= -0xfff000) {
+ FAIL_IF(push_inst(compiler, SUBI | (1 << 22) | RD(tmp_reg) | RN(arg) | (((sljit_ins)(-argw + 0xfff) >> 12) << 10)));
+ argw = ((0x1000 + argw) & 0xfff) >> shift;
return push_inst(compiler, STRBI | type | RT(reg) | RN(tmp_reg) | ((sljit_ins)argw << 10));
}
}
- if (argw <= 255 && argw >= -256)
+ if (argw <= 0xff && argw >= -0x100)
return push_inst(compiler, STURBI | type | RT(reg) | RN(arg) | (((sljit_ins)argw & 0x1ff) << 12));
+ if (argw >= 0) {
+ if (argw <= 0xfff0ff && ((argw + 0x100) & 0xfff) <= 0x1ff) {
+ FAIL_IF(push_inst(compiler, ADDI | (1 << 22) | RD(tmp_reg) | RN(arg) | (((sljit_ins)argw >> 12) << 10)));
+ return push_inst(compiler, STURBI | type | RT(reg) | RN(tmp_reg) | (((sljit_ins)argw & 0x1ff) << 12));
+ }
+ } else if (argw >= -0xfff100 && ((-argw + 0xff) & 0xfff) <= 0x1ff) {
+ FAIL_IF(push_inst(compiler, SUBI | (1 << 22) | RD(tmp_reg) | RN(arg) | (((sljit_ins)-argw >> 12) << 10)));
+ return push_inst(compiler, STURBI | type | RT(reg) | RN(tmp_reg) | (((sljit_ins)argw & 0x1ff) << 12));
+ }
+
FAIL_IF(load_immediate(compiler, tmp_reg, argw));
return push_inst(compiler, STRB | type | RT(reg) | RN(arg) | RM(tmp_reg));
@@ -924,15 +1055,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
{
sljit_s32 prev, fprev, saved_regs_size, i, tmp;
- sljit_s32 word_arg_count = 0;
+ sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options);
sljit_ins offs;
CHECK_ERROR();
CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
- saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 2);
- saved_regs_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, SSIZE_OF(f64));
+ saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - saved_arg_count, 2);
+ saved_regs_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, f64);
local_size = (local_size + saved_regs_size + 0xf) & ~0xf;
compiler->local_size = local_size;
@@ -954,7 +1085,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
prev = -1;
tmp = SLJIT_S0 - saveds;
- for (i = SLJIT_S0; i > tmp; i--) {
+ for (i = SLJIT_S0 - saved_arg_count; i > tmp; i--) {
if (prev == -1) {
prev = i;
continue;
@@ -1003,23 +1134,27 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
if (prev != -1)
FAIL_IF(push_inst(compiler, STRI | RT(prev) | RN(SLJIT_SP) | (offs >> 5) | ((fprev == -1) ? (1 << 10) : 0)));
- arg_types >>= SLJIT_ARG_SHIFT;
#ifdef _WIN32
if (local_size > 4096)
FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | (1 << 10) | (1 << 22)));
#endif /* _WIN32 */
- tmp = 0;
- while (arg_types > 0) {
- if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) {
- if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) {
- FAIL_IF(push_inst(compiler, ORR | RD(SLJIT_S0 - tmp) | RN(TMP_ZERO) | RM(SLJIT_R0 + word_arg_count)));
+ if (!(options & SLJIT_ENTER_REG_ARG)) {
+ arg_types >>= SLJIT_ARG_SHIFT;
+ saved_arg_count = 0;
+ tmp = SLJIT_R0;
+
+ while (arg_types) {
+ if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) {
+ if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) {
+ FAIL_IF(push_inst(compiler, MOV | RD(SLJIT_S0 - saved_arg_count) | RM(tmp)));
+ saved_arg_count++;
+ }
tmp++;
}
- word_arg_count++;
+ arg_types >>= SLJIT_ARG_SHIFT;
}
- arg_types >>= SLJIT_ARG_SHIFT;
}
#ifdef _WIN32
@@ -1100,26 +1235,34 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp
CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
- saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 2);
- saved_regs_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, SSIZE_OF(f64));
+ saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - SLJIT_KEPT_SAVEDS_COUNT(options), 2);
+ saved_regs_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, f64);
compiler->local_size = (local_size + saved_regs_size + 0xf) & ~0xf;
return SLJIT_SUCCESS;
}
-static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler)
+static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_s32 is_return_to)
{
sljit_s32 local_size, prev, fprev, i, tmp;
sljit_ins offs;
local_size = compiler->local_size;
- if (local_size > 512 && local_size <= 512 + 496) {
- FAIL_IF(push_inst(compiler, LDP_POST | RT(TMP_FP) | RT2(TMP_LR)
- | RN(SLJIT_SP) | ((sljit_ins)(local_size - 512) << (15 - 3))));
- local_size = 512;
- } else
- FAIL_IF(push_inst(compiler, LDP | RT(TMP_FP) | RT2(TMP_LR) | RN(SLJIT_SP)));
+ if (!is_return_to) {
+ if (local_size > 512 && local_size <= 512 + 496) {
+ FAIL_IF(push_inst(compiler, LDP_POST | RT(TMP_FP) | RT2(TMP_LR)
+ | RN(SLJIT_SP) | ((sljit_ins)(local_size - 512) << (15 - 3))));
+ local_size = 512;
+ } else
+ FAIL_IF(push_inst(compiler, LDP | RT(TMP_FP) | RT2(TMP_LR) | RN(SLJIT_SP)));
+ } else {
+ if (local_size > 512 && local_size <= 512 + 248) {
+ FAIL_IF(push_inst(compiler, LDRI_POST | RT(TMP_FP) | RN(SLJIT_SP) | ((sljit_ins)(local_size - 512) << 12)));
+ local_size = 512;
+ } else
+ FAIL_IF(push_inst(compiler, LDRI | RT(TMP_FP) | RN(SLJIT_SP) | 0));
+ }
if (local_size > 512) {
local_size -= 512;
@@ -1137,7 +1280,7 @@ static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler)
prev = -1;
tmp = SLJIT_S0 - compiler->saveds;
- for (i = SLJIT_S0; i > tmp; i--) {
+ for (i = SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options); i > tmp; i--) {
if (prev == -1) {
prev = i;
continue;
@@ -1195,11 +1338,34 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler
CHECK_ERROR();
CHECK(check_sljit_emit_return_void(compiler));
- FAIL_IF(emit_stack_frame_release(compiler));
+ FAIL_IF(emit_stack_frame_release(compiler, 0));
return push_inst(compiler, RET | RN(TMP_LR));
}
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler,
+ sljit_s32 src, sljit_sw srcw)
+{
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_return_to(compiler, src, srcw));
+
+ if (src & SLJIT_MEM) {
+ ADJUST_LOCAL_OFFSET(src, srcw);
+ FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src, srcw, TMP_REG1));
+ src = TMP_REG1;
+ srcw = 0;
+ } else if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
+ FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | RM(src)));
+ src = TMP_REG1;
+ srcw = 0;
+ }
+
+ FAIL_IF(emit_stack_frame_release(compiler, 1));
+
+ SLJIT_SKIP_CHECKS(compiler);
+ return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw);
+}
+
/* --------------------------------------------------------------------- */
/* Operators */
/* --------------------------------------------------------------------- */
@@ -1219,12 +1385,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compile
return push_inst(compiler, NOP);
case SLJIT_LMUL_UW:
case SLJIT_LMUL_SW:
- FAIL_IF(push_inst(compiler, ORR | RD(TMP_REG1) | RN(TMP_ZERO) | RM(SLJIT_R0)));
+ FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | RM(SLJIT_R0)));
FAIL_IF(push_inst(compiler, MADD | RD(SLJIT_R0) | RN(SLJIT_R0) | RM(SLJIT_R1) | RT2(TMP_ZERO)));
return push_inst(compiler, (op == SLJIT_LMUL_UW ? UMULH : SMULH) | RD(SLJIT_R1) | RN(TMP_REG1) | RM(SLJIT_R1));
case SLJIT_DIVMOD_UW:
case SLJIT_DIVMOD_SW:
- FAIL_IF(push_inst(compiler, (ORR ^ inv_bits) | RD(TMP_REG1) | RN(TMP_ZERO) | RM(SLJIT_R0)));
+ FAIL_IF(push_inst(compiler, (MOV ^ inv_bits) | RD(TMP_REG1) | RM(SLJIT_R0)));
FAIL_IF(push_inst(compiler, ((op == SLJIT_DIVMOD_UW ? UDIV : SDIV) ^ inv_bits) | RD(SLJIT_R0) | RN(SLJIT_R0) | RM(SLJIT_R1)));
FAIL_IF(push_inst(compiler, (MADD ^ inv_bits) | RD(SLJIT_R1) | RN(SLJIT_R0) | RM(SLJIT_R1) | RT2(TMP_ZERO)));
return push_inst(compiler, (SUB ^ inv_bits) | RD(SLJIT_R1) | RN(TMP_REG1) | RM(SLJIT_R1));
@@ -1266,33 +1432,33 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
break;
case SLJIT_MOV_U8:
mem_flags = BYTE_SIZE;
- if (src & SLJIT_IMM)
+ if (src == SLJIT_IMM)
srcw = (sljit_u8)srcw;
break;
case SLJIT_MOV_S8:
mem_flags = BYTE_SIZE | SIGNED;
- if (src & SLJIT_IMM)
+ if (src == SLJIT_IMM)
srcw = (sljit_s8)srcw;
break;
case SLJIT_MOV_U16:
mem_flags = HALF_SIZE;
- if (src & SLJIT_IMM)
+ if (src == SLJIT_IMM)
srcw = (sljit_u16)srcw;
break;
case SLJIT_MOV_S16:
mem_flags = HALF_SIZE | SIGNED;
- if (src & SLJIT_IMM)
+ if (src == SLJIT_IMM)
srcw = (sljit_s16)srcw;
break;
case SLJIT_MOV_U32:
mem_flags = INT_SIZE;
- if (src & SLJIT_IMM)
+ if (src == SLJIT_IMM)
srcw = (sljit_u32)srcw;
break;
case SLJIT_MOV_S32:
case SLJIT_MOV32:
mem_flags = INT_SIZE | SIGNED;
- if (src & SLJIT_IMM)
+ if (src == SLJIT_IMM)
srcw = (sljit_s32)srcw;
break;
default:
@@ -1301,7 +1467,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
break;
}
- if (src & SLJIT_IMM)
+ if (src == SLJIT_IMM)
FAIL_IF(emit_op_imm(compiler, SLJIT_MOV | ARG2_IMM, dst_r, TMP_REG1, srcw));
else if (!(src & SLJIT_MEM))
dst_r = src;
@@ -1314,11 +1480,24 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
}
flags = HAS_FLAGS(op_flags) ? SET_FLAGS : 0;
- mem_flags = WORD_SIZE;
- if (op_flags & SLJIT_32) {
- flags |= INT_OP;
+ switch (op) {
+ case SLJIT_REV_U16:
+ case SLJIT_REV_S16:
+ mem_flags = HALF_SIZE;
+ break;
+ case SLJIT_REV_U32:
+ case SLJIT_REV_S32:
mem_flags = INT_SIZE;
+ break;
+ default:
+ mem_flags = WORD_SIZE;
+
+ if (op_flags & SLJIT_32) {
+ flags |= INT_OP;
+ mem_flags = INT_SIZE;
+ }
+ break;
}
if (src & SLJIT_MEM) {
@@ -1368,12 +1547,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
src2 = TMP_REG2;
}
- if (src1 & SLJIT_IMM)
+ if (src1 == SLJIT_IMM)
flags |= ARG1_IMM;
else
src1w = src1;
- if (src2 & SLJIT_IMM)
+ if (src2 == SLJIT_IMM)
flags |= ARG2_IMM;
else
src2w = src2;
@@ -1392,13 +1571,79 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compil
CHECK_ERROR();
CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w));
-#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
- || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
- compiler->skip_checks = 1;
-#endif
+ SLJIT_SKIP_CHECKS(compiler);
return sljit_emit_op2(compiler, op, TMP_REG1, 0, src1, src1w, src2, src2w);
}
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 dst_reg,
+ sljit_s32 src1_reg,
+ sljit_s32 src2_reg,
+ sljit_s32 src3, sljit_sw src3w)
+{
+ sljit_ins inv_bits, imm;
+ sljit_s32 is_left;
+ sljit_sw mask;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_shift_into(compiler, op, dst_reg, src1_reg, src2_reg, src3, src3w));
+
+ is_left = (GET_OPCODE(op) == SLJIT_SHL || GET_OPCODE(op) == SLJIT_MSHL);
+
+ if (src1_reg == src2_reg) {
+ SLJIT_SKIP_CHECKS(compiler);
+ return sljit_emit_op2(compiler, (is_left ? SLJIT_ROTL : SLJIT_ROTR) | (op & SLJIT_32), dst_reg, 0, src1_reg, 0, src3, src3w);
+ }
+
+ ADJUST_LOCAL_OFFSET(src3, src3w);
+
+ inv_bits = (op & SLJIT_32) ? W_OP : 0;
+
+ if (src3 == SLJIT_IMM) {
+ mask = inv_bits ? 0x1f : 0x3f;
+ src3w &= mask;
+
+ if (src3w == 0)
+ return SLJIT_SUCCESS;
+
+ if (is_left)
+ src3w = (src3w ^ mask) + 1;
+
+ return push_inst(compiler, (EXTR ^ (inv_bits | (inv_bits >> 9))) | RD(dst_reg)
+ | RN(is_left ? src1_reg : src2_reg) | RM(is_left ? src2_reg : src1_reg) | ((sljit_ins)src3w << 10));
+ }
+
+ if (src3 & SLJIT_MEM) {
+ FAIL_IF(emit_op_mem(compiler, inv_bits ? INT_SIZE : WORD_SIZE, TMP_REG2, src3, src3w, TMP_REG2));
+ src3 = TMP_REG2;
+ } else if (dst_reg == src3) {
+ FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG2) | RM(src3)));
+ src3 = TMP_REG2;
+ }
+
+ FAIL_IF(push_inst(compiler, ((is_left ? LSLV : LSRV) ^ inv_bits) | RD(dst_reg) | RN(src1_reg) | RM(src3)));
+
+ if (!(op & SLJIT_SHIFT_INTO_NON_ZERO)) {
+ /* Shift left/right by 1. */
+ if (is_left)
+ imm = (sljit_ins)(inv_bits ? ((1 << 16) | (31 << 10)) : ((1 << 16) | (63 << 10) | (1 << 22)));
+ else
+ imm = (sljit_ins)(inv_bits ? ((31 << 16) | (30 << 10)) : ((63 << 16) | (62 << 10) | (1 << 22)));
+
+ FAIL_IF(push_inst(compiler, (UBFM ^ (inv_bits | (inv_bits >> 9))) | RD(TMP_REG1) | RN(src2_reg) | imm));
+
+ /* Set imm to mask. */
+ imm = (sljit_ins)(inv_bits ? (4 << 10) : ((5 << 10) | (1 << 22)));
+ FAIL_IF(push_inst(compiler, (EORI ^ inv_bits) | RD(TMP_REG2) | RN(src3) | imm));
+
+ src2_reg = TMP_REG1;
+ } else
+ FAIL_IF(push_inst(compiler, (SUB ^ inv_bits) | RD(TMP_REG2) | RN(TMP_ZERO) | RM(src3)));
+
+ FAIL_IF(push_inst(compiler, ((is_left ? LSRV : LSLV) ^ inv_bits) | RD(TMP_REG1) | RN(src2_reg) | RM(TMP_REG2)));
+ return push_inst(compiler, (ORR ^ inv_bits) | RD(dst_reg) | RN(dst_reg) | RM(TMP_REG1));
+}
+
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 src, sljit_sw srcw)
{
@@ -1409,7 +1654,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *comp
switch (op) {
case SLJIT_FAST_RETURN:
if (FAST_IS_REG(src))
- FAIL_IF(push_inst(compiler, ORR | RD(TMP_LR) | RN(TMP_ZERO) | RM(src)));
+ FAIL_IF(push_inst(compiler, MOV | RD(TMP_LR) | RM(src)));
else
FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_LR, src, srcw, TMP_REG1));
@@ -1439,15 +1684,42 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *comp
return SLJIT_SUCCESS;
}
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg)
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_dst(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 dst, sljit_sw dstw)
{
- CHECK_REG_INDEX(check_sljit_get_register_index(reg));
- return reg_map[reg];
+ sljit_s32 dst_r = TMP_LR;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_op_dst(compiler, op, dst, dstw));
+ ADJUST_LOCAL_OFFSET(dst, dstw);
+
+ switch (op) {
+ case SLJIT_FAST_ENTER:
+ if (FAST_IS_REG(dst))
+ return push_inst(compiler, MOV | RD(dst) | RM(TMP_LR));
+ break;
+ case SLJIT_GET_RETURN_ADDRESS:
+ dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
+ FAIL_IF(emit_op_mem(compiler, WORD_SIZE, dst_r, SLJIT_MEM1(SLJIT_SP), 0x8, TMP_REG2));
+ break;
+ }
+
+ if (dst & SLJIT_MEM)
+ return emit_op_mem(compiler, WORD_SIZE | STORE, dst_r, dst, dstw, TMP_REG2);
+
+ return SLJIT_SUCCESS;
}
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg)
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 type, sljit_s32 reg)
{
- CHECK_REG_INDEX(check_sljit_get_float_register_index(reg));
+ CHECK_REG_INDEX(check_sljit_get_register_index(type, reg));
+
+ if (type == SLJIT_GP_REGISTER)
+ return reg_map[reg];
+
+ if (type != SLJIT_FLOAT_REGISTER && type != SLJIT_SIMD_REG_64 && type != SLJIT_SIMD_REG_128)
+ return -1;
+
return freg_map[reg];
}
@@ -1525,7 +1797,7 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp
inv_bits |= W_OP;
if (src & SLJIT_MEM) {
- emit_fop_mem(compiler, (op & SLJIT_32) ? INT_SIZE : WORD_SIZE, TMP_FREG1, src, srcw);
+ FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) ? INT_SIZE : WORD_SIZE, TMP_FREG1, src, srcw));
src = TMP_FREG1;
}
@@ -1536,35 +1808,59 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp
return SLJIT_SUCCESS;
}
-static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
+static sljit_s32 sljit_emit_fop1_conv_f64_from_w(struct sljit_compiler *compiler, sljit_ins ins,
sljit_s32 dst, sljit_sw dstw,
sljit_s32 src, sljit_sw srcw)
{
sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
- sljit_ins inv_bits = (op & SLJIT_32) ? (1 << 22) : 0;
-
- if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
- inv_bits |= W_OP;
if (src & SLJIT_MEM) {
- emit_op_mem(compiler, ((GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) ? INT_SIZE : WORD_SIZE), TMP_REG1, src, srcw, TMP_REG1);
+ emit_op_mem(compiler, (ins & W_OP) ? WORD_SIZE : INT_SIZE, TMP_REG1, src, srcw, TMP_REG1);
src = TMP_REG1;
- } else if (src & SLJIT_IMM) {
-#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
- if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
- srcw = (sljit_s32)srcw;
-#endif
+ } else if (src == SLJIT_IMM) {
FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
src = TMP_REG1;
}
- FAIL_IF(push_inst(compiler, (SCVTF ^ inv_bits) | VD(dst_r) | RN(src)));
+ FAIL_IF(push_inst(compiler, ins | VD(dst_r) | RN(src)));
if (dst & SLJIT_MEM)
- return emit_fop_mem(compiler, ((op & SLJIT_32) ? INT_SIZE : WORD_SIZE) | STORE, TMP_FREG1, dst, dstw);
+ return emit_fop_mem(compiler, ((ins & (1 << 22)) ? WORD_SIZE : INT_SIZE) | STORE, TMP_FREG1, dst, dstw);
return SLJIT_SUCCESS;
}
+static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 dst, sljit_sw dstw,
+ sljit_s32 src, sljit_sw srcw)
+{
+ sljit_ins inv_bits = (op & SLJIT_32) ? (1 << 22) : 0;
+
+ if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) {
+ inv_bits |= W_OP;
+
+ if (src == SLJIT_IMM)
+ srcw = (sljit_s32)srcw;
+ }
+
+ return sljit_emit_fop1_conv_f64_from_w(compiler, SCVTF ^ inv_bits, dst, dstw, src, srcw);
+}
+
+static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 dst, sljit_sw dstw,
+ sljit_s32 src, sljit_sw srcw)
+{
+ sljit_ins inv_bits = (op & SLJIT_32) ? (1 << 22) : 0;
+
+ if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_U32) {
+ inv_bits |= W_OP;
+
+ if (src == SLJIT_IMM)
+ srcw = (sljit_u32)srcw;
+ }
+
+ return sljit_emit_fop1_conv_f64_from_w(compiler, UCVTF ^ inv_bits, dst, dstw, src, srcw);
+}
+
static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
sljit_s32 src1, sljit_sw src1w,
sljit_s32 src2, sljit_sw src2w)
@@ -1573,16 +1869,22 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compile
sljit_ins inv_bits = (op & SLJIT_32) ? (1 << 22) : 0;
if (src1 & SLJIT_MEM) {
- emit_fop_mem(compiler, mem_flags, TMP_FREG1, src1, src1w);
+ FAIL_IF(emit_fop_mem(compiler, mem_flags, TMP_FREG1, src1, src1w));
src1 = TMP_FREG1;
}
if (src2 & SLJIT_MEM) {
- emit_fop_mem(compiler, mem_flags, TMP_FREG2, src2, src2w);
+ FAIL_IF(emit_fop_mem(compiler, mem_flags, TMP_FREG2, src2, src2w));
src2 = TMP_FREG2;
}
- return push_inst(compiler, (FCMP ^ inv_bits) | VN(src1) | VM(src2));
+ FAIL_IF(push_inst(compiler, (FCMP ^ inv_bits) | VN(src1) | VM(src2)));
+
+ if (GET_FLAG_TYPE(op) != SLJIT_UNORDERED_OR_EQUAL)
+ return SLJIT_SUCCESS;
+
+ FAIL_IF(push_inst(compiler, CSINC | (0x0 << 12) | RD(TMP_REG1) | RN(TMP_ZERO) | RM(TMP_ZERO)));
+ return push_inst(compiler, CCMPI | (0x0 << 16) | (0x7 << 12) | RN(TMP_REG1) | 0x4);
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
@@ -1601,7 +1903,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil
dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
if (src & SLJIT_MEM) {
- emit_fop_mem(compiler, (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) ? (mem_flags ^ 0x1) : mem_flags, dst_r, src, srcw);
+ FAIL_IF(emit_fop_mem(compiler, (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) ? (mem_flags ^ 0x1) : mem_flags, dst_r, src, srcw));
src = dst_r;
}
@@ -1646,11 +1948,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil
dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
if (src1 & SLJIT_MEM) {
- emit_fop_mem(compiler, mem_flags, TMP_FREG1, src1, src1w);
+ FAIL_IF(emit_fop_mem(compiler, mem_flags, TMP_FREG1, src1, src1w));
src1 = TMP_FREG1;
}
if (src2 & SLJIT_MEM) {
- emit_fop_mem(compiler, mem_flags, TMP_FREG2, src2, src2w);
+ FAIL_IF(emit_fop_mem(compiler, mem_flags, TMP_FREG2, src2, src2w));
src2 = TMP_FREG2;
}
@@ -1667,6 +1969,11 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil
case SLJIT_DIV_F64:
FAIL_IF(push_inst(compiler, (FDIV ^ inv_bits) | VD(dst_r) | VN(src1) | VM(src2)));
break;
+ case SLJIT_COPYSIGN_F64:
+ FAIL_IF(push_inst(compiler, (FMOV_R ^ ((op & SLJIT_32) ? (W_OP | (1 << 22)) : 0)) | VN(src2) | RD(TMP_REG1)));
+ FAIL_IF(push_inst(compiler, (FABS ^ inv_bits) | VD(dst_r) | VN(src1)));
+ FAIL_IF(push_inst(compiler, TBZ | ((op & SLJIT_32) ? 0 : ((sljit_ins)1 << 31)) | (0x1f << 19) | (2 << 5) | RT(TMP_REG1)));
+ return push_inst(compiler, (FNEG ^ inv_bits) | VD(dst_r) | VN(dst_r));
}
if (!(dst & SLJIT_MEM))
@@ -1674,21 +1981,79 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil
return emit_fop_mem(compiler, mem_flags | STORE, TMP_FREG1, dst, dstw);
}
-/* --------------------------------------------------------------------- */
-/* Other instructions */
-/* --------------------------------------------------------------------- */
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset32(struct sljit_compiler *compiler,
+ sljit_s32 freg, sljit_f32 value)
+{
+ sljit_u32 exp;
+ union {
+ sljit_u32 imm;
+ sljit_f32 value;
+ } u;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_fset32(compiler, freg, value));
+
+ u.value = value;
+
+ if (u.imm == 0)
+ return push_inst(compiler, (FMOV_R ^ (W_OP | (1 << 22))) | RN(TMP_ZERO) | VD(freg) | (1 << 16));
+
+ if ((u.imm << (32 - 19)) == 0) {
+ exp = (u.imm >> (23 + 2)) & 0x3f;
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
+ if (exp == 0x20 || exp == 0x1f)
+ return push_inst(compiler, (FMOV_I ^ (1 << 22)) | (sljit_ins)((((u.imm >> 24) & 0x80) | ((u.imm >> 19) & 0x7f)) << 13) | VD(freg));
+ }
+
+ FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_s32)u.imm));
+ return push_inst(compiler, (FMOV_R ^ (W_OP | (1 << 22))) | RN(TMP_REG1) | VD(freg) | (1 << 16));
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler,
+ sljit_s32 freg, sljit_f64 value)
{
+ sljit_uw exp;
+ union {
+ sljit_uw imm;
+ sljit_f64 value;
+ } u;
+
CHECK_ERROR();
- CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
- ADJUST_LOCAL_OFFSET(dst, dstw);
+ CHECK(check_sljit_emit_fset64(compiler, freg, value));
+
+ u.value = value;
+
+ if (u.imm == 0)
+ return push_inst(compiler, FMOV_R | RN(TMP_ZERO) | VD(freg) | (sljit_ins)1 << 16);
+
+ if ((u.imm << (64 - 48)) == 0) {
+ exp = (u.imm >> (52 + 2)) & 0x1ff;
+
+ if (exp == 0x100 || exp == 0xff)
+ return push_inst(compiler, FMOV_I | (sljit_ins)((((u.imm >> 56) & 0x80) | ((u.imm >> 48) & 0x7f)) << 13) | VD(freg));
+ }
+
+ FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_sw)u.imm));
+ return push_inst(compiler, FMOV_R | RN(TMP_REG1) | VD(freg) | (1 << 16));
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 freg, sljit_s32 reg)
+{
+ sljit_ins inst;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_fcopy(compiler, op, freg, reg));
+
+ if (GET_OPCODE(op) == SLJIT_COPY_TO_F64)
+ inst = FMOV_R | RN(reg) | VD(freg) | (1 << 16);
+ else
+ inst = FMOV_R | VN(freg) | RD(reg);
- if (FAST_IS_REG(dst))
- return push_inst(compiler, ORR | RD(dst) | RN(TMP_ZERO) | RM(TMP_LR));
+ if (op & SLJIT_32)
+ inst ^= W_OP | (1 << 22);
- /* Memory. */
- return emit_op_mem(compiler, WORD_SIZE | STORE, TMP_LR, dst, dstw, TMP_REG1);
+ return push_inst(compiler, inst);
}
/* --------------------------------------------------------------------- */
@@ -1699,11 +2064,17 @@ static sljit_ins get_cc(struct sljit_compiler *compiler, sljit_s32 type)
{
switch (type) {
case SLJIT_EQUAL:
- case SLJIT_EQUAL_F64:
+ case SLJIT_ATOMIC_STORED:
+ case SLJIT_F_EQUAL:
+ case SLJIT_ORDERED_EQUAL:
+ case SLJIT_UNORDERED_OR_EQUAL:
return 0x1;
case SLJIT_NOT_EQUAL:
- case SLJIT_NOT_EQUAL_F64:
+ case SLJIT_ATOMIC_NOT_STORED:
+ case SLJIT_F_NOT_EQUAL:
+ case SLJIT_UNORDERED_OR_NOT_EQUAL:
+ case SLJIT_ORDERED_NOT_EQUAL:
return 0x0;
case SLJIT_CARRY:
@@ -1712,7 +2083,6 @@ static sljit_ins get_cc(struct sljit_compiler *compiler, sljit_s32 type)
/* fallthrough */
case SLJIT_LESS:
- case SLJIT_LESS_F64:
return 0x2;
case SLJIT_NOT_CARRY:
@@ -1721,27 +2091,33 @@ static sljit_ins get_cc(struct sljit_compiler *compiler, sljit_s32 type)
/* fallthrough */
case SLJIT_GREATER_EQUAL:
- case SLJIT_GREATER_EQUAL_F64:
return 0x3;
case SLJIT_GREATER:
- case SLJIT_GREATER_F64:
+ case SLJIT_UNORDERED_OR_GREATER:
return 0x9;
case SLJIT_LESS_EQUAL:
- case SLJIT_LESS_EQUAL_F64:
+ case SLJIT_F_LESS_EQUAL:
+ case SLJIT_ORDERED_LESS_EQUAL:
return 0x8;
case SLJIT_SIG_LESS:
+ case SLJIT_UNORDERED_OR_LESS:
return 0xa;
case SLJIT_SIG_GREATER_EQUAL:
+ case SLJIT_F_GREATER_EQUAL:
+ case SLJIT_ORDERED_GREATER_EQUAL:
return 0xb;
case SLJIT_SIG_GREATER:
+ case SLJIT_F_GREATER:
+ case SLJIT_ORDERED_GREATER:
return 0xd;
case SLJIT_SIG_LESS_EQUAL:
+ case SLJIT_UNORDERED_OR_LESS_EQUAL:
return 0xc;
case SLJIT_OVERFLOW:
@@ -1749,7 +2125,7 @@ static sljit_ins get_cc(struct sljit_compiler *compiler, sljit_s32 type)
return 0x0;
/* fallthrough */
- case SLJIT_UNORDERED_F64:
+ case SLJIT_UNORDERED:
return 0x7;
case SLJIT_NOT_OVERFLOW:
@@ -1757,9 +2133,16 @@ static sljit_ins get_cc(struct sljit_compiler *compiler, sljit_s32 type)
return 0x1;
/* fallthrough */
- case SLJIT_ORDERED_F64:
+ case SLJIT_ORDERED:
return 0x6;
+ case SLJIT_F_LESS:
+ case SLJIT_ORDERED_LESS:
+ return 0x5;
+
+ case SLJIT_UNORDERED_OR_GREATER_EQUAL:
+ return 0x4;
+
default:
SLJIT_UNREACHABLE();
return 0xe;
@@ -1816,15 +2199,11 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compile
CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
if (type & SLJIT_CALL_RETURN) {
- PTR_FAIL_IF(emit_stack_frame_release(compiler));
+ PTR_FAIL_IF(emit_stack_frame_release(compiler, 0));
type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP);
}
-#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
- || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
- compiler->skip_checks = 1;
-#endif
-
+ SLJIT_SKIP_CHECKS(compiler);
return sljit_emit_jump(compiler, type);
}
@@ -1846,7 +2225,7 @@ static SLJIT_INLINE struct sljit_jump* emit_cmp_to0(struct sljit_compiler *compi
PTR_FAIL_IF(emit_op_mem(compiler, inv_bits ? INT_SIZE : WORD_SIZE, TMP_REG1, src, srcw, TMP_REG1));
src = TMP_REG1;
}
- else if (src & SLJIT_IMM) {
+ else if (src == SLJIT_IMM) {
PTR_FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
src = TMP_REG1;
}
@@ -1869,10 +2248,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi
CHECK_ERROR();
CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
- ADJUST_LOCAL_OFFSET(src, srcw);
- if (!(src & SLJIT_IMM)) {
+ if (src != SLJIT_IMM) {
if (src & SLJIT_MEM) {
+ ADJUST_LOCAL_OFFSET(src, srcw);
FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src, srcw, TMP_REG1));
src = TMP_REG1;
}
@@ -1897,28 +2276,24 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi
SLJIT_UNUSED_ARG(arg_types);
CHECK_ERROR();
CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
- ADJUST_LOCAL_OFFSET(src, srcw);
if (src & SLJIT_MEM) {
+ ADJUST_LOCAL_OFFSET(src, srcw);
FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src, srcw, TMP_REG1));
src = TMP_REG1;
}
if (type & SLJIT_CALL_RETURN) {
- if (src >= SLJIT_FIRST_SAVED_REG && src <= SLJIT_S0) {
- FAIL_IF(push_inst(compiler, ORR | RD(TMP_REG1) | RN(TMP_ZERO) | RM(src)));
+ if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
+ FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | RM(src)));
src = TMP_REG1;
}
- FAIL_IF(emit_stack_frame_release(compiler));
+ FAIL_IF(emit_stack_frame_release(compiler, 0));
type = SLJIT_JUMP;
}
-#if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
- || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
- compiler->skip_checks = 1;
-#endif
-
+ SLJIT_SKIP_CHECKS(compiler);
return sljit_emit_ijump(compiler, type, src, srcw);
}
@@ -1933,7 +2308,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co
CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
ADJUST_LOCAL_OFFSET(dst, dstw);
- cc = get_cc(compiler, type & 0xff);
+ cc = get_cc(compiler, type);
dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
if (GET_OPCODE(op) < SLJIT_ADD) {
@@ -1970,39 +2345,135 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co
return SLJIT_SUCCESS;
}
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type,
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type,
sljit_s32 dst_reg,
- sljit_s32 src, sljit_sw srcw)
+ sljit_s32 src1, sljit_sw src1w,
+ sljit_s32 src2_reg)
{
- sljit_ins inv_bits = (dst_reg & SLJIT_32) ? W_OP : 0;
+ sljit_ins inv_bits = (type & SLJIT_32) ? W_OP : 0;
sljit_ins cc;
CHECK_ERROR();
- CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw));
+ CHECK(check_sljit_emit_select(compiler, type, dst_reg, src1, src1w, src2_reg));
- if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
- if (dst_reg & SLJIT_32)
- srcw = (sljit_s32)srcw;
- FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
- src = TMP_REG1;
- srcw = 0;
+ ADJUST_LOCAL_OFFSET(src1, src1w);
+
+ if (src1 == SLJIT_IMM) {
+ if (type & SLJIT_32)
+ src1w = (sljit_s32)src1w;
+ FAIL_IF(load_immediate(compiler, TMP_REG1, src1w));
+ src1 = TMP_REG1;
+ } else if (src1 & SLJIT_MEM) {
+ FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src1, src1w, TMP_REG2));
+ src1 = TMP_REG1;
}
- cc = get_cc(compiler, type & 0xff);
- dst_reg &= ~SLJIT_32;
+ cc = get_cc(compiler, type & ~SLJIT_32);
+ return push_inst(compiler, (CSEL ^ inv_bits) | (cc << 12) | RD(dst_reg) | RN(src2_reg) | RM(src1));
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 dst_freg,
+ sljit_s32 src1, sljit_sw src1w,
+ sljit_s32 src2_freg)
+{
+ sljit_ins inv_bits = (type & SLJIT_32) ? (1 << 22) : 0;
+ sljit_ins cc;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_fselect(compiler, type, dst_freg, src1, src1w, src2_freg));
+
+ ADJUST_LOCAL_OFFSET(src1, src1w);
- return push_inst(compiler, (CSEL ^ inv_bits) | (cc << 12) | RD(dst_reg) | RN(dst_reg) | RM(src));
+ if (src1 & SLJIT_MEM) {
+ FAIL_IF(emit_fop_mem(compiler, (type & SLJIT_32) ? INT_SIZE : WORD_SIZE, TMP_FREG1, src1, src1w));
+ src1 = TMP_FREG1;
+ }
+
+ cc = get_cc(compiler, type & ~SLJIT_32);
+ return push_inst(compiler, (FCSEL ^ inv_bits) | (cc << 12) | VD(dst_freg) | VN(src2_freg) | VM(src1));
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type,
sljit_s32 reg,
sljit_s32 mem, sljit_sw memw)
{
- sljit_u32 sign = 0, inst;
+ sljit_u32 inst;
CHECK_ERROR();
CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw));
+ if (!(reg & REG_PAIR_MASK))
+ return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw);
+
+ ADJUST_LOCAL_OFFSET(mem, memw);
+
+ if (!(mem & REG_MASK)) {
+ FAIL_IF(load_immediate(compiler, TMP_REG1, memw & ~0x1f8));
+
+ mem = SLJIT_MEM1(TMP_REG1);
+ memw &= 0x1f8;
+ } else if (mem & OFFS_REG_MASK) {
+ FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG1) | RN(mem & REG_MASK) | RM(OFFS_REG(mem)) | ((sljit_ins)(memw & 0x3) << 10)));
+
+ mem = SLJIT_MEM1(TMP_REG1);
+ memw = 0;
+ } else if ((memw & 0x7) != 0 || memw > 0x1f8 || memw < -0x200) {
+ inst = ADDI;
+
+ if (memw < 0) {
+ /* Remains negative for integer min. */
+ memw = -memw;
+ inst = SUBI;
+ } else if ((memw & 0x7) == 0 && memw <= 0x7ff0) {
+ if (!(type & SLJIT_MEM_STORE) && (mem & REG_MASK) == REG_PAIR_FIRST(reg)) {
+ FAIL_IF(push_inst(compiler, LDRI | RD(REG_PAIR_SECOND(reg)) | RN(mem & REG_MASK) | ((sljit_ins)memw << 7)));
+ return push_inst(compiler, LDRI | RD(REG_PAIR_FIRST(reg)) | RN(mem & REG_MASK) | ((sljit_ins)(memw + 0x8) << 7));
+ }
+
+ inst = (type & SLJIT_MEM_STORE) ? STRI : LDRI;
+
+ FAIL_IF(push_inst(compiler, inst | RD(REG_PAIR_FIRST(reg)) | RN(mem & REG_MASK) | ((sljit_ins)memw << 7)));
+ return push_inst(compiler, inst | RD(REG_PAIR_SECOND(reg)) | RN(mem & REG_MASK) | ((sljit_ins)(memw + 0x8) << 7));
+ }
+
+ if ((sljit_uw)memw <= 0xfff) {
+ FAIL_IF(push_inst(compiler, inst | RD(TMP_REG1) | RN(mem & REG_MASK) | ((sljit_ins)memw << 10)));
+ memw = 0;
+ } else if ((sljit_uw)memw <= 0xffffff) {
+ FAIL_IF(push_inst(compiler, inst | (1 << 22) | RD(TMP_REG1) | RN(mem & REG_MASK) | (((sljit_ins)memw >> 12) << 10)));
+
+ if ((memw & 0xe07) != 0) {
+ FAIL_IF(push_inst(compiler, inst | RD(TMP_REG1) | RN(TMP_REG1) | (((sljit_ins)memw & 0xfff) << 10)));
+ memw = 0;
+ } else {
+ memw &= 0xfff;
+ }
+ } else {
+ FAIL_IF(load_immediate(compiler, TMP_REG1, memw));
+ FAIL_IF(push_inst(compiler, (inst == ADDI ? ADD : SUB) | RD(TMP_REG1) | RN(mem & REG_MASK) | RM(TMP_REG1)));
+ memw = 0;
+ }
+
+ mem = SLJIT_MEM1(TMP_REG1);
+
+ if (inst == SUBI)
+ memw = -memw;
+ }
+
+ SLJIT_ASSERT((memw & 0x7) == 0 && memw <= 0x1f8 && memw >= -0x200);
+ return push_inst(compiler, ((type & SLJIT_MEM_STORE) ? STP : LDP) | RT(REG_PAIR_FIRST(reg)) | RT2(REG_PAIR_SECOND(reg)) | RN(mem & REG_MASK) | (sljit_ins)((memw & 0x3f8) << 12));
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem_update(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 reg,
+ sljit_s32 mem, sljit_sw memw)
+{
+ sljit_u32 sign = 0, inst;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_mem_update(compiler, type, reg, mem, memw));
+
if ((mem & OFFS_REG_MASK) || (memw > 255 || memw < -256))
return SLJIT_ERR_UNSUPPORTED;
@@ -2042,20 +2513,20 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compile
if (!(type & SLJIT_MEM_STORE))
inst |= sign ? 0x00800000 : 0x00400000;
- if (type & SLJIT_MEM_PRE)
+ if (!(type & SLJIT_MEM_POST))
inst |= 0x800;
return push_inst(compiler, inst | RT(reg) | RN(mem & REG_MASK) | (sljit_ins)((memw & 0x1ff) << 12));
}
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compiler, sljit_s32 type,
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem_update(struct sljit_compiler *compiler, sljit_s32 type,
sljit_s32 freg,
sljit_s32 mem, sljit_sw memw)
{
sljit_u32 inst;
CHECK_ERROR();
- CHECK(check_sljit_emit_fmem(compiler, type, freg, mem, memw));
+ CHECK(check_sljit_emit_fmem_update(compiler, type, freg, mem, memw));
if ((mem & OFFS_REG_MASK) || (memw > 255 || memw < -256))
return SLJIT_ERR_UNSUPPORTED;
@@ -2071,12 +2542,667 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compil
if (!(type & SLJIT_MEM_STORE))
inst |= 0x00400000;
- if (type & SLJIT_MEM_PRE)
+ if (!(type & SLJIT_MEM_POST))
inst |= 0x800;
return push_inst(compiler, inst | VT(freg) | RN(mem & REG_MASK) | (sljit_ins)((memw & 0x1ff) << 12));
}
+static sljit_s32 sljit_emit_simd_mem_offset(struct sljit_compiler *compiler, sljit_s32 *mem_ptr, sljit_sw memw)
+{
+ sljit_ins ins;
+ sljit_s32 mem = *mem_ptr;
+
+ if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) {
+ *mem_ptr = TMP_REG1;
+ return push_inst(compiler, ADD | RD(TMP_REG1) | RN(mem & REG_MASK) | RM(OFFS_REG(mem)) | ((sljit_ins)(memw & 0x3) << 10));
+ }
+
+ if (!(mem & REG_MASK)) {
+ *mem_ptr = TMP_REG1;
+ return load_immediate(compiler, TMP_REG1, memw);
+ }
+
+ mem &= REG_MASK;
+
+ if (memw == 0) {
+ *mem_ptr = mem;
+ return SLJIT_SUCCESS;
+ }
+
+ *mem_ptr = TMP_REG1;
+
+ if (memw < -0xffffff || memw > 0xffffff) {
+ FAIL_IF(load_immediate(compiler, TMP_REG1, memw));
+ return push_inst(compiler, ADD | RD(TMP_REG1) | RN(TMP_REG1) | RM(mem));
+ }
+
+ ins = ADDI;
+
+ if (memw < 0) {
+ memw = -memw;
+ ins = SUBI;
+ }
+
+ if (memw > 0xfff) {
+ FAIL_IF(push_inst(compiler, ins | (1 << 22) | RD(TMP_REG1) | RN(mem) | ((sljit_ins)(memw >> 12) << 10)));
+
+ memw &= 0xfff;
+ if (memw == 0)
+ return SLJIT_SUCCESS;
+
+ mem = TMP_REG1;
+ }
+
+ return push_inst(compiler, ins | RD(TMP_REG1) | RN(mem) | ((sljit_ins)memw << 10));
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 freg,
+ sljit_s32 srcdst, sljit_sw srcdstw)
+{
+ sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
+ sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
+ sljit_ins ins;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_simd_mov(compiler, type, freg, srcdst, srcdstw));
+
+ ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
+
+ if (reg_size != 3 && reg_size != 4)
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if (type & SLJIT_SIMD_TEST)
+ return SLJIT_SUCCESS;
+
+ if (!(srcdst & SLJIT_MEM)) {
+ if (type & SLJIT_SIMD_STORE)
+ ins = VD(srcdst) | VN(freg) | VM(freg);
+ else
+ ins = VD(freg) | VN(srcdst) | VM(srcdst);
+
+ if (reg_size == 4)
+ ins |= (1 << 30);
+
+ return push_inst(compiler, ORR_v | ins);
+ }
+
+ FAIL_IF(sljit_emit_simd_mem_offset(compiler, &srcdst, srcdstw));
+
+ if (elem_size > 3)
+ elem_size = 3;
+
+ ins = (type & SLJIT_SIMD_STORE) ? ST1 : LD1;
+
+ if (reg_size == 4)
+ ins |= (1 << 30);
+
+ return push_inst(compiler, ins | ((sljit_ins)elem_size << 10) | RN(srcdst) | VT(freg));
+}
+
+static sljit_ins simd_get_imm(sljit_s32 elem_size, sljit_uw value)
+{
+ sljit_ins result;
+
+ if (elem_size > 2 && (sljit_u32)value == (value >> 32)) {
+ elem_size = 2;
+ value = (sljit_u32)value;
+ }
+
+ if (elem_size == 2 && (sljit_u16)value == (value >> 16)) {
+ elem_size = 1;
+ value = (sljit_u16)value;
+ }
+
+ if (elem_size == 1 && (sljit_u8)value == (value >> 8)) {
+ elem_size = 0;
+ value = (sljit_u8)value;
+ }
+
+ switch (elem_size) {
+ case 0:
+ SLJIT_ASSERT(value <= 0xff);
+ result = 0xe000;
+ break;
+ case 1:
+ SLJIT_ASSERT(value <= 0xffff);
+ result = 0;
+
+ while (1) {
+ if (value <= 0xff) {
+ result |= 0x8000;
+ break;
+ }
+
+ if ((value & 0xff) == 0) {
+ value >>= 8;
+ result |= 0xa000;
+ break;
+ }
+
+ if (result != 0)
+ return ~(sljit_ins)0;
+
+ value ^= (sljit_uw)0xffff;
+ result = (1 << 29);
+ }
+ break;
+ case 2:
+ SLJIT_ASSERT(value <= 0xffffffff);
+ result = 0;
+
+ while (1) {
+ if (value <= 0xff) {
+ result |= 0x0000;
+ break;
+ }
+
+ if ((value & ~(sljit_uw)0xff00) == 0) {
+ value >>= 8;
+ result |= 0x2000;
+ break;
+ }
+
+ if ((value & ~(sljit_uw)0xff0000) == 0) {
+ value >>= 16;
+ result |= 0x4000;
+ break;
+ }
+
+ if ((value & ~(sljit_uw)0xff000000) == 0) {
+ value >>= 24;
+ result |= 0x6000;
+ break;
+ }
+
+ if ((value & (sljit_uw)0xff) == 0xff && (value >> 16) == 0) {
+ value >>= 8;
+ result |= 0xc000;
+ break;
+ }
+
+ if ((value & (sljit_uw)0xffff) == 0xffff && (value >> 24) == 0) {
+ value >>= 16;
+ result |= 0xd000;
+ break;
+ }
+
+ if (result != 0)
+ return ~(sljit_ins)0;
+
+ value ^= (sljit_uw)0xffffffff;
+ result = (1 << 29);
+ }
+ break;
+ default:
+ return ~(sljit_ins)0;
+ }
+
+ return (((sljit_ins)value & 0x1f) << 5) | (((sljit_ins)value & 0xe0) << 11) | result;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 freg,
+ sljit_s32 src, sljit_sw srcw)
+{
+ sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
+ sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
+ sljit_ins ins, imm;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_simd_replicate(compiler, type, freg, src, srcw));
+
+ ADJUST_LOCAL_OFFSET(src, srcw);
+
+ if (reg_size != 3 && reg_size != 4)
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if (type & SLJIT_SIMD_TEST)
+ return SLJIT_SUCCESS;
+
+ if (src & SLJIT_MEM) {
+ FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src, srcw));
+
+ ins = (sljit_ins)elem_size << 10;
+
+ if (reg_size == 4)
+ ins |= (sljit_ins)1 << 30;
+
+ return push_inst(compiler, LD1R | ins | RN(src) | VT(freg));
+ }
+
+ ins = (sljit_ins)1 << (16 + elem_size);
+
+ if (reg_size == 4)
+ ins |= (sljit_ins)1 << 30;
+
+ if (type & SLJIT_SIMD_FLOAT) {
+ if (src == SLJIT_IMM)
+ return push_inst(compiler, MOVI | (ins & ((sljit_ins)1 << 30)) | VD(freg));
+
+ return push_inst(compiler, DUP_e | ins | VD(freg) | VN(src));
+ }
+
+ if (src == SLJIT_IMM) {
+ if (elem_size < 3)
+ srcw &= ((sljit_sw)1 << (((sljit_sw)1 << elem_size) << 3)) - 1;
+
+ imm = simd_get_imm(elem_size, (sljit_uw)srcw);
+
+ if (imm != ~(sljit_ins)0) {
+ imm |= ins & ((sljit_ins)1 << 30);
+
+ return push_inst(compiler, MOVI | imm | VD(freg));
+ }
+
+ FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
+ src = TMP_REG1;
+ }
+
+ return push_inst(compiler, DUP_g | ins | VD(freg) | RN(src));
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 freg, sljit_s32 lane_index,
+ sljit_s32 srcdst, sljit_sw srcdstw)
+{
+ sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
+ sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
+ sljit_ins ins;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_simd_lane_mov(compiler, type, freg, lane_index, srcdst, srcdstw));
+
+ ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
+
+ if (reg_size != 3 && reg_size != 4)
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if (type & SLJIT_SIMD_TEST)
+ return SLJIT_SUCCESS;
+
+ if (type & SLJIT_SIMD_LANE_ZERO) {
+ ins = (reg_size == 3) ? 0 : ((sljit_ins)1 << 30);
+
+ if ((type & SLJIT_SIMD_FLOAT) && freg == srcdst) {
+ FAIL_IF(push_inst(compiler, ORR_v | ins | VD(TMP_FREG1) | VN(freg) | VM(freg)));
+ srcdst = TMP_FREG1;
+ srcdstw = 0;
+ }
+
+ FAIL_IF(push_inst(compiler, MOVI | ins | VD(freg)));
+ }
+
+ if (srcdst & SLJIT_MEM) {
+ FAIL_IF(sljit_emit_simd_mem_offset(compiler, &srcdst, srcdstw));
+
+ if (elem_size == 3)
+ ins = 0x8400;
+ else if (elem_size == 0)
+ ins = 0;
+ else
+ ins = (sljit_ins)0x2000 << elem_size;
+
+ lane_index = lane_index << elem_size;
+ ins |= (sljit_ins)(((lane_index & 0x8) << 27) | ((lane_index & 0x7) << 10));
+
+ return push_inst(compiler, ((type & SLJIT_SIMD_STORE) ? ST1_s : LD1_s) | ins | RN(srcdst) | VT(freg));
+ }
+
+ if (type & SLJIT_SIMD_FLOAT) {
+ if (type & SLJIT_SIMD_STORE)
+ ins = INS_e | ((sljit_ins)1 << (16 + elem_size)) | ((sljit_ins)lane_index << (11 + elem_size)) | VD(srcdst) | VN(freg);
+ else
+ ins = INS_e | ((((sljit_ins)lane_index << 1) | 1) << (16 + elem_size)) | VD(freg) | VN(srcdst);
+
+ return push_inst(compiler, ins);
+ }
+
+ if (srcdst == SLJIT_IMM) {
+ if (elem_size < 3)
+ srcdstw &= ((sljit_sw)1 << (((sljit_sw)1 << elem_size) << 3)) - 1;
+
+ FAIL_IF(load_immediate(compiler, TMP_REG1, srcdstw));
+ srcdst = TMP_REG1;
+ }
+
+ if (type & SLJIT_SIMD_STORE) {
+ ins = RD(srcdst) | VN(freg);
+
+ if ((type & SLJIT_SIMD_LANE_SIGNED) && (elem_size < 2 || (elem_size == 2 && !(type & SLJIT_32)))) {
+ ins |= SMOV;
+
+ if (!(type & SLJIT_32))
+ ins |= (sljit_ins)1 << 30;
+ } else
+ ins |= UMOV;
+ } else
+ ins = INS | VD(freg) | RN(srcdst);
+
+ if (elem_size == 3)
+ ins |= (sljit_ins)1 << 30;
+
+ return push_inst(compiler, ins | ((((sljit_ins)lane_index << 1) | 1) << (16 + elem_size)));
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 freg,
+ sljit_s32 src, sljit_s32 src_lane_index)
+{
+ sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
+ sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
+ sljit_ins ins;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_simd_lane_replicate(compiler, type, freg, src, src_lane_index));
+
+ if (reg_size != 3 && reg_size != 4)
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if (type & SLJIT_SIMD_TEST)
+ return SLJIT_SUCCESS;
+
+ ins = (((sljit_ins)src_lane_index << 1) | 1) << (16 + elem_size);
+
+ if (reg_size == 4)
+ ins |= (sljit_ins)1 << 30;
+
+ return push_inst(compiler, DUP_e | ins | VD(freg) | VN(src));
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 freg,
+ sljit_s32 src, sljit_sw srcw)
+{
+ sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
+ sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
+ sljit_s32 elem2_size = SLJIT_SIMD_GET_ELEM2_SIZE(type);
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_simd_extend(compiler, type, freg, src, srcw));
+
+ ADJUST_LOCAL_OFFSET(src, srcw);
+
+ if (reg_size != 3 && reg_size != 4)
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if ((type & SLJIT_SIMD_FLOAT) && (elem_size != 2 || elem2_size != 3))
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if (type & SLJIT_SIMD_TEST)
+ return SLJIT_SUCCESS;
+
+ if (src & SLJIT_MEM) {
+ FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src, srcw));
+
+ if (reg_size == 4 && elem2_size - elem_size == 1)
+ FAIL_IF(push_inst(compiler, LD1 | ((sljit_ins)elem_size << 10) | RN(src) | VT(freg)));
+ else
+ FAIL_IF(push_inst(compiler, LD1_s | ((sljit_ins)0x2000 << (reg_size - elem2_size + elem_size)) | RN(src) | VT(freg)));
+ src = freg;
+ }
+
+ if (type & SLJIT_SIMD_FLOAT) {
+ SLJIT_ASSERT(reg_size == 4);
+ return push_inst(compiler, FCVTL | (1 << 22) | VD(freg) | VN(src));
+ }
+
+ do {
+ FAIL_IF(push_inst(compiler, ((type & SLJIT_SIMD_EXTEND_SIGNED) ? SSHLL : USHLL)
+ | ((sljit_ins)1 << (19 + elem_size)) | VD(freg) | VN(src)));
+ src = freg;
+ } while (++elem_size < elem2_size);
+
+ return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 freg,
+ sljit_s32 dst, sljit_sw dstw)
+{
+ sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
+ sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
+ sljit_ins ins, imms;
+ sljit_s32 dst_r;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_simd_sign(compiler, type, freg, dst, dstw));
+
+ ADJUST_LOCAL_OFFSET(dst, dstw);
+
+ if (reg_size != 3 && reg_size != 4)
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if (type & SLJIT_SIMD_TEST)
+ return SLJIT_SUCCESS;
+
+ switch (elem_size) {
+ case 0:
+ imms = 0x643219;
+ ins = USHR | (0x9 << 16);
+ break;
+ case 1:
+ imms = (reg_size == 4) ? 0x643219 : 0x6231;
+ ins = USHR | (0x11 << 16);
+ break;
+ case 2:
+ imms = (reg_size == 4) ? 0x6231 : 0x61;
+ ins = USHR | (0x21 << 16);
+ break;
+ default:
+ imms = 0x61;
+ ins = USHR | (0x41 << 16);
+ break;
+ }
+
+ if (reg_size == 4)
+ ins |= (1 << 30);
+
+ FAIL_IF(push_inst(compiler, ins | VD(TMP_FREG1) | VN(freg)));
+
+ if (reg_size == 4 && elem_size > 0)
+ FAIL_IF(push_inst(compiler, XTN | ((sljit_ins)(elem_size - 1) << 22) | VD(TMP_FREG1) | VN(TMP_FREG1)));
+
+ if (imms >= 0x100) {
+ ins = (reg_size == 4 && elem_size == 0) ? (1 << 30) : 0;
+
+ do {
+ FAIL_IF(push_inst(compiler, USRA | ins | ((imms & 0xff) << 16) | VD(TMP_FREG1) | VN(TMP_FREG1)));
+ imms >>= 8;
+ } while (imms >= 0x100);
+ }
+
+ FAIL_IF(push_inst(compiler, USRA | (1 << 30) | (imms << 16) | VD(TMP_FREG1) | VN(TMP_FREG1)));
+
+ dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
+ ins = (0x1 << 16);
+
+ if (reg_size == 4 && elem_size == 0) {
+ FAIL_IF(push_inst(compiler, INS_e | (0x3 << 16) | (0x8 << 11) | VD(TMP_FREG1) | VN(TMP_FREG1)));
+ ins = (0x2 << 16);
+ }
+
+ FAIL_IF(push_inst(compiler, UMOV | ins | RD(dst_r) | VN(TMP_FREG1)));
+
+ if (dst_r == TMP_REG1)
+ return emit_op_mem(compiler, STORE | ((type & SLJIT_32) ? INT_SIZE : WORD_SIZE), TMP_REG1, dst, dstw, TMP_REG2);
+
+ return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 dst_freg, sljit_s32 src1_freg, sljit_s32 src2_freg)
+{
+ sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
+ sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
+ sljit_ins ins = 0;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_simd_op2(compiler, type, dst_freg, src1_freg, src2_freg));
+
+ if (reg_size != 3 && reg_size != 4)
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
+ return SLJIT_ERR_UNSUPPORTED;
+
+ switch (SLJIT_SIMD_GET_OPCODE(type)) {
+ case SLJIT_SIMD_OP2_AND:
+ ins = AND_v;
+ break;
+ case SLJIT_SIMD_OP2_OR:
+ ins = ORR_v;
+ break;
+ case SLJIT_SIMD_OP2_XOR:
+ ins = EOR_v;
+ break;
+ }
+
+ if (type & SLJIT_SIMD_TEST)
+ return SLJIT_SUCCESS;
+
+ if (reg_size == 4)
+ ins |= (sljit_ins)1 << 30;
+
+ return push_inst(compiler, ins | VD(dst_freg) | VN(src1_freg) | VM(src2_freg));
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 dst_reg,
+ sljit_s32 mem_reg)
+{
+ sljit_ins ins;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg));
+
+#ifdef __ARM_FEATURE_ATOMICS
+ switch (GET_OPCODE(op)) {
+ case SLJIT_MOV32:
+ case SLJIT_MOV_U32:
+ ins = LDR ^ (1 << 30);
+ break;
+ case SLJIT_MOV_U16:
+ ins = LDRH;
+ break;
+ case SLJIT_MOV_U8:
+ ins = LDRB;
+ break;
+ default:
+ ins = LDR;
+ break;
+ }
+#else /* !__ARM_FEATURE_ATOMICS */
+ switch (GET_OPCODE(op)) {
+ case SLJIT_MOV32:
+ case SLJIT_MOV_U32:
+ ins = LDXR ^ (1 << 30);
+ break;
+ case SLJIT_MOV_U8:
+ ins = LDXRB;
+ break;
+ case SLJIT_MOV_U16:
+ ins = LDXRH;
+ break;
+ default:
+ ins = LDXR;
+ break;
+ }
+#endif /* ARM_FEATURE_ATOMICS */
+ return push_inst(compiler, ins | RN(mem_reg) | RT(dst_reg));
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 src_reg,
+ sljit_s32 mem_reg,
+ sljit_s32 temp_reg)
+{
+ sljit_ins ins;
+ sljit_s32 tmp = temp_reg;
+ sljit_ins cmp = 0;
+ sljit_ins inv_bits = W_OP;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_atomic_store(compiler, op, src_reg, mem_reg, temp_reg));
+
+#ifdef __ARM_FEATURE_ATOMICS
+ if (op & SLJIT_SET_ATOMIC_STORED)
+ cmp = (SUBS ^ W_OP) | RD(TMP_ZERO);
+
+ switch (GET_OPCODE(op)) {
+ case SLJIT_MOV32:
+ case SLJIT_MOV_U32:
+ ins = CAS ^ (1 << 30);
+ break;
+ case SLJIT_MOV_U16:
+ ins = CASH;
+ break;
+ case SLJIT_MOV_U8:
+ ins = CASB;
+ break;
+ default:
+ ins = CAS;
+ inv_bits = 0;
+ if (cmp)
+ cmp ^= W_OP;
+ break;
+ }
+
+ if (cmp) {
+ FAIL_IF(push_inst(compiler, (MOV ^ inv_bits) | RM(temp_reg) | RD(TMP_REG1)));
+ tmp = TMP_REG1;
+ }
+ FAIL_IF(push_inst(compiler, ins | RM(tmp) | RN(mem_reg) | RD(src_reg)));
+ if (!cmp)
+ return SLJIT_SUCCESS;
+
+ FAIL_IF(push_inst(compiler, cmp | RM(tmp) | RN(temp_reg)));
+ FAIL_IF(push_inst(compiler, (CSET ^ inv_bits) | RD(tmp)));
+ return push_inst(compiler, cmp | RM(tmp) | RN(TMP_ZERO));
+#else /* !__ARM_FEATURE_ATOMICS */
+ SLJIT_UNUSED_ARG(tmp);
+ SLJIT_UNUSED_ARG(inv_bits);
+
+ if (op & SLJIT_SET_ATOMIC_STORED)
+ cmp = (SUBI ^ W_OP) | (1 << 29);
+
+ switch (GET_OPCODE(op)) {
+ case SLJIT_MOV32:
+ case SLJIT_MOV_U32:
+ ins = STXR ^ (1 << 30);
+ break;
+ case SLJIT_MOV_U8:
+ ins = STXRB;
+ break;
+ case SLJIT_MOV_U16:
+ ins = STXRH;
+ break;
+ default:
+ ins = STXR;
+ break;
+ }
+
+ FAIL_IF(push_inst(compiler, ins | RM(TMP_REG1) | RN(mem_reg) | RT(src_reg)));
+ return cmp ? push_inst(compiler, cmp | RD(TMP_ZERO) | RN(TMP_REG1)) : SLJIT_SUCCESS;
+#endif /* __ARM_FEATURE_ATOMICS */
+}
+
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset)
{
sljit_s32 dst_reg;