summaryrefslogtreecommitdiffstats
path: root/src/3rdparty/pcre2/src/sljit/sljitNativeS390X.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/3rdparty/pcre2/src/sljit/sljitNativeS390X.c')
-rw-r--r--src/3rdparty/pcre2/src/sljit/sljitNativeS390X.c1671
1 files changed, 1225 insertions, 446 deletions
diff --git a/src/3rdparty/pcre2/src/sljit/sljitNativeS390X.c b/src/3rdparty/pcre2/src/sljit/sljitNativeS390X.c
index 8b51bad9bc..99e846350f 100644
--- a/src/3rdparty/pcre2/src/sljit/sljitNativeS390X.c
+++ b/src/3rdparty/pcre2/src/sljit/sljitNativeS390X.c
@@ -38,17 +38,14 @@ SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
return "s390x" SLJIT_CPUINFO;
}
-/* Instructions. */
+/* Instructions are stored as 64 bit values regardless their size. */
typedef sljit_uw sljit_ins;
-/* Instruction tags (most significant halfword). */
-static const sljit_ins sljit_ins_const = (sljit_ins)1 << 48;
-
#define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
#define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)
-static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = {
- 0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 0, 1
+static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
+ 0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 0, 1, 14
};
/* there are also a[2-15] available, but they are slower to access and
@@ -83,7 +80,7 @@ static const sljit_gpr r10 = 10; /* reg_map[9] */
static const sljit_gpr r11 = 11; /* reg_map[10] */
static const sljit_gpr r12 = 12; /* reg_map[11]: GOT */
static const sljit_gpr r13 = 13; /* reg_map[12]: Literal Pool pointer */
-static const sljit_gpr r14 = 14; /* reg_map[0]: return address and flag register */
+static const sljit_gpr r14 = 14; /* reg_map[0]: return address */
static const sljit_gpr r15 = 15; /* reg_map[SLJIT_NUMBER_OF_REGISTERS + 1]: stack pointer */
/* WARNING: r12 and r13 shouldn't be used as per ABI recommendation */
@@ -96,20 +93,16 @@ static const sljit_gpr r15 = 15; /* reg_map[SLJIT_NUMBER_OF_REGISTERS + 1]: stac
#define tmp0 r0
#define tmp1 r1
-/* TODO(carenas): flags should move to a different register so that
- * link register doesn't need to change
- */
-
/* When reg cannot be unused. */
#define IS_GPR_REG(reg) ((reg > 0) && (reg) <= SLJIT_SP)
/* Link register. */
static const sljit_gpr link_r = 14; /* r14 */
-#define TMP_FREG1 (0)
+#define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
-static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = {
- 1, 0, 2, 4, 6, 3, 5, 7, 15, 14, 13, 12, 11, 10, 9, 8,
+static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2] = {
+ 0, 0, 2, 4, 6, 3, 5, 7, 15, 14, 13, 12, 11, 10, 9, 8, 1
};
#define R0A(r) (r)
@@ -126,7 +119,10 @@ static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = {
#define F0(r) ((sljit_ins)freg_map[r])
#define F4(r) (R4A((sljit_ins)freg_map[r]))
+#define F12(r) (R12A((sljit_ins)freg_map[r]))
#define F20(r) (R20A((sljit_ins)freg_map[r]))
+#define F28(r) (R28A((sljit_ins)freg_map[r]))
+#define F32(r) (R32A((sljit_ins)freg_map[r]))
#define F36(r) (R36A((sljit_ins)freg_map[r]))
struct sljit_s390x_const {
@@ -141,56 +137,21 @@ static SLJIT_INLINE sljit_gpr gpr(sljit_s32 r)
return reg_map[r];
}
-static SLJIT_INLINE sljit_gpr fgpr(sljit_s32 r)
-{
- SLJIT_ASSERT(r >= 0 && r < (sljit_s32)(sizeof(freg_map) / sizeof(freg_map[0])));
- return freg_map[r];
-}
-
-/* Size of instruction in bytes. Tags must already be cleared. */
-static SLJIT_INLINE sljit_uw sizeof_ins(sljit_ins ins)
-{
- /* keep faulting instructions */
- if (ins == 0)
- return 2;
-
- if ((ins & 0x00000000ffffL) == ins)
- return 2;
- if ((ins & 0x0000ffffffffL) == ins)
- return 4;
- if ((ins & 0xffffffffffffL) == ins)
- return 6;
-
- SLJIT_UNREACHABLE();
- return (sljit_uw)-1;
-}
-
static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins)
{
sljit_ins *ibuf = (sljit_ins *)ensure_buf(compiler, sizeof(sljit_ins));
FAIL_IF(!ibuf);
*ibuf = ins;
+
+ SLJIT_ASSERT(ins <= 0xffffffffffffL);
+
compiler->size++;
- return SLJIT_SUCCESS;
-}
+ if (ins & 0xffff00000000L)
+ compiler->size++;
-static sljit_s32 encode_inst(void **ptr, sljit_ins ins)
-{
- sljit_u16 *ibuf = (sljit_u16 *)*ptr;
- sljit_uw size = sizeof_ins(ins);
+ if (ins & 0xffffffff0000L)
+ compiler->size++;
- SLJIT_ASSERT((size & 6) == size);
- switch (size) {
- case 6:
- *ibuf++ = (sljit_u16)(ins >> 32);
- /* fallthrough */
- case 4:
- *ibuf++ = (sljit_u16)(ins >> 16);
- /* fallthrough */
- case 2:
- *ibuf++ = (sljit_u16)(ins);
- }
- *ptr = (void*)ibuf;
return SLJIT_SUCCESS;
}
@@ -217,6 +178,7 @@ static SLJIT_INLINE sljit_u8 get_cc(struct sljit_compiler *compiler, sljit_s32 t
}
/* fallthrough */
+ case SLJIT_ATOMIC_STORED:
case SLJIT_F_EQUAL:
case SLJIT_ORDERED_EQUAL:
return cc0;
@@ -236,6 +198,7 @@ static SLJIT_INLINE sljit_u8 get_cc(struct sljit_compiler *compiler, sljit_s32 t
return (cc1 | cc2 | cc3);
case SLJIT_LESS:
+ case SLJIT_ATOMIC_NOT_STORED:
return cc1;
case SLJIT_GREATER_EQUAL:
@@ -454,10 +417,12 @@ HAVE_FACILITY(have_misc2, MISCELLANEOUS_INSTRUCTION_EXTENSIONS_2_FACILITY)
static SLJIT_INLINE sljit_ins disp_s20(sljit_s32 d)
{
+ sljit_uw dh, dl;
+
SLJIT_ASSERT(is_s20(d));
- sljit_uw dh = (d >> 12) & 0xff;
- sljit_uw dl = (d << 8) & 0xfff00;
+ dh = (d >> 12) & 0xff;
+ dl = ((sljit_uw)d << 8) & 0xfff00;
return (dh | dl) << 8;
}
@@ -899,23 +864,17 @@ static sljit_s32 push_load_imm_inst(struct sljit_compiler *compiler, sljit_gpr t
if (((sljit_uw)v & ~(sljit_uw)0xffff000000000000) == 0)
return push_inst(compiler, llihh(target, (sljit_u16)(v >> 48)));
- /* 6 byte instructions (requires extended immediate facility) */
- if (have_eimm()) {
- if (is_s32(v))
- return push_inst(compiler, lgfi(target, (sljit_s32)v));
+ if (is_s32(v))
+ return push_inst(compiler, lgfi(target, (sljit_s32)v));
- if (((sljit_uw)v >> 32) == 0)
- return push_inst(compiler, llilf(target, (sljit_u32)v));
+ if (((sljit_uw)v >> 32) == 0)
+ return push_inst(compiler, llilf(target, (sljit_u32)v));
- if (((sljit_uw)v << 32) == 0)
- return push_inst(compiler, llihf(target, (sljit_u32)((sljit_uw)v >> 32)));
+ if (((sljit_uw)v << 32) == 0)
+ return push_inst(compiler, llihf(target, (sljit_u32)((sljit_uw)v >> 32)));
- FAIL_IF(push_inst(compiler, llilf(target, (sljit_u32)v)));
- return push_inst(compiler, iihf(target, (sljit_u32)(v >> 32)));
- }
-
- /* TODO(mundaym): instruction sequences that don't use extended immediates */
- abort();
+ FAIL_IF(push_inst(compiler, llilf(target, (sljit_u32)v)));
+ return push_inst(compiler, iihf(target, (sljit_u32)(v >> 32)));
}
struct addr {
@@ -995,24 +954,47 @@ static sljit_s32 make_addr_bx(struct sljit_compiler *compiler,
(cond) ? EVAL(i1, r, addr) : EVAL(i2, r, addr)
/* May clobber tmp1. */
-static sljit_s32 load_word(struct sljit_compiler *compiler, sljit_gpr dst_r,
- sljit_s32 src, sljit_sw srcw,
- sljit_s32 is_32bit)
+static sljit_s32 load_store_op(struct sljit_compiler *compiler, sljit_gpr reg,
+ sljit_s32 mem, sljit_sw memw,
+ sljit_s32 is_32bit, const sljit_ins* forms)
{
struct addr addr;
- sljit_ins ins;
- SLJIT_ASSERT(src & SLJIT_MEM);
+ SLJIT_ASSERT(mem & SLJIT_MEM);
- if (is_32bit && ((src & OFFS_REG_MASK) || is_u12(srcw) || !is_s20(srcw))) {
- FAIL_IF(make_addr_bx(compiler, &addr, src, srcw, tmp1));
- return push_inst(compiler, 0x58000000 /* l */ | R20A(dst_r) | R16A(addr.index) | R12A(addr.base) | (sljit_ins)addr.offset);
+ if (is_32bit && ((mem & OFFS_REG_MASK) || is_u12(memw) || !is_s20(memw))) {
+ FAIL_IF(make_addr_bx(compiler, &addr, mem, memw, tmp1));
+ return push_inst(compiler, forms[0] | R20A(reg) | R16A(addr.index) | R12A(addr.base) | (sljit_ins)addr.offset);
}
- FAIL_IF(make_addr_bxy(compiler, &addr, src, srcw, tmp1));
+ FAIL_IF(make_addr_bxy(compiler, &addr, mem, memw, tmp1));
+ return push_inst(compiler, (is_32bit ? forms[1] : forms[2]) | R36A(reg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset));
+}
- ins = is_32bit ? 0xe30000000058 /* ly */ : 0xe30000000004 /* lg */;
- return push_inst(compiler, ins | R36A(dst_r) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset));
+static const sljit_ins load_forms[3] = {
+ 0x58000000 /* l */,
+ 0xe30000000058 /* ly */,
+ 0xe30000000004 /* lg */
+};
+
+static const sljit_ins store_forms[3] = {
+ 0x50000000 /* st */,
+ 0xe30000000050 /* sty */,
+ 0xe30000000024 /* stg */
+};
+
+static const sljit_ins load_halfword_forms[3] = {
+ 0x48000000 /* lh */,
+ 0xe30000000078 /* lhy */,
+ 0xe30000000015 /* lgh */
+};
+
+/* May clobber tmp1. */
+static SLJIT_INLINE sljit_s32 load_word(struct sljit_compiler *compiler, sljit_gpr dst_r,
+ sljit_s32 src, sljit_sw srcw,
+ sljit_s32 is_32bit)
+{
+ return load_store_op(compiler, dst_r, src, srcw, is_32bit, load_forms);
}
/* May clobber tmp1. */
@@ -1032,24 +1014,11 @@ static sljit_s32 load_unsigned_word(struct sljit_compiler *compiler, sljit_gpr d
}
/* May clobber tmp1. */
-static sljit_s32 store_word(struct sljit_compiler *compiler, sljit_gpr src_r,
+static SLJIT_INLINE sljit_s32 store_word(struct sljit_compiler *compiler, sljit_gpr src_r,
sljit_s32 dst, sljit_sw dstw,
sljit_s32 is_32bit)
{
- struct addr addr;
- sljit_ins ins;
-
- SLJIT_ASSERT(dst & SLJIT_MEM);
-
- if (is_32bit && ((dst & OFFS_REG_MASK) || is_u12(dstw) || !is_s20(dstw))) {
- FAIL_IF(make_addr_bx(compiler, &addr, dst, dstw, tmp1));
- return push_inst(compiler, 0x50000000 /* st */ | R20A(src_r) | R16A(addr.index) | R12A(addr.base) | (sljit_ins)addr.offset);
- }
-
- FAIL_IF(make_addr_bxy(compiler, &addr, dst, dstw, tmp1));
-
- ins = is_32bit ? 0xe30000000050 /* sty */ : 0xe30000000024 /* stg */;
- return push_inst(compiler, ins | R36A(src_r) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset));
+ return load_store_op(compiler, src_r, dst, dstw, is_32bit, store_forms);
}
#undef WHEN
@@ -1058,15 +1027,17 @@ static sljit_s32 emit_move(struct sljit_compiler *compiler,
sljit_gpr dst_r,
sljit_s32 src, sljit_sw srcw)
{
+ sljit_gpr src_r;
+
SLJIT_ASSERT(!IS_GPR_REG(src) || dst_r != gpr(src & REG_MASK));
- if (src & SLJIT_IMM)
+ if (src == SLJIT_IMM)
return push_load_imm_inst(compiler, dst_r, srcw);
if (src & SLJIT_MEM)
return load_word(compiler, dst_r, src, srcw, (compiler->mode & SLJIT_32) != 0);
- sljit_gpr src_r = gpr(src & REG_MASK);
+ src_r = gpr(src & REG_MASK);
return push_inst(compiler, (compiler->mode & SLJIT_32) ? lr(dst_r, src_r) : lgr(dst_r, src_r));
}
@@ -1259,10 +1230,10 @@ static sljit_s32 emit_siy(struct sljit_compiler *compiler, sljit_ins ins,
sljit_s32 dst, sljit_sw dstw,
sljit_sw srcw)
{
- SLJIT_ASSERT(dst & SLJIT_MEM);
-
sljit_gpr dst_r = tmp1;
+ SLJIT_ASSERT(dst & SLJIT_MEM);
+
if (dst & OFFS_REG_MASK) {
sljit_gpr index = tmp1;
@@ -1421,97 +1392,60 @@ static sljit_s32 emit_non_commutative(struct sljit_compiler *compiler, const str
return emit_rrf(compiler, ins, dst, src1, src1w, src2, src2w);
}
-SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
+SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler, sljit_s32 options, void *exec_allocator_data)
{
struct sljit_label *label;
struct sljit_jump *jump;
- struct sljit_s390x_const *const_;
- struct sljit_put_label *put_label;
+ struct sljit_const *const_;
sljit_sw executable_offset;
- sljit_uw ins_size = 0; /* instructions */
+ sljit_uw ins_size = compiler->size << 1;
sljit_uw pool_size = 0; /* literal pool */
sljit_uw pad_size;
- sljit_uw i, j = 0;
+ sljit_uw half_count;
+ SLJIT_NEXT_DEFINE_TYPES;
struct sljit_memory_fragment *buf;
- void *code, *code_ptr;
+ sljit_ins *buf_ptr;
+ sljit_ins *buf_end;
+ sljit_u16 *code;
+ sljit_u16 *code_ptr;
sljit_uw *pool, *pool_ptr;
- sljit_sw source, offset; /* TODO(carenas): only need 32 bit */
+ sljit_ins ins;
+ sljit_sw source, offset;
CHECK_ERROR_PTR();
CHECK_PTR(check_sljit_generate_code(compiler));
reverse_buf(compiler);
- /* branch handling */
- label = compiler->labels;
jump = compiler->jumps;
- put_label = compiler->put_labels;
-
- /* TODO(carenas): compiler->executable_size could be calculated
- * before to avoid the following loop (except for
- * pool_size)
- */
- /* calculate the size of the code */
- for (buf = compiler->buf; buf != NULL; buf = buf->next) {
- sljit_uw len = buf->used_size / sizeof(sljit_ins);
- sljit_ins *ibuf = (sljit_ins *)buf->memory;
- for (i = 0; i < len; ++i, ++j) {
- sljit_ins ins = ibuf[i];
-
- /* TODO(carenas): instruction tag vs size/addr == j
- * using instruction tags for const is creative
- * but unlike all other architectures, and is not
- * done consistently for all other objects.
- * This might need reviewing later.
- */
- if (ins & sljit_ins_const) {
- pool_size += sizeof(*pool);
- ins &= ~sljit_ins_const;
- }
- if (label && label->size == j) {
- label->size = ins_size;
- label = label->next;
- }
- if (jump && jump->addr == j) {
- if ((jump->flags & SLJIT_REWRITABLE_JUMP) || (jump->flags & JUMP_ADDR)) {
- /* encoded: */
- /* brasl %r14, <rel_addr> (or brcl <mask>, <rel_addr>) */
- /* replace with: */
- /* lgrl %r1, <pool_addr> */
- /* bras %r14, %r1 (or bcr <mask>, %r1) */
- pool_size += sizeof(*pool);
- ins_size += 2;
- }
- jump = jump->next;
- }
- if (put_label && put_label->addr == j) {
- pool_size += sizeof(*pool);
- put_label = put_label->next;
- }
- ins_size += sizeof_ins(ins);
+ while (jump != NULL) {
+ if (jump->flags & (SLJIT_REWRITABLE_JUMP | JUMP_ADDR | JUMP_MOV_ADDR)) {
+ /* encoded: */
+ /* brasl %r14, <rel_addr> (or brcl <mask>, <rel_addr>) */
+ /* replace with: */
+ /* lgrl %r1, <pool_addr> */
+ /* bras %r14, %r1 (or bcr <mask>, %r1) */
+ pool_size += sizeof(*pool);
+ if (!(jump->flags & JUMP_MOV_ADDR))
+ ins_size += 2;
}
+ jump = jump->next;
}
- /* emit trailing label */
- if (label && label->size == j) {
- label->size = ins_size;
- label = label->next;
+ const_ = compiler->consts;
+ while (const_) {
+ pool_size += sizeof(*pool);
+ const_ = const_->next;
}
- SLJIT_ASSERT(!label);
- SLJIT_ASSERT(!jump);
- SLJIT_ASSERT(!put_label);
-
/* pad code size to 8 bytes so is accessible with half word offsets */
/* the literal pool needs to be doubleword aligned */
pad_size = ((ins_size + 7UL) & ~7UL) - ins_size;
SLJIT_ASSERT(pad_size < 8UL);
/* allocate target buffer */
- code = SLJIT_MALLOC_EXEC(ins_size + pad_size + pool_size,
- compiler->exec_allocator_data);
+ code = (sljit_u16*)allocate_executable_memory(ins_size + pad_size + pool_size, options, exec_allocator_data, &executable_offset);
PTR_FAIL_WITH_EXEC_IF(code);
code_ptr = code;
- executable_offset = SLJIT_EXEC_OFFSET(code);
/* TODO(carenas): pool is optional, and the ABI recommends it to
* be created before the function code, instead of
@@ -1520,126 +1454,166 @@ SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compil
*/
pool = (sljit_uw *)((sljit_uw)code + ins_size + pad_size);
pool_ptr = pool;
- const_ = (struct sljit_s390x_const *)compiler->consts;
+ buf = compiler->buf;
+ half_count = 0;
- /* update label addresses */
label = compiler->labels;
- while (label) {
- label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(
- (sljit_uw)code_ptr + label->size, executable_offset);
- label = label->next;
- }
-
- /* reset jumps */
jump = compiler->jumps;
- put_label = compiler->put_labels;
+ const_ = compiler->consts;
+ SLJIT_NEXT_INIT_TYPES();
+ SLJIT_GET_NEXT_MIN();
- /* emit the code */
- j = 0;
- for (buf = compiler->buf; buf != NULL; buf = buf->next) {
- sljit_uw len = buf->used_size / sizeof(sljit_ins);
- sljit_ins *ibuf = (sljit_ins *)buf->memory;
- for (i = 0; i < len; ++i, ++j) {
- sljit_ins ins = ibuf[i];
- if (ins & sljit_ins_const) {
- /* clear the const tag */
- ins &= ~sljit_ins_const;
+ do {
+ buf_ptr = (sljit_ins*)buf->memory;
+ buf_end = buf_ptr + (buf->used_size >> 3);
+ do {
+ ins = *buf_ptr++;
+
+ if (next_min_addr == half_count) {
+ SLJIT_ASSERT(!label || label->size >= half_count);
+ SLJIT_ASSERT(!jump || jump->addr >= half_count);
+ SLJIT_ASSERT(!const_ || const_->addr >= half_count);
+
+ if (next_min_addr == next_label_size) {
+ label->u.addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
+ label = label->next;
+ next_label_size = SLJIT_GET_NEXT_SIZE(label);
+ }
- /* update instruction with relative address of constant */
- source = (sljit_sw)code_ptr;
- offset = (sljit_sw)pool_ptr - source;
+ if (next_min_addr == next_jump_addr) {
+ if (SLJIT_UNLIKELY(jump->flags & JUMP_MOV_ADDR)) {
+ source = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
+
+ jump->addr = (sljit_uw)pool_ptr;
+
+ /* store target into pool */
+ offset = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(pool_ptr, executable_offset) - source;
+ pool_ptr++;
+
+ SLJIT_ASSERT(!(offset & 1));
+ offset >>= 1;
+ SLJIT_ASSERT(is_s32(offset));
+ ins |= (sljit_ins)offset & 0xffffffff;
+ } else if (jump->flags & (SLJIT_REWRITABLE_JUMP | JUMP_ADDR)) {
+ sljit_ins arg;
+
+ jump->addr = (sljit_uw)pool_ptr;
+
+ /* load address into tmp1 */
+ source = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
+ offset = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(pool_ptr, executable_offset) - source;
+
+ SLJIT_ASSERT(!(offset & 1));
+ offset >>= 1;
+ SLJIT_ASSERT(is_s32(offset));
+
+ code_ptr[0] = (sljit_u16)(0xc408 | R4A(tmp1) /* lgrl */);
+ code_ptr[1] = (sljit_u16)(offset >> 16);
+ code_ptr[2] = (sljit_u16)offset;
+ code_ptr += 3;
+ pool_ptr++;
+
+ /* branch to tmp1 */
+ arg = (ins >> 36) & 0xf;
+ if (((ins >> 32) & 0xf) == 4) {
+ /* brcl -> bcr */
+ ins = bcr(arg, tmp1);
+ } else {
+ SLJIT_ASSERT(((ins >> 32) & 0xf) == 5);
+ /* brasl -> basr */
+ ins = basr(arg, tmp1);
+ }
+
+ /* Adjust half_count. */
+ half_count += 2;
+ } else
+ jump->addr = (sljit_uw)code_ptr;
+
+ jump = jump->next;
+ next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump);
+ } else if (next_min_addr == next_const_addr) {
+ /* update instruction with relative address of constant */
+ source = (sljit_sw)code_ptr;
+ offset = (sljit_sw)pool_ptr - source;
+
+ SLJIT_ASSERT(!(offset & 0x1));
+ offset >>= 1; /* halfword (not byte) offset */
+ SLJIT_ASSERT(is_s32(offset));
- SLJIT_ASSERT(!(offset & 1));
- offset >>= 1; /* halfword (not byte) offset */
- SLJIT_ASSERT(is_s32(offset));
+ ins |= (sljit_ins)offset & 0xffffffff;
- ins |= (sljit_ins)offset & 0xffffffff;
+ /* update address */
+ const_->addr = (sljit_uw)pool_ptr;
- /* update address */
- const_->const_.addr = (sljit_uw)pool_ptr;
+ /* store initial value into pool and update pool address */
+ *(pool_ptr++) = (sljit_uw)(((struct sljit_s390x_const*)const_)->init_value);
- /* store initial value into pool and update pool address */
- *(pool_ptr++) = (sljit_uw)const_->init_value;
+ /* move to next constant */
+ const_ = const_->next;
+ next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_);
+ }
- /* move to next constant */
- const_ = (struct sljit_s390x_const *)const_->const_.next;
+ SLJIT_GET_NEXT_MIN();
}
- if (jump && jump->addr == j) {
- sljit_sw target = (sljit_sw)((jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target);
- if ((jump->flags & SLJIT_REWRITABLE_JUMP) || (jump->flags & JUMP_ADDR)) {
- jump->addr = (sljit_uw)pool_ptr;
- /* load address into tmp1 */
- source = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
- offset = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(pool_ptr, executable_offset) - source;
+ if (ins & 0xffff00000000L) {
+ *code_ptr++ = (sljit_u16)(ins >> 32);
+ half_count++;
+ }
- SLJIT_ASSERT(!(offset & 1));
- offset >>= 1;
- SLJIT_ASSERT(is_s32(offset));
+ if (ins & 0xffffffff0000L) {
+ *code_ptr++ = (sljit_u16)(ins >> 16);
+ half_count++;
+ }
- encode_inst(&code_ptr, lgrl(tmp1, offset & 0xffffffff));
-
- /* store jump target into pool and update pool address */
- *(pool_ptr++) = (sljit_uw)target;
-
- /* branch to tmp1 */
- sljit_ins op = (ins >> 32) & 0xf;
- sljit_ins arg = (ins >> 36) & 0xf;
- switch (op) {
- case 4: /* brcl -> bcr */
- ins = bcr(arg, tmp1);
- break;
- case 5: /* brasl -> basr */
- ins = basr(arg, tmp1);
- break;
- default:
- abort();
- }
- }
- else {
- jump->addr = (sljit_uw)code_ptr + 2;
- source = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
- offset = target - source;
+ *code_ptr++ = (sljit_u16)ins;
+ half_count++;
+ } while (buf_ptr < buf_end);
- /* offset must be halfword aligned */
- SLJIT_ASSERT(!(offset & 1));
- offset >>= 1;
- SLJIT_ASSERT(is_s32(offset)); /* TODO(mundaym): handle arbitrary offsets */
+ buf = buf->next;
+ } while (buf);
- /* patch jump target */
- ins |= (sljit_ins)offset & 0xffffffff;
- }
- jump = jump->next;
- }
- if (put_label && put_label->addr == j) {
- source = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
+ if (next_label_size == half_count) {
+ label->u.addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
+ label = label->next;
+ }
- SLJIT_ASSERT(put_label->label);
- put_label->addr = (sljit_uw)code_ptr;
+ SLJIT_ASSERT(!label);
+ SLJIT_ASSERT(!jump);
+ SLJIT_ASSERT(!const_);
+ SLJIT_ASSERT(code + (ins_size >> 1) == code_ptr);
+ SLJIT_ASSERT((sljit_u8 *)pool + pool_size == (sljit_u8 *)pool_ptr);
- /* store target into pool */
- *pool_ptr = put_label->label->addr;
- offset = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(pool_ptr, executable_offset) - source;
- pool_ptr++;
+ jump = compiler->jumps;
+ while (jump != NULL) {
+ offset = (sljit_sw)((jump->flags & JUMP_ADDR) ? jump->u.target : jump->u.label->u.addr);
- SLJIT_ASSERT(!(offset & 1));
- offset >>= 1;
- SLJIT_ASSERT(is_s32(offset));
- ins |= (sljit_ins)offset & 0xffffffff;
+ if (jump->flags & (SLJIT_REWRITABLE_JUMP | JUMP_ADDR | JUMP_MOV_ADDR)) {
+ /* Store jump target into pool. */
+ *(sljit_uw*)(jump->addr) = (sljit_uw)offset;
+ } else {
+ code_ptr = (sljit_u16*)jump->addr;
+ offset -= (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
- put_label = put_label->next;
- }
- encode_inst(&code_ptr, ins);
+ /* offset must be halfword aligned */
+ SLJIT_ASSERT(!(offset & 1));
+ offset >>= 1;
+ SLJIT_ASSERT(is_s32(offset)); /* TODO(mundaym): handle arbitrary offsets */
+
+ code_ptr[1] = (sljit_u16)(offset >> 16);
+ code_ptr[2] = (sljit_u16)offset;
}
+ jump = jump->next;
}
- SLJIT_ASSERT((sljit_u8 *)code + ins_size == code_ptr);
- SLJIT_ASSERT((sljit_u8 *)pool + pool_size == (sljit_u8 *)pool_ptr);
compiler->error = SLJIT_ERR_COMPILED;
compiler->executable_offset = executable_offset;
compiler->executable_size = ins_size;
- code = SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
- code_ptr = SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
+ if (pool_size)
+ compiler->executable_size += (pad_size + pool_size);
+
+ code = (sljit_u16 *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
+ code_ptr = (sljit_u16 *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
SLJIT_CACHE_FLUSH(code, code_ptr);
SLJIT_UPDATE_WX_FLAGS(code, code_ptr, 1);
return code;
@@ -1650,12 +1624,25 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
/* TODO(mundaym): implement all */
switch (feature_type) {
case SLJIT_HAS_FPU:
+#ifdef SLJIT_IS_FPU_AVAILABLE
+ return (SLJIT_IS_FPU_AVAILABLE) != 0;
+#else
+ return 1;
+#endif /* SLJIT_IS_FPU_AVAILABLE */
+
case SLJIT_HAS_CLZ:
+ case SLJIT_HAS_REV:
case SLJIT_HAS_ROT:
case SLJIT_HAS_PREFETCH:
+ case SLJIT_HAS_COPY_F32:
+ case SLJIT_HAS_COPY_F64:
+ case SLJIT_HAS_SIMD:
+ case SLJIT_HAS_ATOMIC:
return 1;
+
case SLJIT_HAS_CTZ:
return 2;
+
case SLJIT_HAS_CMOV:
return have_lscond1() ? 1 : 0;
}
@@ -1664,7 +1651,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type)
{
- return (type >= SLJIT_UNORDERED && type <= SLJIT_ORDERED_LESS_EQUAL);
+ SLJIT_UNUSED_ARG(type);
+ return 0;
}
/* --------------------------------------------------------------------- */
@@ -1741,7 +1729,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi
local_size = (local_size + SLJIT_S390X_DEFAULT_STACK_FRAME_SIZE + 0xf) & ~0xf;
compiler->local_size = local_size;
- FAIL_IF(push_inst(compiler, 0xe30000000071 /* lay */ | R36A(r15) | R28A(r15) | disp_s20(-local_size)));
+ if (is_s20(-local_size))
+ FAIL_IF(push_inst(compiler, 0xe30000000071 /* lay */ | R36A(r15) | R28A(r15) | disp_s20(-local_size)));
+ else
+ FAIL_IF(push_inst(compiler, 0xc20400000000 /* slgfi */ | R36A(r15) | (sljit_ins)local_size));
if (options & SLJIT_ENTER_REG_ARG)
return SLJIT_SUCCESS;
@@ -1786,8 +1777,10 @@ static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit
if (is_u12(local_size))
FAIL_IF(push_inst(compiler, 0x41000000 /* ly */ | R20A(r15) | R12A(r15) | (sljit_ins)local_size));
- else
+ else if (is_s20(local_size))
FAIL_IF(push_inst(compiler, 0xe30000000071 /* lay */ | R36A(r15) | R28A(r15) | disp_s20(local_size)));
+ else
+ FAIL_IF(push_inst(compiler, 0xc20a00000000 /* algfi */ | R36A(r15) | (sljit_ins)local_size));
offset = 2 * SSIZE_OF(sw);
if (saveds + scratches >= SLJIT_NUMBER_OF_REGISTERS) {
@@ -2011,12 +2004,85 @@ static sljit_s32 sljit_emit_clz_ctz(struct sljit_compiler *compiler, sljit_s32 o
return push_inst(compiler, ((op & SLJIT_32) ? 0x1800 /* lr */ : 0xb9040000 /* lgr */) | R4A(dst_r) | R0A(tmp0));
}
+static sljit_s32 sljit_emit_rev(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 dst, sljit_sw dstw,
+ sljit_s32 src, sljit_sw srcw)
+{
+ struct addr addr;
+ sljit_gpr reg;
+ sljit_ins ins;
+ sljit_s32 opcode = GET_OPCODE(op);
+ sljit_s32 is_16bit = (opcode == SLJIT_REV_U16 || opcode == SLJIT_REV_S16);
+
+ if (dst & SLJIT_MEM) {
+ if (src & SLJIT_MEM) {
+ FAIL_IF(load_store_op(compiler, tmp0, src, srcw, op & SLJIT_32, is_16bit ? load_halfword_forms : load_forms));
+ reg = tmp0;
+ } else
+ reg = gpr(src);
+
+ FAIL_IF(make_addr_bxy(compiler, &addr, dst, dstw, tmp1));
+
+ if (is_16bit)
+ ins = 0xe3000000003f /* strvh */;
+ else
+ ins = (op & SLJIT_32) ? 0xe3000000003e /* strv */ : 0xe3000000002f /* strvg */;
+
+ return push_inst(compiler, ins | R36A(reg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset));
+ }
+
+ reg = gpr(dst);
+
+ if (src & SLJIT_MEM) {
+ FAIL_IF(make_addr_bxy(compiler, &addr, src, srcw, tmp1));
+
+ if (is_16bit)
+ ins = 0xe3000000001f /* lrvh */;
+ else
+ ins = (op & SLJIT_32) ? 0xe3000000001e /* lrv */ : 0xe3000000000f /* lrvg */;
+
+ FAIL_IF(push_inst(compiler, ins | R36A(reg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset)));
+
+ if (opcode == SLJIT_REV)
+ return SLJIT_SUCCESS;
+
+ if (is_16bit) {
+ if (op & SLJIT_32)
+ ins = (opcode == SLJIT_REV_U16) ? 0xb9950000 /* llhr */ : 0xb9270000 /* lhr */;
+ else
+ ins = (opcode == SLJIT_REV_U16) ? 0xb9850000 /* llghr */ : 0xb9070000 /* lghr */;
+ } else
+ ins = (opcode == SLJIT_REV_U32) ? 0xb9160000 /* llgfr */ : 0xb9140000 /* lgfr */;
+
+ return push_inst(compiler, ins | R4A(reg) | R0A(reg));
+ }
+
+ ins = (op & SLJIT_32) ? 0xb91f0000 /* lrvr */ : 0xb90f0000 /* lrvgr */;
+ FAIL_IF(push_inst(compiler, ins | R4A(reg) | R0A(gpr(src))));
+
+ if (opcode == SLJIT_REV)
+ return SLJIT_SUCCESS;
+
+ if (!is_16bit) {
+ ins = (opcode == SLJIT_REV_U32) ? 0xb9160000 /* llgfr */ : 0xb9140000 /* lgfr */;
+ return push_inst(compiler, ins | R4A(reg) | R0A(reg));
+ }
+
+ if (op & SLJIT_32) {
+ ins = (opcode == SLJIT_REV_U16) ? 0x88000000 /* srl */ : 0x8a000000 /* sra */;
+ return push_inst(compiler, ins | R20A(reg) | 16);
+ }
+
+ ins = (opcode == SLJIT_REV_U16) ? 0xeb000000000c /* srlg */ : 0xeb000000000a /* srag */;
+ return push_inst(compiler, ins | R36A(reg) | R32A(reg) | (48 << 16));
+}
+
/* LEVAL will be defined later with different parameters as needed */
#define WHEN2(cond, i1, i2) (cond) ? LEVAL(i1) : LEVAL(i2)
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
- sljit_s32 dst, sljit_sw dstw,
- sljit_s32 src, sljit_sw srcw)
+ sljit_s32 dst, sljit_sw dstw,
+ sljit_s32 src, sljit_sw srcw)
{
sljit_ins ins;
struct addr mem;
@@ -2087,7 +2153,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
return SLJIT_SUCCESS;
}
/* LOAD IMMEDIATE */
- if (FAST_IS_REG(dst) && (src & SLJIT_IMM)) {
+ if (FAST_IS_REG(dst) && src == SLJIT_IMM) {
switch (opcode) {
case SLJIT_MOV_U8:
srcw = (sljit_sw)((sljit_u8)(srcw));
@@ -2166,14 +2232,14 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
return SLJIT_SUCCESS;
}
/* STORE and STORE IMMEDIATE */
- if ((dst & SLJIT_MEM)
- && (FAST_IS_REG(src) || (src & SLJIT_IMM))) {
+ if ((dst & SLJIT_MEM) && (FAST_IS_REG(src) || src == SLJIT_IMM)) {
+ struct addr mem;
sljit_gpr reg = FAST_IS_REG(src) ? gpr(src) : tmp0;
- if (src & SLJIT_IMM) {
+
+ if (src == SLJIT_IMM) {
/* TODO(mundaym): MOVE IMMEDIATE? */
FAIL_IF(push_load_imm_inst(compiler, reg, srcw));
}
- struct addr mem;
FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));
switch (opcode) {
case SLJIT_MOV_U8:
@@ -2240,39 +2306,15 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
SLJIT_UNREACHABLE();
}
- SLJIT_ASSERT((src & SLJIT_IMM) == 0); /* no immediates */
+ SLJIT_ASSERT(src != SLJIT_IMM);
- dst_r = FAST_IS_REG(dst) ? gpr(REG_MASK & dst) : tmp0;
- src_r = FAST_IS_REG(src) ? gpr(REG_MASK & src) : tmp0;
+ dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0;
+ src_r = FAST_IS_REG(src) ? gpr(src) : tmp0;
compiler->status_flags_state = op & (VARIABLE_FLAG_MASK | SLJIT_SET_Z);
/* TODO(mundaym): optimize loads and stores */
switch (opcode) {
- case SLJIT_NOT:
- if (src & SLJIT_MEM)
- FAIL_IF(load_word(compiler, src_r, src, srcw, op & SLJIT_32));
-
- /* emulate ~x with x^-1 */
- if (!(op & SLJIT_32)) {
- FAIL_IF(push_load_imm_inst(compiler, tmp1, -1));
- if (src_r != dst_r)
- FAIL_IF(push_inst(compiler, lgr(dst_r, src_r)));
-
- FAIL_IF(push_inst(compiler, xgr(dst_r, tmp1)));
- break;
- }
-
- if (have_eimm())
- FAIL_IF(push_inst(compiler, xilf(dst_r, 0xffffffff)));
- else {
- FAIL_IF(push_load_imm_inst(compiler, tmp1, -1));
- if (src_r != dst_r)
- FAIL_IF(push_inst(compiler, lr(dst_r, src_r)));
-
- FAIL_IF(push_inst(compiler, xr(dst_r, tmp1)));
- }
- break;
case SLJIT_CLZ:
case SLJIT_CTZ:
if (src & SLJIT_MEM)
@@ -2280,13 +2322,18 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compile
FAIL_IF(sljit_emit_clz_ctz(compiler, op, dst_r, src_r));
break;
+ case SLJIT_REV_U32:
+ case SLJIT_REV_S32:
+ op |= SLJIT_32;
+ /* fallthrough */
+ case SLJIT_REV:
+ case SLJIT_REV_U16:
+ case SLJIT_REV_S16:
+ return sljit_emit_rev(compiler, op, dst, dstw, src, srcw);
default:
SLJIT_UNREACHABLE();
}
- if ((op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == (SLJIT_SET_Z | SLJIT_SET_OVERFLOW))
- FAIL_IF(update_zero_overflow(compiler, op, dst_r));
-
if (dst & SLJIT_MEM)
return store_word(compiler, dst_r, dst, dstw, op & SLJIT_32);
@@ -2337,7 +2384,7 @@ static sljit_s32 sljit_emit_add(struct sljit_compiler *compiler, sljit_s32 op,
const struct ins_forms *forms;
sljit_ins ins;
- if (src2 & SLJIT_IMM) {
+ if (src2 == SLJIT_IMM) {
if (!sets_zero_overflow && is_s8(src2w) && (src1 & SLJIT_MEM) && (dst == src1 && dstw == src1w)) {
if (sets_overflow)
ins = (op & SLJIT_32) ? 0xeb000000006a /* asi */ : 0xeb000000007a /* agsi */;
@@ -2417,14 +2464,13 @@ static sljit_s32 sljit_emit_sub(struct sljit_compiler *compiler, sljit_s32 op,
const struct ins_forms *forms;
sljit_ins ins;
- if (dst == (sljit_s32)tmp0 && flag_type <= SLJIT_SIG_LESS_EQUAL) {
+ if (dst == TMP_REG2 && flag_type <= SLJIT_SIG_LESS_EQUAL) {
int compare_signed = flag_type >= SLJIT_SIG_LESS;
compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_COMPARE;
- if (src2 & SLJIT_IMM) {
- if (compare_signed || ((op & VARIABLE_FLAG_MASK) == 0 && is_s32(src2w)))
- {
+ if (src2 == SLJIT_IMM) {
+ if (compare_signed || ((op & VARIABLE_FLAG_MASK) == 0 && is_s32(src2w))) {
if ((op & SLJIT_32) || is_s32(src2w)) {
ins = (op & SLJIT_32) ? 0xc20d00000000 /* cfi */ : 0xc20c00000000 /* cgfi */;
return emit_ri(compiler, ins, src1, src1, src1w, src2w, RIL_A);
@@ -2465,7 +2511,7 @@ static sljit_s32 sljit_emit_sub(struct sljit_compiler *compiler, sljit_s32 op,
goto done;
}
- if (src2 & SLJIT_IMM) {
+ if (src2 == SLJIT_IMM) {
sljit_sw neg_src2w = -src2w;
if (sets_signed || neg_src2w != 0 || (op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == 0) {
@@ -2518,7 +2564,7 @@ done:
- the first operand is less if the sign bit of the result is not set
The -result operation sets the corrent sign, because the result cannot be zero.
The overflow is considered greater, since the result must be equal to INT_MIN so its sign bit is set. */
- FAIL_IF(push_inst(compiler, brc(0xe, 2 + 2)));
+ FAIL_IF(push_inst(compiler, brc(0xe, (op & SLJIT_32) ? (2 + 1) : (2 + 2))));
FAIL_IF(push_inst(compiler, (op & SLJIT_32) ? lcr(tmp1, dst_r) : lcgr(tmp1, dst_r)));
}
else if (op & SLJIT_SET_Z)
@@ -2573,7 +2619,7 @@ static sljit_s32 sljit_emit_multiply(struct sljit_compiler *compiler, sljit_s32
return emit_commutative(compiler, &multiply_overflow_forms, dst, src1, src1w, src2, src2w);
}
- if (src2 & SLJIT_IMM) {
+ if (src2 == SLJIT_IMM) {
if (is_s16(src2w)) {
ins = (op & SLJIT_32) ? 0xa70c0000 /* mhi */ : 0xa70d0000 /* mghi */;
return emit_ri(compiler, ins, dst, src1, src1w, src2w, RI_A);
@@ -2680,7 +2726,7 @@ static sljit_s32 sljit_emit_bitwise(struct sljit_compiler *compiler, sljit_s32 o
sljit_s32 type = GET_OPCODE(op);
const struct ins_forms *forms;
- if ((src2 & SLJIT_IMM) && (!(op & SLJIT_SET_Z) || (type == SLJIT_AND && dst == (sljit_s32)tmp0))) {
+ if (src2 == SLJIT_IMM && (!(op & SLJIT_SET_Z) || (type == SLJIT_AND && dst == TMP_REG2))) {
sljit_s32 count16 = 0;
sljit_uw imm = (sljit_uw)src2w;
@@ -2696,21 +2742,21 @@ static sljit_s32 sljit_emit_bitwise(struct sljit_compiler *compiler, sljit_s32 o
if ((imm & 0xffff000000000000ull) != 0)
count16++;
- if (type == SLJIT_AND && dst == (sljit_s32)tmp0 && count16 == 1) {
- sljit_gpr src_r = tmp0;
+ if (type == SLJIT_AND && dst == TMP_REG2 && count16 == 1) {
+ sljit_gpr src_r = tmp1;
if (FAST_IS_REG(src1))
src_r = gpr(src1 & REG_MASK);
else
- FAIL_IF(emit_move(compiler, tmp0, src1, src1w));
+ FAIL_IF(emit_move(compiler, tmp1, src1, src1w));
if ((imm & 0x000000000000ffffull) != 0 || imm == 0)
- return push_inst(compiler, 0xa7010000 | R20A(src_r) | imm);
+ return push_inst(compiler, 0xa7010000 /* tmll */ | R20A(src_r) | imm);
if ((imm & 0x00000000ffff0000ull) != 0)
- return push_inst(compiler, 0xa7000000 | R20A(src_r) | (imm >> 16));
+ return push_inst(compiler, 0xa7000000 /* tmlh */ | R20A(src_r) | (imm >> 16));
if ((imm & 0x0000ffff00000000ull) != 0)
- return push_inst(compiler, 0xa7030000 | R20A(src_r) | (imm >> 32));
- return push_inst(compiler, 0xa7020000 | R20A(src_r) | (imm >> 48));
+ return push_inst(compiler, 0xa7030000 /* tmhl */ | R20A(src_r) | (imm >> 32));
+ return push_inst(compiler, 0xa7020000 /* tmhh */ | R20A(src_r) | (imm >> 48));
}
if (!(op & SLJIT_SET_Z))
@@ -2744,7 +2790,7 @@ static sljit_s32 sljit_emit_shift(struct sljit_compiler *compiler, sljit_s32 op,
else
FAIL_IF(emit_move(compiler, tmp0, src1, src1w));
- if (!(src2 & SLJIT_IMM)) {
+ if (src2 != SLJIT_IMM) {
if (FAST_IS_REG(src2))
base_r = gpr(src2);
else {
@@ -2804,7 +2850,7 @@ static sljit_s32 sljit_emit_rotate(struct sljit_compiler *compiler, sljit_s32 op
else
FAIL_IF(emit_move(compiler, tmp0, src1, src1w));
- if (!(src2 & SLJIT_IMM)) {
+ if (src2 != SLJIT_IMM) {
if (FAST_IS_REG(src2))
base_r = gpr(src2);
else {
@@ -2814,7 +2860,7 @@ static sljit_s32 sljit_emit_rotate(struct sljit_compiler *compiler, sljit_s32 op
}
if (GET_OPCODE(op) == SLJIT_ROTR) {
- if (!(src2 & SLJIT_IMM)) {
+ if (src2 != SLJIT_IMM) {
ins = (op & SLJIT_32) ? 0x1300 /* lcr */ : 0xb9030000 /* lcgr */;
FAIL_IF(push_inst(compiler, ins | R4A(tmp1) | R0A(base_r)));
base_r = tmp1;
@@ -2822,7 +2868,7 @@ static sljit_s32 sljit_emit_rotate(struct sljit_compiler *compiler, sljit_s32 op
src2w = -src2w;
}
- if (src2 & SLJIT_IMM)
+ if (src2 == SLJIT_IMM)
imm = (sljit_ins)(src2w & ((op & SLJIT_32) ? 0x1f : 0x3f));
ins = (op & SLJIT_32) ? 0xeb000000001d /* rll */ : 0xeb000000001c /* rllg */;
@@ -2863,7 +2909,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compile
compiler->mode = op & SLJIT_32;
compiler->status_flags_state = op & (VARIABLE_FLAG_MASK | SLJIT_SET_Z);
- if (is_commutative(op) && (src1 & SLJIT_IMM) && !(src2 & SLJIT_IMM)) {
+ if (is_commutative(op) && src1 == SLJIT_IMM && src2 != SLJIT_IMM) {
src1 ^= src2;
src2 ^= src1;
src1 ^= src2;
@@ -2923,130 +2969,153 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compil
sljit_s32 src1, sljit_sw src1w,
sljit_s32 src2, sljit_sw src2w)
{
+ sljit_s32 dst_reg = (GET_OPCODE(op) == SLJIT_SUB || GET_OPCODE(op) == SLJIT_AND) ? TMP_REG2 : TMP_REG1;
+
CHECK_ERROR();
CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w));
SLJIT_SKIP_CHECKS(compiler);
- return sljit_emit_op2(compiler, op, (sljit_s32)tmp0, 0, src1, src1w, src2, src2w);
+ return sljit_emit_op2(compiler, op, dst_reg, 0, src1, src1w, src2, src2w);
}
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op,
- sljit_s32 src_dst,
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2r(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 dst_reg,
sljit_s32 src1, sljit_sw src1w,
sljit_s32 src2, sljit_sw src2w)
{
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_op2r(compiler, op, dst_reg, src1, src1w, src2, src2w));
+
+ switch (GET_OPCODE(op)) {
+ case SLJIT_MULADD:
+ SLJIT_SKIP_CHECKS(compiler);
+ FAIL_IF(sljit_emit_op2(compiler, SLJIT_MUL | (op & SLJIT_32), 0 /* tmp0 */, 0, src1, src1w, src2, src2w));
+ return push_inst(compiler, ((op & SLJIT_32) ? 0x1a00 /* ar */ : 0xb9080000 /* agr */) | R4A(gpr(dst_reg)) | R0A(tmp0));
+ }
+
+ return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 dst_reg,
+ sljit_s32 src1_reg,
+ sljit_s32 src2_reg,
+ sljit_s32 src3, sljit_sw src3w)
+{
sljit_s32 is_right;
sljit_sw bit_length = (op & SLJIT_32) ? 32 : 64;
- sljit_gpr src_dst_r = gpr(src_dst);
- sljit_gpr src1_r = tmp0;
- sljit_gpr src2_r = tmp1;
+ sljit_gpr dst_r = gpr(dst_reg);
+ sljit_gpr src1_r = gpr(src1_reg);
+ sljit_gpr src2_r = gpr(src2_reg);
+ sljit_gpr src3_r = tmp1;
sljit_ins ins;
CHECK_ERROR();
- CHECK(check_sljit_emit_shift_into(compiler, op, src_dst, src1, src1w, src2, src2w));
+ CHECK(check_sljit_emit_shift_into(compiler, op, dst_reg, src1_reg, src2_reg, src3, src3w));
is_right = (GET_OPCODE(op) == SLJIT_LSHR || GET_OPCODE(op) == SLJIT_MLSHR);
- if (src_dst == src1) {
+ if (src1_reg == src2_reg) {
SLJIT_SKIP_CHECKS(compiler);
- return sljit_emit_op2(compiler, (is_right ? SLJIT_ROTR : SLJIT_ROTL) | (op & SLJIT_32), src_dst, 0, src_dst, 0, src2, src2w);
+ return sljit_emit_op2(compiler, (is_right ? SLJIT_ROTR : SLJIT_ROTL) | (op & SLJIT_32), dst_reg, 0, src1_reg, 0, src3, src3w);
}
- ADJUST_LOCAL_OFFSET(src1, src1w);
- ADJUST_LOCAL_OFFSET(src2, src2w);
+ ADJUST_LOCAL_OFFSET(src3, src3w);
- if (src1 & SLJIT_MEM)
- FAIL_IF(load_word(compiler, tmp0, src1, src1w, op & SLJIT_32));
- else if (src1 & SLJIT_IMM)
- FAIL_IF(push_load_imm_inst(compiler, tmp0, src1w));
- else
- src1_r = gpr(src1);
+ if (src3 == SLJIT_IMM) {
+ src3w &= bit_length - 1;
- if (src2 & SLJIT_IMM) {
- src2w &= bit_length - 1;
-
- if (src2w == 0)
+ if (src3w == 0)
return SLJIT_SUCCESS;
- } else if (!(src2 & SLJIT_MEM))
- src2_r = gpr(src2);
- else
- FAIL_IF(load_word(compiler, tmp1, src2, src2w, op & SLJIT_32));
- if (src2 & SLJIT_IMM) {
if (op & SLJIT_32) {
- ins = is_right ? 0x88000000 /* srl */ : 0x89000000 /* sll */;
- FAIL_IF(push_inst(compiler, ins | R20A(src_dst_r) | (sljit_ins)src2w));
+ if (dst_r == src1_r) {
+ ins = is_right ? 0x88000000 /* srl */ : 0x89000000 /* sll */;
+ FAIL_IF(push_inst(compiler, ins | R20A(dst_r) | (sljit_ins)src3w));
+ } else {
+ ins = is_right ? 0xeb00000000de /* srlk */ : 0xeb00000000df /* sllk */;
+ FAIL_IF(push_inst(compiler, ins | R36A(dst_r) | R32A(src1_r) | ((sljit_ins)src3w << 16)));
+ }
} else {
ins = is_right ? 0xeb000000000c /* srlg */ : 0xeb000000000d /* sllg */;
- FAIL_IF(push_inst(compiler, ins | R36A(src_dst_r) | R32A(src_dst_r) | ((sljit_ins)src2w << 16)));
+ FAIL_IF(push_inst(compiler, ins | R36A(dst_r) | R32A(src1_r) | ((sljit_ins)src3w << 16)));
}
ins = 0xec0000000055 /* risbg */;
if (is_right) {
- src2w = bit_length - src2w;
- ins |= ((sljit_ins)(64 - bit_length) << 24) | ((sljit_ins)(63 - src2w) << 16) | ((sljit_ins)src2w << 8);
+ src3w = bit_length - src3w;
+ ins |= ((sljit_ins)(64 - bit_length) << 24) | ((sljit_ins)(63 - src3w) << 16) | ((sljit_ins)src3w << 8);
} else
- ins |= ((sljit_ins)(64 - src2w) << 24) | ((sljit_ins)63 << 16) | ((sljit_ins)src2w << 8);
+ ins |= ((sljit_ins)(64 - src3w) << 24) | ((sljit_ins)63 << 16) | ((sljit_ins)(src3w + 64 - bit_length) << 8);
- return push_inst(compiler, ins | R36A(src_dst_r) | R32A(src1_r));
+ return push_inst(compiler, ins | R36A(dst_r) | R32A(src2_r));
}
+ if (!(src3 & SLJIT_MEM)) {
+ src3_r = gpr(src3);
+
+ if (dst_r == src3_r) {
+ FAIL_IF(push_inst(compiler, 0x1800 /* lr */ | R4A(tmp1) | R0A(src3_r)));
+ src3_r = tmp1;
+ }
+ } else
+ FAIL_IF(load_word(compiler, tmp1, src3, src3w, op & SLJIT_32));
+
if (op & SLJIT_32) {
if (GET_OPCODE(op) == SLJIT_MSHL || GET_OPCODE(op) == SLJIT_MLSHR) {
- if (src2_r != tmp1) {
- FAIL_IF(push_inst(compiler, 0xec0000000055 /* risbg */ | R36A(tmp1) | R32A(src2_r) | (59 << 24) | (1 << 23) | (63 << 16)));
- src2_r = tmp1;
+ if (src3_r != tmp1) {
+ FAIL_IF(push_inst(compiler, 0xec0000000055 /* risbg */ | R36A(tmp1) | R32A(src3_r) | (59 << 24) | (1 << 23) | (63 << 16)));
+ src3_r = tmp1;
} else
FAIL_IF(push_inst(compiler, 0xa5070000 /* nill */ | R20A(tmp1) | 0x1f));
}
- ins = is_right ? 0x88000000 /* srl */ : 0x89000000 /* sll */;
- FAIL_IF(push_inst(compiler, ins | R20A(src_dst_r) | R12A(src2_r)));
+ if (dst_r == src1_r) {
+ ins = is_right ? 0x88000000 /* srl */ : 0x89000000 /* sll */;
+ FAIL_IF(push_inst(compiler, ins | R20A(dst_r) | R12A(src3_r)));
+ } else {
+ ins = is_right ? 0xeb00000000de /* srlk */ : 0xeb00000000df /* sllk */;
+ FAIL_IF(push_inst(compiler, ins | R36A(dst_r) | R32A(src1_r) | R28A(src3_r)));
+ }
- if (src2_r != tmp1) {
+ if (src3_r != tmp1) {
FAIL_IF(push_inst(compiler, 0xa50f0000 /* llill */ | R20A(tmp1) | 0x1f));
- FAIL_IF(push_inst(compiler, 0x1700 /* xr */ | R4A(tmp1) | R0A(src2_r)));
+ FAIL_IF(push_inst(compiler, 0x1700 /* xr */ | R4A(tmp1) | R0A(src3_r)));
} else
FAIL_IF(push_inst(compiler, 0xc00700000000 /* xilf */ | R36A(tmp1) | 0x1f));
- if (src1_r == tmp0) {
- ins = is_right ? 0x89000000 /* sll */ : 0x88000000 /* srl */;
- FAIL_IF(push_inst(compiler, ins | R20A(tmp0) | R12A(tmp1) | 0x1));
- } else {
- ins = is_right ? 0xeb00000000df /* sllk */ : 0xeb00000000de /* srlk */;
- FAIL_IF(push_inst(compiler, ins | R36A(tmp0) | R32A(src1_r) | R28A(tmp1) | (0x1 << 16)));
- }
+ ins = is_right ? 0xeb00000000df /* sllk */ : 0xeb00000000de /* srlk */;
+ FAIL_IF(push_inst(compiler, ins | R36A(tmp0) | R32A(src2_r) | R28A(tmp1) | (0x1 << 16)));
- return push_inst(compiler, 0x1600 /* or */ | R4A(src_dst_r) | R0A(tmp0));
+ return push_inst(compiler, 0x1600 /* or */ | R4A(dst_r) | R0A(tmp0));
}
ins = is_right ? 0xeb000000000c /* srlg */ : 0xeb000000000d /* sllg */;
- FAIL_IF(push_inst(compiler, ins | R36A(src_dst_r) | R32A(src_dst_r) | R28A(src2_r)));
+ FAIL_IF(push_inst(compiler, ins | R36A(dst_r) | R32A(src1_r) | R28A(src3_r)));
ins = is_right ? 0xeb000000000d /* sllg */ : 0xeb000000000c /* srlg */;
if (!(op & SLJIT_SHIFT_INTO_NON_ZERO)) {
- if (src2_r != tmp1)
+ if (src3_r != tmp1)
FAIL_IF(push_inst(compiler, 0xa50f0000 /* llill */ | R20A(tmp1) | 0x3f));
- FAIL_IF(push_inst(compiler, ins | R36A(tmp0) | R32A(src1_r) | (0x1 << 16)));
- src1_r = tmp0;
+ FAIL_IF(push_inst(compiler, ins | R36A(tmp0) | R32A(src2_r) | (0x1 << 16)));
+ src2_r = tmp0;
- if (src2_r != tmp1)
- FAIL_IF(push_inst(compiler, 0xb9820000 /* xgr */ | R4A(tmp1) | R0A(src2_r)));
+ if (src3_r != tmp1)
+ FAIL_IF(push_inst(compiler, 0xb9820000 /* xgr */ | R4A(tmp1) | R0A(src3_r)));
else
FAIL_IF(push_inst(compiler, 0xc00700000000 /* xilf */ | R36A(tmp1) | 0x3f));
} else
- FAIL_IF(push_inst(compiler, 0xb9030000 /* lcgr */ | R4A(tmp1) | R0A(src2_r)));
+ FAIL_IF(push_inst(compiler, 0xb9030000 /* lcgr */ | R4A(tmp1) | R0A(src3_r)));
- FAIL_IF(push_inst(compiler, ins | R36A(tmp0) | R32A(src1_r) | R28A(tmp1)));
- return push_inst(compiler, 0xb9810000 /* ogr */ | R4A(src_dst_r) | R0A(tmp0));
+ FAIL_IF(push_inst(compiler, ins | R36A(tmp0) | R32A(src2_r) | R28A(tmp1)));
+ return push_inst(compiler, 0xb9810000 /* ogr */ | R4A(dst_r) | R0A(tmp0));
}
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(
- struct sljit_compiler *compiler,
- sljit_s32 op, sljit_s32 src, sljit_sw srcw)
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 src, sljit_sw srcw)
{
sljit_gpr src_r;
struct addr addr;
@@ -3077,16 +3146,46 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(
return SLJIT_SUCCESS;
}
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg)
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_dst(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 dst, sljit_sw dstw)
{
- CHECK_REG_INDEX(check_sljit_get_register_index(reg));
- return (sljit_s32)gpr(reg);
+ sljit_gpr dst_r = link_r;
+ sljit_s32 size;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_op_dst(compiler, op, dst, dstw));
+ ADJUST_LOCAL_OFFSET(dst, dstw);
+
+ switch (op) {
+ case SLJIT_FAST_ENTER:
+ if (FAST_IS_REG(dst))
+ return push_inst(compiler, lgr(gpr(dst), link_r));
+ break;
+ case SLJIT_GET_RETURN_ADDRESS:
+ dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0;
+
+ size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds - SLJIT_KEPT_SAVEDS_COUNT(compiler->options), 2);
+ FAIL_IF(load_word(compiler, dst_r, SLJIT_MEM1(SLJIT_SP), compiler->local_size + size, 0));
+ break;
+ }
+
+ if (dst & SLJIT_MEM)
+ return store_word(compiler, dst_r, dst, dstw, 0);
+
+ return SLJIT_SUCCESS;
}
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg)
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 type, sljit_s32 reg)
{
- CHECK_REG_INDEX(check_sljit_get_float_register_index(reg));
- return (sljit_s32)fgpr(reg);
+ CHECK_REG_INDEX(check_sljit_get_register_index(type, reg));
+
+ if (type == SLJIT_GP_REGISTER)
+ return (sljit_s32)gpr(reg);
+
+ if (type != SLJIT_FLOAT_REGISTER)
+ return -1;
+
+ return (sljit_s32)freg_map[reg];
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
@@ -3177,33 +3276,61 @@ static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_comp
return SLJIT_SUCCESS;
}
-static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
+static sljit_s32 sljit_emit_fop1_conv_f64_from_w(struct sljit_compiler *compiler, sljit_ins ins,
sljit_s32 dst, sljit_sw dstw,
sljit_s32 src, sljit_sw srcw)
{
sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
- sljit_ins ins;
- if (src & SLJIT_IMM) {
+ if (src == SLJIT_IMM) {
FAIL_IF(push_load_imm_inst(compiler, tmp0, srcw));
src = (sljit_s32)tmp0;
}
else if (src & SLJIT_MEM) {
- FAIL_IF(load_word(compiler, tmp0, src, srcw, GET_OPCODE(op) >= SLJIT_CONV_F64_FROM_S32));
+ FAIL_IF(load_word(compiler, tmp0, src, srcw, ins & 0x100000));
src = (sljit_s32)tmp0;
}
+ FAIL_IF(push_inst(compiler, ins | F4(dst_r) | R0(src)));
+
+ if (dst & SLJIT_MEM)
+ return float_mem(compiler, FLOAT_STORE | ((ins & 0x10000) ? 0 : SLJIT_32), TMP_FREG1, dst, dstw);
+
+ return SLJIT_SUCCESS;
+}
+
+static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 dst, sljit_sw dstw,
+ sljit_s32 src, sljit_sw srcw)
+{
+ sljit_ins ins;
+
+ if (src == SLJIT_IMM && GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
+ srcw = (sljit_s32)srcw;
+
if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW)
ins = (op & SLJIT_32) ? 0xb3a40000 /* cegbr */ : 0xb3a50000 /* cdgbr */;
else
ins = (op & SLJIT_32) ? 0xb3940000 /* cefbr */ : 0xb3950000 /* cdfbr */;
- FAIL_IF(push_inst(compiler, ins | F4(dst_r) | R0(src)));
+ return sljit_emit_fop1_conv_f64_from_w(compiler, ins, dst, dstw, src, srcw);
+}
- if (dst & SLJIT_MEM)
- return float_mem(compiler, FLOAT_STORE | (op & SLJIT_32), TMP_FREG1, dst, dstw);
+static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 dst, sljit_sw dstw,
+ sljit_s32 src, sljit_sw srcw)
+{
+ sljit_ins ins;
- return SLJIT_SUCCESS;
+ if (src == SLJIT_IMM && GET_OPCODE(op) == SLJIT_CONV_F64_FROM_U32)
+ srcw = (sljit_u32)srcw;
+
+ if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_UW)
+ ins = (op & SLJIT_32) ? 0xb3a00000 /* celgbr */ : 0xb3a10000 /* cdlgbr */;
+ else
+ ins = (op & SLJIT_32) ? 0xb3900000 /* celfbr */ : 0xb3910000 /* cdlfbr */;
+
+ return sljit_emit_fop1_conv_f64_from_w(compiler, ins, dst, dstw, src, srcw);
}
static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
@@ -3275,12 +3402,10 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compil
FAIL_IF(push_inst(compiler, ins | F4(dst_r) | F0(src)));
}
- if (!(dst & SLJIT_MEM))
- return SLJIT_SUCCESS;
-
- SLJIT_ASSERT(dst_r == TMP_FREG1);
+ if (dst & SLJIT_MEM)
+ return float_mem(compiler, FLOAT_STORE | (op & SLJIT_32), TMP_FREG1, dst, dstw);
- return float_mem(compiler, FLOAT_STORE | (op & SLJIT_32), TMP_FREG1, dst, dstw);
+ return SLJIT_SUCCESS;
}
#define FLOAT_MOV(op, dst_r, src_r) \
@@ -3351,25 +3476,94 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compil
if (dst & SLJIT_MEM)
return float_mem(compiler, FLOAT_STORE | (op & SLJIT_32), TMP_FREG1, dst, dstw);
- SLJIT_ASSERT(dst_r != TMP_FREG1);
return SLJIT_SUCCESS;
}
-/* --------------------------------------------------------------------- */
-/* Other instructions */
-/* --------------------------------------------------------------------- */
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2r(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 dst_freg,
+ sljit_s32 src1, sljit_sw src1w,
+ sljit_s32 src2, sljit_sw src2w)
+{
+ sljit_s32 reg;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_fop2r(compiler, op, dst_freg, src1, src1w, src2, src2w));
+ ADJUST_LOCAL_OFFSET(src1, src1w);
+ ADJUST_LOCAL_OFFSET(src2, src2w);
+
+ if (src2 & SLJIT_MEM) {
+ FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), TMP_FREG1, src2, src2w));
+ src2 = TMP_FREG1;
+ }
+
+ if (src1 & SLJIT_MEM) {
+ reg = (dst_freg == src2) ? TMP_FREG1 : dst_freg;
+ FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), reg, src1, src1w));
+ src1 = reg;
+ }
+
+ return push_inst(compiler, 0xb3720000 /* cpsdr */ | F12(src2) | F4(dst_freg) | F0(src1));
+}
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset32(struct sljit_compiler *compiler,
+ sljit_s32 freg, sljit_f32 value)
{
+ union {
+ sljit_s32 imm;
+ sljit_f32 value;
+ } u;
+
CHECK_ERROR();
- CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
- ADJUST_LOCAL_OFFSET(dst, dstw);
+ CHECK(check_sljit_emit_fset32(compiler, freg, value));
+
+ u.value = value;
+
+ FAIL_IF(push_load_imm_inst(compiler, tmp1, (sljit_sw)(((sljit_uw)u.imm << 32))));
+ return push_inst(compiler, 0xb3c10000 /* ldgr */ | F4(freg) | R0A(tmp1));
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler,
+ sljit_s32 freg, sljit_f64 value)
+{
+ union {
+ sljit_sw imm;
+ sljit_f64 value;
+ } u;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_fset64(compiler, freg, value));
+
+ u.value = value;
+
+ FAIL_IF(push_load_imm_inst(compiler, tmp1, (sljit_sw)u.imm));
+ return push_inst(compiler, 0xb3c10000 /* ldgr */ | F4(freg) | R0A(tmp1));
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 freg, sljit_s32 reg)
+{
+ sljit_gpr gen_r;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_fcopy(compiler, op, freg, reg));
+
+ gen_r = gpr(reg);
+
+ if (GET_OPCODE(op) == SLJIT_COPY_TO_F64) {
+ if (op & SLJIT_32) {
+ FAIL_IF(push_inst(compiler, 0xeb000000000d /* sllg */ | R36A(tmp0) | R32A(gen_r) | (32 << 16)));
+ gen_r = tmp0;
+ }
+
+ return push_inst(compiler, 0xb3c10000 /* ldgr */ | F4(freg) | R0A(gen_r));
+ }
+
+ FAIL_IF(push_inst(compiler, 0xb3cd0000 /* lgdr */ | R4A(gen_r) | F0(freg)));
- if (FAST_IS_REG(dst))
- return push_inst(compiler, lgr(gpr(dst), link_r));
+ if (!(op & SLJIT_32))
+ return SLJIT_SUCCESS;
- /* memory */
- return store_word(compiler, link_r, dst, dstw, 0);
+ return push_inst(compiler, 0xeb000000000c /* srlg */ | R36A(gen_r) | R32A(gen_r) | (32 << 16));
}
/* --------------------------------------------------------------------- */
@@ -3394,14 +3588,14 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compi
SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
{
+ struct sljit_jump *jump;
sljit_u8 mask = ((type & 0xff) < SLJIT_JUMP) ? get_cc(compiler, type & 0xff) : 0xf;
CHECK_ERROR_PTR();
CHECK_PTR(check_sljit_emit_jump(compiler, type));
/* record jump */
- struct sljit_jump *jump = (struct sljit_jump *)
- ensure_abuf(compiler, sizeof(struct sljit_jump));
+ jump = (struct sljit_jump *)ensure_abuf(compiler, sizeof(struct sljit_jump));
PTR_FAIL_IF(!jump);
set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
jump->addr = compiler->size;
@@ -3439,7 +3633,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compi
CHECK_ERROR();
CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
- if (src & SLJIT_IMM) {
+ if (src == SLJIT_IMM) {
SLJIT_ASSERT(!(srcw & 1)); /* target address must be even */
FAIL_IF(push_load_imm_inst(compiler, src_r, srcw));
}
@@ -3459,6 +3653,8 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi
sljit_s32 arg_types,
sljit_s32 src, sljit_sw srcw)
{
+ SLJIT_UNUSED_ARG(arg_types);
+
CHECK_ERROR();
CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
@@ -3490,13 +3686,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co
sljit_s32 dst, sljit_sw dstw,
sljit_s32 type)
{
+ sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
+ sljit_gpr loc_r = tmp1;
sljit_u8 mask = get_cc(compiler, type);
CHECK_ERROR();
CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
- sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
- sljit_gpr loc_r = tmp1;
switch (GET_OPCODE(op)) {
case SLJIT_AND:
case SLJIT_OR:
@@ -3526,8 +3722,9 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co
FAIL_IF(push_inst(compiler,
WHEN2(op & SLJIT_32, lochi, locghi)));
} else {
- /* TODO(mundaym): no load/store-on-condition 2 facility (ipm? branch-and-set?) */
- abort();
+ FAIL_IF(push_load_imm_inst(compiler, loc_r, 1));
+ FAIL_IF(push_inst(compiler, brc(mask, 2 + 2)));
+ FAIL_IF(push_load_imm_inst(compiler, loc_r, 0));
}
#undef LEVAL
@@ -3556,37 +3753,125 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *co
return SLJIT_SUCCESS;
}
-SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type,
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type,
sljit_s32 dst_reg,
- sljit_s32 src, sljit_sw srcw)
+ sljit_s32 src1, sljit_sw src1w,
+ sljit_s32 src2_reg)
{
- sljit_ins mask = get_cc(compiler, type & ~SLJIT_32);
+ sljit_ins mask;
sljit_gpr src_r;
+ sljit_gpr dst_r = gpr(dst_reg);
sljit_ins ins;
CHECK_ERROR();
- CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw));
+ CHECK(check_sljit_emit_select(compiler, type, dst_reg, src1, src1w, src2_reg));
- if (type & SLJIT_32)
- srcw = (sljit_s32)srcw;
+ ADJUST_LOCAL_OFFSET(src1, src1w);
- if (have_lscond2() && (src & SLJIT_IMM) && is_s16(srcw)) {
- ins = (type & SLJIT_32) ? 0xec0000000042 /* lochi */ : 0xec0000000046 /* locghi */;
- return push_inst(compiler, ins | R36A(gpr(dst_reg)) | (mask << 32) | (sljit_ins)(srcw & 0xffff) << 16);
+ if (dst_reg != src2_reg) {
+ if (src1 == dst_reg) {
+ src1 = src2_reg;
+ src1w = 0;
+ type ^= 0x1;
+ } else {
+ if (ADDRESSING_DEPENDS_ON(src1, dst_reg)) {
+ FAIL_IF(load_word(compiler, dst_r, src1, src1w, type & SLJIT_32));
+ src1 = src2_reg;
+ src1w = 0;
+ type ^= 0x1;
+ } else
+ FAIL_IF(push_inst(compiler, ((type & SLJIT_32) ? 0x1800 /* lr */ : 0xb9040000 /* lgr */) | R4A(dst_r) | R0A(gpr(src2_reg))));
+ }
}
- if (src & SLJIT_IMM) {
- FAIL_IF(push_load_imm_inst(compiler, tmp0, srcw));
- src_r = tmp0;
+ mask = get_cc(compiler, type & ~SLJIT_32);
+
+ if (src1 & SLJIT_MEM) {
+ if (src1 & OFFS_REG_MASK) {
+ src_r = gpr(OFFS_REG(src1));
+
+ if (src1w != 0) {
+ FAIL_IF(push_inst(compiler, 0xeb000000000d /* sllg */ | R36A(tmp1) | R32A(src_r) | ((sljit_ins)(src1w & 0x3) << 16)));
+ src_r = tmp1;
+ }
+
+ FAIL_IF(push_inst(compiler, 0xb9e80000 /* agrk */ | R12A(src_r) | R4A(tmp1) | R0A(gpr(src1 & REG_MASK))));
+ src_r = tmp1;
+ src1w = 0;
+ } else if (!is_s20(src1w)) {
+ FAIL_IF(push_load_imm_inst(compiler, tmp1, src1w));
+
+ if (src1 & REG_MASK)
+ FAIL_IF(push_inst(compiler, 0xb9e80000 /* agrk */ | R12A(tmp1) | R4A(tmp1) | R0A(gpr(src1 & REG_MASK))));
+
+ src_r = tmp1;
+ src1w = 0;
+ } else
+ src_r = gpr(src1 & REG_MASK);
+
+ ins = (type & SLJIT_32) ? 0xeb00000000f2 /* loc */ : 0xeb00000000e2 /* locg */;
+ return push_inst(compiler, ins | R36A(dst_r) | (mask << 32) | R28A(src_r) | disp_s20((sljit_s32)src1w));
+ }
+
+ if (src1 == SLJIT_IMM) {
+ if (type & SLJIT_32)
+ src1w = (sljit_s32)src1w;
+
+ if (have_lscond2() && is_s16(src1w)) {
+ ins = (type & SLJIT_32) ? 0xec0000000042 /* lochi */ : 0xec0000000046 /* locghi */;
+ return push_inst(compiler, ins | R36A(dst_r) | (mask << 32) | (sljit_ins)(src1w & 0xffff) << 16);
+ }
+
+ FAIL_IF(push_load_imm_inst(compiler, tmp1, src1w));
+ src_r = tmp1;
} else
- src_r = gpr(src);
+ src_r = gpr(src1);
+
+ ins = (type & SLJIT_32) ? 0xb9f20000 /* locr */ : 0xb9e20000 /* locgr */;
+ return push_inst(compiler, ins | (mask << 12) | R4A(dst_r) | R0A(src_r));
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 dst_freg,
+ sljit_s32 src1, sljit_sw src1w,
+ sljit_s32 src2_freg)
+{
+ sljit_ins ins;
+ struct sljit_label *label;
+ struct sljit_jump *jump;
- if (have_lscond1()) {
- ins = (type & SLJIT_32) ? 0xb9f20000 /* locr */ : 0xb9e20000 /* locgr */;
- return push_inst(compiler, ins | (mask << 12) | R4A(gpr(dst_reg)) | R0A(src_r));
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_fselect(compiler, type, dst_freg, src1, src1w, src2_freg));
+
+ ADJUST_LOCAL_OFFSET(src1, src1w);
+
+ if (dst_freg != src2_freg) {
+ if (dst_freg == src1) {
+ src1 = src2_freg;
+ src1w = 0;
+ type ^= 0x1;
+ } else {
+ ins = (type & SLJIT_32) ? 0x3800 /* ler */ : 0x2800 /* ldr */;
+ FAIL_IF(push_inst(compiler, ins | F4(dst_freg) | F0(src2_freg)));
+ }
}
- return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw);
+ SLJIT_SKIP_CHECKS(compiler);
+ jump = sljit_emit_jump(compiler, (type & ~SLJIT_32) ^ 0x1);
+ FAIL_IF(!jump);
+
+ if (!(src1 & SLJIT_MEM)) {
+ ins = (type & SLJIT_32) ? 0x3800 /* ler */ : 0x2800 /* ldr */;
+ FAIL_IF(push_inst(compiler, ins | F4(dst_freg) | F0(src1)));
+ } else
+ FAIL_IF(float_mem(compiler, FLOAT_LOAD | (type & SLJIT_32), dst_freg, src1, src1w));
+
+ SLJIT_SKIP_CHECKS(compiler);
+ label = sljit_emit_label(compiler);
+ FAIL_IF(!label);
+
+ sljit_set_label(jump, label);
+ return SLJIT_SUCCESS;
}
SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type,
@@ -3648,6 +3933,502 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compile
return push_inst(compiler, ins | R36A(reg2) | disp_s20((sljit_s32)memw + SSIZE_OF(sw)));
}
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 freg,
+ sljit_s32 srcdst, sljit_sw srcdstw)
+{
+ sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
+ sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
+ sljit_s32 alignment = SLJIT_SIMD_GET_ELEM2_SIZE(type);
+ struct addr addr;
+ sljit_ins ins;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_simd_mov(compiler, type, freg, srcdst, srcdstw));
+
+ ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
+
+ if (reg_size != 4)
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if (type & SLJIT_SIMD_TEST)
+ return SLJIT_SUCCESS;
+
+ if (!(srcdst & SLJIT_MEM)) {
+ if (type & SLJIT_SIMD_STORE)
+ ins = F36(srcdst) | F32(freg);
+ else
+ ins = F36(freg) | F32(srcdst);
+
+ return push_inst(compiler, 0xe70000000056 /* vlr */ | ins);
+ }
+
+ FAIL_IF(make_addr_bx(compiler, &addr, srcdst, srcdstw, tmp1));
+ ins = F36(freg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset);
+
+ if (alignment >= 4)
+ ins |= 4 << 12;
+ else if (alignment == 3)
+ ins |= 3 << 12;
+
+ return push_inst(compiler, ((type & SLJIT_SIMD_STORE) ? 0xe7000000000e /* vst */ : 0xe70000000006 /* vl */) | ins);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 freg,
+ sljit_s32 src, sljit_sw srcw)
+{
+ sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
+ sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
+ struct addr addr;
+ sljit_gpr reg;
+ sljit_sw sign_ext;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_simd_replicate(compiler, type, freg, src, srcw));
+
+ ADJUST_LOCAL_OFFSET(src, srcw);
+
+ if (reg_size != 4)
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if ((type & SLJIT_SIMD_FLOAT) && elem_size < 2)
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if (type & SLJIT_SIMD_TEST)
+ return SLJIT_SUCCESS;
+
+ if (src & SLJIT_MEM) {
+ FAIL_IF(make_addr_bx(compiler, &addr, src, srcw, tmp1));
+ return push_inst(compiler, 0xe70000000005 /* vlrep */ | F36(freg)
+ | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset) | ((sljit_ins)elem_size << 12));
+ }
+
+ if (type & SLJIT_SIMD_FLOAT) {
+ if (src == SLJIT_IMM)
+ return push_inst(compiler, 0xe70000000044 /* vgbm */ | F36(freg));
+
+ return push_inst(compiler, 0xe7000000004d /* vrep */ | F36(freg) | F32(src) | ((sljit_ins)elem_size << 12));
+ }
+
+ if (src == SLJIT_IMM) {
+ sign_ext = 0x10000;
+
+ switch (elem_size) {
+ case 0:
+ srcw &= 0xff;
+ sign_ext = (sljit_s8)srcw;
+ break;
+ case 1:
+ srcw &= 0xffff;
+ sign_ext = (sljit_s16)srcw;
+ break;
+ case 2:
+ if ((sljit_s32)srcw == (sljit_s16)srcw) {
+ srcw &= 0xffff;
+ sign_ext = (sljit_s16)srcw;
+ } else
+ srcw &= 0xffffffff;
+ break;
+ default:
+ if (srcw == (sljit_s16)srcw) {
+ srcw &= 0xffff;
+ sign_ext = (sljit_s16)srcw;
+ }
+ break;
+ }
+
+ if (sign_ext != 0x10000) {
+ if (sign_ext == 0 || sign_ext == -1)
+ return push_inst(compiler, 0xe70000000044 /* vgbm */ | F36(freg)
+ | (sign_ext == 0 ? 0 : ((sljit_ins)0xffff << 16)));
+
+ return push_inst(compiler, 0xe70000000045 /* vrepi */ | F36(freg)
+ | ((sljit_ins)srcw << 16) | ((sljit_ins)elem_size << 12));
+ }
+
+ push_load_imm_inst(compiler, tmp0, srcw);
+ reg = tmp0;
+ } else
+ reg = gpr(src);
+
+ FAIL_IF(push_inst(compiler, 0xe70000000022 /* vlvg */ | F36(freg) | R32A(reg) | ((sljit_ins)elem_size << 12)));
+ return push_inst(compiler, 0xe7000000004d /* vrep */ | F36(freg) | F32(freg) | ((sljit_ins)elem_size << 12));
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 freg, sljit_s32 lane_index,
+ sljit_s32 srcdst, sljit_sw srcdstw)
+{
+ sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
+ sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
+ struct addr addr;
+ sljit_gpr reg;
+ sljit_ins ins = 0;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_simd_lane_mov(compiler, type, freg, lane_index, srcdst, srcdstw));
+
+ ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
+
+ if (reg_size != 4)
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if ((type & SLJIT_SIMD_FLOAT) && elem_size < 2)
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if (type & SLJIT_SIMD_TEST)
+ return SLJIT_SUCCESS;
+
+ if (srcdst & SLJIT_MEM) {
+ FAIL_IF(make_addr_bx(compiler, &addr, srcdst, srcdstw, tmp1));
+ ins = F36(freg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset);
+ }
+
+ if (type & SLJIT_SIMD_LANE_ZERO) {
+ if ((srcdst & SLJIT_MEM) && lane_index == ((1 << (3 - elem_size)) - 1))
+ return push_inst(compiler, 0xe70000000004 /* vllez */ | ins | ((sljit_ins)elem_size << 12));
+
+ if ((type & SLJIT_SIMD_FLOAT) && freg == srcdst) {
+ FAIL_IF(push_inst(compiler, 0xe70000000056 /* vlr */ | F36(TMP_FREG1) | F32(freg)));
+ srcdst = TMP_FREG1;
+ srcdstw = 0;
+ }
+
+ FAIL_IF(push_inst(compiler, 0xe70000000044 /* vgbm */ | F36(freg)));
+ }
+
+ if (srcdst & SLJIT_MEM) {
+ switch (elem_size) {
+ case 0:
+ ins |= 0xe70000000000 /* vleb */;
+ break;
+ case 1:
+ ins |= 0xe70000000001 /* vleh */;
+ break;
+ case 2:
+ ins |= 0xe70000000003 /* vlef */;
+ break;
+ default:
+ ins |= 0xe70000000002 /* vleg */;
+ break;
+ }
+
+ /* Convert to vsteb - vsteg */
+ if (type & SLJIT_SIMD_STORE)
+ ins |= 0x8;
+
+ return push_inst(compiler, ins | ((sljit_ins)lane_index << 12));
+ }
+
+ if (type & SLJIT_SIMD_FLOAT) {
+ if (type & SLJIT_SIMD_STORE)
+ return push_inst(compiler, 0xe7000000004d /* vrep */ | F36(srcdst) | F32(freg) | ((sljit_ins)lane_index << 16) | ((sljit_ins)elem_size << 12));
+
+ if (elem_size == 3) {
+ if (lane_index == 0)
+ ins = F32(srcdst) | F28(freg) | (1 << 12);
+ else
+ ins = F32(freg) | F28(srcdst);
+
+ return push_inst(compiler, 0xe70000000084 /* vpdi */ | F36(freg) | ins);
+ }
+
+ FAIL_IF(push_inst(compiler, 0xe70000000021 /* vlgv */ | R36A(tmp0) | F32(srcdst) | ((sljit_ins)2 << 12)));
+ return push_inst(compiler, 0xe70000000022 /* vlvg */ | F36(freg) | R32A(tmp0) | ((sljit_ins)lane_index << 16) | ((sljit_ins)2 << 12));
+ }
+
+ if (srcdst == SLJIT_IMM) {
+ switch (elem_size) {
+ case 0:
+ ins = 0xe70000000040 /* vleib */;
+ srcdstw &= 0xff;
+ break;
+ case 1:
+ ins = 0xe70000000041 /* vleih */;
+ srcdstw &= 0xffff;
+ break;
+ case 2:
+ if ((sljit_s32)srcdstw == (sljit_s16)srcdstw) {
+ srcdstw &= 0xffff;
+ ins = 0xe70000000043 /* vleif */;
+ } else
+ srcdstw &= 0xffffffff;
+ break;
+ default:
+ if (srcdstw == (sljit_s16)srcdstw) {
+ srcdstw &= 0xffff;
+ ins = 0xe70000000042 /* vleig */;
+ }
+ break;
+ }
+
+ if (ins != 0)
+ return push_inst(compiler, ins | F36(freg) | ((sljit_ins)srcdstw << 16) | ((sljit_ins)lane_index << 12));
+
+ push_load_imm_inst(compiler, tmp0, srcdstw);
+ reg = tmp0;
+ } else
+ reg = gpr(srcdst);
+
+ ins = ((sljit_ins)lane_index << 16) | ((sljit_ins)elem_size << 12);
+
+ if (!(type & SLJIT_SIMD_STORE))
+ return push_inst(compiler, 0xe70000000022 /* vlvg */ | F36(freg) | R32A(reg) | ins);
+
+ FAIL_IF(push_inst(compiler, 0xe70000000021 /* vlgv */ | R36A(reg) | F32(freg) | ins));
+
+ if (!(type & SLJIT_SIMD_LANE_SIGNED) || elem_size >= 3)
+ return SLJIT_SUCCESS;
+
+ switch (elem_size) {
+ case 0:
+ ins = 0xb9060000 /* lgbr */;
+ break;
+ case 1:
+ ins = 0xb9070000 /* lghr */;
+ break;
+ default:
+ ins = 0xb9140000 /* lgfr */;
+ break;
+ }
+
+ return push_inst(compiler, ins | R4A(reg) | R0A(reg));
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 freg,
+ sljit_s32 src, sljit_s32 src_lane_index)
+{
+ sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
+ sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_simd_lane_replicate(compiler, type, freg, src, src_lane_index));
+
+ if (reg_size != 4)
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if ((type & SLJIT_SIMD_FLOAT) && elem_size < 2)
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if (type & SLJIT_SIMD_TEST)
+ return SLJIT_SUCCESS;
+
+ return push_inst(compiler, 0xe7000000004d /* vrep */ | F36(freg) | F32(src)
+ | ((sljit_ins)src_lane_index << 16) | ((sljit_ins)elem_size << 12));
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 freg,
+ sljit_s32 src, sljit_sw srcw)
+{
+ sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
+ sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
+ sljit_s32 elem2_size = SLJIT_SIMD_GET_ELEM2_SIZE(type);
+ struct addr addr;
+ sljit_ins ins;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_simd_extend(compiler, type, freg, src, srcw));
+
+ ADJUST_LOCAL_OFFSET(src, srcw);
+
+ if (reg_size != 4)
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if ((type & SLJIT_SIMD_FLOAT) && elem_size < 2)
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if (type & SLJIT_SIMD_TEST)
+ return SLJIT_SUCCESS;
+
+ if (src & SLJIT_MEM) {
+ FAIL_IF(make_addr_bx(compiler, &addr, src, srcw, tmp1));
+ ins = F36(freg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset);
+
+ switch (elem2_size - elem_size) {
+ case 1:
+ ins |= 0xe70000000002 /* vleg */;
+ break;
+ case 2:
+ ins |= 0xe70000000003 /* vlef */;
+ break;
+ default:
+ ins |= 0xe70000000001 /* vleh */;
+ break;
+ }
+
+ FAIL_IF(push_inst(compiler, ins));
+ src = freg;
+ }
+
+ if (type & SLJIT_SIMD_FLOAT) {
+ FAIL_IF(push_inst(compiler, 0xe700000000d5 /* vuplh */ | F36(freg) | F32(src) | (2 << 12)));
+ FAIL_IF(push_inst(compiler, 0xe70000000030 /* vesl */ | F36(freg) | F32(freg) | (32 << 16) | (3 << 12)));
+ return push_inst(compiler, 0xe700000000c4 /* vfll */ | F36(freg) | F32(freg) | (2 << 12));
+ }
+
+ ins = ((type & SLJIT_SIMD_EXTEND_SIGNED) ? 0xe700000000d7 /* vuph */ : 0xe700000000d5 /* vuplh */) | F36(freg);
+
+ do {
+ FAIL_IF(push_inst(compiler, ins | F32(src) | ((sljit_ins)elem_size << 12)));
+ src = freg;
+ } while (++elem_size < elem2_size);
+
+ return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 freg,
+ sljit_s32 dst, sljit_sw dstw)
+{
+ sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
+ sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
+ sljit_gpr dst_r;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_simd_sign(compiler, type, freg, dst, dstw));
+
+ ADJUST_LOCAL_OFFSET(dst, dstw);
+
+ if (reg_size != 4)
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if ((type & SLJIT_SIMD_FLOAT) && elem_size < 2)
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if (type & SLJIT_SIMD_TEST)
+ return SLJIT_SUCCESS;
+
+ switch (elem_size) {
+ case 0:
+ push_load_imm_inst(compiler, tmp0, (sljit_sw)0x4048505860687078);
+ push_load_imm_inst(compiler, tmp1, (sljit_sw)0x0008101820283038);
+ FAIL_IF(push_inst(compiler, 0xe70000000062 /* vlvgp */ | F36(TMP_FREG1) | R32A(tmp1) | R28A(tmp0)));
+ break;
+ case 1:
+ push_load_imm_inst(compiler, tmp0, (sljit_sw)0x0010203040506070);
+ break;
+ case 2:
+ push_load_imm_inst(compiler, tmp0, (sljit_sw)0x8080808000204060);
+ break;
+ default:
+ push_load_imm_inst(compiler, tmp0, (sljit_sw)0x8080808080800040);
+ break;
+ }
+
+ if (elem_size != 0)
+ FAIL_IF(push_inst(compiler, 0xe70000000022 /* vlvg */ | F36(TMP_FREG1) | R32A(tmp0) | (1 << 16) | (3 << 12)));
+
+ FAIL_IF(push_inst(compiler, 0xe70000000085 /* vbperm */ | F36(TMP_FREG1) | F32(freg) | F28(TMP_FREG1)));
+
+ dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0;
+ FAIL_IF(push_inst(compiler, 0xe70000000021 /* vlgv */ | R36A(dst_r) | F32(TMP_FREG1)
+ | (elem_size == 0 ? ((3 << 16) | (1 << 12)) : (7 << 16))));
+
+ if (dst_r == tmp0)
+ return store_word(compiler, tmp0, dst, dstw, type & SLJIT_32);
+
+ return SLJIT_SUCCESS;
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *compiler, sljit_s32 type,
+ sljit_s32 dst_freg, sljit_s32 src1_freg, sljit_s32 src2_freg)
+{
+ sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
+ sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
+ sljit_ins ins = 0;
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_simd_op2(compiler, type, dst_freg, src1_freg, src2_freg));
+
+ if (reg_size != 4)
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
+ return SLJIT_ERR_UNSUPPORTED;
+
+ if (type & SLJIT_SIMD_TEST)
+ return SLJIT_SUCCESS;
+
+ switch (SLJIT_SIMD_GET_OPCODE(type)) {
+ case SLJIT_SIMD_OP2_AND:
+ ins = 0xe70000000068 /* vn */;
+ break;
+ case SLJIT_SIMD_OP2_OR:
+ ins = 0xe7000000006a /* vo */;
+ break;
+ case SLJIT_SIMD_OP2_XOR:
+ ins = 0xe7000000006d /* vx */;
+ break;
+ }
+
+ if (type & SLJIT_SIMD_TEST)
+ return SLJIT_SUCCESS;
+
+ return push_inst(compiler, ins | F36(dst_freg) | F32(src1_freg) | F28(src2_freg));
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 dst_reg,
+ sljit_s32 mem_reg)
+{
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg));
+
+ SLJIT_SKIP_CHECKS(compiler);
+ return sljit_emit_op1(compiler, op, dst_reg, 0, SLJIT_MEM1(mem_reg), 0);
+}
+
+SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler *compiler, sljit_s32 op,
+ sljit_s32 src_reg,
+ sljit_s32 mem_reg,
+ sljit_s32 temp_reg)
+{
+ sljit_ins mask;
+ sljit_gpr tmp_r = gpr(temp_reg);
+ sljit_gpr mem_r = gpr(mem_reg);
+
+ CHECK_ERROR();
+ CHECK(check_sljit_emit_atomic_store(compiler, op, src_reg, mem_reg, temp_reg));
+
+ switch (GET_OPCODE(op)) {
+ case SLJIT_MOV32:
+ case SLJIT_MOV_U32:
+ return push_inst(compiler, 0xba000000 /* cs */ | R20A(tmp_r) | R16A(gpr(src_reg)) | R12A(mem_r));
+ case SLJIT_MOV_U8:
+ mask = 0xff;
+ break;
+ case SLJIT_MOV_U16:
+ mask = 0xffff;
+ break;
+ default:
+ return push_inst(compiler, 0xeb0000000030 /* csg */ | R36A(tmp_r) | R32A(gpr(src_reg)) | R28A(mem_r));
+ }
+
+ /* tmp0 = (src_reg ^ tmp_r) & mask */
+ FAIL_IF(push_inst(compiler, 0xa50f0000 /* llill */ | R20A(tmp1) | mask));
+ FAIL_IF(push_inst(compiler, 0xb9e70000 /* xgrk */ | R4A(tmp0) | R0A(gpr(src_reg)) | R12A(tmp_r)));
+ FAIL_IF(push_inst(compiler, 0xa7090000 /* lghi */ | R20A(tmp_r) | 0xfffc));
+ FAIL_IF(push_inst(compiler, 0xb9800000 /* ngr */ | R4A(tmp0) | R0A(tmp1)));
+
+ /* tmp0 = tmp0 << (((mem_r ^ 0x3) & 0x3) << 3) */
+ FAIL_IF(push_inst(compiler, 0xa50f0000 /* llill */ | R20A(tmp1) | (sljit_ins)((mask == 0xff) ? 0x18 : 0x10)));
+ FAIL_IF(push_inst(compiler, 0xb9800000 /* ngr */ | R4A(tmp_r) | R0A(mem_r)));
+ FAIL_IF(push_inst(compiler, 0xec0000000057 /* rxsbg */ | R36A(tmp1) | R32A(mem_r) | (59 << 24) | (60 << 16) | (3 << 8)));
+ FAIL_IF(push_inst(compiler, 0xeb000000000d /* sllg */ | R36A(tmp0) | R32A(tmp0) | R28A(tmp1)));
+
+ /* Already computed: tmp_r = mem_r & ~0x3 */
+
+ FAIL_IF(push_inst(compiler, 0x58000000 /* l */ | R20A(tmp1) | R12A(tmp_r)));
+ FAIL_IF(push_inst(compiler, 0x1700 /* x */ | R4A(tmp0) | R0A(tmp1)));
+ return push_inst(compiler, 0xba000000 /* cs */ | R20A(tmp1) | R16A(tmp0) | R12A(tmp_r));
+}
+
/* --------------------------------------------------------------------- */
/* Other instructions */
/* --------------------------------------------------------------------- */
@@ -3678,9 +4459,9 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compi
dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
if (have_genext())
- PTR_FAIL_IF(push_inst(compiler, sljit_ins_const | lgrl(dst_r, 0)));
+ PTR_FAIL_IF(push_inst(compiler, lgrl(dst_r, 0)));
else {
- PTR_FAIL_IF(push_inst(compiler, sljit_ins_const | larl(tmp1, 0)));
+ PTR_FAIL_IF(push_inst(compiler, larl(tmp1, 0)));
PTR_FAIL_IF(push_inst(compiler, lg(dst_r, 0, r0, tmp1)));
}
@@ -3707,20 +4488,18 @@ SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_consta
sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset);
}
-SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label *sljit_emit_put_label(
- struct sljit_compiler *compiler,
- sljit_s32 dst, sljit_sw dstw)
+SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_mov_addr(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
{
- struct sljit_put_label *put_label;
+ struct sljit_jump *jump;
sljit_gpr dst_r;
CHECK_ERROR_PTR();
- CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw));
+ CHECK_PTR(check_sljit_emit_mov_addr(compiler, dst, dstw));
ADJUST_LOCAL_OFFSET(dst, dstw);
- put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label));
- PTR_FAIL_IF(!put_label);
- set_put_label(put_label, compiler, 0);
+ jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
+ PTR_FAIL_IF(!jump);
+ set_mov_addr(jump, compiler, 0);
dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
@@ -3734,7 +4513,7 @@ SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label *sljit_emit_put_label(
if (dst & SLJIT_MEM)
PTR_FAIL_IF(store_word(compiler, dst_r, dst, dstw, 0));
- return put_label;
+ return jump;
}
/* TODO(carenas): EVAL probably should move up or be refactored */