diff options
Diffstat (limited to 'src/3rdparty/pcre2/src/sljit/sljitNativeX86_64.c')
-rw-r--r-- | src/3rdparty/pcre2/src/sljit/sljitNativeX86_64.c | 582 |
1 files changed, 444 insertions, 138 deletions
diff --git a/src/3rdparty/pcre2/src/sljit/sljitNativeX86_64.c b/src/3rdparty/pcre2/src/sljit/sljitNativeX86_64.c index 4e938ffcf3..1ab79293c7 100644 --- a/src/3rdparty/pcre2/src/sljit/sljitNativeX86_64.c +++ b/src/3rdparty/pcre2/src/sljit/sljitNativeX86_64.c @@ -37,9 +37,9 @@ static sljit_s32 emit_load_imm64(struct sljit_compiler *compiler, sljit_s32 reg, inst = (sljit_u8*)ensure_buf(compiler, 1 + 2 + sizeof(sljit_sw)); FAIL_IF(!inst); INC_SIZE(2 + sizeof(sljit_sw)); - *inst++ = REX_W | ((reg_map[reg] <= 7) ? 0 : REX_B); - *inst++ = U8(MOV_r_i32 | (reg_map[reg] & 0x7)); - sljit_unaligned_store_sw(inst, imm); + inst[0] = REX_W | ((reg_map[reg] <= 7) ? 0 : REX_B); + inst[1] = U8(MOV_r_i32 | reg_lmap[reg]); + sljit_unaligned_store_sw(inst + 2, imm); return SLJIT_SUCCESS; } @@ -72,7 +72,7 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw sljit_uw inst_size; /* The immediate operand must be 32 bit. */ - SLJIT_ASSERT(!(a & SLJIT_IMM) || compiler->mode32 || IS_HALFWORD(imma)); + SLJIT_ASSERT(a != SLJIT_IMM || compiler->mode32 || IS_HALFWORD(imma)); /* Both cannot be switched on. */ SLJIT_ASSERT((flags & (EX86_BIN_INS | EX86_SHIFT_INS)) != (EX86_BIN_INS | EX86_SHIFT_INS)); /* Size flags not allowed for typed instructions. */ @@ -80,26 +80,24 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw /* Both size flags cannot be switched on. */ SLJIT_ASSERT((flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) != (EX86_BYTE_ARG | EX86_HALF_ARG)); /* SSE2 and immediate is not possible. */ - SLJIT_ASSERT(!(a & SLJIT_IMM) || !(flags & EX86_SSE2)); - SLJIT_ASSERT((flags & (EX86_PREF_F2 | EX86_PREF_F3)) != (EX86_PREF_F2 | EX86_PREF_F3) - && (flags & (EX86_PREF_F2 | EX86_PREF_66)) != (EX86_PREF_F2 | EX86_PREF_66) - && (flags & (EX86_PREF_F3 | EX86_PREF_66)) != (EX86_PREF_F3 | EX86_PREF_66)); + SLJIT_ASSERT(a != SLJIT_IMM || !(flags & EX86_SSE2)); + SLJIT_ASSERT(((flags & (EX86_PREF_F2 | EX86_PREF_F3 | EX86_PREF_66)) + & ((flags & (EX86_PREF_F2 | EX86_PREF_F3 | EX86_PREF_66)) - 1)) == 0); + SLJIT_ASSERT((flags & (EX86_VEX_EXT | EX86_REX)) != EX86_VEX_EXT); size &= 0xf; - inst_size = size; + /* The mod r/m byte is always present. */ + inst_size = size + 1; if (!compiler->mode32 && !(flags & EX86_NO_REXW)) rex |= REX_W; else if (flags & EX86_REX) rex |= REX; - if (flags & (EX86_PREF_F2 | EX86_PREF_F3)) - inst_size++; - if (flags & EX86_PREF_66) + if (flags & (EX86_PREF_F2 | EX86_PREF_F3 | EX86_PREF_66)) inst_size++; /* Calculate size of b. */ - inst_size += 1; /* mod r/m byte. */ if (b & SLJIT_MEM) { if (!(b & OFFS_REG_MASK) && NOT_HALFWORD(immb)) { PTR_FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immb)); @@ -119,8 +117,7 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw inst_size += sizeof(sljit_s8); else inst_size += sizeof(sljit_s32); - } - else if (reg_lmap[b & REG_MASK] == 5) { + } else if (reg_lmap[b & REG_MASK] == 5) { /* Swap registers if possible. */ if ((b & OFFS_REG_MASK) && (immb & 0x3) == 0 && reg_lmap[OFFS_REG(b)] != 5) b = SLJIT_MEM | OFFS_REG(b) | TO_OFFS_REG(b & REG_MASK); @@ -140,23 +137,26 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw rex |= REX_X; } } - } - else if (!(flags & EX86_SSE2_OP2)) { + } else if (!(flags & EX86_SSE2_OP2)) { if (reg_map[b] >= 8) rex |= REX_B; - } - else if (freg_map[b] >= 8) + } else if (freg_map[b] >= 8) rex |= REX_B; - if (a & SLJIT_IMM) { + if ((flags & EX86_VEX_EXT) && (rex & 0x3)) { + SLJIT_ASSERT(size == 2); + size++; + inst_size++; + } + + if (a == SLJIT_IMM) { if (flags & EX86_BIN_INS) { if (imma <= 127 && imma >= -128) { inst_size += 1; flags |= EX86_BYTE_ARG; } else inst_size += 4; - } - else if (flags & EX86_SHIFT_INS) { + } else if (flags & EX86_SHIFT_INS) { SLJIT_ASSERT(imma <= (compiler->mode32 ? 0x1f : 0x3f)); if (imma != 1) { inst_size++; @@ -168,8 +168,7 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw inst_size += sizeof(short); else inst_size += sizeof(sljit_s32); - } - else { + } else { SLJIT_ASSERT(!(flags & EX86_SHIFT_INS) || a == SLJIT_PREF_SHIFT_REG); /* reg_map[SLJIT_PREF_SHIFT_REG] is less than 8. */ if (!(flags & EX86_SSE2_OP1)) { @@ -186,32 +185,34 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw inst = (sljit_u8*)ensure_buf(compiler, 1 + inst_size); PTR_FAIL_IF(!inst); - /* Encoding the byte. */ + /* Encoding prefixes. */ INC_SIZE(inst_size); if (flags & EX86_PREF_F2) *inst++ = 0xf2; - if (flags & EX86_PREF_F3) + else if (flags & EX86_PREF_F3) *inst++ = 0xf3; - if (flags & EX86_PREF_66) + else if (flags & EX86_PREF_66) *inst++ = 0x66; + + /* Rex is always the last prefix. */ if (rex) *inst++ = rex; + buf_ptr = inst + size; /* Encode mod/rm byte. */ if (!(flags & EX86_SHIFT_INS)) { - if ((flags & EX86_BIN_INS) && (a & SLJIT_IMM)) + if ((flags & EX86_BIN_INS) && a == SLJIT_IMM) *inst = (flags & EX86_BYTE_ARG) ? GROUP_BINARY_83 : GROUP_BINARY_81; - if (a & SLJIT_IMM) + if (a == SLJIT_IMM) *buf_ptr = 0; else if (!(flags & EX86_SSE2_OP1)) *buf_ptr = U8(reg_lmap[a] << 3); else *buf_ptr = U8(freg_lmap[a] << 3); - } - else { - if (a & SLJIT_IMM) { + } else { + if (a == SLJIT_IMM) { if (imma == 1) *inst = GROUP_SHIFT_1; else @@ -238,8 +239,9 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw if (!(b & OFFS_REG_MASK)) *buf_ptr++ |= reg_lmap_b; else { - *buf_ptr++ |= 0x04; - *buf_ptr++ = U8(reg_lmap_b | (reg_lmap[OFFS_REG(b)] << 3)); + buf_ptr[0] |= 0x04; + buf_ptr[1] = U8(reg_lmap_b | (reg_lmap[OFFS_REG(b)] << 3)); + buf_ptr += 2; } if (immb != 0 || reg_lmap_b == 5) { @@ -250,26 +252,26 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw buf_ptr += sizeof(sljit_s32); } } - } - else { + } else { if (reg_lmap_b == 5) *buf_ptr |= 0x40; - *buf_ptr++ |= 0x04; - *buf_ptr++ = U8(reg_lmap_b | (reg_lmap[OFFS_REG(b)] << 3) | (immb << 6)); + buf_ptr[0] |= 0x04; + buf_ptr[1] = U8(reg_lmap_b | (reg_lmap[OFFS_REG(b)] << 3) | (immb << 6)); + buf_ptr += 2; if (reg_lmap_b == 5) *buf_ptr++ = 0; } - } - else { - *buf_ptr++ |= 0x04; - *buf_ptr++ = 0x25; + } else { + buf_ptr[0] |= 0x04; + buf_ptr[1] = 0x25; + buf_ptr += 2; sljit_unaligned_store_s32(buf_ptr, (sljit_s32)immb); /* 32 bit displacement. */ buf_ptr += sizeof(sljit_s32); } - if (a & SLJIT_IMM) { + if (a == SLJIT_IMM) { if (flags & EX86_BYTE_ARG) *buf_ptr = U8(imma); else if (flags & EX86_HALF_ARG) @@ -278,33 +280,106 @@ static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw sljit_unaligned_store_s32(buf_ptr, (sljit_s32)imma); } - return !(flags & EX86_SHIFT_INS) ? inst : (inst + 1); + return inst; +} + +static sljit_s32 emit_vex_instruction(struct sljit_compiler *compiler, sljit_uw op, + /* The first and second register operand. */ + sljit_s32 a, sljit_s32 v, + /* The general operand (not immediate). */ + sljit_s32 b, sljit_sw immb) +{ + sljit_u8 *inst; + sljit_u8 vex = 0; + sljit_u8 vex_m = 0; + sljit_uw size; + + SLJIT_ASSERT(((op & (EX86_PREF_F2 | EX86_PREF_F3 | EX86_PREF_66)) + & ((op & (EX86_PREF_F2 | EX86_PREF_F3 | EX86_PREF_66)) - 1)) == 0); + + op |= EX86_REX; + + if (op & VEX_OP_0F38) + vex_m = 0x2; + else if (op & VEX_OP_0F3A) + vex_m = 0x3; + + if ((op & VEX_W) || ((op & VEX_AUTO_W) && !compiler->mode32)) { + if (vex_m == 0) + vex_m = 0x1; + + vex |= 0x80; + } + + if (op & EX86_PREF_66) + vex |= 0x1; + else if (op & EX86_PREF_F2) + vex |= 0x3; + else if (op & EX86_PREF_F3) + vex |= 0x2; + + op &= ~(EX86_PREF_66 | EX86_PREF_F2 | EX86_PREF_F3); + + if (op & VEX_256) + vex |= 0x4; + + vex = U8(vex | ((((op & VEX_SSE2_OPV) ? freg_map[v] : reg_map[v]) ^ 0xf) << 3)); + + size = op & ~(sljit_uw)0xff; + size |= (vex_m == 0) ? (EX86_VEX_EXT | 2) : 3; + + inst = emit_x86_instruction(compiler, size, a, 0, b, immb); + FAIL_IF(!inst); + + SLJIT_ASSERT((inst[-1] & 0xf0) == REX); + + /* If X or B is present in REX prefix. */ + if (vex_m == 0 && inst[-1] & 0x3) + vex_m = 0x1; + + if (vex_m == 0) { + vex |= U8(((inst[-1] >> 2) ^ 0x1) << 7); + + inst[-1] = 0xc5; + inst[0] = vex; + inst[1] = U8(op); + return SLJIT_SUCCESS; + } + + vex_m |= U8((inst[-1] ^ 0x7) << 5); + inst[-1] = 0xc4; + inst[0] = vex_m; + inst[1] = vex; + inst[2] = U8(op); + return SLJIT_SUCCESS; } /* --------------------------------------------------------------------- */ /* Enter / return */ /* --------------------------------------------------------------------- */ -static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr) +static sljit_u8* detect_far_jump_type(struct sljit_jump *jump, sljit_u8 *code_ptr) { sljit_uw type = jump->flags >> TYPE_SHIFT; - int short_addr = !(jump->flags & SLJIT_REWRITABLE_JUMP) && !(jump->flags & JUMP_LABEL) && (jump->u.target <= 0xffffffff); + int short_addr = !(jump->flags & SLJIT_REWRITABLE_JUMP) && (jump->flags & JUMP_ADDR) && (jump->u.target <= 0xffffffff); /* The relative jump below specialized for this case. */ - SLJIT_ASSERT(reg_map[TMP_REG2] >= 8); + SLJIT_ASSERT(reg_map[TMP_REG2] >= 8 && TMP_REG2 != SLJIT_TMP_DEST_REG); if (type < SLJIT_JUMP) { /* Invert type. */ - *code_ptr++ = U8(get_jump_code(type ^ 0x1) - 0x10); - *code_ptr++ = short_addr ? (6 + 3) : (10 + 3); + code_ptr[0] = U8(get_jump_code(type ^ 0x1) - 0x10); + code_ptr[1] = short_addr ? (6 + 3) : (10 + 3); + code_ptr += 2; } - *code_ptr++ = short_addr ? REX_B : (REX_W | REX_B); - *code_ptr++ = MOV_r_i32 | reg_lmap[TMP_REG2]; + code_ptr[0] = short_addr ? REX_B : (REX_W | REX_B); + code_ptr[1] = MOV_r_i32 | reg_lmap[TMP_REG2]; + code_ptr += 2; jump->addr = (sljit_uw)code_ptr; - if (jump->flags & JUMP_LABEL) + if (!(jump->flags & JUMP_ADDR)) jump->flags |= PATCH_MD; else if (short_addr) sljit_unaligned_store_s32(code_ptr, (sljit_s32)jump->u.target); @@ -313,63 +388,71 @@ static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ code_ptr += short_addr ? sizeof(sljit_s32) : sizeof(sljit_sw); - *code_ptr++ = REX_B; - *code_ptr++ = GROUP_FF; - *code_ptr++ = U8(MOD_REG | (type >= SLJIT_FAST_CALL ? CALL_rm : JMP_rm) | reg_lmap[TMP_REG2]); + code_ptr[0] = REX_B; + code_ptr[1] = GROUP_FF; + code_ptr[2] = U8(MOD_REG | (type >= SLJIT_FAST_CALL ? CALL_rm : JMP_rm) | reg_lmap[TMP_REG2]); - return code_ptr; + return code_ptr + 3; } -static sljit_u8* generate_put_label_code(struct sljit_put_label *put_label, sljit_u8 *code_ptr, sljit_uw max_label) +static sljit_u8* generate_mov_addr_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_u8 *code, sljit_sw executable_offset) { - if (max_label > HALFWORD_MAX) { - put_label->addr -= put_label->flags; - put_label->flags = PATCH_MD; - return code_ptr; - } + sljit_uw addr; + sljit_sw diff; + SLJIT_UNUSED_ARG(executable_offset); + + SLJIT_ASSERT(((jump->flags >> JUMP_SIZE_SHIFT) & 0x1f) <= 10); + if (jump->flags & JUMP_ADDR) + addr = jump->u.target; + else + addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code, executable_offset) + jump->u.label->size; - if (put_label->flags == 0) { - /* Destination is register. */ - code_ptr = (sljit_u8*)put_label->addr - 2 - sizeof(sljit_uw); + if (addr > 0xffffffffl) { + diff = (sljit_sw)addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset); - SLJIT_ASSERT((code_ptr[0] & 0xf8) == REX_W); - SLJIT_ASSERT((code_ptr[1] & 0xf8) == MOV_r_i32); + if (diff <= HALFWORD_MAX && diff >= HALFWORD_MIN) { + SLJIT_ASSERT(((jump->flags >> JUMP_SIZE_SHIFT) & 0x1f) >= 7); + code_ptr -= SSIZE_OF(s32) - 1; - if ((code_ptr[0] & 0x07) != 0) { - code_ptr[0] = U8(code_ptr[0] & ~0x08); - code_ptr += 2 + sizeof(sljit_s32); - } - else { - code_ptr[0] = code_ptr[1]; - code_ptr += 1 + sizeof(sljit_s32); + SLJIT_ASSERT((code_ptr[-3 - SSIZE_OF(s32)] & 0xf8) == REX_W); + SLJIT_ASSERT((code_ptr[-2 - SSIZE_OF(s32)] & 0xf8) == MOV_r_i32); + + code_ptr[-3 - SSIZE_OF(s32)] = U8(REX_W | ((code_ptr[-3 - SSIZE_OF(s32)] & 0x1) << 2)); + code_ptr[-1 - SSIZE_OF(s32)] = U8(((code_ptr[-2 - SSIZE_OF(s32)] & 0x7) << 3) | 0x5); + code_ptr[-2 - SSIZE_OF(s32)] = LEA_r_m; + + jump->flags |= PATCH_MW; + return code_ptr; } - put_label->addr = (sljit_uw)code_ptr; + jump->flags |= PATCH_MD; return code_ptr; } - code_ptr -= put_label->flags + (2 + sizeof(sljit_uw)); - SLJIT_MEMMOVE(code_ptr, code_ptr + (2 + sizeof(sljit_uw)), put_label->flags); + code_ptr -= 2 + sizeof(sljit_uw); SLJIT_ASSERT((code_ptr[0] & 0xf8) == REX_W); - - if ((code_ptr[1] & 0xf8) == MOV_r_i32) { - code_ptr += 2 + sizeof(sljit_uw); - SLJIT_ASSERT((code_ptr[0] & 0xf8) == REX_W); + SLJIT_ASSERT((code_ptr[1] & 0xf8) == MOV_r_i32); + + if ((code_ptr[0] & 0x07) != 0) { + SLJIT_ASSERT(((jump->flags >> JUMP_SIZE_SHIFT) & 0x1f) >= 6); + code_ptr[0] = U8(code_ptr[0] & ~0x08); + code_ptr += 2 + sizeof(sljit_s32); + } else { + SLJIT_ASSERT(((jump->flags >> JUMP_SIZE_SHIFT) & 0x1f) >= 5); + code_ptr[0] = code_ptr[1]; + code_ptr += 1 + sizeof(sljit_s32); } - SLJIT_ASSERT(code_ptr[1] == MOV_rm_r); - - code_ptr[0] = U8(code_ptr[0] & ~0x4); - code_ptr[1] = MOV_rm_i32; - code_ptr[2] = U8(code_ptr[2] & ~(0x7 << 3)); - - code_ptr = (sljit_u8*)(put_label->addr - (2 + sizeof(sljit_uw)) + sizeof(sljit_s32)); - put_label->addr = (sljit_uw)code_ptr; - put_label->flags = 0; return code_ptr; } +#ifdef _WIN64 +typedef struct { + sljit_sw regs[2]; +} sljit_sse2_reg; +#endif /* _WIN64 */ + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler, sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds, sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size) @@ -423,7 +506,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi #ifdef _WIN64 local_size += SLJIT_LOCALS_OFFSET; - saved_float_regs_size = GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, 16); + saved_float_regs_size = GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, sse2_reg); if (saved_float_regs_size > 0) { saved_float_regs_offset = ((local_size + 0xf) & ~0xf); @@ -532,16 +615,12 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compi tmp = SLJIT_FS0 - fsaveds; for (i = SLJIT_FS0; i > tmp; i--) { - inst = emit_x86_instruction(compiler, 2 | EX86_SSE2, i, 0, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset); - *inst++ = GROUP_0F; - *inst = MOVAPS_xm_x; + FAIL_IF(emit_groupf(compiler, MOVAPS_xm_x | EX86_SSE2, i, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset)); saved_float_regs_offset += 16; } for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) { - inst = emit_x86_instruction(compiler, 2 | EX86_SSE2, i, 0, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset); - *inst++ = GROUP_0F; - *inst = MOVAPS_xm_x; + FAIL_IF(emit_groupf(compiler, MOVAPS_xm_x | EX86_SSE2, i, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset)); saved_float_regs_offset += 16; } } @@ -565,7 +644,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *comp #ifdef _WIN64 local_size += SLJIT_LOCALS_OFFSET; - saved_float_regs_size = GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, 16); + saved_float_regs_size = GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, sse2_reg); if (saved_float_regs_size > 0) local_size = ((local_size + 0xf) & ~0xf) + saved_float_regs_size; @@ -591,7 +670,7 @@ static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit #endif /* _WIN64 */ #ifdef _WIN64 - saved_float_regs_offset = GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, 16); + saved_float_regs_offset = GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, sse2_reg); if (saved_float_regs_offset > 0) { compiler->mode32 = 1; @@ -599,16 +678,12 @@ static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit tmp = SLJIT_FS0 - fsaveds; for (i = SLJIT_FS0; i > tmp; i--) { - inst = emit_x86_instruction(compiler, 2 | EX86_SSE2, i, 0, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset); - *inst++ = GROUP_0F; - *inst = MOVAPS_x_xm; + FAIL_IF(emit_groupf(compiler, MOVAPS_x_xm | EX86_SSE2, i, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset)); saved_float_regs_offset += 16; } for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) { - inst = emit_x86_instruction(compiler, 2 | EX86_SSE2, i, 0, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset); - *inst++ = GROUP_0F; - *inst = MOVAPS_x_xm; + FAIL_IF(emit_groupf(compiler, MOVAPS_x_xm | EX86_SSE2, i, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset)); saved_float_regs_offset += 16; } @@ -656,20 +731,13 @@ static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler) { - sljit_u8 *inst; - CHECK_ERROR(); CHECK(check_sljit_emit_return_void(compiler)); compiler->mode32 = 0; FAIL_IF(emit_stack_frame_release(compiler, 0)); - - inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); - FAIL_IF(!inst); - INC_SIZE(1); - RET(); - return SLJIT_SUCCESS; + return emit_byte(compiler, RET_near); } SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler, @@ -863,22 +931,13 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compi return sljit_emit_ijump(compiler, type, src, srcw); } -SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) +static sljit_s32 emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw) { sljit_u8 *inst; - CHECK_ERROR(); - CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw)); - ADJUST_LOCAL_OFFSET(dst, dstw); - if (FAST_IS_REG(dst)) { - if (reg_map[dst] < 8) { - inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); - FAIL_IF(!inst); - INC_SIZE(1); - POP_REG(reg_lmap[dst]); - return SLJIT_SUCCESS; - } + if (reg_map[dst] < 8) + return emit_byte(compiler, U8(POP_r + reg_lmap[dst])); inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); FAIL_IF(!inst); @@ -892,7 +951,7 @@ SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler * compiler->mode32 = 1; inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw); FAIL_IF(!inst); - *inst++ = POP_rm; + *inst = POP_rm; return SLJIT_SUCCESS; } @@ -922,8 +981,8 @@ static sljit_s32 emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src compiler->mode32 = 1; inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw); FAIL_IF(!inst); - *inst++ = GROUP_FF; - *inst |= PUSH_rm; + inst[0] = GROUP_FF; + inst[1] |= PUSH_rm; inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); FAIL_IF(!inst); @@ -934,10 +993,60 @@ static sljit_s32 emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src return SLJIT_SUCCESS; } +static sljit_s32 sljit_emit_get_return_address(struct sljit_compiler *compiler, + sljit_s32 dst, sljit_sw dstw) +{ + sljit_s32 saved_regs_size; + + compiler->mode32 = 0; + saved_regs_size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds - SLJIT_KEPT_SAVEDS_COUNT(compiler->options), 0); + return emit_mov(compiler, dst, dstw, SLJIT_MEM1(SLJIT_SP), compiler->local_size + saved_regs_size); +} + /* --------------------------------------------------------------------- */ /* Other operations */ /* --------------------------------------------------------------------- */ +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type, + sljit_s32 dst_reg, + sljit_s32 src1, sljit_sw src1w, + sljit_s32 src2_reg) +{ + CHECK_ERROR(); + CHECK(check_sljit_emit_select(compiler, type, dst_reg, src1, src1w, src2_reg)); + + ADJUST_LOCAL_OFFSET(src1, src1w); + + compiler->mode32 = type & SLJIT_32; + type &= ~SLJIT_32; + + if (dst_reg != src2_reg) { + if (dst_reg == src1) { + src1 = src2_reg; + src1w = 0; + type ^= 0x1; + } else if (ADDRESSING_DEPENDS_ON(src1, dst_reg)) { + EMIT_MOV(compiler, dst_reg, 0, src1, src1w); + src1 = src2_reg; + src1w = 0; + type ^= 0x1; + } else + EMIT_MOV(compiler, dst_reg, 0, src2_reg, 0); + } + + if (sljit_has_cpu_feature(SLJIT_HAS_CMOV)) { + if (SLJIT_UNLIKELY(src1 == SLJIT_IMM)) { + EMIT_MOV(compiler, TMP_REG2, 0, src1, src1w); + src1 = TMP_REG2; + src1w = 0; + } + + return emit_groupf(compiler, U8(get_jump_code((sljit_uw)type) - 0x40), dst_reg, src1, src1w); + } + + return emit_cmov_generic(compiler, type, dst_reg, src1, src1w); +} + SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 reg, sljit_s32 mem, sljit_sw memw) @@ -1027,15 +1136,15 @@ static sljit_s32 emit_mov_int(struct sljit_compiler *compiler, sljit_s32 sign, compiler->mode32 = 0; - if (src & SLJIT_IMM) { + if (src == SLJIT_IMM) { if (FAST_IS_REG(dst)) { - if (sign || ((sljit_uw)srcw <= 0x7fffffff)) { - inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_sw)(sljit_s32)srcw, dst, dstw); - FAIL_IF(!inst); - *inst = MOV_rm_i32; - return SLJIT_SUCCESS; - } - return emit_load_imm64(compiler, dst, srcw); + if (!sign || ((sljit_u32)srcw <= 0x7fffffff)) + return emit_do_imm32(compiler, reg_map[dst] <= 7 ? 0 : REX_B, U8(MOV_r_i32 | reg_lmap[dst]), srcw); + + inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_sw)(sljit_s32)srcw, dst, dstw); + FAIL_IF(!inst); + *inst = MOV_rm_i32; + return SLJIT_SUCCESS; } compiler->mode32 = 1; inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_sw)(sljit_s32)srcw, dst, dstw); @@ -1053,10 +1162,10 @@ static sljit_s32 emit_mov_int(struct sljit_compiler *compiler, sljit_s32 sign, if (sign) { inst = emit_x86_instruction(compiler, 1, dst_r, 0, src, srcw); FAIL_IF(!inst); - *inst++ = MOVSXD_r_rm; + *inst = MOVSXD_r_rm; } else { compiler->mode32 = 1; - FAIL_IF(emit_mov(compiler, dst_r, 0, src, srcw)); + EMIT_MOV(compiler, dst_r, 0, src, srcw); compiler->mode32 = 0; } } @@ -1072,6 +1181,203 @@ static sljit_s32 emit_mov_int(struct sljit_compiler *compiler, sljit_s32 sign, return SLJIT_SUCCESS; } +static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 dst, sljit_sw dstw, + sljit_s32 src, sljit_sw srcw) +{ + sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG; + sljit_u8 *inst, *jump_inst1, *jump_inst2; + sljit_uw size1, size2; + + compiler->mode32 = 0; + + if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_U32) { + if (src != SLJIT_IMM) { + compiler->mode32 = 1; + EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); + compiler->mode32 = 0; + } else + FAIL_IF(emit_do_imm32(compiler, reg_map[TMP_REG1] <= 7 ? 0 : REX_B, U8(MOV_r_i32 | reg_lmap[TMP_REG1]), srcw)); + + FAIL_IF(emit_groupf(compiler, CVTSI2SD_x_rm | EX86_SELECT_F2_F3(op) | EX86_SSE2_OP1, dst_r, TMP_REG1, 0)); + + compiler->mode32 = 1; + + if (dst_r == TMP_FREG) + return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG); + return SLJIT_SUCCESS; + } + + if (!FAST_IS_REG(src)) { + EMIT_MOV(compiler, TMP_REG1, 0, src, srcw); + src = TMP_REG1; + } + + BINARY_IMM32(CMP, 0, src, 0); + + inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); + FAIL_IF(!inst); + INC_SIZE(2); + inst[0] = JL_i8; + jump_inst1 = inst; + + size1 = compiler->size; + + compiler->mode32 = 0; + FAIL_IF(emit_groupf(compiler, CVTSI2SD_x_rm | EX86_SELECT_F2_F3(op) | EX86_SSE2_OP1, dst_r, src, 0)); + + inst = (sljit_u8*)ensure_buf(compiler, 1 + 2); + FAIL_IF(!inst); + INC_SIZE(2); + inst[0] = JMP_i8; + jump_inst2 = inst; + + size2 = compiler->size; + + jump_inst1[1] = U8(size2 - size1); + + if (src != TMP_REG1) + EMIT_MOV(compiler, TMP_REG1, 0, src, 0); + + EMIT_MOV(compiler, TMP_REG2, 0, src, 0); + + inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 1, TMP_REG1, 0); + FAIL_IF(!inst); + inst[1] |= SHR; + + compiler->mode32 = 1; + BINARY_IMM32(AND, 1, TMP_REG2, 0); + + compiler->mode32 = 0; + inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG2, 0); + FAIL_IF(!inst); + inst[0] = OR_r_rm; + + FAIL_IF(emit_groupf(compiler, CVTSI2SD_x_rm | EX86_SELECT_F2_F3(op) | EX86_SSE2_OP1, dst_r, TMP_REG1, 0)); + compiler->mode32 = 1; + FAIL_IF(emit_groupf(compiler, ADDSD_x_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2, dst_r, dst_r, 0)); + + jump_inst2[1] = U8(compiler->size - size2); + + if (dst_r == TMP_FREG) + return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG); + return SLJIT_SUCCESS; +} + +static sljit_s32 sljit_emit_fset(struct sljit_compiler *compiler, + sljit_s32 freg, sljit_u8 rex, sljit_s32 is_zero) +{ + sljit_u8 *inst; + sljit_u32 size; + + if (is_zero) { + rex = freg_map[freg] >= 8 ? (REX_R | REX_B) : 0; + } else { + if (freg_map[freg] >= 8) + rex |= REX_R; + if (reg_map[TMP_REG1] >= 8) + rex |= REX_B; + } + + size = (rex != 0) ? 5 : 4; + + inst = (sljit_u8*)ensure_buf(compiler, 1 + size); + FAIL_IF(!inst); + INC_SIZE(size); + + *inst++ = GROUP_66; + if (rex != 0) + *inst++ = rex; + inst[0] = GROUP_0F; + + if (is_zero) { + inst[1] = PXOR_x_xm; + inst[2] = U8(freg_lmap[freg] | (freg_lmap[freg] << 3) | MOD_REG); + } else { + inst[1] = MOVD_x_rm; + inst[2] = U8(reg_lmap[TMP_REG1] | (freg_lmap[freg] << 3) | MOD_REG); + } + + return SLJIT_SUCCESS; +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset32(struct sljit_compiler *compiler, + sljit_s32 freg, sljit_f32 value) +{ + union { + sljit_s32 imm; + sljit_f32 value; + } u; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fset32(compiler, freg, value)); + + u.value = value; + + if (u.imm != 0) { + compiler->mode32 = 1; + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, u.imm); + } + + return sljit_emit_fset(compiler, freg, 0, u.imm == 0); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler, + sljit_s32 freg, sljit_f64 value) +{ + union { + sljit_sw imm; + sljit_f64 value; + } u; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fset64(compiler, freg, value)); + + u.value = value; + + if (u.imm != 0) { + compiler->mode32 = 0; + EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, u.imm); + } + + return sljit_emit_fset(compiler, freg, REX_W, u.imm == 0); +} + +SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op, + sljit_s32 freg, sljit_s32 reg) +{ + sljit_u8 *inst; + sljit_u32 size; + sljit_u8 rex = 0; + + CHECK_ERROR(); + CHECK(check_sljit_emit_fcopy(compiler, op, freg, reg)); + + if (!(op & SLJIT_32)) + rex = REX_W; + + if (freg_map[freg] >= 8) + rex |= REX_R; + + if (reg_map[reg] >= 8) + rex |= REX_B; + + size = (rex != 0) ? 5 : 4; + + inst = (sljit_u8*)ensure_buf(compiler, 1 + size); + FAIL_IF(!inst); + INC_SIZE(size); + + *inst++ = GROUP_66; + if (rex != 0) + *inst++ = rex; + inst[0] = GROUP_0F; + inst[1] = GET_OPCODE(op) == SLJIT_COPY_TO_F64 ? MOVD_x_rm : MOVD_rm_x; + inst[2] = U8(reg_lmap[reg] | (freg_lmap[freg] << 3) | MOD_REG); + + return SLJIT_SUCCESS; +} + static sljit_s32 skip_frames_before_return(struct sljit_compiler *compiler) { sljit_s32 tmp, size; |