diff options
Diffstat (limited to 'Source/JavaScriptCore/disassembler/udis86/udis86_decode.c')
-rw-r--r-- | Source/JavaScriptCore/disassembler/udis86/udis86_decode.c | 1149 |
1 files changed, 640 insertions, 509 deletions
diff --git a/Source/JavaScriptCore/disassembler/udis86/udis86_decode.c b/Source/JavaScriptCore/disassembler/udis86/udis86_decode.c index 579903642..c0ea28362 100644 --- a/Source/JavaScriptCore/disassembler/udis86/udis86_decode.c +++ b/Source/JavaScriptCore/disassembler/udis86/udis86_decode.c @@ -28,14 +28,10 @@ #if USE(UDIS86) -#include "udis86_extern.h" +#include "udis86_udint.h" #include "udis86_types.h" -#include "udis86_input.h" +#include "udis86_extern.h" #include "udis86_decode.h" -#include <wtf/Assertions.h> - -#define dbg(x, n...) -/* #define dbg printf */ #ifndef __UD_STANDALONE__ # include <string.h> @@ -44,15 +40,164 @@ /* The max number of prefixes to an instruction */ #define MAX_PREFIXES 15 -/* instruction aliases and special cases */ -static struct ud_itab_entry s_ie__invalid = - { UD_Iinvalid, O_NONE, O_NONE, O_NONE, P_none }; +/* rex prefix bits */ +#define REX_W(r) ( ( 0xF & ( r ) ) >> 3 ) +#define REX_R(r) ( ( 0x7 & ( r ) ) >> 2 ) +#define REX_X(r) ( ( 0x3 & ( r ) ) >> 1 ) +#define REX_B(r) ( ( 0x1 & ( r ) ) >> 0 ) +#define REX_PFX_MASK(n) ( ( P_REXW(n) << 3 ) | \ + ( P_REXR(n) << 2 ) | \ + ( P_REXX(n) << 1 ) | \ + ( P_REXB(n) << 0 ) ) + +/* scable-index-base bits */ +#define SIB_S(b) ( ( b ) >> 6 ) +#define SIB_I(b) ( ( ( b ) >> 3 ) & 7 ) +#define SIB_B(b) ( ( b ) & 7 ) + +/* modrm bits */ +#define MODRM_REG(b) ( ( ( b ) >> 3 ) & 7 ) +#define MODRM_NNN(b) ( ( ( b ) >> 3 ) & 7 ) +#define MODRM_MOD(b) ( ( ( b ) >> 6 ) & 3 ) +#define MODRM_RM(b) ( ( b ) & 7 ) + +static int decode_ext(struct ud *u, uint16_t ptr); +static int decode_opcode(struct ud *u); + +enum reg_class { /* register classes */ + REGCLASS_GPR, + REGCLASS_MMX, + REGCLASS_CR, + REGCLASS_DB, + REGCLASS_SEG, + REGCLASS_XMM +}; + + /* + * inp_start + * Should be called before each de-code operation. + */ +static void +inp_start(struct ud *u) +{ + u->inp_ctr = 0; +} -static int -decode_ext(struct ud *u, uint16_t ptr); +static uint8_t +inp_peek(struct ud *u) +{ + if (u->inp_end == 0) { + if (u->inp_buf != NULL) { + if (u->inp_buf_index < u->inp_buf_size) { + return u->inp_buf[u->inp_buf_index]; + } + } else if (u->inp_peek != UD_EOI) { + return u->inp_peek; + } else { + int c; + if ((c = u->inp_hook(u)) != UD_EOI) { + u->inp_peek = c; + return u->inp_peek; + } + } + } + u->inp_end = 1; + UDERR(u, "byte expected, eoi received\n"); + return 0; +} + +static uint8_t +inp_next(struct ud *u) +{ + if (u->inp_end == 0) { + if (u->inp_buf != NULL) { + if (u->inp_buf_index < u->inp_buf_size) { + u->inp_ctr++; + return (u->inp_curr = u->inp_buf[u->inp_buf_index++]); + } + } else { + int c = u->inp_peek; + if (c != UD_EOI || (c = u->inp_hook(u)) != UD_EOI) { + u->inp_peek = UD_EOI; + u->inp_curr = c; + u->inp_sess[u->inp_ctr++] = u->inp_curr; + return u->inp_curr; + } + } + } + u->inp_end = 1; + UDERR(u, "byte expected, eoi received\n"); + return 0; +} +static uint8_t +inp_curr(struct ud *u) +{ + return u->inp_curr; +} + + +/* + * inp_uint8 + * int_uint16 + * int_uint32 + * int_uint64 + * Load little-endian values from input + */ +static uint8_t +inp_uint8(struct ud* u) +{ + return inp_next(u); +} + +static uint16_t +inp_uint16(struct ud* u) +{ + uint16_t r, ret; -static inline int + ret = inp_next(u); + r = inp_next(u); + return ret | (r << 8); +} + +static uint32_t +inp_uint32(struct ud* u) +{ + uint32_t r, ret; + + ret = inp_next(u); + r = inp_next(u); + ret = ret | (r << 8); + r = inp_next(u); + ret = ret | (r << 16); + r = inp_next(u); + return ret | (r << 24); +} + +static uint64_t +inp_uint64(struct ud* u) +{ + uint64_t r, ret; + + ret = inp_next(u); + r = inp_next(u); + ret = ret | (r << 8); + r = inp_next(u); + ret = ret | (r << 16); + r = inp_next(u); + ret = ret | (r << 24); + r = inp_next(u); + ret = ret | (r << 32); + r = inp_next(u); + ret = ret | (r << 40); + r = inp_next(u); + ret = ret | (r << 48); + r = inp_next(u); + return ret | (r << 56); +} + + +static UD_INLINE int eff_opr_mode(int dis_mode, int rex_w, int pfx_opr) { if (dis_mode == 64) { @@ -60,13 +205,13 @@ eff_opr_mode(int dis_mode, int rex_w, int pfx_opr) } else if (dis_mode == 32) { return pfx_opr ? 16 : 32; } else { - ASSERT(dis_mode == 16); + UD_ASSERT(dis_mode == 16); return pfx_opr ? 32 : 16; } } -static inline int +static UD_INLINE int eff_adr_mode(int dis_mode, int pfx_adr) { if (dis_mode == 64) { @@ -74,21 +219,12 @@ eff_adr_mode(int dis_mode, int pfx_adr) } else if (dis_mode == 32) { return pfx_adr ? 16 : 32; } else { - ASSERT(dis_mode == 16); + UD_ASSERT(dis_mode == 16); return pfx_adr ? 32 : 16; } } -/* Looks up mnemonic code in the mnemonic string table - * Returns NULL if the mnemonic code is invalid - */ -const char * ud_lookup_mnemonic( enum ud_mnemonic_code c ) -{ - return ud_mnemonics_str[ c ]; -} - - /* * decode_prefixes * @@ -97,164 +233,128 @@ const char * ud_lookup_mnemonic( enum ud_mnemonic_code c ) static int decode_prefixes(struct ud *u) { - unsigned int have_pfx = 1; - unsigned int i; - uint8_t curr; - - /* if in error state, bail out */ - if ( u->error ) - return -1; - - /* keep going as long as there are prefixes available */ - for ( i = 0; have_pfx ; ++i ) { - - /* Get next byte. */ - ud_inp_next(u); - if ( u->error ) - return -1; - curr = ud_inp_curr( u ); - - /* rex prefixes in 64bit mode */ - if ( u->dis_mode == 64 && ( curr & 0xF0 ) == 0x40 ) { - u->pfx_rex = curr; - } else { - switch ( curr ) - { - case 0x2E : - u->pfx_seg = UD_R_CS; - u->pfx_rex = 0; - break; - case 0x36 : - u->pfx_seg = UD_R_SS; - u->pfx_rex = 0; - break; - case 0x3E : - u->pfx_seg = UD_R_DS; - u->pfx_rex = 0; - break; - case 0x26 : - u->pfx_seg = UD_R_ES; - u->pfx_rex = 0; - break; - case 0x64 : - u->pfx_seg = UD_R_FS; - u->pfx_rex = 0; - break; - case 0x65 : - u->pfx_seg = UD_R_GS; - u->pfx_rex = 0; - break; - case 0x67 : /* adress-size override prefix */ - u->pfx_adr = 0x67; - u->pfx_rex = 0; - break; - case 0xF0 : - u->pfx_lock = 0xF0; - u->pfx_rex = 0; - break; - case 0x66: - /* the 0x66 sse prefix is only effective if no other sse prefix - * has already been specified. - */ - if ( !u->pfx_insn ) u->pfx_insn = 0x66; - u->pfx_opr = 0x66; - u->pfx_rex = 0; - break; - case 0xF2: - u->pfx_insn = 0xF2; - u->pfx_repne = 0xF2; - u->pfx_rex = 0; - break; - case 0xF3: - u->pfx_insn = 0xF3; - u->pfx_rep = 0xF3; - u->pfx_repe = 0xF3; - u->pfx_rex = 0; - break; - default : - /* No more prefixes */ - have_pfx = 0; - break; - } - } - - /* check if we reached max instruction length */ - if ( i + 1 == MAX_INSN_LENGTH ) { - u->error = 1; - break; - } + int done = 0; + uint8_t curr = 0, last = 0; + UD_RETURN_ON_ERROR(u); + + do { + last = curr; + curr = inp_next(u); + UD_RETURN_ON_ERROR(u); + if (u->inp_ctr == MAX_INSN_LENGTH) { + UD_RETURN_WITH_ERROR(u, "max instruction length"); + } + + switch (curr) + { + case 0x2E: + u->pfx_seg = UD_R_CS; + break; + case 0x36: + u->pfx_seg = UD_R_SS; + break; + case 0x3E: + u->pfx_seg = UD_R_DS; + break; + case 0x26: + u->pfx_seg = UD_R_ES; + break; + case 0x64: + u->pfx_seg = UD_R_FS; + break; + case 0x65: + u->pfx_seg = UD_R_GS; + break; + case 0x67: /* adress-size override prefix */ + u->pfx_adr = 0x67; + break; + case 0xF0: + u->pfx_lock = 0xF0; + break; + case 0x66: + u->pfx_opr = 0x66; + break; + case 0xF2: + u->pfx_str = 0xf2; + break; + case 0xF3: + u->pfx_str = 0xf3; + break; + default: + /* consume if rex */ + done = (u->dis_mode == 64 && (curr & 0xF0) == 0x40) ? 0 : 1; + break; } + } while (!done); + /* rex prefixes in 64bit mode, must be the last prefix */ + if (u->dis_mode == 64 && (last & 0xF0) == 0x40) { + u->pfx_rex = last; + } + return 0; +} - /* return status */ - if ( u->error ) - return -1; - /* rewind back one byte in stream, since the above loop - * stops with a non-prefix byte. - */ - ud_inp_back(u); - return 0; +/* + * vex_l, vex_w + * Return the vex.L and vex.W bits + */ +static UD_INLINE uint8_t +vex_l(const struct ud *u) +{ + UD_ASSERT(u->vex_op != 0); + return ((u->vex_op == 0xc4 ? u->vex_b2 : u->vex_b1) >> 2) & 1; } +static UD_INLINE uint8_t +vex_w(const struct ud *u) +{ + UD_ASSERT(u->vex_op != 0); + return u->vex_op == 0xc4 ? ((u->vex_b2 >> 7) & 1) : 0; +} -static inline unsigned int modrm( struct ud * u ) + +static UD_INLINE uint8_t +modrm(struct ud * u) { if ( !u->have_modrm ) { - u->modrm = ud_inp_next( u ); + u->modrm = inp_next( u ); + u->modrm_offset = (uint8_t) (u->inp_ctr - 1); u->have_modrm = 1; } return u->modrm; } -static unsigned int resolve_operand_size( const struct ud * u, unsigned int s ) +static unsigned int +resolve_operand_size(const struct ud* u, ud_operand_size_t osize) { - switch ( s ) - { - case SZ_V: - return ( u->opr_mode ); - case SZ_Z: - return ( u->opr_mode == 16 ) ? 16 : 32; - case SZ_P: - return ( u->opr_mode == 16 ) ? SZ_WP : SZ_DP; - case SZ_MDQ: - return ( u->opr_mode == 16 ) ? 32 : u->opr_mode; - case SZ_RDQ: - return ( u->dis_mode == 64 ) ? 64 : 32; - default: - return s; - } + switch (osize) { + case SZ_V: + return u->opr_mode; + case SZ_Z: + return u->opr_mode == 16 ? 16 : 32; + case SZ_Y: + return u->opr_mode == 16 ? 32 : u->opr_mode; + case SZ_RDQ: + return u->dis_mode == 64 ? 64 : 32; + case SZ_X: + UD_ASSERT(u->vex_op != 0); + return (P_VEXL(u->itab_entry->prefix) && vex_l(u)) ? SZ_QQ : SZ_DQ; + default: + return osize; + } } static int resolve_mnemonic( struct ud* u ) { - /* far/near flags */ - u->br_far = 0; - u->br_near = 0; - /* readjust operand sizes for call/jmp instrcutions */ - if ( u->mnemonic == UD_Icall || u->mnemonic == UD_Ijmp ) { - /* WP: 16:16 pointer */ - if ( u->operand[ 0 ].size == SZ_WP ) { - u->operand[ 0 ].size = 16; - u->br_far = 1; - u->br_near= 0; - /* DP: 32:32 pointer */ - } else if ( u->operand[ 0 ].size == SZ_DP ) { - u->operand[ 0 ].size = 32; - u->br_far = 1; - u->br_near= 0; - } else { - u->br_far = 0; - u->br_near= 1; - } /* resolve 3dnow weirdness. */ - } else if ( u->mnemonic == UD_I3dnow ) { - u->mnemonic = ud_itab[ u->le->table[ ud_inp_curr( u ) ] ].mnemonic; + if ( u->mnemonic == UD_I3dnow ) { + u->mnemonic = ud_itab[ u->le->table[ inp_curr( u ) ] ].mnemonic; } /* SWAPGS is only valid in 64bits mode */ if ( u->mnemonic == UD_Iswapgs && u->dis_mode != 64 ) { - u->error = 1; + UDERR(u, "swapgs invalid in 64bits mode\n"); return -1; } @@ -269,8 +369,8 @@ static int resolve_mnemonic( struct ud* u ) } } - if (u->mnemonic == UD_Inop && u->pfx_rep) { - u->pfx_rep = 0; + if (u->mnemonic == UD_Inop && u->pfx_repe) { + u->pfx_repe = 0; u->mnemonic = UD_Ipause; } return 0; @@ -288,14 +388,14 @@ decode_a(struct ud* u, struct ud_operand *op) /* seg16:off16 */ op->type = UD_OP_PTR; op->size = 32; - op->lval.ptr.off = ud_inp_uint16(u); - op->lval.ptr.seg = ud_inp_uint16(u); + op->lval.ptr.off = inp_uint16(u); + op->lval.ptr.seg = inp_uint16(u); } else { /* seg16:off32 */ op->type = UD_OP_PTR; op->size = 48; - op->lval.ptr.off = ud_inp_uint32(u); - op->lval.ptr.seg = ud_inp_uint16(u); + op->lval.ptr.off = inp_uint32(u); + op->lval.ptr.seg = inp_uint16(u); } } @@ -306,15 +406,11 @@ decode_a(struct ud* u, struct ud_operand *op) static enum ud_type decode_gpr(register struct ud* u, unsigned int s, unsigned char rm) { - s = resolve_operand_size(u, s); - switch (s) { case 64: return UD_R_RAX + rm; - case SZ_DP: case 32: return UD_R_EAX + rm; - case SZ_WP: case 16: return UD_R_AX + rm; case 8: @@ -323,94 +419,103 @@ decode_gpr(register struct ud* u, unsigned int s, unsigned char rm) return UD_R_SPL + (rm-4); return UD_R_AL + rm; } else return UD_R_AL + rm; + case 0: + /* invalid size in case of a decode error */ + UD_ASSERT(u->error); + return UD_NONE; default: - return 0; + UD_ASSERT(!"invalid operand size"); + return UD_NONE; } } -/* ----------------------------------------------------------------------------- - * resolve_gpr64() - 64bit General Purpose Register-Selection. - * ----------------------------------------------------------------------------- - */ -static enum ud_type -resolve_gpr64(struct ud* u, enum ud_operand_code gpr_op, enum ud_operand_size * size) -{ - if (gpr_op >= OP_rAXr8 && gpr_op <= OP_rDIr15) - gpr_op = (gpr_op - OP_rAXr8) | (REX_B(u->pfx_rex) << 3); - else gpr_op = (gpr_op - OP_rAX); - - if (u->opr_mode == 16) { - *size = 16; - return gpr_op + UD_R_AX; - } - if (u->dis_mode == 32 || - (u->opr_mode == 32 && ! (REX_W(u->pfx_rex) || u->default64))) { - *size = 32; - return gpr_op + UD_R_EAX; - } - - *size = 64; - return gpr_op + UD_R_RAX; -} - -/* ----------------------------------------------------------------------------- - * resolve_gpr32 () - 32bit General Purpose Register-Selection. - * ----------------------------------------------------------------------------- - */ -static enum ud_type -resolve_gpr32(struct ud* u, enum ud_operand_code gpr_op) -{ - gpr_op = gpr_op - OP_eAX; - - if (u->opr_mode == 16) - return gpr_op + UD_R_AX; - - return gpr_op + UD_R_EAX; -} - -/* ----------------------------------------------------------------------------- - * resolve_reg() - Resolves the register type - * ----------------------------------------------------------------------------- - */ -static enum ud_type -resolve_reg(struct ud* u, unsigned int type, unsigned char i) +static void +decode_reg(struct ud *u, + struct ud_operand *opr, + int type, + int num, + int size) { + int reg; + size = resolve_operand_size(u, size); switch (type) { - case T_MMX : return UD_R_MM0 + (i & 7); - case T_XMM : return UD_R_XMM0 + i; - case T_CRG : return UD_R_CR0 + i; - case T_DBG : return UD_R_DR0 + i; - case T_SEG : { + case REGCLASS_GPR : reg = decode_gpr(u, size, num); break; + case REGCLASS_MMX : reg = UD_R_MM0 + (num & 7); break; + case REGCLASS_XMM : + reg = num + (size == SZ_QQ ? UD_R_YMM0 : UD_R_XMM0); + break; + case REGCLASS_CR : reg = UD_R_CR0 + num; break; + case REGCLASS_DB : reg = UD_R_DR0 + num; break; + case REGCLASS_SEG : { /* * Only 6 segment registers, anything else is an error. */ - if ((i & 7) > 5) { - u->error = 1; + if ((num & 7) > 5) { + UDERR(u, "invalid segment register value\n"); + return; } else { - return UD_R_ES + (i & 7); + reg = UD_R_ES + (num & 7); } + break; } - case T_NONE: - default: return UD_NONE; + default: + UD_ASSERT(!"invalid register type"); + return; } + opr->type = UD_OP_REG; + opr->base = reg; + opr->size = size; } -/* ----------------------------------------------------------------------------- - * decode_imm() - Decodes Immediate values. - * ----------------------------------------------------------------------------- + +/* + * decode_imm + * + * Decode Immediate values. */ static void -decode_imm(struct ud* u, unsigned int s, struct ud_operand *op) +decode_imm(struct ud* u, unsigned int size, struct ud_operand *op) { - op->size = resolve_operand_size(u, s); + op->size = resolve_operand_size(u, size); op->type = UD_OP_IMM; switch (op->size) { - case 8: op->lval.sbyte = ud_inp_uint8(u); break; - case 16: op->lval.uword = ud_inp_uint16(u); break; - case 32: op->lval.udword = ud_inp_uint32(u); break; - case 64: op->lval.uqword = ud_inp_uint64(u); break; - default: return; + case 8: op->lval.sbyte = inp_uint8(u); break; + case 16: op->lval.uword = inp_uint16(u); break; + case 32: op->lval.udword = inp_uint32(u); break; + case 64: op->lval.uqword = inp_uint64(u); break; + default: return; + } +} + + +/* + * decode_mem_disp + * + * Decode mem address displacement. + */ +static void +decode_mem_disp(struct ud* u, unsigned int size, struct ud_operand *op) +{ + switch (size) { + case 8: + op->offset = 8; + op->lval.ubyte = inp_uint8(u); + break; + case 16: + op->offset = 16; + op->lval.uword = inp_uint16(u); + break; + case 32: + op->offset = 32; + op->lval.udword = inp_uint32(u); + break; + case 64: + op->offset = 64; + op->lval.uqword = inp_uint64(u); + break; + default: + return; } } @@ -421,21 +526,14 @@ decode_imm(struct ud* u, unsigned int s, struct ud_operand *op) * Decodes reg field of mod/rm byte * */ -static void +static UD_INLINE void decode_modrm_reg(struct ud *u, struct ud_operand *operand, unsigned int type, unsigned int size) { - uint8_t reg = (REX_R(u->pfx_rex) << 3) | MODRM_REG(modrm(u)); - operand->type = UD_OP_REG; - operand->size = resolve_operand_size(u, size); - - if (type == T_GPR) { - operand->base = decode_gpr(u, operand->size, reg); - } else { - operand->base = resolve_reg(u, type, reg); - } + uint8_t reg = (REX_R(u->_rex) << 3) | MODRM_REG(modrm(u)); + decode_reg(u, operand, type, reg, size); } @@ -448,67 +546,58 @@ decode_modrm_reg(struct ud *u, static void decode_modrm_rm(struct ud *u, struct ud_operand *op, - unsigned char type, - unsigned int size) + unsigned char type, /* register type */ + unsigned int size) /* operand size */ { - unsigned char mod, rm, reg; + size_t offset = 0; + unsigned char mod, rm; /* get mod, r/m and reg fields */ mod = MODRM_MOD(modrm(u)); - rm = (REX_B(u->pfx_rex) << 3) | MODRM_RM(modrm(u)); - reg = (REX_R(u->pfx_rex) << 3) | MODRM_REG(modrm(u)); - - UNUSED_PARAM(reg); - - op->size = resolve_operand_size(u, size); + rm = (REX_B(u->_rex) << 3) | MODRM_RM(modrm(u)); /* * If mod is 11b, then the modrm.rm specifies a register. * */ if (mod == 3) { - op->type = UD_OP_REG; - if (type == T_GPR) { - op->base = decode_gpr(u, op->size, rm); - } else { - op->base = resolve_reg(u, type, (REX_B(u->pfx_rex) << 3) | (rm & 7)); - } + decode_reg(u, op, type, rm, size); return; - } - + } /* - * !11 => Memory Address + * !11b => Memory Address */ op->type = UD_OP_MEM; + op->size = resolve_operand_size(u, size); if (u->adr_mode == 64) { op->base = UD_R_RAX + rm; if (mod == 1) { - op->offset = 8; + offset = 8; } else if (mod == 2) { - op->offset = 32; + offset = 32; } else if (mod == 0 && (rm & 7) == 5) { op->base = UD_R_RIP; - op->offset = 32; + offset = 32; } else { - op->offset = 0; + offset = 0; } /* * Scale-Index-Base (SIB) */ if ((rm & 7) == 4) { - ud_inp_next(u); + inp_next(u); - op->scale = (1 << SIB_S(ud_inp_curr(u))) & ~1; - op->index = UD_R_RAX + (SIB_I(ud_inp_curr(u)) | (REX_X(u->pfx_rex) << 3)); - op->base = UD_R_RAX + (SIB_B(ud_inp_curr(u)) | (REX_B(u->pfx_rex) << 3)); - + op->base = UD_R_RAX + (SIB_B(inp_curr(u)) | (REX_B(u->_rex) << 3)); + op->index = UD_R_RAX + (SIB_I(inp_curr(u)) | (REX_X(u->_rex) << 3)); /* special conditions for base reference */ if (op->index == UD_R_RSP) { op->index = UD_NONE; op->scale = UD_NONE; + } else { + op->scale = (1 << SIB_S(inp_curr(u))) & ~1; } if (op->base == UD_R_RBP || op->base == UD_R_R13) { @@ -516,32 +605,35 @@ decode_modrm_rm(struct ud *u, op->base = UD_NONE; } if (mod == 1) { - op->offset = 8; + offset = 8; } else { - op->offset = 32; + offset = 32; } } + } else { + op->scale = UD_NONE; + op->index = UD_NONE; } } else if (u->adr_mode == 32) { op->base = UD_R_EAX + rm; if (mod == 1) { - op->offset = 8; + offset = 8; } else if (mod == 2) { - op->offset = 32; + offset = 32; } else if (mod == 0 && rm == 5) { op->base = UD_NONE; - op->offset = 32; + offset = 32; } else { - op->offset = 0; + offset = 0; } /* Scale-Index-Base (SIB) */ if ((rm & 7) == 4) { - ud_inp_next(u); + inp_next(u); - op->scale = (1 << SIB_S(ud_inp_curr(u))) & ~1; - op->index = UD_R_EAX + (SIB_I(ud_inp_curr(u)) | (REX_X(u->pfx_rex) << 3)); - op->base = UD_R_EAX + (SIB_B(ud_inp_curr(u)) | (REX_B(u->pfx_rex) << 3)); + op->scale = (1 << SIB_S(inp_curr(u))) & ~1; + op->index = UD_R_EAX + (SIB_I(inp_curr(u)) | (REX_X(u->pfx_rex) << 3)); + op->base = UD_R_EAX + (SIB_B(inp_curr(u)) | (REX_B(u->pfx_rex) << 3)); if (op->index == UD_R_ESP) { op->index = UD_NONE; @@ -554,11 +646,14 @@ decode_modrm_rm(struct ud *u, op->base = UD_NONE; } if (mod == 1) { - op->offset = 8; + offset = 8; } else { - op->offset = 32; + offset = 32; } } + } else { + op->scale = UD_NONE; + op->index = UD_NONE; } } else { const unsigned int bases[] = { UD_R_BX, UD_R_BX, UD_R_BP, UD_R_BP, @@ -567,58 +662,72 @@ decode_modrm_rm(struct ud *u, UD_NONE, UD_NONE, UD_NONE, UD_NONE }; op->base = bases[rm & 7]; op->index = indices[rm & 7]; + op->scale = UD_NONE; if (mod == 0 && rm == 6) { - op->offset= 16; + offset = 16; op->base = UD_NONE; } else if (mod == 1) { - op->offset = 8; + offset = 8; } else if (mod == 2) { - op->offset = 16; + offset = 16; } } - /* - * extract offset, if any - */ - switch (op->offset) { - case 8 : op->lval.ubyte = ud_inp_uint8(u); break; - case 16: op->lval.uword = ud_inp_uint16(u); break; - case 32: op->lval.udword = ud_inp_uint32(u); break; - case 64: op->lval.uqword = ud_inp_uint64(u); break; - default: break; + if (offset) { + decode_mem_disp(u, offset, op); + } else { + op->offset = 0; } } -/* ----------------------------------------------------------------------------- - * decode_o() - Decodes offset - * ----------------------------------------------------------------------------- + +/* + * decode_moffset + * Decode offset-only memory operand */ -static void -decode_o(struct ud* u, unsigned int s, struct ud_operand *op) +static void +decode_moffset(struct ud *u, unsigned int size, struct ud_operand *opr) { - switch (u->adr_mode) { - case 64: - op->offset = 64; - op->lval.uqword = ud_inp_uint64(u); - break; - case 32: - op->offset = 32; - op->lval.udword = ud_inp_uint32(u); - break; - case 16: - op->offset = 16; - op->lval.uword = ud_inp_uint16(u); - break; - default: - return; - } - op->type = UD_OP_MEM; - op->size = resolve_operand_size(u, s); + opr->type = UD_OP_MEM; + opr->base = UD_NONE; + opr->index = UD_NONE; + opr->scale = UD_NONE; + opr->size = resolve_operand_size(u, size); + decode_mem_disp(u, u->adr_mode, opr); } -/* ----------------------------------------------------------------------------- - * decode_operands() - Disassembles Operands. - * ----------------------------------------------------------------------------- + +static void +decode_vex_vvvv(struct ud *u, struct ud_operand *opr, unsigned size) +{ + uint8_t vvvv; + UD_ASSERT(u->vex_op != 0); + vvvv = ((u->vex_op == 0xc4 ? u->vex_b2 : u->vex_b1) >> 3) & 0xf; + decode_reg(u, opr, REGCLASS_XMM, (0xf & ~vvvv), size); +} + + +/* + * decode_vex_immreg + * Decode source operand encoded in immediate byte [7:4] + */ +static int +decode_vex_immreg(struct ud *u, struct ud_operand *opr, unsigned size) +{ + uint8_t imm = inp_next(u); + uint8_t mask = u->dis_mode == 64 ? 0xf : 0x7; + UD_RETURN_ON_ERROR(u); + UD_ASSERT(u->vex_op != 0); + decode_reg(u, opr, REGCLASS_XMM, mask & (imm >> 4), size); + return 0; +} + + +/* + * decode_operand + * + * Decodes a single operand. + * Returns the type of the operand (UD_NONE if none) */ static int decode_operand(struct ud *u, @@ -626,35 +735,33 @@ decode_operand(struct ud *u, enum ud_operand_code type, unsigned int size) { + operand->type = UD_NONE; + operand->_oprcode = type; + switch (type) { case OP_A : decode_a(u, operand); break; case OP_MR: - if (MODRM_MOD(modrm(u)) == 3) { - decode_modrm_rm(u, operand, T_GPR, - size == SZ_DY ? SZ_MDQ : SZ_V); - } else if (size == SZ_WV) { - decode_modrm_rm( u, operand, T_GPR, SZ_W); - } else if (size == SZ_BV) { - decode_modrm_rm( u, operand, T_GPR, SZ_B); - } else if (size == SZ_DY) { - decode_modrm_rm( u, operand, T_GPR, SZ_D); - } else { - ASSERT(!"unexpected size"); - } + decode_modrm_rm(u, operand, REGCLASS_GPR, + MODRM_MOD(modrm(u)) == 3 ? + Mx_reg_size(size) : Mx_mem_size(size)); break; + case OP_F: + u->br_far = 1; + /* intended fall through */ case OP_M: if (MODRM_MOD(modrm(u)) == 3) { - u->error = 1; + UDERR(u, "expected modrm.mod != 3\n"); } /* intended fall through */ case OP_E: - decode_modrm_rm(u, operand, T_GPR, size); + decode_modrm_rm(u, operand, REGCLASS_GPR, size); break; case OP_G: - decode_modrm_reg(u, operand, T_GPR, size); + decode_modrm_reg(u, operand, REGCLASS_GPR, size); break; + case OP_sI: case OP_I: decode_imm(u, size, operand); break; @@ -662,96 +769,68 @@ decode_operand(struct ud *u, operand->type = UD_OP_CONST; operand->lval.udword = 1; break; - case OP_PR: + case OP_N: if (MODRM_MOD(modrm(u)) != 3) { - u->error = 1; + UDERR(u, "expected modrm.mod == 3\n"); } - decode_modrm_rm(u, operand, T_MMX, size); + /* intended fall through */ + case OP_Q: + decode_modrm_rm(u, operand, REGCLASS_MMX, size); break; case OP_P: - decode_modrm_reg(u, operand, T_MMX, size); + decode_modrm_reg(u, operand, REGCLASS_MMX, size); break; - case OP_VR: + case OP_U: if (MODRM_MOD(modrm(u)) != 3) { - u->error = 1; + UDERR(u, "expected modrm.mod == 3\n"); } /* intended fall through */ case OP_W: - decode_modrm_rm(u, operand, T_XMM, size); + decode_modrm_rm(u, operand, REGCLASS_XMM, size); break; case OP_V: - decode_modrm_reg(u, operand, T_XMM, size); + decode_modrm_reg(u, operand, REGCLASS_XMM, size); break; - case OP_S: - decode_modrm_reg(u, operand, T_SEG, size); + case OP_H: + decode_vex_vvvv(u, operand, size); break; - case OP_AL: - case OP_CL: - case OP_DL: - case OP_BL: - case OP_AH: - case OP_CH: - case OP_DH: - case OP_BH: - operand->type = UD_OP_REG; - operand->base = UD_R_AL + (type - OP_AL); - operand->size = 8; + case OP_MU: + decode_modrm_rm(u, operand, REGCLASS_XMM, + MODRM_MOD(modrm(u)) == 3 ? + Mx_reg_size(size) : Mx_mem_size(size)); break; - case OP_DX: - operand->type = UD_OP_REG; - operand->base = UD_R_DX; - operand->size = 16; + case OP_S: + decode_modrm_reg(u, operand, REGCLASS_SEG, size); break; case OP_O: - decode_o(u, size, operand); + decode_moffset(u, size, operand); break; - case OP_rAXr8: - case OP_rCXr9: - case OP_rDXr10: - case OP_rBXr11: - case OP_rSPr12: - case OP_rBPr13: - case OP_rSIr14: - case OP_rDIr15: - case OP_rAX: - case OP_rCX: - case OP_rDX: - case OP_rBX: - case OP_rSP: - case OP_rBP: - case OP_rSI: - case OP_rDI: - operand->type = UD_OP_REG; - operand->base = resolve_gpr64(u, type, &operand->size); + case OP_R0: + case OP_R1: + case OP_R2: + case OP_R3: + case OP_R4: + case OP_R5: + case OP_R6: + case OP_R7: + decode_reg(u, operand, REGCLASS_GPR, + (REX_B(u->_rex) << 3) | (type - OP_R0), size); break; - case OP_ALr8b: - case OP_CLr9b: - case OP_DLr10b: - case OP_BLr11b: - case OP_AHr12b: - case OP_CHr13b: - case OP_DHr14b: - case OP_BHr15b: { - ud_type_t gpr = (type - OP_ALr8b) + UD_R_AL - + (REX_B(u->pfx_rex) << 3); - if (UD_R_AH <= gpr && u->pfx_rex) { - gpr = gpr + 4; - } - operand->type = UD_OP_REG; - operand->base = gpr; + case OP_AL: + case OP_AX: + case OP_eAX: + case OP_rAX: + decode_reg(u, operand, REGCLASS_GPR, 0, size); break; - } - case OP_eAX: - case OP_eCX: - case OP_eDX: - case OP_eBX: - case OP_eSP: - case OP_eBP: - case OP_eSI: - case OP_eDI: - operand->type = UD_OP_REG; - operand->base = resolve_gpr32(u, type); - operand->size = u->opr_mode == 16 ? 16 : 32; + case OP_CL: + case OP_CX: + case OP_eCX: + decode_reg(u, operand, REGCLASS_GPR, 1, size); + break; + case OP_DL: + case OP_DX: + case OP_eDX: + decode_reg(u, operand, REGCLASS_GPR, 2, size); break; case OP_ES: case OP_CS: @@ -762,7 +841,7 @@ decode_operand(struct ud *u, /* in 64bits mode, only fs and gs are allowed */ if (u->dis_mode == 64) { if (type != OP_FS && type != OP_GS) { - u->error= 1; + UDERR(u, "invalid segment register in 64bits\n"); } } operand->type = UD_OP_REG; @@ -773,17 +852,17 @@ decode_operand(struct ud *u, decode_imm(u, size, operand); operand->type = UD_OP_JIMM; break ; - case OP_Q: - decode_modrm_rm(u, operand, T_MMX, size); - break; case OP_R : - decode_modrm_rm(u, operand, T_GPR, size); + if (MODRM_MOD(modrm(u)) != 3) { + UDERR(u, "expected modrm.mod == 3\n"); + } + decode_modrm_rm(u, operand, REGCLASS_GPR, size); break; case OP_C: - decode_modrm_reg(u, operand, T_CRG, size); + decode_modrm_reg(u, operand, REGCLASS_CR, size); break; case OP_D: - decode_modrm_reg(u, operand, T_DBG, size); + decode_modrm_reg(u, operand, REGCLASS_DB, size); break; case OP_I3 : operand->type = UD_OP_CONST; @@ -799,18 +878,16 @@ decode_operand(struct ud *u, case OP_ST7: operand->type = UD_OP_REG; operand->base = (type - OP_ST0) + UD_R_ST0; - operand->size = 0; + operand->size = 80; break; - case OP_AX: - operand->type = UD_OP_REG; - operand->base = UD_R_AX; - operand->size = 16; + case OP_L: + decode_vex_immreg(u, operand, size); break; default : operand->type = UD_NONE; break; } - return 0; + return operand->type; } @@ -827,12 +904,21 @@ decode_operands(struct ud* u) decode_operand(u, &u->operand[0], u->itab_entry->operand1.type, u->itab_entry->operand1.size); - decode_operand(u, &u->operand[1], - u->itab_entry->operand2.type, - u->itab_entry->operand2.size); - decode_operand(u, &u->operand[2], - u->itab_entry->operand3.type, - u->itab_entry->operand3.size); + if (u->operand[0].type != UD_NONE) { + decode_operand(u, &u->operand[1], + u->itab_entry->operand2.type, + u->itab_entry->operand2.size); + } + if (u->operand[1].type != UD_NONE) { + decode_operand(u, &u->operand[2], + u->itab_entry->operand3.type, + u->itab_entry->operand3.size); + } + if (u->operand[2].type != UD_NONE) { + decode_operand(u, &u->operand[3], + u->itab_entry->operand4.type, + u->itab_entry->operand4.size); + } return 0; } @@ -852,19 +938,40 @@ clear_insn(register struct ud* u) u->pfx_rep = 0; u->pfx_repe = 0; u->pfx_rex = 0; - u->pfx_insn = 0; + u->pfx_str = 0; u->mnemonic = UD_Inone; u->itab_entry = NULL; u->have_modrm = 0; + u->br_far = 0; + u->vex_op = 0; + u->_rex = 0; + u->operand[0].type = UD_NONE; + u->operand[1].type = UD_NONE; + u->operand[2].type = UD_NONE; + u->operand[3].type = UD_NONE; +} - memset( &u->operand[ 0 ], 0, sizeof( struct ud_operand ) ); - memset( &u->operand[ 1 ], 0, sizeof( struct ud_operand ) ); - memset( &u->operand[ 2 ], 0, sizeof( struct ud_operand ) ); + +static UD_INLINE int +resolve_pfx_str(struct ud* u) +{ + if (u->pfx_str == 0xf3) { + if (P_STR(u->itab_entry->prefix)) { + u->pfx_rep = 0xf3; + } else { + u->pfx_repe = 0xf3; + } + } else if (u->pfx_str == 0xf2) { + u->pfx_repne = 0xf3; + } + return 0; } + static int resolve_mode( struct ud* u ) { + int default64; /* if in error state, bail out */ if ( u->error ) return -1; @@ -873,22 +980,34 @@ resolve_mode( struct ud* u ) /* Check validity of instruction m64 */ if ( P_INV64( u->itab_entry->prefix ) ) { - u->error = 1; - return -1; + UDERR(u, "instruction invalid in 64bits\n"); + return -1; } - /* effective rex prefix is the effective mask for the - * instruction hard-coded in the opcode map. + /* compute effective rex based on, + * - vex prefix (if any) + * - rex prefix (if any, and not vex) + * - allowed prefixes specified by the opcode map */ - u->pfx_rex = ( u->pfx_rex & 0x40 ) | - ( u->pfx_rex & REX_PFX_MASK( u->itab_entry->prefix ) ); + if (u->vex_op == 0xc4) { + /* vex has rex.rxb in 1's complement */ + u->_rex = ((~(u->vex_b1 >> 5) & 0x7) /* rex.0rxb */ | + ((u->vex_b2 >> 4) & 0x8) /* rex.w000 */); + } else if (u->vex_op == 0xc5) { + /* vex has rex.r in 1's complement */ + u->_rex = (~(u->vex_b1 >> 5)) & 4; + } else { + UD_ASSERT(u->vex_op == 0); + u->_rex = u->pfx_rex; + } + u->_rex &= REX_PFX_MASK(u->itab_entry->prefix); /* whether this instruction has a default operand size of * 64bit, also hardcoded into the opcode map. */ - u->default64 = P_DEF64( u->itab_entry->prefix ); + default64 = P_DEF64( u->itab_entry->prefix ); /* calculate effective operand size */ - if ( REX_W( u->pfx_rex ) ) { + if (REX_W(u->_rex)) { u->opr_mode = 64; } else if ( u->pfx_opr ) { u->opr_mode = 16; @@ -897,7 +1016,7 @@ resolve_mode( struct ud* u ) * the effective operand size in the absence of rex.w * prefix is 32. */ - u->opr_mode = ( u->default64 ) ? 64 : 32; + u->opr_mode = default64 ? 64 : 32; } /* calculate effective address size */ @@ -910,45 +1029,18 @@ resolve_mode( struct ud* u ) u->adr_mode = ( u->pfx_adr ) ? 32 : 16; } - /* These flags determine which operand to apply the operand size - * cast to. - */ - u->c1 = ( P_C1( u->itab_entry->prefix ) ) ? 1 : 0; - u->c2 = ( P_C2( u->itab_entry->prefix ) ) ? 1 : 0; - u->c3 = ( P_C3( u->itab_entry->prefix ) ) ? 1 : 0; - - /* set flags for implicit addressing */ - u->implicit_addr = P_IMPADDR( u->itab_entry->prefix ); - - return 0; -} - -static int gen_hex( struct ud *u ) -{ - unsigned int i; - unsigned char *src_ptr = ud_inp_sess( u ); - char* src_hex; - - /* bail out if in error stat. */ - if ( u->error ) return -1; - /* output buffer pointe */ - src_hex = ( char* ) u->insn_hexcode; - /* for each byte used to decode instruction */ - for ( i = 0; i < u->inp_ctr; ++i, ++src_ptr) { - sprintf( src_hex, "%02x", *src_ptr & 0xFF ); - src_hex += 2; - } return 0; } -static inline int +static UD_INLINE int decode_insn(struct ud *u, uint16_t ptr) { - ASSERT((ptr & 0x8000) == 0); + UD_ASSERT((ptr & 0x8000) == 0); u->itab_entry = &ud_itab[ ptr ]; u->mnemonic = u->itab_entry->mnemonic; - return (resolve_mode(u) == 0 && + return (resolve_pfx_str(u) == 0 && + resolve_mode(u) == 0 && decode_operands(u) == 0 && resolve_mnemonic(u) == 0) ? 0 : -1; } @@ -965,19 +1057,19 @@ decode_insn(struct ud *u, uint16_t ptr) * valid entry in the table, decode the operands, and read the final * byte to resolve the menmonic. */ -static inline int +static UD_INLINE int decode_3dnow(struct ud* u) { uint16_t ptr; - ASSERT(u->le->type == UD_TAB__OPC_3DNOW); - ASSERT(u->le->table[0xc] != 0); + UD_ASSERT(u->le->type == UD_TAB__OPC_3DNOW); + UD_ASSERT(u->le->table[0xc] != 0); decode_insn(u, u->le->table[0xc]); - ud_inp_next(u); + inp_next(u); if (u->error) { return -1; } - ptr = u->le->table[ud_inp_curr(u)]; - ASSERT((ptr & 0x8000) == 0); + ptr = u->le->table[inp_curr(u)]; + UD_ASSERT((ptr & 0x8000) == 0); u->mnemonic = ud_itab[ptr].mnemonic; return 0; } @@ -986,7 +1078,18 @@ decode_3dnow(struct ud* u) static int decode_ssepfx(struct ud *u) { - uint8_t idx = ((u->pfx_insn & 0xf) + 1) / 2; + uint8_t idx; + uint8_t pfx; + + /* + * String prefixes (f2, f3) take precedence over operand + * size prefix (66). + */ + pfx = u->pfx_str; + if (pfx == 0) { + pfx = u->pfx_opr; + } + idx = ((pfx & 0xf) + 1) / 2; if (u->le->table[idx] == 0) { idx = 0; } @@ -995,23 +1098,50 @@ decode_ssepfx(struct ud *u) * "Consume" the prefix as a part of the opcode, so it is no * longer exported as an instruction prefix. */ - switch (u->pfx_insn) { - case 0xf2: - u->pfx_repne = 0; - break; - case 0xf3: - u->pfx_rep = 0; - u->pfx_repe = 0; - break; - case 0x66: + u->pfx_str = 0; + if (pfx == 0x66) { + /* + * consume "66" only if it was used for decoding, leaving + * it to be used as an operands size override for some + * simd instructions. + */ u->pfx_opr = 0; - break; } } return decode_ext(u, u->le->table[idx]); } +static int +decode_vex(struct ud *u) +{ + uint8_t index; + if (u->dis_mode != 64 && MODRM_MOD(inp_peek(u)) != 0x3) { + index = 0; + } else { + u->vex_op = inp_curr(u); + u->vex_b1 = inp_next(u); + if (u->vex_op == 0xc4) { + uint8_t pp, m; + /* 3-byte vex */ + u->vex_b2 = inp_next(u); + UD_RETURN_ON_ERROR(u); + m = u->vex_b1 & 0x1f; + if (m == 0 || m > 3) { + UD_RETURN_WITH_ERROR(u, "reserved vex.m-mmmm value"); + } + pp = u->vex_b2 & 0x3; + index = (pp << 2) | m; + } else { + /* 2-byte vex */ + UD_ASSERT(u->vex_op == 0xc5); + index = 0x1 | ((u->vex_b1 & 0x3) << 2); + } + } + return decode_ext(u, u->le->table[index]); +} + + /* * decode_ext() * @@ -1038,7 +1168,7 @@ decode_ext(struct ud *u, uint16_t ptr) * 16 = 0,, 32 = 1, 64 = 2 */ case UD_TAB__OPC_MODE: - idx = u->dis_mode / 32; + idx = u->dis_mode != 64 ? 0 : 1; break; case UD_TAB__OPC_OSIZE: idx = eff_opr_mode(u->dis_mode, REX_W(u->pfx_rex), u->pfx_opr) / 32; @@ -1067,8 +1197,19 @@ decode_ext(struct ud *u, uint16_t ptr) break; case UD_TAB__OPC_SSE: return decode_ssepfx(u); + case UD_TAB__OPC_VEX: + return decode_vex(u); + case UD_TAB__OPC_VEX_W: + idx = vex_w(u); + break; + case UD_TAB__OPC_VEX_L: + idx = vex_l(u); + break; + case UD_TAB__OPC_TABLE: + inp_next(u); + return decode_opcode(u); default: - ASSERT(!"not reached"); + UD_ASSERT(!"not reached"); break; } @@ -1076,22 +1217,13 @@ decode_ext(struct ud *u, uint16_t ptr) } -static inline int +static int decode_opcode(struct ud *u) { uint16_t ptr; - ASSERT(u->le->type == UD_TAB__OPC_TABLE); - ud_inp_next(u); - if (u->error) { - return -1; - } - ptr = u->le->table[ud_inp_curr(u)]; - if (ptr & 0x8000) { - u->le = &ud_lookup_table_list[ptr & ~0x8000]; - if (u->le->type == UD_TAB__OPC_TABLE) { - return decode_opcode(u); - } - } + UD_ASSERT(u->le->type == UD_TAB__OPC_TABLE); + UD_RETURN_ON_ERROR(u); + ptr = u->le->table[inp_curr(u)]; return decode_ext(u, ptr); } @@ -1103,7 +1235,7 @@ decode_opcode(struct ud *u) unsigned int ud_decode(struct ud *u) { - ud_inp_start(u); + inp_start(u); clear_insn(u); u->le = &ud_lookup_table_list[0]; u->error = decode_prefixes(u) == -1 || @@ -1114,7 +1246,7 @@ ud_decode(struct ud *u) /* clear out the decode data. */ clear_insn(u); /* mark the sequence of bytes as invalid. */ - u->itab_entry = & s_ie__invalid; + u->itab_entry = &ud_itab[0]; /* entry 0 is invalid */ u->mnemonic = u->itab_entry->mnemonic; } @@ -1127,16 +1259,15 @@ ud_decode(struct ud *u) u->pfx_seg = 0; u->insn_offset = u->pc; /* set offset of instruction */ - u->insn_fill = 0; /* set translation buffer index to 0 */ + u->asm_buf_fill = 0; /* set translation buffer index to 0 */ u->pc += u->inp_ctr; /* move program counter by bytes decoded */ - gen_hex( u ); /* generate hex code */ /* return number of bytes disassembled. */ return u->inp_ctr; } +#endif // USE(UDIS86) + /* vim: set ts=2 sw=2 expandtab */ - -#endif // USE(UDIS86) |