/* udis86 - libudis86/decode.c * * Copyright (c) 2002-2009 Vivek Thampi * All rights reserved. * * Redistribution and use in source and binary forms, with or without modification, * are permitted provided that the following conditions are met: * * * Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "config.h" #if USE(UDIS86) #include "udis86_extern.h" #include "udis86_types.h" #include "udis86_input.h" #include "udis86_decode.h" #include #define dbg(x, n...) /* #define dbg printf */ #ifndef __UD_STANDALONE__ # include #endif /* __UD_STANDALONE__ */ /* The max number of prefixes to an instruction */ #define MAX_PREFIXES 15 /* instruction aliases and special cases */ static struct ud_itab_entry s_ie__invalid = { UD_Iinvalid, O_NONE, O_NONE, O_NONE, P_none }; static int decode_ext(struct ud *u, uint16_t ptr); static inline int eff_opr_mode(int dis_mode, int rex_w, int pfx_opr) { if (dis_mode == 64) { return rex_w ? 64 : (pfx_opr ? 16 : 32); } else if (dis_mode == 32) { return pfx_opr ? 16 : 32; } else { ASSERT(dis_mode == 16); return pfx_opr ? 32 : 16; } } static inline int eff_adr_mode(int dis_mode, int pfx_adr) { if (dis_mode == 64) { return pfx_adr ? 32 : 64; } else if (dis_mode == 32) { return pfx_adr ? 16 : 32; } else { ASSERT(dis_mode == 16); return pfx_adr ? 32 : 16; } } /* Looks up mnemonic code in the mnemonic string table * Returns NULL if the mnemonic code is invalid */ const char * ud_lookup_mnemonic( enum ud_mnemonic_code c ) { return ud_mnemonics_str[ c ]; } /* * decode_prefixes * * Extracts instruction prefixes. */ static int decode_prefixes(struct ud *u) { unsigned int have_pfx = 1; unsigned int i; uint8_t curr; /* if in error state, bail out */ if ( u->error ) return -1; /* keep going as long as there are prefixes available */ for ( i = 0; have_pfx ; ++i ) { /* Get next byte. */ ud_inp_next(u); if ( u->error ) return -1; curr = ud_inp_curr( u ); /* rex prefixes in 64bit mode */ if ( u->dis_mode == 64 && ( curr & 0xF0 ) == 0x40 ) { u->pfx_rex = curr; } else { switch ( curr ) { case 0x2E : u->pfx_seg = UD_R_CS; u->pfx_rex = 0; break; case 0x36 : u->pfx_seg = UD_R_SS; u->pfx_rex = 0; break; case 0x3E : u->pfx_seg = UD_R_DS; u->pfx_rex = 0; break; case 0x26 : u->pfx_seg = UD_R_ES; u->pfx_rex = 0; break; case 0x64 : u->pfx_seg = UD_R_FS; u->pfx_rex = 0; break; case 0x65 : u->pfx_seg = UD_R_GS; u->pfx_rex = 0; break; case 0x67 : /* adress-size override prefix */ u->pfx_adr = 0x67; u->pfx_rex = 0; break; case 0xF0 : u->pfx_lock = 0xF0; u->pfx_rex = 0; break; case 0x66: /* the 0x66 sse prefix is only effective if no other sse prefix * has already been specified. */ if ( !u->pfx_insn ) u->pfx_insn = 0x66; u->pfx_opr = 0x66; u->pfx_rex = 0; break; case 0xF2: u->pfx_insn = 0xF2; u->pfx_repne = 0xF2; u->pfx_rex = 0; break; case 0xF3: u->pfx_insn = 0xF3; u->pfx_rep = 0xF3; u->pfx_repe = 0xF3; u->pfx_rex = 0; break; default : /* No more prefixes */ have_pfx = 0; break; } } /* check if we reached max instruction length */ if ( i + 1 == MAX_INSN_LENGTH ) { u->error = 1; break; } } /* return status */ if ( u->error ) return -1; /* rewind back one byte in stream, since the above loop * stops with a non-prefix byte. */ ud_inp_back(u); return 0; } static inline unsigned int modrm( struct ud * u ) { if ( !u->have_modrm ) { u->modrm = ud_inp_next( u ); u->have_modrm = 1; } return u->modrm; } static unsigned int resolve_operand_size( const struct ud * u, unsigned int s ) { switch ( s ) { case SZ_V: return ( u->opr_mode ); case SZ_Z: return ( u->opr_mode == 16 ) ? 16 : 32; case SZ_P: return ( u->opr_mode == 16 ) ? SZ_WP : SZ_DP; case SZ_MDQ: return ( u->opr_mode == 16 ) ? 32 : u->opr_mode; case SZ_RDQ: return ( u->dis_mode == 64 ) ? 64 : 32; default: return s; } } static int resolve_mnemonic( struct ud* u ) { /* far/near flags */ u->br_far = 0; u->br_near = 0; /* readjust operand sizes for call/jmp instrcutions */ if ( u->mnemonic == UD_Icall || u->mnemonic == UD_Ijmp ) { /* WP: 16:16 pointer */ if ( u->operand[ 0 ].size == SZ_WP ) { u->operand[ 0 ].size = 16; u->br_far = 1; u->br_near= 0; /* DP: 32:32 pointer */ } else if ( u->operand[ 0 ].size == SZ_DP ) { u->operand[ 0 ].size = 32; u->br_far = 1; u->br_near= 0; } else { u->br_far = 0; u->br_near= 1; } /* resolve 3dnow weirdness. */ } else if ( u->mnemonic == UD_I3dnow ) { u->mnemonic = ud_itab[ u->le->table[ ud_inp_curr( u ) ] ].mnemonic; } /* SWAPGS is only valid in 64bits mode */ if ( u->mnemonic == UD_Iswapgs && u->dis_mode != 64 ) { u->error = 1; return -1; } if (u->mnemonic == UD_Ixchg) { if ((u->operand[0].type == UD_OP_REG && u->operand[0].base == UD_R_AX && u->operand[1].type == UD_OP_REG && u->operand[1].base == UD_R_AX) || (u->operand[0].type == UD_OP_REG && u->operand[0].base == UD_R_EAX && u->operand[1].type == UD_OP_REG && u->operand[1].base == UD_R_EAX)) { u->operand[0].type = UD_NONE; u->operand[1].type = UD_NONE; u->mnemonic = UD_Inop; } } if (u->mnemonic == UD_Inop && u->pfx_rep) { u->pfx_rep = 0; u->mnemonic = UD_Ipause; } return 0; } /* ----------------------------------------------------------------------------- * decode_a()- Decodes operands of the type seg:offset * ----------------------------------------------------------------------------- */ static void decode_a(struct ud* u, struct ud_operand *op) { if (u->opr_mode == 16) { /* seg16:off16 */ op->type = UD_OP_PTR; op->size = 32; op->lval.ptr.off = ud_inp_uint16(u); op->lval.ptr.seg = ud_inp_uint16(u); } else { /* seg16:off32 */ op->type = UD_OP_PTR; op->size = 48; op->lval.ptr.off = ud_inp_uint32(u); op->lval.ptr.seg = ud_inp_uint16(u); } } /* ----------------------------------------------------------------------------- * decode_gpr() - Returns decoded General Purpose Register * ----------------------------------------------------------------------------- */ static enum ud_type decode_gpr(register struct ud* u, unsigned int s, unsigned char rm) { s = resolve_operand_size(u, s); switch (s) { case 64: return UD_R_RAX + rm; case SZ_DP: case 32: return UD_R_EAX + rm; case SZ_WP: case 16: return UD_R_AX + rm; case 8: if (u->dis_mode == 64 && u->pfx_rex) { if (rm >= 4) return UD_R_SPL + (rm-4); return UD_R_AL + rm; } else return UD_R_AL + rm; default: return 0; } } /* ----------------------------------------------------------------------------- * resolve_gpr64() - 64bit General Purpose Register-Selection. * ----------------------------------------------------------------------------- */ static enum ud_type resolve_gpr64(struct ud* u, enum ud_operand_code gpr_op, enum ud_operand_size * size) { if (gpr_op >= OP_rAXr8 && gpr_op <= OP_rDIr15) gpr_op = (gpr_op - OP_rAXr8) | (REX_B(u->pfx_rex) << 3); else gpr_op = (gpr_op - OP_rAX); if (u->opr_mode == 16) { *size = 16; return gpr_op + UD_R_AX; } if (u->dis_mode == 32 || (u->opr_mode == 32 && ! (REX_W(u->pfx_rex) || u->default64))) { *size = 32; return gpr_op + UD_R_EAX; } *size = 64; return gpr_op + UD_R_RAX; } /* ----------------------------------------------------------------------------- * resolve_gpr32 () - 32bit General Purpose Register-Selection. * ----------------------------------------------------------------------------- */ static enum ud_type resolve_gpr32(struct ud* u, enum ud_operand_code gpr_op) { gpr_op = gpr_op - OP_eAX; if (u->opr_mode == 16) return gpr_op + UD_R_AX; return gpr_op + UD_R_EAX; } /* ----------------------------------------------------------------------------- * resolve_reg() - Resolves the register type * ----------------------------------------------------------------------------- */ static enum ud_type resolve_reg(struct ud* u, unsigned int type, unsigned char i) { switch (type) { case T_MMX : return UD_R_MM0 + (i & 7); case T_XMM : return UD_R_XMM0 + i; case T_CRG : return UD_R_CR0 + i; case T_DBG : return UD_R_DR0 + i; case T_SEG : { /* * Only 6 segment registers, anything else is an error. */ if ((i & 7) > 5) { u->error = 1; } else { return UD_R_ES + (i & 7); } } case T_NONE: default: return UD_NONE; } } /* ----------------------------------------------------------------------------- * decode_imm() - Decodes Immediate values. * ----------------------------------------------------------------------------- */ static void decode_imm(struct ud* u, unsigned int s, struct ud_operand *op) { op->size = resolve_operand_size(u, s); op->type = UD_OP_IMM; switch (op->size) { case 8: op->lval.sbyte = ud_inp_uint8(u); break; case 16: op->lval.uword = ud_inp_uint16(u); break; case 32: op->lval.udword = ud_inp_uint32(u); break; case 64: op->lval.uqword = ud_inp_uint64(u); break; default: return; } } /* * decode_modrm_reg * * Decodes reg field of mod/rm byte * */ static void decode_modrm_reg(struct ud *u, struct ud_operand *operand, unsigned int type, unsigned int size) { uint8_t reg = (REX_R(u->pfx_rex) << 3) | MODRM_REG(modrm(u)); operand->type = UD_OP_REG; operand->size = resolve_operand_size(u, size); if (type == T_GPR) { operand->base = decode_gpr(u, operand->size, reg); } else { operand->base = resolve_reg(u, type, reg); } } /* * decode_modrm_rm * * Decodes rm field of mod/rm byte * */ static void decode_modrm_rm(struct ud *u, struct ud_operand *op, unsigned char type, unsigned int size) { unsigned char mod, rm, reg; /* get mod, r/m and reg fields */ mod = MODRM_MOD(modrm(u)); rm = (REX_B(u->pfx_rex) << 3) | MODRM_RM(modrm(u)); reg = (REX_R(u->pfx_rex) << 3) | MODRM_REG(modrm(u)); UNUSED_PARAM(reg); op->size = resolve_operand_size(u, size); /* * If mod is 11b, then the modrm.rm specifies a register. * */ if (mod == 3) { op->type = UD_OP_REG; if (type == T_GPR) { op->base = decode_gpr(u, op->size, rm); } else { op->base = resolve_reg(u, type, (REX_B(u->pfx_rex) << 3) | (rm & 7)); } return; } /* * !11 => Memory Address */ op->type = UD_OP_MEM; if (u->adr_mode == 64) { op->base = UD_R_RAX + rm; if (mod == 1) { op->offset = 8; } else if (mod == 2) { op->offset = 32; } else if (mod == 0 && (rm & 7) == 5) { op->base = UD_R_RIP; op->offset = 32; } else { op->offset = 0; } /* * Scale-Index-Base (SIB) */ if ((rm & 7) == 4) { ud_inp_next(u); op->scale = (1 << SIB_S(ud_inp_curr(u))) & ~1; op->index = UD_R_RAX + (SIB_I(ud_inp_curr(u)) | (REX_X(u->pfx_rex) << 3)); op->base = UD_R_RAX + (SIB_B(ud_inp_curr(u)) | (REX_B(u->pfx_rex) << 3)); /* special conditions for base reference */ if (op->index == UD_R_RSP) { op->index = UD_NONE; op->scale = UD_NONE; } if (op->base == UD_R_RBP || op->base == UD_R_R13) { if (mod == 0) { op->base = UD_NONE; } if (mod == 1) { op->offset = 8; } else { op->offset = 32; } } } } else if (u->adr_mode == 32) { op->base = UD_R_EAX + rm; if (mod == 1) { op->offset = 8; } else if (mod == 2) { op->offset = 32; } else if (mod == 0 && rm == 5) { op->base = UD_NONE; op->offset = 32; } else { op->offset = 0; } /* Scale-Index-Base (SIB) */ if ((rm & 7) == 4) { ud_inp_next(u); op->scale = (1 << SIB_S(ud_inp_curr(u))) & ~1; op->index = UD_R_EAX + (SIB_I(ud_inp_curr(u)) | (REX_X(u->pfx_rex) << 3)); op->base = UD_R_EAX + (SIB_B(ud_inp_curr(u)) | (REX_B(u->pfx_rex) << 3)); if (op->index == UD_R_ESP) { op->index = UD_NONE; op->scale = UD_NONE; } /* special condition for base reference */ if (op->base == UD_R_EBP) { if (mod == 0) { op->base = UD_NONE; } if (mod == 1) { op->offset = 8; } else { op->offset = 32; } } } } else { const unsigned int bases[] = { UD_R_BX, UD_R_BX, UD_R_BP, UD_R_BP, UD_R_SI, UD_R_DI, UD_R_BP, UD_R_BX }; const unsigned int indices[] = { UD_R_SI, UD_R_DI, UD_R_SI, UD_R_DI, UD_NONE, UD_NONE, UD_NONE, UD_NONE }; op->base = bases[rm & 7]; op->index = indices[rm & 7]; if (mod == 0 && rm == 6) { op->offset= 16; op->base = UD_NONE; } else if (mod == 1) { op->offset = 8; } else if (mod == 2) { op->offset = 16; } } /* * extract offset, if any */ switch (op->offset) { case 8 : op->lval.ubyte = ud_inp_uint8(u); break; case 16: op->lval.uword = ud_inp_uint16(u); break; case 32: op->lval.udword = ud_inp_uint32(u); break; case 64: op->lval.uqword = ud_inp_uint64(u); break; default: break; } } /* ----------------------------------------------------------------------------- * decode_o() - Decodes offset * ----------------------------------------------------------------------------- */ static void decode_o(struct ud* u, unsigned int s, struct ud_operand *op) { switch (u->adr_mode) { case 64: op->offset = 64; op->lval.uqword = ud_inp_uint64(u); break; case 32: op->offset = 32; op->lval.udword = ud_inp_uint32(u); break; case 16: op->offset = 16; op->lval.uword = ud_inp_uint16(u); break; default: return; } op->type = UD_OP_MEM; op->size = resolve_operand_size(u, s); } /* ----------------------------------------------------------------------------- * decode_operands() - Disassembles Operands. * ----------------------------------------------------------------------------- */ static int decode_operand(struct ud *u, struct ud_operand *operand, enum ud_operand_code type, unsigned int size) { switch (type) { case OP_A : decode_a(u, operand); break; case OP_MR: if (MODRM_MOD(modrm(u)) == 3) { decode_modrm_rm(u, operand, T_GPR, size == SZ_DY ? SZ_MDQ : SZ_V); } else if (size == SZ_WV) { decode_modrm_rm( u, operand, T_GPR, SZ_W); } else if (size == SZ_BV) { decode_modrm_rm( u, operand, T_GPR, SZ_B); } else if (size == SZ_DY) { decode_modrm_rm( u, operand, T_GPR, SZ_D); } else { ASSERT(!"unexpected size"); } break; case OP_M: if (MODRM_MOD(modrm(u)) == 3) { u->error = 1; } /* intended fall through */ case OP_E: decode_modrm_rm(u, operand, T_GPR, size); break; case OP_G: decode_modrm_reg(u, operand, T_GPR, size); break; case OP_I: decode_imm(u, size, operand); break; case OP_I1: operand->type = UD_OP_CONST; operand->lval.udword = 1; break; case OP_PR: if (MODRM_MOD(modrm(u)) != 3) { u->error = 1; } decode_modrm_rm(u, operand, T_MMX, size); break; case OP_P: decode_modrm_reg(u, operand, T_MMX, size); break; case OP_VR: if (MODRM_MOD(modrm(u)) != 3) { u->error = 1; } /* intended fall through */ case OP_W: decode_modrm_rm(u, operand, T_XMM, size); break; case OP_V: decode_modrm_reg(u, operand, T_XMM, size); break; case OP_S: decode_modrm_reg(u, operand, T_SEG, size); break; case OP_AL: case OP_CL: case OP_DL: case OP_BL: case OP_AH: case OP_CH: case OP_DH: case OP_BH: operand->type = UD_OP_REG; operand->base = UD_R_AL + (type - OP_AL); operand->size = 8; break; case OP_DX: operand->type = UD_OP_REG; operand->base = UD_R_DX; operand->size = 16; break; case OP_O: decode_o(u, size, operand); break; case OP_rAXr8: case OP_rCXr9: case OP_rDXr10: case OP_rBXr11: case OP_rSPr12: case OP_rBPr13: case OP_rSIr14: case OP_rDIr15: case OP_rAX: case OP_rCX: case OP_rDX: case OP_rBX: case OP_rSP: case OP_rBP: case OP_rSI: case OP_rDI: operand->type = UD_OP_REG; operand->base = resolve_gpr64(u, type, &operand->size); break; case OP_ALr8b: case OP_CLr9b: case OP_DLr10b: case OP_BLr11b: case OP_AHr12b: case OP_CHr13b: case OP_DHr14b: case OP_BHr15b: { ud_type_t gpr = (type - OP_ALr8b) + UD_R_AL + (REX_B(u->pfx_rex) << 3); if (UD_R_AH <= gpr && u->pfx_rex) { gpr = gpr + 4; } operand->type = UD_OP_REG; operand->base = gpr; break; } case OP_eAX: case OP_eCX: case OP_eDX: case OP_eBX: case OP_eSP: case OP_eBP: case OP_eSI: case OP_eDI: operand->type = UD_OP_REG; operand->base = resolve_gpr32(u, type); operand->size = u->opr_mode == 16 ? 16 : 32; break; case OP_ES: case OP_CS: case OP_DS: case OP_SS: case OP_FS: case OP_GS: /* in 64bits mode, only fs and gs are allowed */ if (u->dis_mode == 64) { if (type != OP_FS && type != OP_GS) { u->error= 1; } } operand->type = UD_OP_REG; operand->base = (type - OP_ES) + UD_R_ES; operand->size = 16; break; case OP_J : decode_imm(u, size, operand); operand->type = UD_OP_JIMM; break ; case OP_Q: decode_modrm_rm(u, operand, T_MMX, size); break; case OP_R : decode_modrm_rm(u, operand, T_GPR, size); break; case OP_C: decode_modrm_reg(u, operand, T_CRG, size); break; case OP_D: decode_modrm_reg(u, operand, T_DBG, size); break; case OP_I3 : operand->type = UD_OP_CONST; operand->lval.sbyte = 3; break; case OP_ST0: case OP_ST1: case OP_ST2: case OP_ST3: case OP_ST4: case OP_ST5: case OP_ST6: case OP_ST7: operand->type = UD_OP_REG; operand->base = (type - OP_ST0) + UD_R_ST0; operand->size = 0; break; case OP_AX: operand->type = UD_OP_REG; operand->base = UD_R_AX; operand->size = 16; break; default : operand->type = UD_NONE; break; } return 0; } /* * decode_operands * * Disassemble upto 3 operands of the current instruction being * disassembled. By the end of the function, the operand fields * of the ud structure will have been filled. */ static int decode_operands(struct ud* u) { decode_operand(u, &u->operand[0], u->itab_entry->operand1.type, u->itab_entry->operand1.size); decode_operand(u, &u->operand[1], u->itab_entry->operand2.type, u->itab_entry->operand2.size); decode_operand(u, &u->operand[2], u->itab_entry->operand3.type, u->itab_entry->operand3.size); return 0; } /* ----------------------------------------------------------------------------- * clear_insn() - clear instruction structure * ----------------------------------------------------------------------------- */ static void clear_insn(register struct ud* u) { u->error = 0; u->pfx_seg = 0; u->pfx_opr = 0; u->pfx_adr = 0; u->pfx_lock = 0; u->pfx_repne = 0; u->pfx_rep = 0; u->pfx_repe = 0; u->pfx_rex = 0; u->pfx_insn = 0; u->mnemonic = UD_Inone; u->itab_entry = NULL; u->have_modrm = 0; memset( &u->operand[ 0 ], 0, sizeof( struct ud_operand ) ); memset( &u->operand[ 1 ], 0, sizeof( struct ud_operand ) ); memset( &u->operand[ 2 ], 0, sizeof( struct ud_operand ) ); } static int resolve_mode( struct ud* u ) { /* if in error state, bail out */ if ( u->error ) return -1; /* propagate prefix effects */ if ( u->dis_mode == 64 ) { /* set 64bit-mode flags */ /* Check validity of instruction m64 */ if ( P_INV64( u->itab_entry->prefix ) ) { u->error = 1; return -1; } /* effective rex prefix is the effective mask for the * instruction hard-coded in the opcode map. */ u->pfx_rex = ( u->pfx_rex & 0x40 ) | ( u->pfx_rex & REX_PFX_MASK( u->itab_entry->prefix ) ); /* whether this instruction has a default operand size of * 64bit, also hardcoded into the opcode map. */ u->default64 = P_DEF64( u->itab_entry->prefix ); /* calculate effective operand size */ if ( REX_W( u->pfx_rex ) ) { u->opr_mode = 64; } else if ( u->pfx_opr ) { u->opr_mode = 16; } else { /* unless the default opr size of instruction is 64, * the effective operand size in the absence of rex.w * prefix is 32. */ u->opr_mode = ( u->default64 ) ? 64 : 32; } /* calculate effective address size */ u->adr_mode = (u->pfx_adr) ? 32 : 64; } else if ( u->dis_mode == 32 ) { /* set 32bit-mode flags */ u->opr_mode = ( u->pfx_opr ) ? 16 : 32; u->adr_mode = ( u->pfx_adr ) ? 16 : 32; } else if ( u->dis_mode == 16 ) { /* set 16bit-mode flags */ u->opr_mode = ( u->pfx_opr ) ? 32 : 16; u->adr_mode = ( u->pfx_adr ) ? 32 : 16; } /* These flags determine which operand to apply the operand size * cast to. */ u->c1 = ( P_C1( u->itab_entry->prefix ) ) ? 1 : 0; u->c2 = ( P_C2( u->itab_entry->prefix ) ) ? 1 : 0; u->c3 = ( P_C3( u->itab_entry->prefix ) ) ? 1 : 0; /* set flags for implicit addressing */ u->implicit_addr = P_IMPADDR( u->itab_entry->prefix ); return 0; } static int gen_hex( struct ud *u ) { unsigned int i; unsigned char *src_ptr = ud_inp_sess( u ); char* src_hex; /* bail out if in error stat. */ if ( u->error ) return -1; /* output buffer pointe */ src_hex = ( char* ) u->insn_hexcode; /* for each byte used to decode instruction */ for ( i = 0; i < u->inp_ctr; ++i, ++src_ptr) { sprintf( src_hex, "%02x", *src_ptr & 0xFF ); src_hex += 2; } return 0; } static inline int decode_insn(struct ud *u, uint16_t ptr) { ASSERT((ptr & 0x8000) == 0); u->itab_entry = &ud_itab[ ptr ]; u->mnemonic = u->itab_entry->mnemonic; return (resolve_mode(u) == 0 && decode_operands(u) == 0 && resolve_mnemonic(u) == 0) ? 0 : -1; } /* * decode_3dnow() * * Decoding 3dnow is a little tricky because of its strange opcode * structure. The final opcode disambiguation depends on the last * byte that comes after the operands have been decoded. Fortunately, * all 3dnow instructions have the same set of operand types. So we * go ahead and decode the instruction by picking an arbitrarily chosen * valid entry in the table, decode the operands, and read the final * byte to resolve the menmonic. */ static inline int decode_3dnow(struct ud* u) { uint16_t ptr; ASSERT(u->le->type == UD_TAB__OPC_3DNOW); ASSERT(u->le->table[0xc] != 0); decode_insn(u, u->le->table[0xc]); ud_inp_next(u); if (u->error) { return -1; } ptr = u->le->table[ud_inp_curr(u)]; ASSERT((ptr & 0x8000) == 0); u->mnemonic = ud_itab[ptr].mnemonic; return 0; } static int decode_ssepfx(struct ud *u) { uint8_t idx = ((u->pfx_insn & 0xf) + 1) / 2; if (u->le->table[idx] == 0) { idx = 0; } if (idx && u->le->table[idx] != 0) { /* * "Consume" the prefix as a part of the opcode, so it is no * longer exported as an instruction prefix. */ switch (u->pfx_insn) { case 0xf2: u->pfx_repne = 0; break; case 0xf3: u->pfx_rep = 0; u->pfx_repe = 0; break; case 0x66: u->pfx_opr = 0; break; } } return decode_ext(u, u->le->table[idx]); } /* * decode_ext() * * Decode opcode extensions (if any) */ static int decode_ext(struct ud *u, uint16_t ptr) { uint8_t idx = 0; if ((ptr & 0x8000) == 0) { return decode_insn(u, ptr); } u->le = &ud_lookup_table_list[(~0x8000 & ptr)]; if (u->le->type == UD_TAB__OPC_3DNOW) { return decode_3dnow(u); } switch (u->le->type) { case UD_TAB__OPC_MOD: /* !11 = 0, 11 = 1 */ idx = (MODRM_MOD(modrm(u)) + 1) / 4; break; /* disassembly mode/operand size/address size based tables. * 16 = 0,, 32 = 1, 64 = 2 */ case UD_TAB__OPC_MODE: idx = u->dis_mode / 32; break; case UD_TAB__OPC_OSIZE: idx = eff_opr_mode(u->dis_mode, REX_W(u->pfx_rex), u->pfx_opr) / 32; break; case UD_TAB__OPC_ASIZE: idx = eff_adr_mode(u->dis_mode, u->pfx_adr) / 32; break; case UD_TAB__OPC_X87: idx = modrm(u) - 0xC0; break; case UD_TAB__OPC_VENDOR: if (u->vendor == UD_VENDOR_ANY) { /* choose a valid entry */ idx = (u->le->table[idx] != 0) ? 0 : 1; } else if (u->vendor == UD_VENDOR_AMD) { idx = 0; } else { idx = 1; } break; case UD_TAB__OPC_RM: idx = MODRM_RM(modrm(u)); break; case UD_TAB__OPC_REG: idx = MODRM_REG(modrm(u)); break; case UD_TAB__OPC_SSE: return decode_ssepfx(u); default: ASSERT(!"not reached"); break; } return decode_ext(u, u->le->table[idx]); } static inline int decode_opcode(struct ud *u) { uint16_t ptr; ASSERT(u->le->type == UD_TAB__OPC_TABLE); ud_inp_next(u); if (u->error) { return -1; } ptr = u->le->table[ud_inp_curr(u)]; if (ptr & 0x8000) { u->le = &ud_lookup_table_list[ptr & ~0x8000]; if (u->le->type == UD_TAB__OPC_TABLE) { return decode_opcode(u); } } return decode_ext(u, ptr); } /* ============================================================================= * ud_decode() - Instruction decoder. Returns the number of bytes decoded. * ============================================================================= */ unsigned int ud_decode(struct ud *u) { ud_inp_start(u); clear_insn(u); u->le = &ud_lookup_table_list[0]; u->error = decode_prefixes(u) == -1 || decode_opcode(u) == -1 || u->error; /* Handle decode error. */ if (u->error) { /* clear out the decode data. */ clear_insn(u); /* mark the sequence of bytes as invalid. */ u->itab_entry = & s_ie__invalid; u->mnemonic = u->itab_entry->mnemonic; } /* maybe this stray segment override byte * should be spewed out? */ if ( !P_SEG( u->itab_entry->prefix ) && u->operand[0].type != UD_OP_MEM && u->operand[1].type != UD_OP_MEM ) u->pfx_seg = 0; u->insn_offset = u->pc; /* set offset of instruction */ u->insn_fill = 0; /* set translation buffer index to 0 */ u->pc += u->inp_ctr; /* move program counter by bytes decoded */ gen_hex( u ); /* generate hex code */ /* return number of bytes disassembled. */ return u->inp_ctr; } /* vim: set ts=2 sw=2 expandtab */ #endif // USE(UDIS86)