diff options
Diffstat (limited to 'Source/JavaScriptCore/disassembler/udis86/ud_opcode.py')
-rw-r--r-- | Source/JavaScriptCore/disassembler/udis86/ud_opcode.py | 771 |
1 files changed, 579 insertions, 192 deletions
diff --git a/Source/JavaScriptCore/disassembler/udis86/ud_opcode.py b/Source/JavaScriptCore/disassembler/udis86/ud_opcode.py index f82738062..fe1833dc7 100644 --- a/Source/JavaScriptCore/disassembler/udis86/ud_opcode.py +++ b/Source/JavaScriptCore/disassembler/udis86/ud_opcode.py @@ -1,6 +1,6 @@ # udis86 - scripts/ud_opcode.py # -# Copyright (c) 2009 Vivek Thampi +# Copyright (c) 2009, 2013 Vivek Thampi # All rights reserved. # # Redistribution and use in source and binary forms, with or without modification, @@ -23,213 +23,600 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -class UdOpcodeTables: - - TableInfo = { - 'opctbl' : { 'name' : 'UD_TAB__OPC_TABLE', 'size' : 256 }, - '/sse' : { 'name' : 'UD_TAB__OPC_SSE', 'size' : 4 }, - '/reg' : { 'name' : 'UD_TAB__OPC_REG', 'size' : 8 }, - '/rm' : { 'name' : 'UD_TAB__OPC_RM', 'size' : 8 }, - '/mod' : { 'name' : 'UD_TAB__OPC_MOD', 'size' : 2 }, - '/m' : { 'name' : 'UD_TAB__OPC_MODE', 'size' : 3 }, - '/x87' : { 'name' : 'UD_TAB__OPC_X87', 'size' : 64 }, - '/a' : { 'name' : 'UD_TAB__OPC_ASIZE', 'size' : 3 }, - '/o' : { 'name' : 'UD_TAB__OPC_OSIZE', 'size' : 3 }, - '/3dnow' : { 'name' : 'UD_TAB__OPC_3DNOW', 'size' : 256 }, - 'vendor' : { 'name' : 'UD_TAB__OPC_VENDOR', 'size' : 3 }, - } +import os + +# Some compatibility stuff for supporting python 2.x as well as python 3.x +def itemslist(dict): + try: + return dict.iteritems() # python 2.x + except AttributeError: + return list(dict.items()) # python 3.x + +class UdInsnDef: + """An x86 instruction definition + """ + def __init__(self, **insnDef): + self.mnemonic = insnDef['mnemonic'] + self.prefixes = insnDef['prefixes'] + self.opcodes = insnDef['opcodes'] + self.operands = insnDef['operands'] + self._cpuid = insnDef['cpuid'] + self._opcexts = {} + + for opc in self.opcodes: + if opc.startswith('/'): + e, v = opc.split('=') + self._opcexts[e] = v + + def lookupPrefix(self, pfx): + """Lookup prefix (if any, None otherwise), by name""" + return True if pfx in self.prefixes else None + + + @property + def vendor(self): + return self._opcexts.get('/vendor', None) + + @property + def mode(self): + return self._opcexts.get('/m', None) + + @property + def osize(self): + return self._opcexts.get('/o', None) + + def isDef64(self): + return 'def64' in self.prefixes + + def __str__(self): + return self.mnemonic + " " + ', '.join(self.operands) + \ + " " + ' '.join(self.opcodes) - OpcodeTable0 = { - 'type' : 'opctbl', - 'entries' : {}, - 'meta' : 'table0' - } - OpcExtIndex = { - - # ssef2, ssef3, sse66 - 'sse': { - 'none' : '00', - 'f2' : '01', - 'f3' : '02', - '66' : '03' - }, - - # /mod= - 'mod': { - '!11' : '00', - '11' : '01' - }, - - # /m=, /o=, /a= - 'mode': { - '16' : '00', - '32' : '01', - '64' : '02' - }, - - 'vendor' : { - 'amd' : '00', - 'intel' : '01', - 'any' : '02' +class UdOpcodeTable: + """A single table of instruction definitions, indexed by + a decode field. + """ + + class CollisionError(Exception): + pass + + class IndexError(Exception): + """Invalid Index Error""" + pass + + @classmethod + def vendor2idx(cls, v): + return (0 if v == 'amd' + else (1 if v == 'intel' + else 2)) + + @classmethod + def vex2idx(cls, v): + if v.startswith("none_"): + v = v[5:] + vexOpcExtMap = { + 'none' : 0x0, + '0f' : 0x1, + '0f38' : 0x2, + '0f3a' : 0x3, + '66' : 0x4, + '66_0f' : 0x5, + '66_0f38' : 0x6, + '66_0f3a' : 0x7, + 'f3' : 0x8, + 'f3_0f' : 0x9, + 'f3_0f38' : 0xa, + 'f3_0f3a' : 0xb, + 'f2' : 0xc, + 'f2_0f' : 0xd, + 'f2_0f38' : 0xe, + 'f2_0f3a' : 0xf, } + return vexOpcExtMap[v] + + + # A mapping of opcode extensions to their representational + # values used in the opcode map. + OpcExtMap = { + '/rm' : lambda v: int(v, 16), + '/x87' : lambda v: int(v, 16), + '/3dnow' : lambda v: int(v, 16), + '/reg' : lambda v: int(v, 16), + # modrm.mod + # (!11, 11) => (00b, 01b) + '/mod' : lambda v: 0 if v == '!11' else 1, + # Mode extensions: + # (16, 32, 64) => (00, 01, 02) + '/o' : lambda v: (int(v) / 32), + '/a' : lambda v: (int(v) / 32), + # Disassembly mode + # (!64, 64) => (00b, 01b) + '/m' : lambda v: 1 if v == '64' else 0, + # SSE + # none => 0 + # f2 => 1 + # f3 => 2 + # 66 => 3 + '/sse' : lambda v: (0 if v == 'none' + else (((int(v, 16) & 0xf) + 1) / 2)), + # AVX + '/vex' : lambda v: UdOpcodeTable.vex2idx(v), + '/vexw' : lambda v: 0 if v == '0' else 1, + '/vexl' : lambda v: 0 if v == '0' else 1, + # Vendor + '/vendor': lambda v: UdOpcodeTable.vendor2idx(v) } - InsnTable = [] - MnemonicsTable = [] - ThreeDNowTable = {} + _TableInfo = { + 'opctbl' : { 'label' : 'UD_TAB__OPC_TABLE', 'size' : 256 }, + '/sse' : { 'label' : 'UD_TAB__OPC_SSE', 'size' : 4 }, + '/reg' : { 'label' : 'UD_TAB__OPC_REG', 'size' : 8 }, + '/rm' : { 'label' : 'UD_TAB__OPC_RM', 'size' : 8 }, + '/mod' : { 'label' : 'UD_TAB__OPC_MOD', 'size' : 2 }, + '/m' : { 'label' : 'UD_TAB__OPC_MODE', 'size' : 2 }, + '/x87' : { 'label' : 'UD_TAB__OPC_X87', 'size' : 64 }, + '/a' : { 'label' : 'UD_TAB__OPC_ASIZE', 'size' : 3 }, + '/o' : { 'label' : 'UD_TAB__OPC_OSIZE', 'size' : 3 }, + '/3dnow' : { 'label' : 'UD_TAB__OPC_3DNOW', 'size' : 256 }, + '/vendor' : { 'label' : 'UD_TAB__OPC_VENDOR', 'size' : 3 }, + '/vex' : { 'label' : 'UD_TAB__OPC_VEX', 'size' : 16 }, + '/vexw' : { 'label' : 'UD_TAB__OPC_VEX_W', 'size' : 2 }, + '/vexl' : { 'label' : 'UD_TAB__OPC_VEX_L', 'size' : 2 }, + } + + + def __init__(self, typ): + assert typ in self._TableInfo + self._typ = typ + self._entries = {} + + + def size(self): + return self._TableInfo[self._typ]['size'] + + def entries(self): + return itemslist(self._entries) - def sizeOfTable( self, t ): - return self.TableInfo[ t ][ 'size' ] + def numEntries(self): + return len(self._entries.keys()) - def nameOfTable( self, t ): - return self.TableInfo[ t ][ 'name' ] + def label(self): + return self._TableInfo[self._typ]['label'] - # - # Updates a table entry: If the entry doesn't exist - # it will create the entry, otherwise, it will walk - # while validating the path. - # - def updateTable( self, table, index, type, meta ): - if not index in table[ 'entries' ]: - table[ 'entries' ][ index ] = { 'type' : type, 'entries' : {}, 'meta' : meta } - if table[ 'entries' ][ index ][ 'type' ] != type: - raise NameError( "error: violation in opcode mapping (overwrite) %s with %s." % - ( table[ 'entries' ][ index ][ 'type' ], type) ) - return table[ 'entries' ][ index ] + def typ(self): + return self._typ - class Insn: - """An abstract type representing an instruction in the opcode map. + def meta(self): + return self._typ + + + def __str__(self): + return "table-%s" % self._typ + + + def add(self, opc, obj): + typ = UdOpcodeTable.getOpcodeTyp(opc) + idx = UdOpcodeTable.getOpcodeIdx(opc) + if self._typ != typ or idx in self._entries: + raise CollisionError() + self._entries[idx] = obj + + + def lookup(self, opc): + typ = UdOpcodeTable.getOpcodeTyp(opc) + idx = UdOpcodeTable.getOpcodeIdx(opc) + if self._typ != typ: + raise UdOpcodeTable.CollisionError("%s <-> %s" % (self._typ, typ)) + return self._entries.get(idx, None) + + + def entryAt(self, index): + """Returns the entry at a given index of the table, + None if there is none. Raises an exception if the + index is out of bounds. """ + if index < self.size(): + return self._entries.get(index, None) + raise self.IndexError("index out of bounds: %s" % index) - # A mapping of opcode extensions to their representational - # values used in the opcode map. - OpcExtMap = { - '/rm' : lambda v: "%02x" % int(v, 16), - '/x87' : lambda v: "%02x" % int(v, 16), - '/3dnow' : lambda v: "%02x" % int(v, 16), - '/reg' : lambda v: "%02x" % int(v, 16), - # modrm.mod - # (!11, 11) => (00, 01) - '/mod' : lambda v: '00' if v == '!11' else '01', - # Mode extensions: - # (16, 32, 64) => (00, 01, 02) - '/o' : lambda v: "%02x" % (int(v) / 32), - '/a' : lambda v: "%02x" % (int(v) / 32), - '/m' : lambda v: "%02x" % (int(v) / 32), - '/sse' : lambda v: UdOpcodeTables.OpcExtIndex['sse'][v] - } + def setEntryAt(self, index, obj): + if index < self.size(): + self._entries[index] = obj + else: + raise self.IndexError("index out of bounds: %s" % index) + + @classmethod + def getOpcodeTyp(cls, opc): + if opc.startswith('/'): + return opc.split('=')[0] + else: + return 'opctbl' + + + @classmethod + def getOpcodeIdx(cls, opc): + if opc.startswith('/'): + typ, v = opc.split('=') + return cls.OpcExtMap[typ](v) + else: + # plain opctbl opcode + return int(opc, 16) + + + @classmethod + def getLabels(cls): + """Returns a list of all labels""" + return [cls._TableInfo[k]['label'] for k in cls._TableInfo.keys()] + + +class UdOpcodeTables(object): + """Collection of opcode tables + """ + + class CollisionError(Exception): + def __init__(self, obj1, obj2): + self.obj1, self.obj2 = obj1, obj2 + + def newTable(self, typ): + """Create a new opcode table of a give type `typ`. """ + tbl = UdOpcodeTable(typ) + self._tables.append(tbl) + return tbl + + def mkTrie(self, opcodes, obj): + """Recursively contruct a trie entry mapping a string of + opcodes to an object. + """ + if len(opcodes) == 0: + return obj + opc = opcodes[0] + tbl = self.newTable(UdOpcodeTable.getOpcodeTyp(opc)) + tbl.add(opc, self.mkTrie(opcodes[1:], obj)) + return tbl + + def walk(self, tbl, opcodes): + """Walk down the opcode trie, starting at a given opcode + table, given a string of opcodes. Return None if unable + to walk, the object at the leaf otherwise. + """ + opc = opcodes[0] + e = tbl.lookup(opc) + if e is None: + return None + elif isinstance(e, UdOpcodeTable) and len(opcodes[1:]): + return self.walk(e, opcodes[1:]) + return e + + def map(self, tbl, opcodes, obj): + """Create a mapping from a given string of opcodes to an + object in the opcode trie. Constructs trie branches as + needed. + """ + opc = opcodes[0] + e = tbl.lookup(opc) + if e is None: + tbl.add(opc, self.mkTrie(opcodes[1:], obj)) + else: + if len(opcodes[1:]) == 0: + raise self.CollisionError(e, obj) + self.map(e, opcodes[1:], obj) + + def __init__(self, xml): + self._tables = [] + self._insns = [] + self._mnemonics = {} - def __init__(self, prefixes, mnemonic, opcodes, operands, vendor): - self.opcodes = opcodes - self.prefixes = prefixes - self.mnemonic = mnemonic - self.operands = operands - self.vendor = vendor - self.opcext = {} - - ssePrefix = None - if self.opcodes[0] in ('ssef2', 'ssef3', 'sse66'): - ssePrefix = self.opcodes[0][3:] - self.opcodes.pop(0) - - # do some preliminary decoding of the instruction type - # 1byte, 2byte or 3byte instruction? - self.nByteInsn = 1 - if self.opcodes[0] == '0f': # 2byte - # 2+ byte opcodes are always disambiguated by an - # sse prefix, unless it is a 3d now instruction - # which is 0f 0f ... - if self.opcodes[1] != '0f' and ssePrefix is None: - ssePrefix = 'none' - if self.opcodes[1] in ('38', '3a'): # 3byte - self.nByteInsn = 3 + # The root table is always a 256 entry opctbl, indexed + # by a plain opcode byte + self.root = self.newTable('opctbl') + + if os.getenv("UD_OPCODE_DEBUG"): + self._logFh = open("opcodeTables.log", "w") + + # add an invalid instruction entry without any mapping + # in the opcode tables. + self.invalidInsn = UdInsnDef(mnemonic="invalid", opcodes=[], cpuid=[], + operands=[], prefixes=[]) + self._insns.append(self.invalidInsn) + + # Construct UdOpcodeTables object from the given + # udis86 optable.xml + for insn in self.__class__.parseOptableXML(xml): + self.addInsnDef(insn) + self.patchAvx2byte() + self.mergeSSENONE() + self.printStats() + + def log(self, s): + if os.getenv("UD_OPCODE_DEBUG"): + self._logFh.write(s + "\n") + + + def mergeSSENONE(self): + """Merge sse tables with only one entry for /sse=none + """ + for table in self._tables: + for k, e in table.entries(): + if isinstance(e, UdOpcodeTable) and e.typ() == '/sse': + if e.numEntries() == 1: + sse = e.lookup("/sse=none") + if sse: + table.setEntryAt(k, sse) + uniqTables = {} + def genTableList(tbl): + if tbl not in uniqTables: + self._tables.append(tbl) + uniqTables[tbl] = 1 + for k, e in tbl.entries(): + if isinstance(e, UdOpcodeTable): + genTableList(e) + self._tables = [] + genTableList(self.root) + + + def patchAvx2byte(self): + # create avx tables + for pp in (None, 'f2', 'f3', '66'): + for m in (None, '0f', '0f38', '0f3a'): + if pp is None and m is None: + continue + if pp is None: + vex = m + elif m is None: + vex = pp else: - self.nByteInsn = 2 - - # The opcode that indexes into the opcode table. - self.opcode = self.opcodes[self.nByteInsn - 1] - - # Record opcode extensions - for opcode in self.opcodes[self.nByteInsn:]: - arg, val = opcode.split('=') - self.opcext[arg] = self.OpcExtMap[arg](val) - - # Record sse extension: the reason sse extension is handled - # separately is that historically sse was handled as a first - # class opcode, not as an extension. Now that sse is handled - # as an extension, we do the manual conversion here, as opposed - # to modifying the opcode xml file. - if ssePrefix is not None: - self.opcext['/sse'] = self.OpcExtMap['/sse'](ssePrefix) - - def parse(self, table, insn): - index = insn.opcodes[0]; - if insn.nByteInsn > 1: - assert index == '0f' - table = self.updateTable(table, index, 'opctbl', '0f') - index = insn.opcodes[1] - - if insn.nByteInsn == 3: - table = self.updateTable(table, index, 'opctbl', index) - index = insn.opcodes[2] - - # Walk down the tree, create levels as needed, for opcode - # extensions. The order is important, and determines how + vex = pp + '_' + m + table = self.walk(self.root, ('c4', '/vex=' + vex)) + self.map(self.root, ('c5', '/vex=' + vex), table) + + + def addInsn(self, **insnDef): + + # Canonicalize opcode list + opcexts = insnDef['opcexts'] + opcodes = list(insnDef['opcodes']) + + # Re-order vex + if '/vex' in opcexts: + assert opcodes[0] == 'c4' or opcodes[0] == 'c5' + opcodes.insert(1, '/vex=' + opcexts['/vex']) + + # Add extensions. The order is important, and determines how # well the opcode table is packed. Also note, /sse must be # before /o, because /sse may consume operand size prefix # affect the outcome of /o. - for ext in ('/mod', '/x87', '/reg', '/rm', '/sse', - '/o', '/a', '/m', '/3dnow'): - if ext in insn.opcext: - table = self.updateTable(table, index, ext, ext) - index = insn.opcext[ext] - - # additional table for disambiguating vendor - if len(insn.vendor): - table = self.updateTable(table, index, 'vendor', insn.vendor) - index = self.OpcExtIndex['vendor'][insn.vendor] - - # make leaf node entries - leaf = self.updateTable(table, index, 'insn', '') - - leaf['mnemonic'] = insn.mnemonic - leaf['prefixes'] = insn.prefixes - leaf['operands'] = insn.operands - - # add instruction to linear table of instruction forms - self.InsnTable.append({ 'prefixes' : insn.prefixes, - 'mnemonic' : insn.mnemonic, - 'operands' : insn.operands }) - - # add mnemonic to mnemonic table - if not insn.mnemonic in self.MnemonicsTable: - self.MnemonicsTable.append(insn.mnemonic) - - - # Adds an instruction definition to the opcode tables - def addInsnDef( self, prefixes, mnemonic, opcodes, operands, vendor ): - insn = self.Insn(prefixes=prefixes, - mnemonic=mnemonic, - opcodes=opcodes, - operands=operands, - vendor=vendor) - self.parse(self.OpcodeTable0, insn) - - def print_table( self, table, pfxs ): - print("%s |" % pfxs) - keys = table[ 'entries' ].keys() - if ( len( keys ) ): - keys.sort() - for idx in keys: - e = table[ 'entries' ][ idx ] - if e[ 'type' ] == 'insn': - print("%s |-<%s>" % ( pfxs, idx )), - print("%s %s" % ( e[ 'mnemonic' ], ' '.join( e[ 'operands'] ))) + for ext in ('/mod', '/x87', '/reg', '/rm', '/sse', '/o', '/a', '/m', + '/vexw', '/vexl', '/3dnow', '/vendor'): + if ext in opcexts: + opcodes.append(ext + '=' + opcexts[ext]) + + insn = UdInsnDef(mnemonic = insnDef['mnemonic'], + prefixes = insnDef['prefixes'], + operands = insnDef['operands'], + opcodes = opcodes, + cpuid = insnDef['cpuid']) + try: + self.map(self.root, opcodes, insn) + except self.CollisionError as e: + self.pprint() + print(opcodes, insn, str(e.obj1), str(e.obj2)) + raise + except Exception as e: + self.pprint() + raise + self._insns.append(insn) + # add to lookup by mnemonic structure + if insn.mnemonic not in self._mnemonics: + self._mnemonics[insn.mnemonic] = [ insn ] + else: + self._mnemonics[insn.mnemonic].append(insn) + + + def addInsnDef(self, insnDef): + opcodes = [] + opcexts = {} + + # pack plain opcodes first, and collect opcode + # extensions + for opc in insnDef['opcodes']: + if not opc.startswith('/'): + opcodes.append(opc) else: - print("%s |-<%s> %s" % ( pfxs, idx, e['type'] )) - self.print_table( e, pfxs + ' |' ) + e, v = opc.split('=') + opcexts[e] = v + + # treat vendor as an opcode extension + if len(insnDef['vendor']): + opcexts['/vendor'] = insnDef['vendor'][0] + + if insnDef['mnemonic'] in ('lds', 'les'): + # + # Massage lds and les, which share the same prefix as AVX + # instructions, to work well with the opcode tree. + # + opcexts['/vex'] = 'none' + elif '/vex' in opcexts: + # A proper avx instruction definition; make sure there are + # no legacy opcode extensions + assert '/sse' not in opcodes + + # make sure the opcode definitions don't already include + # the avx prefixes. + assert opcodes[0] not in ('c4', 'c5') + + # An avx only instruction is defined by the /vex= opcode + # extension. They do not include the c4 (long form) or + # c5 (short form) prefix. As part of opcode table generate, + # here we create the long form definition, and then patch + # the table for c5 in a later stage. + # Construct a long-form definition of the avx instruction + opcodes.insert(0, 'c4') + elif (opcodes[0] == '0f' and opcodes[1] != '0f' and + '/sse' not in opcexts): + # Make all 2-byte opcode form isntructions play nice with sse + # opcode maps. + opcexts['/sse'] = 'none' + + # legacy sse defs that get promoted to avx + fn = self.addInsn + if 'avx' in insnDef['cpuid'] and '/sse' in opcexts: + fn = self.addSSE2AVXInsn + + fn(mnemonic = insnDef['mnemonic'], + prefixes = insnDef['prefixes'], + opcodes = opcodes, + opcexts = opcexts, + operands = insnDef['operands'], + cpuid = insnDef['cpuid']) + + + def addSSE2AVXInsn(self, **insnDef): + """Add an instruction definition containing an avx cpuid bit, but + declared in its legacy SSE form. The function splits the + definition to create two new definitions, one for SSE and one + promoted to an AVX form. + """ + + # SSE + ssemnemonic = insnDef['mnemonic'] + sseopcodes = insnDef['opcodes'] + # remove vex opcode extensions + sseopcexts = dict([(e, v) for e, v in itemslist(insnDef['opcexts']) + if not e.startswith('/vex')]) + # strip out avx operands, preserving relative ordering + # of remaining operands + sseoperands = [opr for opr in insnDef['operands'] + if opr not in ('H', 'L')] + # strip out avx prefixes + sseprefixes = [pfx for pfx in insnDef['prefixes'] + if not pfx.startswith('vex')] + # strip out avx bits from cpuid + ssecpuid = [flag for flag in insnDef['cpuid'] + if not flag.startswith('avx')] + + self.addInsn(mnemonic = ssemnemonic, + prefixes = sseprefixes, + opcodes = sseopcodes, + opcexts = sseopcexts, + operands = sseoperands, + cpuid = ssecpuid) + + # AVX + vexmnemonic = 'v' + insnDef['mnemonic'] + vexprefixes = insnDef['prefixes'] + vexopcodes = ['c4'] + vexopcexts = dict([(e, insnDef['opcexts'][e]) + for e in insnDef['opcexts'] if e != '/sse']) + vexopcexts['/vex'] = insnDef['opcexts']['/sse'] + '_' + '0f' + if insnDef['opcodes'][1] == '38' or insnDef['opcodes'][1] == '3a': + vexopcexts['/vex'] += insnDef['opcodes'][1] + vexopcodes.extend(insnDef['opcodes'][2:]) + else: + vexopcodes.extend(insnDef['opcodes'][1:]) + vexoperands = [] + for o in insnDef['operands']: + # make the operand size explicit: x + if o in ('V', 'W', 'H', 'U'): + o = o + 'x' + vexoperands.append(o) + vexcpuid = [flag for flag in insnDef['cpuid'] + if not flag.startswith('sse')] + + self.addInsn(mnemonic = vexmnemonic, + prefixes = vexprefixes, + opcodes = vexopcodes, + opcexts = vexopcexts, + operands = vexoperands, + cpuid = vexcpuid) + + def getInsnList(self): + """Returns a list of all instructions in the collection""" + return self._insns + + + def getTableList(self): + """Returns a list of all tables in the collection""" + return self._tables + + def getMnemonicsList(self): + """Returns a sorted list of mnemonics""" + return sorted(self._mnemonics.keys()) + + + def pprint(self): + def printWalk(tbl, indent=""): + entries = tbl.entries() + for k, e in entries: + if isinstance(e, UdOpcodeTable): + self.log("%s |-<%02x> %s" % (indent, k, e)) + printWalk(e, indent + " |") + elif isinstance(e, UdInsnDef): + self.log("%s |-<%02x> %s" % (indent, k, e)) + printWalk(self.root) + + + def printStats(self): + tables = self.getTableList() + self.log("stats: ") + self.log(" Num tables = %d" % len(tables)) + self.log(" Num insnDefs = %d" % len(self.getInsnList())) + self.log(" Num insns = %d" % len(self.getMnemonicsList())) + + totalSize = 0 + totalEntries = 0 + for table in tables: + totalSize += table.size() + totalEntries += table.numEntries() + self.log(" Packing Ratio = %d%%" % ((totalEntries * 100) / totalSize)) + self.log("--------------------") + + self.pprint() + + + @staticmethod + def parseOptableXML(xml): + """Parse udis86 optable.xml file and return list of + instruction definitions. + """ + from xml.dom import minidom + + xmlDoc = minidom.parse(xml) + tlNode = xmlDoc.firstChild + insns = [] + + while tlNode and tlNode.localName != "x86optable": + tlNode = tlNode.nextSibling + + for insnNode in tlNode.childNodes: + if not insnNode.localName: + continue + if insnNode.localName != "instruction": + raise Exception("warning: invalid insn node - %s" % insnNode.localName) + mnemonic = insnNode.getElementsByTagName('mnemonic')[0].firstChild.data + vendor, cpuid = '', [] + + for node in insnNode.childNodes: + if node.localName == 'vendor': + vendor = node.firstChild.data.split() + elif node.localName == 'cpuid': + cpuid = node.firstChild.data.split() - def print_tree( self ): - self.print_table( self.OpcodeTable0, '' ) + for node in insnNode.childNodes: + if node.localName == 'def': + insnDef = { 'pfx' : [] } + for node in node.childNodes: + if not node.localName: + continue + if node.localName in ('pfx', 'opc', 'opr', 'vendor', 'cpuid'): + insnDef[node.localName] = node.firstChild.data.split() + elif node.localName == 'mode': + insnDef['pfx'].extend(node.firstChild.data.split()) + insns.append({'prefixes' : insnDef.get('pfx', []), + 'mnemonic' : mnemonic, + 'opcodes' : insnDef.get('opc', []), + 'operands' : insnDef.get('opr', []), + 'vendor' : insnDef.get('vendor', vendor), + 'cpuid' : insnDef.get('cpuid', cpuid)}) + return insns |