# ---------------------------------------------------------------------- # EFI bytecode processor module # (c) Hex-Rays # Please send fixes or improvements to support@hex-rays.com import sys import idaapi from idaapi import * # ---------------------------------------------------------------------- # This function must be present and should return the list of # short processor names similar to the one in ph.psnames def get_idp_desc(): idpdef = get_idp_def() return idpdef['plnames'][0] + ':' + ':'.join(idpdef['psnames']) # ---------------------------------------------------------------------- # This function returns the processor module definition # The return value is a dictionary or object mirroring the ph_t structure def get_idp_def(): return { # Expected kernel version, should be IDP_INTERFACE_VERSION 'version': IDP_INTERFACE_VERSION, # IDP id ( Numbers above 0x8000 are reserved for the third-party modules) 'id' : 0x8000 + 1, # Processor features 'flag' : PR_ASSEMBLE | PR_SEGS | PR_DEFSEG32 | PR_USE32 | PRN_HEX | PR_RNAMESOK | PR_NO_SEGMOVE, # Number of bits in a byte for code segments (usually 8) # IDA supports values up to 32 bits 'cnbits': 8, # Number of bits in a byte for non-code segments (usually 8) # IDA supports values up to 32 bits 'dnbits': 8, # short processor names (NULL terminated) # Each name should be shorter than 9 characters 'psnames': ['ebc'], # long processor names (NULL terminated) # No restriction on name lengthes. 'plnames': ['EFI Byte code'], # number of registers 'regsNum': len(proc_Registers), # register names 'regNames': proc_Registers, # Segment register information (use virtual CS and DS registers if your # processor doesn't have segment registers): 'regFirstSreg': ireg_CS, 'regLastSreg': ireg_DS, # size of a segment register in bytes 'segreg_size': 0, # You should define 2 virtual segment registers for CS and DS. # Let's call them rVcs and rVds. # number of CS register 'regCodeSreg' : ireg_CS, # number of DS register 'regDataSreg' : ireg_DS, # icode of the first instruction 'instruc_start' : 0, # icode of the last instruction + 1 'instruc_end': len(proc_Instructions) + 1, # Array of instructions 'instruc': proc_Instructions, # # Size of long double (tbyte) for this processor # (meaningful only if ash.a_tbyte != NULL) # 'tbyte_size' : 0, # Icode of return instruction. It is ok to give any of possible return # instructions 'icode_return': itype_RET, # only one assembler is supported 'assembler': \ { # flag 'flag' : ASH_HEXF3 | AS_UNEQU | AS_COLON | ASB_BINF4 | AS_N2CHR, # user defined flags (local only for IDP) # you may define and use your own bits 'uflag' : 0, # Assembler name (displayed in menus) 'name': "EFI bytecode assembler", # org directive 'origin': "org", # end directive 'end': "end", # comment string (see also cmnt2) 'cmnt': ";", # ASCII string delimiter 'ascsep': "\"", # ASCII char constant delimiter 'accsep': "'", # ASCII special chars (they can't appear in character and ascii constants) 'esccodes': "\"'", # # Data representation (db,dw,...): # # ASCII string directive 'a_ascii': "db", # byte directive 'a_byte': "db", # word directive 'a_word': "dw", # remove if not allowed 'a_dword': "dd", # remove if not allowed 'a_qword': "dq", # remove if not allowed 'a_oword': "xmmword", # float; 4bytes; remove if not allowed 'a_float': "dd", # double; 8bytes; NULL if not allowed 'a_double': "dq", # long double; NULL if not allowed 'a_tbyte': "dt", # array keyword. the following # sequences may appear: # #h - header # #d - size # #v - value # #s(b,w,l,q,f,d,o) - size specifiers # for byte,word, # dword,qword, # float,double,oword 'a_dups': "#d dup(#v)", # uninitialized data directive (should include '%s' for the size of data) 'a_bss': "%s dup ?", # 'seg ' prefix (example: push seg seg001) 'a_seg': "seg", # current IP (instruction pointer) symbol in assembler 'a_curip': "$", # "public" name keyword. NULL-gen default, ""-do not generate 'a_public': "public", # "weak" name keyword. NULL-gen default, ""-do not generate 'a_weak': "weak", # "extrn" name keyword 'a_extrn': "extrn", # "comm" (communal variable) 'a_comdef': "", # "align" keyword 'a_align': "align", # Left and right braces used in complex expressions 'lbrace': "(", 'rbrace': ")", # % mod assembler time operation 'a_mod': "%", # & bit and assembler time operation 'a_band': "&", # | bit or assembler time operation 'a_bor': "|", # ^ bit xor assembler time operation 'a_xor': "^", # ~ bit not assembler time operation 'a_bnot': "~", # << shift left assembler time operation 'a_shl': "<<", # >> shift right assembler time operation 'a_shr': ">>", # size of type (format string) 'a_sizeof_fmt': "size %s", } # Assembler } # ---------------------------------------------------------------------- # Some internal flags used by the decoder, emulator and output # FL_B = 0x000000001 # 8 bits FL_W = 0x000000002 # 16 bits FL_D = 0x000000004 # 32 bits FL_Q = 0x000000008 # 64 bits FL_OP1 = 0x000000010 # check operand 1 FL_32 = 0x000000020 # Is 32 FL_64 = 0x000000040 # Is 64 FL_NATIVE = 0x000000080 # native call (not EbcCal) FL_REL = 0x000000100 # relative address FL_CS = 0x000000200 # Condition flag is set FL_NCS = 0x000000400 # Condition flag is not set FL_INDIRECT = 0x000000800 # This is an indirect access (not immediate value) FL_SIGNED = 0x000001000 # This is a signed operand # ---------------------------------------------------------------------- # Utility functions # def get_data_width_fl(sz): """Returns a flag given the data width number""" if sz == 0: return FL_B elif sz == 1: return FL_W elif sz == 2: return FL_D elif sz == 3: return FL_Q def next_data_value(sz): """Returns a value depending on the data widh number""" if sz == 0: return ua_next_byte() elif sz == 1: return ua_next_word() elif sz == 2: return ua_next_long() elif sz == 3: return ua_next_qword() else: raise Exception, "Invalid width!" def get_data_dt(sz): """Returns a dt_xxx on the data widh number""" if sz == 0: return dt_byte elif sz == 1: return dt_word elif sz == 2: return dt_dword elif sz == 3: return dt_qword else: raise Exception, "Invalid width!" def get_sz_to_bits(sz): """Returns size in bits of the data widh number""" if sz == 1: return 16 elif sz == 2: return 32 elif sz == 3: return 64 else: return 8 def dt_to_bits(dt): """Returns the size in bits given a dt_xxx""" if dt == dt_byte: return 8 elif dt == dt_word: return 16 elif dt == dt_dword: return 32 elif dt == dt_qword: return 64 def fl_to_str(fl): """Given a flag, it returns a string. (used during output)""" if fl & FL_B != 0: return "B" elif fl & FL_W != 0: return "W" elif fl & FL_D != 0: return "D" elif fl & FL_Q != 0: return "Q" # ---------------------------------------------------------------------- # Decodes an index and returns all its components in a dictionary # Refer to "Index Encoding" section def decode_index(index, sz): bn = sz - 1 s = -1 if copy_bits(index, bn) == 1 else 1 w = copy_bits(index, bn-3, bn-1) if sz == 16 : t = 2 elif sz == 32: t = 4 elif sz == 64: t = 8 a = w * t # actual width c = copy_bits(index, a, bn-4) # constant number n = copy_bits(index, 0, a-1) # natural number o = (c + (n * proc_PTRSZ)) # offset w/o sign so = o * s # signed final offset # return everything r = {'s': s, 'w': w, 'a': a, 'c': c, 'n': n, 'o': o, 'so': so} return r # ---------------------------------------------------------------------- # Returns the "uFlag" value from idaapi.cvar def get_uFlag(): return idaapi.cvar.uFlag # ---------------------------------------------------------------------- def decode_RET(opbyte, cmd): # No operands cmd.Op1.type = o_void # Consume the next byte, and it should be zero ua_next_byte() return True # ---------------------------------------------------------------------- def decode_STORESP(opbyte, cmd): # opbyte (byte0) has nothing meaningful (but the opcode itself) # get next byte opbyte = ua_next_byte() vm_reg = (opbyte & 0x70) >> 4 gp_reg = (opbyte & 0x07) cmd.Op1.type = cmd.Op2.type = o_reg cmd.Op1.dtyp = cmd.Op2.dtyp = dt_qword cmd.Op1.reg = gp_reg cmd.Op2.reg = ireg_FLAGS + vm_reg return True # ---------------------------------------------------------------------- def decode_LOADSP(opbyte, cmd): # opbyte (byte0) has nothing meaningful (but the opcode itself) # get next byte opbyte = ua_next_byte() gp_reg = (opbyte & 0x70) >> 4 vm_reg = (opbyte & 0x07) cmd.Op1.type = cmd.Op2.type = o_reg cmd.Op1.dtyp = cmd.Op2.dtyp = dt_qword cmd.Op1.reg = ireg_FLAGS + vm_reg cmd.Op2.reg = gp_reg return True # ---------------------------------------------------------------------- def decode_BREAK(opbyte, cmd): """ stx= txt= """ cmd.Op1.type = o_imm cmd.Op1.dtyp = dt_byte cmd.Op1.value = ua_next_byte() return True # ---------------------------------------------------------------------- def decode_PUSH(opbyte, cmd): """ stx= """ have_data = (opbyte & 0x80) != 0 is_n = (opbyte & ~0xC0) in [0x35, 0x36] op_32 = False if is_n else (opbyte & 0x40) == 0 opbyte = ua_next_byte() op1_direct = (opbyte & 0x08) == 0 r1 = (opbyte & 0x07) cmd.Op1.dtyp = dt_dword if op_32 else dt_qword fl = 0 if have_data: d = ua_next_word() if not op1_direct: fl |= FL_INDIRECT d = decode_index(d, 16)['so'] else: d = as_signed(d, 16) cmd.Op1.type = o_displ cmd.Op1.phrase = r1 cmd.Op1.addr = d cmd.Op1.specval = fl else: cmd.Op1.type = o_reg cmd.Op1.reg = r1 cmd.auxpref = 0 if is_n else FL_32 if op_32 else FL_64 return True # ---------------------------------------------------------------------- def decode_JMP(opbyte, cmd): """ stx= stx= """ have_data = (opbyte & 0x80) != 0 jmp_32 = (opbyte & 0x40) == 0 opbyte = ua_next_byte() conditional = (opbyte & 0x80) != 0 cs = (opbyte & 0x40) != 0 abs_jmp = (opbyte & 0x10) == 0 op1_direct = (opbyte & 0x08) == 0 r1 = (opbyte & 0x07) fl = 0 if abs_jmp else FL_REL if jmp_32: # Indirect and no data specified? if not op1_direct and not have_data: return False if have_data: d = next_data_value(2) # 32-bits if r1 == 0: cmd.Op1.type = o_near else: cmd.Op1.type = o_displ cmd.Op1.phrase = r1 cmd.Op1.dtyp = dt_dword if not op1_direct: d = decode_index(d, 32)['so'] fl |= FL_INDIRECT else: d = as_signed(d, 32) if not abs_jmp: d += cmd.ea + cmd.size cmd.Op1.addr = d cmd.Op1.specval = fl else: cmd.Op1.type = o_reg cmd.Op1.reg = r1 else: cmd.Op1.type = o_near cmd.Op1.dtyp = dt_qword cmd.Op1.addr = ua_next_qword() cmd.Op1.specval = fl fl = FL_32 if jmp_32 else FL_64 if conditional: fl |= FL_CS if cs else FL_NCS cmd.auxpref = fl return True # ---------------------------------------------------------------------- def decode_JMP8(opbyte, cmd): """ stx= """ conditional = (opbyte & 0x80) != 0 cs = (opbyte & 0x40) != 0 cmd.Op1.type = o_near cmd.Op1.dtyp = dt_byte addr = ua_next_byte() cmd.Op1.addr = (as_signed(addr, 8) * 2) + cmd.size + cmd.ea if conditional: cmd.auxpref = FL_CS if cs else FL_NCS return True # ---------------------------------------------------------------------- def decode_MOVI(opbyte, cmd): """ txt= stx= First character specifies the width of the move and is taken from r1 Second character specifies the immediate data size and is taken from the opbyte """ imm_sz = (opbyte & 0xC0) >> 6 opcode = (opbyte & ~0xC0) is_MOVIn = opcode == 0x38 # Reserved and should not be 0 if imm_sz == 0: return False # take byte 1 opbyte = ua_next_byte() # Bit 7 is reserved and should be 0 if opbyte & 0x80 != 0: return False have_idx = (opbyte & 0x40) != 0 move_sz = (opbyte & 0x30) >> 4 direct = (opbyte & 0x08) == 0 r1 = (opbyte & 0x07) # Cannot have an index with a direct register if have_idx and direct: return False if is_MOVIn: cmd.Op1.dtyp = get_data_dt(imm_sz) else: cmd.Op1.specval = get_data_width_fl(move_sz) cmd.Op1.dtyp = get_data_dt(move_sz) if have_idx: cmd.Op1.type = o_displ cmd.Op1.phrase = r1 cmd.Op1.addr = decode_index(ua_next_word(), 16)['so'] else: cmd.Op1.type = o_reg cmd.Op1.reg = r1 cmd.Op2.type = o_imm cmd.Op2.dtyp = get_data_dt(imm_sz) d = next_data_value(imm_sz) if is_MOVIn: d = decode_index(d, get_sz_to_bits(imm_sz))['so'] cmd.Op2.value = d # save imm size and signal that op1 is defined in first operand cmd.auxpref = get_data_width_fl(imm_sz) | (0 if is_MOVIn else FL_OP1) return True # ---------------------------------------------------------------------- def decode_MOVREL(opbyte, cmd): imm_sz = (opbyte & 0xC0) >> 6 # Reserved and should not be 0 if imm_sz == 0: return False # take byte 1 opbyte = ua_next_byte() # Bit 7 is reserved and should be 0 if opbyte & 0x80 != 0: return False have_idx = (opbyte & 0x40) != 0 direct = (opbyte & 0x08) == 0 r1 = (opbyte & 0x07) # Cannot have an index with a direct register if have_idx and direct: return False cmd.Op1.specval = get_data_width_fl(imm_sz) if have_idx: cmd.Op1.type = o_displ cmd.Op1.phrase = r1 cmd.Op1.addr = decode_index(ua_next_word(), 16)['so'] else: cmd.Op1.type = o_reg cmd.Op1.reg = r1 cmd.Op2.type = o_mem cmd.Op2.dtyp = get_data_dt(imm_sz) cmd.Op2.addr = next_data_value(imm_sz) + cmd.size + cmd.ea # save imm size cmd.auxpref = get_data_width_fl(imm_sz) return True # ---------------------------------------------------------------------- def decode_MOV(opbyte, cmd): have_idx1 = (opbyte & 0x80) != 0 have_idx2 = (opbyte & 0x40) != 0 opcode = (opbyte & ~0xC0) # MOVxW if 0x1D <= opcode <= 0x20: idx_sz = 1 # word data_sz = opcode - 0x1D # MOVxD elif 0x21 <= opcode <= 0x24: idx_sz = 2 # dword data_sz = opcode - 0x21 # MOVqq elif opcode == 0x28: idx_sz = 3 # qword data_sz = 3 # MOVnw elif opcode == 0x32: idx_sz = 1 # word data_sz = 3 # MOVnd elif opcode == 0x33: idx_sz = 2 # dword data_sz = 3 # get byte1 opbyte = ua_next_byte() op2_direct = (opbyte & 0x80) == 0 op1_direct = (opbyte & 0x08) == 0 r1 = (opbyte & 0x07) r2 = (opbyte & 0x70) >> 4 # indirect fl = FL_SIGNED if have_idx1: d = decode_index(next_data_value(idx_sz), get_sz_to_bits(idx_sz))['so'] if not op1_direct: fl |= FL_INDIRECT cmd.Op1.type = o_displ cmd.Op1.phrase = r1 cmd.Op1.addr = d cmd.Op1.dtyp = get_data_dt(idx_sz) else: cmd.Op1.type = o_reg cmd.Op1.reg = r1 cmd.Op1.dtyp = get_data_dt(data_sz) cmd.Op1.specval = fl fl = FL_SIGNED if have_idx2: d = decode_index(next_data_value(idx_sz), get_sz_to_bits(idx_sz))['so'] if not op2_direct: fl |= FL_INDIRECT cmd.Op2.type = o_displ cmd.Op2.phrase = r2 cmd.Op2.addr = d cmd.Op2.dtyp = get_data_dt(idx_sz) else: cmd.Op2.type = o_reg cmd.Op2.reg = r2 cmd.Op2.dtyp = get_data_dt(data_sz) cmd.Op2.specval = fl return True # ---------------------------------------------------------------------- def decode_CMP(opbyte, cmd): have_data = (opbyte & 0x80) != 0 cmp_32 = (opbyte & 0x40) == 0 opcode = (opbyte & ~0xC0) opbyte = ua_next_byte() op2_direct = (opbyte & 0x80) == 0 r1 = (opbyte & 0x07) r2 = (opbyte & 0x70) >> 4 dt = dt_dword if cmp_32 else dt_qword cmd.Op1.type = o_reg cmd.Op1.reg = r1 cmd.Op1.dtyp = dt if have_data: cmd.Op2.type = o_displ cmd.Op2.phrase = r2 cmd.Op2.dtyp = dt addr = next_data_value(1) # get 16bit value if not op2_direct: addr = decode_index(addr, 16)['so'] cmd.Op2.specval = FL_INDIRECT cmd.Op2.addr = addr else: cmd.Op2.type = o_reg cmd.Op2.reg = r2 cmd.auxpref = FL_32 if cmp_32 else FL_64 return True # ---------------------------------------------------------------------- def decode_CMPI(opbyte, cmd): """ stx= """ imm_sz = 1 if (opbyte & 0x80) == 0 else 2 cmp_32 = (opbyte & 0x40) == 0 opcode = (opbyte & ~0xC0) opbyte = ua_next_byte() have_idx = (opbyte & 0x10) != 0 op1_direct = (opbyte & 0x08) == 0 r1 = (opbyte & 0x07) dt = get_data_dt(imm_sz) if op1_direct: cmd.Op1.type = o_reg cmd.Op1.reg = r1 cmd.Op1.dtyp = dt else: if have_idx: d = decode_index(ua_next_word(), 16)['so'] else: d = 0 cmd.Op1.type = o_displ cmd.Op1.phrase = r1 cmd.Op1.addr = d cmd.Op2.type = o_imm cmd.Op2.value = next_data_value(imm_sz) cmd.Op2.dtyp = dt cmd.auxpref = (FL_32 if cmp_32 else FL_64) | get_data_width_fl(imm_sz) return True # ---------------------------------------------------------------------- def decode_CALL(opbyte, cmd): """ stx= stx= """ have_data = (opbyte & 0x80) != 0 call_32 = (opbyte & 0x40) == 0 opbyte = ua_next_byte() # Call to EBC or Native code ebc_call = (opbyte & 0x20) == 0 # Absolute or Relative address abs_addr = (opbyte & 0x10) == 0 # Op1 direct? op1_direct = (opbyte & 0x08) == 0 r1 = (opbyte & 0x07) fl = 0 if call_32: if have_data: addr = next_data_value(2) # Indirect if not op1_direct: addr = decode_index(addr, 32)['so'] fl |= FL_INDIRECT cmd.Op1.dtyp = dt_dword if r1 == 0: cmd.Op1.type = o_near else: cmd.Op1.type = o_displ cmd.Op1.phrase = r1 if not abs_addr: addr = cmd.ea + as_signed(addr, 32) + cmd.size cmd.Op1.addr = addr else: cmd.Op1.type = o_reg cmd.Op1.reg = r1 # 64-bit else: cmd.Op1.type = o_mem cmd.Op1.dtyp = dt_qword cmd.Op1.addr = next_data_value(3) # Get 64-bit value if not ebc_call: fl |= FL_NATIVE if not abs_addr: fl |= FL_REL cmd.Op1.specval = fl fl = FL_NATIVE if not ebc_call else 0 fl |= FL_32 if call_32 else FL_64 cmd.auxpref = fl return True # ---------------------------------------------------------------------- def decode_BINOP_FORM1(opbyte, cmd): have_data = (opbyte & 0x80) != 0 op_32 = (opbyte & 0x40) == 0 opcode = (opbyte & ~0xC0) opbyte = ua_next_byte() op2_direct = (opbyte & 0x80) == 0 op1_direct = (opbyte & 0x08) == 0 r1 = (opbyte & 0x07) r2 = (opbyte & 0x70) >> 4 dt = dt_dword if op_32 else dt_qword # handle operand 2 if have_data: cmd.Op2.dtyp = dt d = next_data_value(1) # one = imm16 if not op2_direct: d = decode_index(d, 16)['so'] cmd.Op2.specval = FL_INDIRECT cmd.Op2.type = o_displ cmd.Op2.phrase = r2 cmd.Op2.addr = d else: cmd.Op2.type = o_reg cmd.Op2.reg = r2 # handle operand 1 if op1_direct: cmd.Op1.type = o_reg cmd.Op1.reg = r1 cmd.Op1.dtyp = dt else: cmd.Op1.type = o_displ cmd.Op1.phrase = r1 cmd.Op1.addr = 0 cmd.Op1.specval = FL_INDIRECT cmd.auxpref = FL_32 if op_32 else FL_64 return True # ---------------------------------------------------------------------- def decode_MOVSN(opbyte, cmd): have_idx1 = (opbyte & 0x80) != 0 have_idx2 = (opbyte & 0x40) != 0 opcode = (opbyte & ~0xC0) if opcode == 0x25: idx_sz = 1 elif opcode == 0x26: idx_sz = 2 else: return False opbyte = ua_next_byte() op2_direct = (opbyte & 0x80) == 0 op1_direct = (opbyte & 0x08) == 0 r1 = (opbyte & 0x07) r2 = (opbyte & 0x70) >> 4 dt = get_data_dt(idx_sz) if have_idx1: cmd.Op1.type = o_displ cmd.Op1.phrase = r1 cmd.Op1.addr = decode_index(next_data_value(idx_sz), get_sz_to_bits(idx_sz))['so'] else: cmd.Op1.type = o_reg cmd.Op1.reg = r1 if have_idx2: d = next_data_value(idx_sz) cmd.Op2.type = o_displ cmd.Op2.phrase = r2 if not op2_direct: d = decode_index(d, get_sz_to_bits(idx_sz))['so'] cmd.Op2.specval = FL_INDIRECT cmd.Op2.addr = d else: cmd.Op2.type = o_reg cmd.Op2.reg = r2 return True # ---------------------------------------------------------------------- # Is the instruction created only for alignment purposes? # Returns: number of bytes in the instruction def ph_is_align_insn(ea): return 2 if get_word(ea) == 0 else 0 def set_ptr_size(): n = netnode("$ PE header") s = n.valobj() if not s: return # Extract magic field global proc_PTRSZ t = struct.unpack("H", s[0x18:0x18+2])[0] if t == 0x20B: proc_PTRSZ = 8 def ph_notify_newfile(filename): set_ptr_size() def ph_notify_oldfile(filename): set_ptr_size() # ---------------------------------------------------------------------- def handle_operand(op, isRead): cmd = idaapi.cmd uFlag = get_uFlag() is_offs = isOff(uFlag, op.n) dref_flag = dr_R if isRead else dr_W def_arg = isDefArg(uFlag, op.n) optype = op.type # create code xrefs if optype == o_imm: if is_offs: ua_add_off_drefs(op, dr_O) # create data xrefs elif optype == o_displ: if is_offs: ua_add_off_drefs(op, dref_flag) elif optype == o_mem: ua_add_dref(op.offb, op.addr, dref_flag) elif optype == o_near: itype = cmd.itype if itype == itype_CALL: fl = fl_CN else: fl = fl_JN ua_add_cref(op.offb, op.addr, fl) # Emulate instruction, create cross-references, plan to analyze # subsequent instructions, modify flags etc. Upon entrance to this function # all information about the instruction is in 'cmd' structure. # If zero is returned, the kernel will delete the instruction. def ph_emu(): cmd = idaapi.cmd aux = cmd.auxpref Feature = cmd.get_canon_feature() if Feature & CF_USE1: handle_operand(cmd.Op1, 1) if Feature & CF_CHG1: handle_operand(cmd.Op1, 0) if Feature & CF_USE2: handle_operand(cmd.Op2, 1) if Feature & CF_CHG2: handle_operand(cmd.Op2, 0) if Feature & CF_JUMP: QueueMark(Q_jumps, cmd.ea) # is it an unconditional jump? uncond_jmp = cmd.itype in [itype_JMP8, itype_JMP] and (aux & (FL_NCS|FL_CS)) == 0 # add flow if (Feature & CF_STOP == 0) and not uncond_jmp: ua_add_cref(0, cmd.ea + cmd.size, fl_F) return 1 # ---------------------------------------------------------------------- # Generate text representation of an instructon operand. # This function shouldn't change the database, flags or anything else. # All these actions should be performed only by u_emu() function. # The output text is placed in the output buffer initialized with init_output_buffer() # This function uses out_...() functions from ua.hpp to generate the operand text # Returns: 1-ok, 0-operand is hidden. def ph_outop(op): cmd = idaapi.cmd optype = op.type fl = op.specval signed = OOF_SIGNED if fl & FL_SIGNED != 0 else 0 if optype == o_reg: out_register(proc_Registers[op.reg]) elif optype == o_imm: OutValue(op, OOFW_IMM | signed) elif optype in [o_near, o_mem]: r = out_name_expr(op, op.addr, BADADDR) if not r: out_tagon(COLOR_ERROR) OutLong(op.addr, 16) out_tagoff(COLOR_ERROR) QueueMark(Q_noName, cmd.ea) elif optype == o_displ: indirect = fl & FL_INDIRECT != 0 if indirect: out_symbol('[') out_register(proc_Registers[op.reg]) if op.addr != 0: OutValue(op, OOF_ADDR | OOFW_16 | signed | OOFS_NEEDSIGN) if indirect: out_symbol(']') else: return False return True # ---------------------------------------------------------------------- # Generate text representation of an instruction in 'cmd' structure. # This function shouldn't change the database, flags or anything else. # All these actions should be performed only by u_emu() function. def ph_out(): # Init output buffer buf = idaapi.init_output_buffer(1024) cmd = idaapi.cmd postfix = "" # First display size of first operand if it exists if cmd.auxpref & FL_OP1 != 0: postfix += fl_to_str(cmd.Op1.specval) # Display opertion size if cmd.auxpref & FL_32: postfix += "32" elif cmd.auxpref & FL_64: postfix += "64" # Display if native or not native (for CALL) if cmd.auxpref & FL_NATIVE: postfix += "EX" # Display size of instruction if cmd.auxpref & (FL_B | FL_W | FL_D | FL_Q) != 0: postfix += fl_to_str(cmd.auxpref) if cmd.auxpref & FL_CS: postfix += "CS" elif cmd.auxpref & FL_NCS: postfix += "CC" OutMnem(15, postfix) out_one_operand( 0 ) for i in xrange(1, 3): op = cmd[i] if op.type == o_void: break out_symbol(',') OutChar(' ') out_one_operand(i) term_output_buffer() cvar.gl_comm = 1 MakeLine(buf) # ---------------------------------------------------------------------- def ph_ana(): """ Decodes an instruction into the C global variable 'cmd' """ cmd = idaapi.cmd h = cmd.ea # take opcode byte b = ua_next_byte() # the 6bit opcode opcode = b & 0x3F # opcode supported? if opcode not in proc_itable: return 0 ins = proc_itable[opcode] # set default itype cmd.itype = getattr(proc_module, 'itype_' + ins.name) # call the decoder return cmd.size if ins.d(b, cmd) else 0 # ---------------------------------------------------------------------- # Instruction definition class idef: def __init__(self, name, cf, d): self.name = name self.cf = cf self.d = d # # Instructions table # proc_itable = \ { 0x00: idef(name='BREAK', d=decode_BREAK, cf = CF_USE1), 0x01: idef(name='JMP', d=decode_JMP, cf = CF_USE1 | CF_JUMP), 0x02: idef(name='JMP8', d=decode_JMP8, cf = CF_USE1 | CF_JUMP), 0x03: idef(name='CALL', d=decode_CALL, cf = CF_USE1 | CF_CALL), 0x04: idef(name='RET', d=decode_RET, cf = CF_STOP), 0x05: idef(name='CMPEQ', d=decode_CMP, cf = CF_USE1 | CF_USE2), 0x06: idef(name='CMPLTE', d=decode_CMP, cf = CF_USE1 | CF_USE2), 0x07: idef(name='CMPGTE', d=decode_CMP, cf = CF_USE1 | CF_USE2), 0x08: idef(name='CMPULTE', d=decode_CMP, cf = CF_USE1 | CF_USE2), 0x09: idef(name='CMPGTE', d=decode_CMP, cf = CF_USE1 | CF_USE2), 0x0A: idef(name='NOT', d=decode_BINOP_FORM1, cf = CF_USE1 | CF_USE2 | CF_CHG1), 0x0B: idef(name='NEG', d=decode_BINOP_FORM1, cf = CF_USE1 | CF_USE2 | CF_CHG1), 0x0C: idef(name='ADD', d=decode_BINOP_FORM1, cf = CF_USE1 | CF_USE2 | CF_CHG1), 0x0D: idef(name='SUB', d=decode_BINOP_FORM1, cf = CF_USE1 | CF_USE2 | CF_CHG1), 0x0E: idef(name='MUL', d=decode_BINOP_FORM1, cf = CF_USE1 | CF_USE2 | CF_CHG1), 0x0F: idef(name='MULU', d=decode_BINOP_FORM1, cf = CF_USE1 | CF_USE2 | CF_CHG1), 0x10: idef(name='DIV', d=decode_BINOP_FORM1, cf = CF_USE1 | CF_USE2 | CF_CHG1), 0x11: idef(name='DIVU', d=decode_BINOP_FORM1, cf = CF_USE1 | CF_USE2 | CF_CHG1), 0x12: idef(name='MOD', d=decode_BINOP_FORM1, cf = CF_USE1 | CF_USE2 | CF_CHG1), 0x13: idef(name='MODU', d=decode_BINOP_FORM1, cf = CF_USE1 | CF_USE2 | CF_CHG1), 0x14: idef(name='AND', d=decode_BINOP_FORM1, cf = CF_USE1 | CF_USE2 | CF_CHG1), 0x15: idef(name='OR', d=decode_BINOP_FORM1, cf = CF_USE1 | CF_USE2 | CF_CHG1), 0x16: idef(name='XOR', d=decode_BINOP_FORM1, cf = CF_USE1 | CF_USE2 | CF_CHG1), 0x17: idef(name='SHL', d=decode_BINOP_FORM1, cf = CF_USE1 | CF_USE2 | CF_CHG1 | CF_SHFT), 0x18: idef(name='SHR', d=decode_BINOP_FORM1, cf = CF_USE1 | CF_USE2 | CF_CHG1 | CF_SHFT), 0x19: idef(name='ASHR', d=decode_BINOP_FORM1, cf = CF_USE1 | CF_USE2 | CF_CHG1 | CF_SHFT), 0x1A: idef(name='EXTNDB', d=decode_BINOP_FORM1, cf = CF_USE1 | CF_USE2 | CF_CHG1), 0x1B: idef(name='EXTNDW', d=decode_BINOP_FORM1, cf = CF_USE1 | CF_USE2 | CF_CHG1), 0x1C: idef(name='EXTNDD', d=decode_BINOP_FORM1, cf = CF_USE1 | CF_USE2 | CF_CHG1), 0x1D: idef(name='MOVBW', d=decode_MOV, cf = CF_USE1 | CF_USE2 | CF_CHG1), 0x1E: idef(name='MOVWW', d=decode_MOV, cf = CF_USE1 | CF_USE2 | CF_CHG1), 0x1F: idef(name='MOVDW', d=decode_MOV, cf = CF_USE1 | CF_USE2 | CF_CHG1), 0x20: idef(name='MOVQW', d=decode_MOV, cf = CF_USE1 | CF_USE2 | CF_CHG1), 0x21: idef(name='MOVBD', d=decode_MOV, cf = CF_USE1 | CF_USE2 | CF_CHG1), 0x22: idef(name='MOVWD', d=decode_MOV, cf = CF_USE1 | CF_USE2 | CF_CHG1), 0x23: idef(name='MOVDD', d=decode_MOV, cf = CF_USE1 | CF_USE2 | CF_CHG1), 0x24: idef(name='MOVQD', d=decode_MOV, cf = CF_USE1 | CF_USE2 | CF_CHG1), 0x25: idef(name='MOVSNW', d=decode_MOVSN, cf = CF_USE1 | CF_USE2 | CF_CHG1), 0x26: idef(name='MOVSND', d=decode_MOVSN, cf = CF_USE1 | CF_USE2 | CF_CHG1), # 0x27: reserved 0x28: idef(name='MOVQQ', d=decode_MOV, cf = CF_USE1 | CF_USE2 | CF_CHG1), 0x29: idef(name='LOADSP', d=decode_LOADSP, cf = CF_USE1 | CF_USE2 | CF_CHG1), 0x2A: idef(name='STORESP', d=decode_STORESP, cf = CF_USE1 | CF_USE2 | CF_CHG1), # PUSH/POP 0x2B: idef(name='PUSH', d=decode_PUSH, cf = CF_USE1), 0x2C: idef(name='POP', d=decode_PUSH, cf = CF_USE1), # CMPI 0x2D: idef(name='CMPIEQ', d=decode_CMPI, cf = CF_USE1 | CF_USE2), 0x2E: idef(name='CMPILTE', d=decode_CMPI, cf = CF_USE1 | CF_USE2), 0x2F: idef(name='CMPIGTE', d=decode_CMPI, cf = CF_USE1 | CF_USE2), 0x30: idef(name='CMPIULTE', d=decode_CMPI, cf = CF_USE1 | CF_USE2), 0x31: idef(name='CMPIUGTE', d=decode_CMPI, cf = CF_USE1 | CF_USE2), 0x32: idef(name='MOVNW', d=decode_MOV, cf = CF_USE1 | CF_USE2 | CF_CHG1), 0x33: idef(name='MOVND', d=decode_MOV, cf = CF_USE1 | CF_USE2 | CF_CHG1), # 0x34: reserved # PUSHn/POPn 0x35: idef(name='PUSHN', d=decode_PUSH, cf = CF_USE1), 0x36: idef(name='PUSHN', d=decode_PUSH, cf = CF_USE1), 0x37: idef(name='MOVI', d=decode_MOVI, cf = CF_USE1 | CF_USE2 | CF_CHG1), 0x38: idef(name='MOVIN', d=decode_MOVI, cf = CF_USE1 | CF_USE2 | CF_CHG1), 0x39: idef(name='MOVREL', d=decode_MOVREL, cf = CF_USE1 | CF_USE2 | CF_CHG1) # 0x3A: reserved # 0x3B: reserved # 0x3C: reserved # 0x3D: reserved # 0x3E: reserved # 0x3F: reserved } # ---------------------------------------------------------------------- # Registers definition proc_Registers = [ # General purpose registers "SP", # aka R0 "R1", "R2", "R3", "R4", "R5", "R6", "R7", # VM registers "FLAGS", # 0 "IP", # 1 "VM2", "VM3", "VM4", "VM5", "VM6", "VM7", # Fake segment registers "CS", "DS" ] # ---------------------------------------------------------------------- def init_instructions(): """ This function creates the Instruction array and the corresponding itype_XXX constants that are needed by the processor module """ class insn(object): def __init__(self, name, feature): self.name = name self.feature = feature Instructions = [] i = 0 for x in proc_itable.values(): Instructions.append(insn(x.name, x.cf)) setattr(proc_module, 'itype_' + x.name, i) i += 1 return Instructions # ---------------------------------------------------------------------- def init_registers(Registers): """ This function parses the register table and creates corresponding ireg_XXX constants """ for i in xrange(len(Registers)): setattr(proc_module, 'ireg_' + Registers[i], i) # ---------------------------------------------------------------------- # Initialization proc_module = sys.modules[__name__] proc_Instructions = init_instructions() init_registers(proc_Registers) proc_PTRSZ = 4 # Assume PTRSZ = 4 by default