diff --git a/json2idc.py b/json2idc.py index 48404d8c86..1f0a6c5095 100755 --- a/json2idc.py +++ b/json2idc.py @@ -7,11 +7,6 @@ image_size = 939072 - 0xa40 # 0x22b90 - 0x200 # .exe size - exe header dosbox_load_seg = 0x1a2 # para ida_load_seg = 0x1000 - -# zone? or sab2 -# image_size = 262288+0x100 #0x22b90 - 0x200 # .exe size - exe header -# dosbox_load_seg = 0x192 # para -# ida_load_seg = 0xff0 all_segs = set() @@ -24,18 +19,13 @@ def seg_dbx2ida(seg: int) -> int: def read_segments_map(file_name): - """It reads a .map file and returns a dictionary of segments. - - :param file_name: The name of the .map file - :return: A dictionary of segments and their values. - """ + """Reads a .map file and returns a dictionary of segments.""" symbols = dict() with open(file_name) as f: lines = f.read().splitlines() for line in lines: m = re.match( - r'^\s*MakeName\s*\(\s*(?P
[0-9A-Fa-fXx]+)\s*,\s*"(?P\S+)"\s*\)\s*;', - line) + r'^\s*MakeName\s*\(\s*(?P
[0-9A-Fa-fXx]+)\s*,\s*"(?P\S+)"\s*\)\s*;', line) if not m: continue name = m["name"] @@ -44,11 +34,98 @@ def read_segments_map(file_name): return symbols +def mark_code(daddr, instr, outfile, code_segs): + """Processes individual code segments.""" + global all_segs + + addr = addr_dbx2ida(int(daddr, 16)) + for seg in ['cs', 'ds', 'es', 'gs', 'fs', 'ss']: + all_segs |= set(instr[seg]) + + if len(instr["cs"]) == 0: + return + + outfile.write(f'MakeCode(0x{addr:x}); // {daddr}\n') + cs = instr["cs"][0] + cseg = seg_dbx2ida(cs) + eip = addr - cseg * 0x10 + + if instr['Video']: # Identify instructions accessing video memory + print(f'Video acc instr: {cseg:x}:{eip:x}') + + set_segment_registers_values(outfile, addr, daddr, instr) + collect_code_segs_and_ip_ranges(code_segs, cs, eip) + + +def set_segment_registers_values(outfile, addr, daddr, instr): + """Sets the default segment register values used for an instruction.""" + for seg in ['ds', 'es', 'gs', 'fs', 'ss']: + if seg in instr and len(instr[seg]) == 1: + outfile.write( + f'split_sreg_range(0x{addr:x},"{seg}",0x{seg_dbx2ida(instr[seg][0]):x},2); // 0x{daddr} 0x{instr[seg][0]:x}\n') + + +def collect_code_segs_and_ip_ranges(code_segs, cs, eip): + """Updates the minimum and maximum addresses for a code segment.""" + if cs not in code_segs: + code_segs[cs] = [eip, eip] + else: + code_segs[cs][0] = min(code_segs[cs][0], eip) + code_segs[cs][1] = max(code_segs[cs][1], eip) + + +def mark_data_access(j, outfile): + """Processes the data segments, setting variable sizes.""" + for daddr, data in j['Data'].items(): + addr = addr_dbx2ida(int(daddr, 16)) + + if data['Array'] or len(data['Sizes']) != 1: # Don't know yet how to handle arrays + continue + + size = data['Sizes'][0] # Only set if it was single size + text = {1: 'Byte', 2: 'Word', 4: 'Dword'}[size] + outfile.write(f'Make{text}(0x{addr:x}); // 0x{daddr}\n') + + +def process_jumps(j, outfile): + """Processes the jump addresses and adds function definitions.""" + for daddr in sorted(j['Jumps'], reverse=True): + addr = addr_dbx2ida(daddr) + outfile.write(f'add_func(0x{addr:x}); // 0x{daddr:x}\n') + + +def write_idc_header(outfile): + """Writes the IDC script header.""" + outfile.write('''#include +static main(){ +set_inf_attr(INF_PROCNAME, "80386r"); +set_target_assembler("Generic for intel 80x86"); +''') + + +def write_idc_footer(outfile): + """Writes the IDC script footer.""" + outfile.write(""" +print("Applied addresses and types"); + +// produce a listing file +auto fpl = fopen(get_root_filename() + ".lst", "w"); +gen_file(OFILE_LST, fpl, 0x10000, BADADDR, GENFLG_ASMTYPE); +fclose(fpl); +print("Generated lst"); +}""") + + +def process_symbols(symbols, outfile): + """Processes and applies symbols from the map file.""" + for symbol, addr in symbols.items(): + outfile.write(f'set_name(0x{addr:x},"_{symbol}",SN_FORCE);\n') + + def main(): parser = argparse.ArgumentParser(description="Process a .json file and a .map file to generate IDC script.") parser.add_argument('json_file', help='Path to the .json file with run-time data') parser.add_argument('map_file', help='Path to the .map file with segment information') - args = parser.parse_args() global all_segs @@ -64,85 +141,25 @@ def main(): code_segs = dict() with open(idc_fname, 'w') as outfile: - outfile.write('''#include -static main(){ -set_inf_attr(INF_PROCNAME, "80386r"); -set_target_assembler("Generic for intel 80x86"); -''') + write_idc_header(outfile) + with open(json_fname) as infile: j = jsonpickle.decode(infile.read()) for daddr, instr in j['Code'].items(): - addr = addr_dbx2ida(int(daddr, 16)) - for seg in ['cs', 'ds', 'es', 'gs', 'fs', 'ss']: - all_segs |= set(instr[seg]) - all_segs = list(sorted(all_segs)) - - for daddr, instr in j['Code'].items(): - if len(instr["cs"]) == 0: - continue - addr = addr_dbx2ida(int(daddr, 16)) - - outfile.write(f'MakeCode(0x{addr:x}); // {daddr}\n') - cs = instr["cs"][0] - cseg = seg_dbx2ida(cs) - eip = addr - cseg * 0x10 - if instr['Video']: # Identify instructions accessing video memory - print(f'Video acc instr: {cseg:x}:{eip:x}') - - for seg in ['ds', 'es', 'gs', 'fs', 'ss']: # set default reg values used for instruction - if seg in instr and len(instr[seg]) == 1: - outfile.write( - f'split_sreg_range(0x{addr:x},"{seg}",0x{seg_dbx2ida(instr[seg][0]):x},2); // 0x{daddr} 0x{instr[seg][0]:x}\n') - if instr["cs"]: - if cs not in code_segs: - code_segs[cs] = [eip, eip] - else: - code_segs[cs][0] = min(code_segs[cs][0], eip) - code_segs[cs][1] = max(code_segs[cs][1], eip) + mark_code(daddr, instr, outfile, code_segs) if 'Data' in j: - for daddr, data in j['Data'].items(): # Set variables sizes - addr = addr_dbx2ida(int(daddr, 16)) - - if data['Array'] or len(data['Sizes']) != 1: # Don't know yet how to handle arrays - continue - size = data['Sizes'][0] # Only set if it was single size - text = {1: 'Byte', 2: 'Word', 4: 'Dword'}[size] - outfile.write( - f'Make{text}(0x{addr:x}); // 0x{daddr}\n') - - for daddr in sorted(j['Jumps'], reverse=True): - addr = addr_dbx2ida(daddr) - outfile.write(f'add_func(0x{addr:x}); // 0x{daddr:x}\n') - - print('Used segs: ') - print(','.join([f'{seg_dbx2ida(seg):x}' for seg in sorted(all_segs) if - dosbox_load_seg <= seg < dosbox_load_seg + image_size // 0x10])) - - for symbol, addr in symbols.items(): - outfile.write(f'set_name(0x{addr:x},"_{symbol}",SN_FORCE);\n') - outfile.write(""" -print("Applied addresses and types"); + mark_data_access(j, outfile) -// unhide all functions -//auto ea = get_func_attr(INF_MIN_EA, FUNCATTR_START); -//while (ea != BADADDR) { - //set_visible_func(ea, true); - //ea = get_func_attr(ea, FUNCATTR_START); -//} + process_jumps(j, outfile) -// produce a listing file -auto fpl = fopen(get_root_filename() + ".lst", "w"); -gen_file(OFILE_LST, fpl, 0x10000, BADADDR, GENFLG_ASMTYPE); -fclose(fpl); -print("Generated lst"); + print('Used segments: ') + print(','.join([f'{seg_dbx2ida(seg):x}' for seg in sorted(all_segs) + if dosbox_load_seg <= seg < dosbox_load_seg + image_size // 0x10])) + + process_symbols(symbols, outfile) + write_idc_footer(outfile) -// produce a map file -//auto fpm = fopen(get_root_filename() + ".map", "w"); -//gen_file(OFILE_MAP, fpm, 0x10000, BADADDR, GENFLG_MAPSEGS|GENFLG_MAPNAME|GENFLG_MAPDMNG|GENFLG_MAPLOC); -//fclose(fpm); -//print("Generated map"); -}""") print("Used code segments and ip range:") for seg, minmax in code_segs.items(): print(f"{seg_dbx2ida(seg):x} {minmax[0]:x}:{minmax[1]:x}")