Skip to content

Commit

Permalink
split by functions
Browse files Browse the repository at this point in the history
  • Loading branch information
xor2003 committed Sep 25, 2024
1 parent 9ac7d83 commit 36b2417
Showing 1 changed file with 101 additions and 84 deletions.
185 changes: 101 additions & 84 deletions json2idc.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,6 @@
image_size = 939072 - 0xa40 # 0x22b90 - 0x200 # .exe size - exe header
dosbox_load_seg = 0x1a2 # para
ida_load_seg = 0x1000

# zone? or sab2
# image_size = 262288+0x100 #0x22b90 - 0x200 # .exe size - exe header
# dosbox_load_seg = 0x192 # para
# ida_load_seg = 0xff0
all_segs = set()


Expand All @@ -24,18 +19,13 @@ def seg_dbx2ida(seg: int) -> int:


def read_segments_map(file_name):
"""It reads a .map file and returns a dictionary of segments.
:param file_name: The name of the .map file
:return: A dictionary of segments and their values.
"""
"""Reads a .map file and returns a dictionary of segments."""
symbols = dict()
with open(file_name) as f:
lines = f.read().splitlines()
for line in lines:
m = re.match(
r'^\s*MakeName\s*\(\s*(?P<address>[0-9A-Fa-fXx]+)\s*,\s*"(?P<name>\S+)"\s*\)\s*;',
line)
r'^\s*MakeName\s*\(\s*(?P<address>[0-9A-Fa-fXx]+)\s*,\s*"(?P<name>\S+)"\s*\)\s*;', line)
if not m:
continue
name = m["name"]
Expand All @@ -44,11 +34,98 @@ def read_segments_map(file_name):
return symbols


def mark_code(daddr, instr, outfile, code_segs):
"""Processes individual code segments."""
global all_segs

addr = addr_dbx2ida(int(daddr, 16))
for seg in ['cs', 'ds', 'es', 'gs', 'fs', 'ss']:
all_segs |= set(instr[seg])

if len(instr["cs"]) == 0:
return

outfile.write(f'MakeCode(0x{addr:x}); // {daddr}\n')
cs = instr["cs"][0]
cseg = seg_dbx2ida(cs)
eip = addr - cseg * 0x10

if instr['Video']: # Identify instructions accessing video memory
print(f'Video acc instr: {cseg:x}:{eip:x}')

set_segment_registers_values(outfile, addr, daddr, instr)
collect_code_segs_and_ip_ranges(code_segs, cs, eip)


def set_segment_registers_values(outfile, addr, daddr, instr):
"""Sets the default segment register values used for an instruction."""
for seg in ['ds', 'es', 'gs', 'fs', 'ss']:
if seg in instr and len(instr[seg]) == 1:
outfile.write(
f'split_sreg_range(0x{addr:x},"{seg}",0x{seg_dbx2ida(instr[seg][0]):x},2); // 0x{daddr} 0x{instr[seg][0]:x}\n')


def collect_code_segs_and_ip_ranges(code_segs, cs, eip):
"""Updates the minimum and maximum addresses for a code segment."""
if cs not in code_segs:
code_segs[cs] = [eip, eip]
else:
code_segs[cs][0] = min(code_segs[cs][0], eip)
code_segs[cs][1] = max(code_segs[cs][1], eip)


def mark_data_access(j, outfile):
"""Processes the data segments, setting variable sizes."""
for daddr, data in j['Data'].items():
addr = addr_dbx2ida(int(daddr, 16))

if data['Array'] or len(data['Sizes']) != 1: # Don't know yet how to handle arrays
continue

size = data['Sizes'][0] # Only set if it was single size
text = {1: 'Byte', 2: 'Word', 4: 'Dword'}[size]
outfile.write(f'Make{text}(0x{addr:x}); // 0x{daddr}\n')


def process_jumps(j, outfile):
"""Processes the jump addresses and adds function definitions."""
for daddr in sorted(j['Jumps'], reverse=True):
addr = addr_dbx2ida(daddr)
outfile.write(f'add_func(0x{addr:x}); // 0x{daddr:x}\n')


def write_idc_header(outfile):
"""Writes the IDC script header."""
outfile.write('''#include <idc.idc>
static main(){
set_inf_attr(INF_PROCNAME, "80386r");
set_target_assembler("Generic for intel 80x86");
''')


def write_idc_footer(outfile):
"""Writes the IDC script footer."""
outfile.write("""
print("Applied addresses and types");
// produce a listing file
auto fpl = fopen(get_root_filename() + ".lst", "w");
gen_file(OFILE_LST, fpl, 0x10000, BADADDR, GENFLG_ASMTYPE);
fclose(fpl);
print("Generated lst");
}""")


def process_symbols(symbols, outfile):
"""Processes and applies symbols from the map file."""
for symbol, addr in symbols.items():
outfile.write(f'set_name(0x{addr:x},"_{symbol}",SN_FORCE);\n')


def main():
parser = argparse.ArgumentParser(description="Process a .json file and a .map file to generate IDC script.")
parser.add_argument('json_file', help='Path to the .json file with run-time data')
parser.add_argument('map_file', help='Path to the .map file with segment information')

args = parser.parse_args()

global all_segs
Expand All @@ -64,85 +141,25 @@ def main():
code_segs = dict()

with open(idc_fname, 'w') as outfile:
outfile.write('''#include <idc.idc>
static main(){
set_inf_attr(INF_PROCNAME, "80386r");
set_target_assembler("Generic for intel 80x86");
''')
write_idc_header(outfile)

with open(json_fname) as infile:
j = jsonpickle.decode(infile.read())
for daddr, instr in j['Code'].items():
addr = addr_dbx2ida(int(daddr, 16))
for seg in ['cs', 'ds', 'es', 'gs', 'fs', 'ss']:
all_segs |= set(instr[seg])
all_segs = list(sorted(all_segs))

for daddr, instr in j['Code'].items():
if len(instr["cs"]) == 0:
continue
addr = addr_dbx2ida(int(daddr, 16))

outfile.write(f'MakeCode(0x{addr:x}); // {daddr}\n')
cs = instr["cs"][0]
cseg = seg_dbx2ida(cs)
eip = addr - cseg * 0x10
if instr['Video']: # Identify instructions accessing video memory
print(f'Video acc instr: {cseg:x}:{eip:x}')

for seg in ['ds', 'es', 'gs', 'fs', 'ss']: # set default reg values used for instruction
if seg in instr and len(instr[seg]) == 1:
outfile.write(
f'split_sreg_range(0x{addr:x},"{seg}",0x{seg_dbx2ida(instr[seg][0]):x},2); // 0x{daddr} 0x{instr[seg][0]:x}\n')
if instr["cs"]:
if cs not in code_segs:
code_segs[cs] = [eip, eip]
else:
code_segs[cs][0] = min(code_segs[cs][0], eip)
code_segs[cs][1] = max(code_segs[cs][1], eip)
mark_code(daddr, instr, outfile, code_segs)

if 'Data' in j:
for daddr, data in j['Data'].items(): # Set variables sizes
addr = addr_dbx2ida(int(daddr, 16))

if data['Array'] or len(data['Sizes']) != 1: # Don't know yet how to handle arrays
continue
size = data['Sizes'][0] # Only set if it was single size
text = {1: 'Byte', 2: 'Word', 4: 'Dword'}[size]
outfile.write(
f'Make{text}(0x{addr:x}); // 0x{daddr}\n')

for daddr in sorted(j['Jumps'], reverse=True):
addr = addr_dbx2ida(daddr)
outfile.write(f'add_func(0x{addr:x}); // 0x{daddr:x}\n')

print('Used segs: ')
print(','.join([f'{seg_dbx2ida(seg):x}' for seg in sorted(all_segs) if
dosbox_load_seg <= seg < dosbox_load_seg + image_size // 0x10]))

for symbol, addr in symbols.items():
outfile.write(f'set_name(0x{addr:x},"_{symbol}",SN_FORCE);\n')
outfile.write("""
print("Applied addresses and types");
mark_data_access(j, outfile)

// unhide all functions
//auto ea = get_func_attr(INF_MIN_EA, FUNCATTR_START);
//while (ea != BADADDR) {
//set_visible_func(ea, true);
//ea = get_func_attr(ea, FUNCATTR_START);
//}
process_jumps(j, outfile)

// produce a listing file
auto fpl = fopen(get_root_filename() + ".lst", "w");
gen_file(OFILE_LST, fpl, 0x10000, BADADDR, GENFLG_ASMTYPE);
fclose(fpl);
print("Generated lst");
print('Used segments: ')
print(','.join([f'{seg_dbx2ida(seg):x}' for seg in sorted(all_segs)
if dosbox_load_seg <= seg < dosbox_load_seg + image_size // 0x10]))

process_symbols(symbols, outfile)
write_idc_footer(outfile)

// produce a map file
//auto fpm = fopen(get_root_filename() + ".map", "w");
//gen_file(OFILE_MAP, fpm, 0x10000, BADADDR, GENFLG_MAPSEGS|GENFLG_MAPNAME|GENFLG_MAPDMNG|GENFLG_MAPLOC);
//fclose(fpm);
//print("Generated map");
}""")
print("Used code segments and ip range:")
for seg, minmax in code_segs.items():
print(f"{seg_dbx2ida(seg):x} {minmax[0]:x}:{minmax[1]:x}")
Expand Down

0 comments on commit 36b2417

Please sign in to comment.