-
Notifications
You must be signed in to change notification settings - Fork 2
/
binutils.py
executable file
·104 lines (77 loc) · 3.09 KB
/
binutils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
#!/usr/bin/python3
import subprocess
import os
import sys
import re
def dump_asm(filename: str, objdump):
with open(filename+'.S', mode="w") as f:
# FIXME: infer from binary
subprocess.run([objdump, '--no-addresses', '--mattr=+v,+zba,+zbb,+zbc,+zbs,+m,+f,+d',
'--symbolize-operands', '-S', filename], stdout=f)
def dump_bc(base, dis, count, data):
filename = "{}/seg{}.bc".format(base, count)
with open(filename, "wb") as f:
f.write(data)
subprocess.run([dis, "seg{}.bc".format(count)], cwd=base)
def diff_ir(out_dir, base1, base2, llvm_diff):
if not (os.path.exists(base1) and os.path.exists(base2)):
return
os.makedirs(out_dir, exist_ok=True)
diff_file = out_dir+"/diff"
with open(diff_file, 'w') as diff:
subprocess.run(['diff', '-x', '*.bc', '-q',
'-r', base1, base2], stdout=diff)
pattern = re.compile(r'Files (.+) and (.+) differ')
diff_count = 0
with open(diff_file) as diff:
for line in diff.readlines():
matched = re.match(pattern, line.removesuffix('\n'))
if matched is not None:
lhs = matched.group(1)
rhs = matched.group(2)
irdiff_file = out_dir + "/irdiff"+str(diff_count)
with open(irdiff_file, 'w') as irdiff:
irdiff.write(line)
irdiff.flush()
subprocess.run([llvm_diff, lhs, rhs], stderr=irdiff)
diff_count += 1
def extract_bc(filename: str, objcopy, dis):
bc_header = bytes([0x42, 0x43, 0xc0, 0xde])
basedir = filename+"_bc"
bc_filename = filename+".bc"
subprocess.run([objcopy, filename, "--dump-section",
".llvmbc={}".format(bc_filename)])
if not os.path.exists(bc_filename):
return
os.makedirs(basedir, exist_ok=True)
count = 0
with open(bc_filename, 'rb') as f:
data = f.read()
last_pos = 0
while True:
pos = data.find(bc_header, last_pos+4)
if pos == -1:
dump_bc(basedir, dis, count, data[last_pos:])
break
else:
dump_bc(basedir, dis, count, data[last_pos: pos])
last_pos = pos
count += 1
def check_access(bin):
return os.path.exists(bin) and os.access(bin, os.X_OK)
if __name__ == '__main__':
if len(sys.argv) != 4:
print('Usage: binutils.py <llvm-bin-path> binary1 binary2')
exit(1)
llvm_path = os.path.abspath(sys.argv[1]).removesuffix('/')
llvm_dis = llvm_path + '/llvm-dis'
llvm_objdump = llvm_path + "/llvm-objdump"
llvm_objcopy = llvm_path + "/llvm-objcopy"
llvm_diff = llvm_path + "/llvm-diff"
if not (check_access(llvm_dis) and check_access(llvm_objdump) and check_access(llvm_objcopy) and check_access(llvm_diff)):
print('Error: invalid llvm binaries path')
exit(1)
for bin in sys.argv[2:]:
dump_asm(bin, llvm_objdump)
extract_bc(bin, llvm_objcopy, llvm_dis)
diff_ir("irdiff", sys.argv[2]+"_bc", sys.argv[3]+"_bc", llvm_diff)