diff --git a/test/templates/common.py b/test/templates/common.py index fa41faf..5b5704c 100644 --- a/test/templates/common.py +++ b/test/templates/common.py @@ -21,6 +21,9 @@ def error(msg): def _run(cmd, stdin=None): + """ + Run process and abort on error. + """ if verbose: print(f"{_me}: running command: {' '.join(cmd)}") with subprocess.Popen(cmd, stdin=stdin, stdout=subprocess.PIPE, @@ -36,27 +39,38 @@ def _run(cmd, stdin=None): def gcc(args): + """ + Run compiler with given arguments. + """ return _run(['gcc'] + args) -def disasm(file, objdump_or_gdb, strip=None, symbol=None): +def disasm(file, objdump_or_gdb, symbol, start, finish): + """ + Disassemble binary file. + """ if objdump_or_gdb: out = _run(['objdump', '-d', file]) - elif strip: - # This is tricky as we can not use symbol name - start, finish = _find_address(file, symbol) - out = _run(['gdb', '-batch', '-ex', f'disassemble {start},{finish}', file]) - else: + elif symbol is not None: out = _run(['gdb', '-batch', '-ex', f'disassemble {symbol}', file]) + else: + out = _run(['gdb', '-batch', '-ex', f'disassemble {start},{finish}', file]) return out +def strip_binary(file): + """ + Strip symbol info from binary file. + """ + _run(['strip', '-s', file]) + + def grep(s, regex): lines = s.split('\n') return list(filter(lambda s: re.search(regex, s), lines)) -def _find_address(file, name): +def find_address(file, name): out = _run(['readelf', '-sW', file]) lines = grep(out, fr'{name}$') assert len(lines) >= 1, f"failed to locate symbol {name} in\n{out}" diff --git a/test/templates/gen_calls.py b/test/templates/gen_calls.py index ae4a665..8554e28 100755 --- a/test/templates/gen_calls.py +++ b/test/templates/gen_calls.py @@ -6,12 +6,11 @@ # TODO: find a way to produce other snippets we see in disasm: # addr32 call 0x5555555733e0 -# call 0x555555576a00 import os.path import itertools -from common import set_basename, gcc, disasm, grep +from common import set_basename, gcc, disasm, grep, strip_binary, find_address set_basename(os.path.basename(__file__)) @@ -42,23 +41,22 @@ # Force non-PLT call for PIC code? if direct and pic: flags.append('-DHIDDEN') - # Include debuginfo? - if not strip: - # FIXME: for real stripping of symtab we need to run - # `strip a.out` and `strip -s a.out`. - # This should be done for other tests too. - flags.append('-g') gcc(flags) + caller = 'bar' + start, finish = find_address('a.out', caller) + if strip: + strip_binary('a.out') + caller = None + # Generate disasm - caller = 'bar' - out = disasm('a.out', not gdb, strip, caller) + out = disasm('a.out', not gdb, caller, start, finish) # Print snippets - headers = grep(out, fr'<{caller}>:|Dump of') + headers = grep(out, r':|Dump of') calls = grep(out, r'call') print('''\ headers: diff --git a/test/templates/gen_funtable.py b/test/templates/gen_funtable.py index 347533d..896149f 100755 --- a/test/templates/gen_funtable.py +++ b/test/templates/gen_funtable.py @@ -7,7 +7,7 @@ import os.path import itertools -from common import set_basename, gcc, disasm, grep +from common import set_basename, gcc, disasm, grep, strip_binary, find_address set_basename(os.path.basename(__file__)) @@ -34,10 +34,17 @@ gcc(flags) - # Generate disasm + # Strip caller = 'bar' - out = disasm('a.out', not gdb, strip, caller) + start, finish = find_address('a.out', caller) + if strip: + strip_binary('a.out') + caller = None + + # Generate disasm + + out = disasm('a.out', not gdb, caller, start, finish) # Print snippets diff --git a/test/templates/gen_jumps.py b/test/templates/gen_jumps.py index 76fd895..b19400f 100755 --- a/test/templates/gen_jumps.py +++ b/test/templates/gen_jumps.py @@ -7,7 +7,7 @@ import os.path import itertools -from common import set_basename, gcc, disasm, grep +from common import set_basename, gcc, disasm, grep, strip_binary, find_address set_basename(os.path.basename(__file__)) @@ -32,16 +32,20 @@ flags += ['-fPIC', '-shared'] if opt: flags.append('-O2') - # Include debuginfo? - if not strip: - flags.append('-g') gcc(flags) - # Generate disasm + # Strip caller = 'bar' - out = disasm('a.out', not gdb, strip, caller) + start, finish = find_address('a.out', caller) + if strip: + strip_binary('a.out') + caller = None + + # Generate disasm + + out = disasm('a.out', not gdb, caller, start, finish) # Print snippets diff --git a/test/templates/gen_jumptable.py b/test/templates/gen_jumptable.py index e7d9ca5..81a173e 100755 --- a/test/templates/gen_jumptable.py +++ b/test/templates/gen_jumptable.py @@ -7,7 +7,7 @@ import os.path import itertools -from common import set_basename, gcc, disasm, grep +from common import set_basename, gcc, disasm, grep, strip_binary, find_address set_basename(os.path.basename(__file__)) @@ -28,16 +28,20 @@ # DLL or executable? if pic: flags += ['-fPIC', '-shared'] - # Include debuginfo? - if not strip: - flags.append('-g') gcc(flags) - # Generate disasm + # Strip caller = 'bar' - out = disasm('a.out', not gdb, strip, caller) + start, finish = find_address('a.out', caller) + if strip: + strip_binary('a.out') + caller = None + + # Generate disasm + + out = disasm('a.out', not gdb, caller, start, finish) # Print snippets diff --git a/test/test_parser.py b/test/test_parser.py new file mode 100644 index 0000000..7713ded --- /dev/null +++ b/test/test_parser.py @@ -0,0 +1,84 @@ +import unittest + +from src.asm2cfg import asm2cfg + + +def _get_the_source_block(blocks): + sources = [block for _, block in blocks.items() if block.jump_edge is not None] + return sources[0] if len(sources) == 1 else None + + +class ParseLineTestCase(unittest.TestCase): + @unittest.expectedFailure + def test_linear_sequence(self): + lines = '''\ +Dump of assembler code for function main: + 0x000055555556f952 <+2>: mov $0x1,%ecx + 0x000055555556f957 <+7>: push %r14 + 0x000055555556f959 <+9>: push %r13 + 0x000055555556f95b <+11>: push %r12 + 0x000055555556f95d <+13>: push %rbp + 0x000055555556f95e <+14>: push %rbx\ +'''.split('\n') + _, blocks = asm2cfg.parse_lines(lines, False) + + self.assertEqual(len(blocks), 1) + _, block = blocks.popitem() + self.assertEqual(len(block.instructions), 6) # FIXME + self.assertIs(block.jump_edge, None) + self.assertIs(block.no_jump_edge, None) + + def test_unconditional(self): + lines = '''\ +Dump of assembler code for function main: + 0x000055555556fffb <+1707>: jmp 0x555555570058 + 0x0000555555570058 <+1800>: mov 0xe0(%rsp),%rdi + 0x0000555555570060 <+1808>: test %rdi,%rdi +'''.split('\n') + _, blocks = asm2cfg.parse_lines(lines, False) + + self.assertEqual(len(blocks), 2) + + source_block = _get_the_source_block(blocks) + self.assertIsNot(source_block.jump_edge, None) + self.assertIs(source_block.no_jump_edge, None) + self.assertEqual(len(source_block.instructions), 1) + + dst_block = blocks[source_block.jump_edge] + self.assertIs(dst_block.jump_edge, None) + self.assertIs(dst_block.no_jump_edge, None) + self.assertEqual(len(dst_block.instructions), 2) + + def test_conditional(self): + lines = '''\ +Dump of assembler code for function main: + 0x000055555556fffb <+1707>: je 0x555555570058 + 0x000055555556fffd <+1709>: push %rbx + 0x000055555556fffe <+1710>: mov %r15,%r8 + 0x000055555556fffe <+1710>: mov %r15,%r8 + 0x0000555555570058 <+1800>: mov 0xe0(%rsp),%rdi + 0x0000555555570060 <+1808>: test %rdi,%rdi +'''.split('\n') + _, blocks = asm2cfg.parse_lines(lines, False) + + self.assertEqual(len(blocks), 3) + + source_block = _get_the_source_block(blocks) + self.assertIsNot(source_block.jump_edge, None) + self.assertIsNot(source_block.no_jump_edge, None) + self.assertEqual(len(source_block.instructions), 1) + + fall_block = blocks[source_block.no_jump_edge] + self.assertIs(fall_block.jump_edge, None) + self.assertIsNot(fall_block.no_jump_edge, None) + self.assertEqual(len(fall_block.instructions), 3) + + dst_block = blocks[source_block.jump_edge] + self.assertIs(dst_block.jump_edge, None) + self.assertIs(dst_block.no_jump_edge, None) + self.assertEqual(len(dst_block.instructions), 2) + + # TODO: + # - functions (with and w/o calls) + # - jumptables + # - skip calls diff --git a/test/test_regex.py b/test/test_regex.py index f01cd44..a267047 100644 --- a/test/test_regex.py +++ b/test/test_regex.py @@ -211,3 +211,13 @@ def test_objdump_jumptable(self): self.assertIsNot(jump_match, None) self.assertEqual(jump_match[1], '101d') self.assertEqual(jump_match[2], '') + + @unittest.expectedFailure + def test_objdump_funnyjump(self): + line = '1044: f2 ff 25 d5 2f 00 00 bnd jmpq *0x2fd5(%rip) # 4020 ' + pattern = asm2cfg.get_jump_pattern(False, 'does_not_matter') + jump_match = pattern.search(line) + + self.assertIsNot(jump_match, None) + self.assertEqual(jump_match[1], '4020') + self.assertEqual(jump_match[2], '0x2fd0')