From 689876ea6abe64e6a0545645fb362bcc559bf8c5 Mon Sep 17 00:00:00 2001 From: Oleg Date: Wed, 1 Aug 2018 12:25:15 -0400 Subject: [PATCH 1/3] rectifies reconstructor, symtab and brancher This PR rectifies reconstructor, symtab and brancher with a respect to performance, without adding new behaviour or breaking of existed one. Rewrote it and reduced a number of iterations over nodes/edges of cfg There was a bit inefficient implementation of `add_symbol` function, so every addition of a symbol led to a filter of the whole table, although there are enough info to reduce such calls. Just a small fix that check if an instruction has jumps at all before subsequent call of `fold_consts` that could be heavy for some of instructions --- lib/bap_disasm/bap_disasm_brancher.ml | 10 ++- lib/bap_disasm/bap_disasm_reconstructor.ml | 82 ++++++++++------------ lib/bap_disasm/bap_disasm_symtab.ml | 20 ++++-- 3 files changed, 62 insertions(+), 50 deletions(-) diff --git a/lib/bap_disasm/bap_disasm_brancher.ml b/lib/bap_disasm/bap_disasm_brancher.ml index 607dee814..99ebc6730 100644 --- a/lib/bap_disasm/bap_disasm_brancher.ml +++ b/lib/bap_disasm/bap_disasm_brancher.ml @@ -32,9 +32,17 @@ let kind_of_branches t f = | `Fall,`Fall -> `Fall | _ -> `Cond +let has_jumps = + Bil.exists + (object + inherit [unit] Stmt.finder + method! enter_jmp _ r = r.return (Some ()) + end) let rec dests_of_bil bil : dests = - Bil.fold_consts bil |> List.concat_map ~f:dests_of_stmt + if has_jumps bil then + Bil.fold_consts bil |> List.concat_map ~f:dests_of_stmt + else [] and dests_of_stmt = function | Bil.Jmp (Bil.Int addr) -> [Some addr,`Jump] | Bil.Jmp (_) -> [None, `Jump] diff --git a/lib/bap_disasm/bap_disasm_reconstructor.ml b/lib/bap_disasm/bap_disasm_reconstructor.ml index 8cd8a8f3c..97d0fd0d3 100644 --- a/lib/bap_disasm/bap_disasm_reconstructor.ml +++ b/lib/bap_disasm/bap_disasm_reconstructor.ml @@ -19,51 +19,47 @@ type reconstructor = t let create f = Reconstructor f let run (Reconstructor f) = f -let find_calls name roots cfg = - let starts = Addr.Table.create () in - List.iter roots ~f:(fun addr -> - Hashtbl.set starts ~key:addr ~data:(name addr)); - Cfg.nodes cfg |> Seq.iter ~f:(fun blk -> - let () = - if Seq.is_empty (Cfg.Node.inputs blk cfg) then - let addr = Block.addr blk in - Hashtbl.set starts ~key:addr ~data:(name addr) in - let term = Block.terminator blk in - if Insn.(is call) term then - Seq.iter (Cfg.Node.outputs blk cfg) - ~f:(fun e -> - if Cfg.Edge.label e <> `Fall then - let w = Block.addr (Cfg.Edge.dst e) in - Hashtbl.set starts ~key:w ~data:(name w))); - starts +let roots_of_blk roots cfg blk = + let addr = Block.addr blk in + let term = Block.terminator blk in + let init = + if Set.mem roots addr || Seq.is_empty (Cfg.Node.inputs blk cfg) + then [blk] + else [] in + if Insn.(is call) term then + Seq.fold ~init (Cfg.Node.outputs blk cfg) + ~f:(fun rs e -> + if Cfg.Edge.label e <> `Fall then Cfg.Edge.dst e :: rs + else rs) + else init + +let find_calls cfg roots = + let roots = List.fold ~init:Addr.Set.empty ~f:Set.add roots in + Graphlib.depth_first_search (module Cfg) + cfg ~init:Block.Set.empty + ~enter_node:(fun _ blk all -> + roots_of_blk roots cfg blk |> + List.fold ~init:all ~f:Set.add) let reconstruct name roots cfg = - let roots = find_calls name roots cfg in - let init = - Cfg.nodes cfg |> Seq.fold ~init:Cfg.empty ~f:(fun cfg n -> - Cfg.Node.insert n cfg) in - let filtered = - Cfg.edges cfg |> Seq.fold ~init ~f:(fun cfg e -> - if Hashtbl.mem roots (Block.addr (Cfg.Edge.dst e)) then cfg - else Cfg.Edge.insert e cfg) in - let find_block addr = - Cfg.nodes cfg |> Seq.find ~f:(fun blk -> - Addr.equal addr (Block.addr blk)) in - Hashtbl.fold roots ~init:Symtab.empty - ~f:(fun ~key:entry ~data:name syms -> - match find_block entry with - | None -> syms - | Some entry -> - let cfg : cfg = - with_return (fun {return} -> - Graphlib.depth_first_search (module Cfg) - filtered ~start:entry ~init:Cfg.empty - ~enter_edge:(fun _ -> Cfg.Edge.insert) - ~start_tree:(fun n t -> - if Block.equal n entry - then Cfg.Node.insert n t - else return t)) in - Symtab.add_symbol syms (name,entry,cfg)) + let roots = find_calls cfg roots in + let filtered = Set.fold roots ~init:cfg + ~f:(fun g root -> + let inputs = Cfg.Node.inputs root cfg in + Seq.fold inputs ~init:g ~f:(fun g e -> Cfg.Edge.remove e g)) in + Set.fold roots ~init:Symtab.empty + ~f:(fun syms entry -> + let name = name (Block.addr entry) in + let cfg : cfg = + with_return (fun {return} -> + Graphlib.depth_first_search (module Cfg) + filtered ~start:entry ~init:Cfg.empty + ~enter_edge:(fun _ -> Cfg.Edge.insert) + ~start_tree:(fun n t -> + if Block.equal n entry + then Cfg.Node.insert n t + else return t)) in + Symtab.add_symbol syms (name,entry,cfg)) let of_blocks syms = let reconstruct (cfg : cfg) = diff --git a/lib/bap_disasm/bap_disasm_symtab.ml b/lib/bap_disasm/bap_disasm_symtab.ml index 77510dccd..d011de36e 100644 --- a/lib/bap_disasm/bap_disasm_symtab.ml +++ b/lib/bap_disasm/bap_disasm_symtab.ml @@ -17,7 +17,7 @@ type cfg = Cfg.t [@@deriving compare] type fn = string * block * cfg [@@deriving compare] -let sexp_of_fn (name,block,cfg) = +let sexp_of_fn (name,block,_cfg) = Sexp.List [sexp_of_string name; sexp_of_addr (Block.addr block)] module Fn = Opaque.Make(struct @@ -38,7 +38,7 @@ let compare t1 t2 = type symtab = t [@@deriving compare, sexp_of] -let span ((name,entry,cfg) as fn) = +let span ((_name,_entry,cfg) as fn) = Cfg.nodes cfg |> Seq.fold ~init:Memmap.empty ~f:(fun map blk -> Memmap.add map (Block.memory blk) fn) @@ -52,19 +52,27 @@ let merge m1 m2 = Memmap.to_sequence m2 |> Seq.fold ~init:m1 ~f:(fun m1 (mem,x) -> Memmap.add m1 mem x) +let filter_mem mem name entry = + Memmap.filter mem ~f:(fun (n,e,_) -> + not(String.(name = n) || Block.(entry = e))) + let remove t (name,entry,_) : t = { names = Map.remove t.names name; addrs = Map.remove t.addrs (Block.addr entry); - memory = Memmap.filter t.memory ~f:(fun (n,e,_) -> - not(String.(name = n) || Block.(entry = e))) + memory = filter_mem t.memory name entry; } +let filter t ((name,entry,_ ) as fn) = + if Map.mem t.names name || Map.mem t.addrs (Block.addr entry) then + remove t fn + else t + let add_symbol t (name,entry,cfg) : t = let data = name,entry,cfg in - let t = remove t data in + let t = filter t data in { addrs = Map.add t.addrs ~key:(Block.addr entry) ~data; - names = Map.add t.names ~key:name ~data; + names = Map.add t.names ~key:name ~data; memory = merge t.memory (span data); } From a2175b7a0c01cf9da70b61befa35015f22ab2b8f Mon Sep 17 00:00:00 2001 From: Oleg Date: Mon, 13 Aug 2018 13:53:29 -0400 Subject: [PATCH 2/3] updated after review --- lib/bap_disasm/bap_disasm_reconstructor.ml | 62 +++++++++++----------- lib/bap_disasm/bap_disasm_symtab.ml | 18 +++---- 2 files changed, 38 insertions(+), 42 deletions(-) diff --git a/lib/bap_disasm/bap_disasm_reconstructor.ml b/lib/bap_disasm/bap_disasm_reconstructor.ml index 97d0fd0d3..c81c1f235 100644 --- a/lib/bap_disasm/bap_disasm_reconstructor.ml +++ b/lib/bap_disasm/bap_disasm_reconstructor.ml @@ -19,47 +19,45 @@ type reconstructor = t let create f = Reconstructor f let run (Reconstructor f) = f -let roots_of_blk roots cfg blk = +let callees_of_block cfg roots blk = let addr = Block.addr blk in let term = Block.terminator blk in let init = - if Set.mem roots addr || Seq.is_empty (Cfg.Node.inputs blk cfg) - then [blk] - else [] in + if Set.mem roots addr then Block.Set.singleton blk + else Block.Set.empty in if Insn.(is call) term then Seq.fold ~init (Cfg.Node.outputs blk cfg) - ~f:(fun rs e -> - if Cfg.Edge.label e <> `Fall then Cfg.Edge.dst e :: rs - else rs) + ~f:(fun cls e -> + if Cfg.Edge.label e <> `Fall then + Set.add cls (Cfg.Edge.dst e) + else cls) else init -let find_calls cfg roots = - let roots = List.fold ~init:Addr.Set.empty ~f:Set.add roots in - Graphlib.depth_first_search (module Cfg) - cfg ~init:Block.Set.empty - ~enter_node:(fun _ blk all -> - roots_of_blk roots cfg blk |> - List.fold ~init:all ~f:Set.add) +let update_callees cfg roots callees blk = + Set.union callees (callees_of_block cfg roots blk) + +let find_callees cfg roots = + let roots = Addr.Set.of_list roots in + Seq.fold (Cfg.nodes cfg) ~init:Block.Set.empty + ~f:(update_callees cfg roots) let reconstruct name roots cfg = - let roots = find_calls cfg roots in - let filtered = Set.fold roots ~init:cfg - ~f:(fun g root -> - let inputs = Cfg.Node.inputs root cfg in - Seq.fold inputs ~init:g ~f:(fun g e -> Cfg.Edge.remove e g)) in - Set.fold roots ~init:Symtab.empty - ~f:(fun syms entry -> - let name = name (Block.addr entry) in - let cfg : cfg = - with_return (fun {return} -> - Graphlib.depth_first_search (module Cfg) - filtered ~start:entry ~init:Cfg.empty - ~enter_edge:(fun _ -> Cfg.Edge.insert) - ~start_tree:(fun n t -> - if Block.equal n entry - then Cfg.Node.insert n t - else return t)) in - Symtab.add_symbol syms (name,entry,cfg)) + let callees = find_callees cfg roots in + let is_call e = Set.mem callees (Cfg.Edge.dst e) in + let rec traverse fng node = + let fng = Cfg.Node.insert node fng in + Seq.fold (Cfg.Node.outputs node cfg) ~init:fng ~f:(fun fng edg -> + if is_call edg then fng + else + let dst = Cfg.Edge.dst edg in + let visited = Cfg.Node.mem dst fng in + let fng = Cfg.Edge.insert edg fng in + if visited then fng + else traverse fng dst) in + Set.fold callees ~init:Symtab.empty ~f:(fun tab entry -> + let name = name (Block.addr entry) in + let fng = traverse Cfg.empty entry in + Symtab.add_symbol tab (name,entry,fng)) let of_blocks syms = let reconstruct (cfg : cfg) = diff --git a/lib/bap_disasm/bap_disasm_symtab.ml b/lib/bap_disasm/bap_disasm_symtab.ml index d011de36e..93e5082c0 100644 --- a/lib/bap_disasm/bap_disasm_symtab.ml +++ b/lib/bap_disasm/bap_disasm_symtab.ml @@ -56,20 +56,18 @@ let filter_mem mem name entry = Memmap.filter mem ~f:(fun (n,e,_) -> not(String.(name = n) || Block.(entry = e))) -let remove t (name,entry,_) : t = { - names = Map.remove t.names name; - addrs = Map.remove t.addrs (Block.addr entry); - memory = filter_mem t.memory name entry; -} - -let filter t ((name,entry,_ ) as fn) = - if Map.mem t.names name || Map.mem t.addrs (Block.addr entry) then - remove t fn +let remove t (name,entry,_) : t = + if Map.mem t.addrs (Block.addr entry) then + { + names = Map.remove t.names name; + addrs = Map.remove t.addrs (Block.addr entry); + memory = filter_mem t.memory name entry; + } else t let add_symbol t (name,entry,cfg) : t = let data = name,entry,cfg in - let t = filter t data in + let t = remove t data in { addrs = Map.add t.addrs ~key:(Block.addr entry) ~data; names = Map.add t.names ~key:name ~data; From b6da46ef3bae047752d6831c54c9ba78abf51a69 Mon Sep 17 00:00:00 2001 From: Oleg Date: Mon, 13 Aug 2018 18:02:12 -0400 Subject: [PATCH 3/3] refactored --- lib/bap_disasm/bap_disasm_reconstructor.ml | 52 ++++++++++------------ 1 file changed, 23 insertions(+), 29 deletions(-) diff --git a/lib/bap_disasm/bap_disasm_reconstructor.ml b/lib/bap_disasm/bap_disasm_reconstructor.ml index c81c1f235..4378992c5 100644 --- a/lib/bap_disasm/bap_disasm_reconstructor.ml +++ b/lib/bap_disasm/bap_disasm_reconstructor.ml @@ -19,44 +19,38 @@ type reconstructor = t let create f = Reconstructor f let run (Reconstructor f) = f -let callees_of_block cfg roots blk = - let addr = Block.addr blk in +let entries_of_block cfg roots entries blk = + let entries = + if Set.mem roots (Block.addr blk) then Set.add entries blk + else entries in let term = Block.terminator blk in - let init = - if Set.mem roots addr then Block.Set.singleton blk - else Block.Set.empty in if Insn.(is call) term then - Seq.fold ~init (Cfg.Node.outputs blk cfg) - ~f:(fun cls e -> + Seq.fold ~init:entries (Cfg.Node.outputs blk cfg) + ~f:(fun entries e -> if Cfg.Edge.label e <> `Fall then - Set.add cls (Cfg.Edge.dst e) - else cls) - else init + Set.add entries (Cfg.Edge.dst e) + else entries) + else entries -let update_callees cfg roots callees blk = - Set.union callees (callees_of_block cfg roots blk) - -let find_callees cfg roots = +let collect_entries cfg roots = let roots = Addr.Set.of_list roots in Seq.fold (Cfg.nodes cfg) ~init:Block.Set.empty - ~f:(update_callees cfg roots) + ~f:(entries_of_block cfg roots) -let reconstruct name roots cfg = - let callees = find_callees cfg roots in - let is_call e = Set.mem callees (Cfg.Edge.dst e) in - let rec traverse fng node = - let fng = Cfg.Node.insert node fng in - Seq.fold (Cfg.Node.outputs node cfg) ~init:fng ~f:(fun fng edg -> - if is_call edg then fng +let reconstruct name roots prog = + let entries = collect_entries prog roots in + let is_call e = Set.mem entries (Cfg.Edge.dst e) in + let rec add cfg node = + let cfg = Cfg.Node.insert node cfg in + Seq.fold (Cfg.Node.outputs node prog) ~init:cfg ~f:(fun cfg edge -> + if is_call edge then cfg else - let dst = Cfg.Edge.dst edg in - let visited = Cfg.Node.mem dst fng in - let fng = Cfg.Edge.insert edg fng in - if visited then fng - else traverse fng dst) in - Set.fold callees ~init:Symtab.empty ~f:(fun tab entry -> + let cfg' = Cfg.Edge.insert edge cfg in + if Cfg.Node.mem (Cfg.Edge.dst edge) cfg then cfg' + else add cfg' (Cfg.Edge.dst edge)) in + Set.fold entries ~init:Symtab.empty ~f:(fun tab entry -> let name = name (Block.addr entry) in - let fng = traverse Cfg.empty entry in + let fng = add Cfg.empty entry in Symtab.add_symbol tab (name,entry,fng)) let of_blocks syms =