Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

optimizes reconstructor, symtab, and brancher performance #855

Merged
merged 3 commits into from
Aug 14, 2018
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion lib/bap_disasm/bap_disasm_brancher.ml
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,17 @@ let kind_of_branches t f =
| `Fall,`Fall -> `Fall
| _ -> `Cond

let has_jumps =
Bil.exists
(object
inherit [unit] Stmt.finder
method! enter_jmp _ r = r.return (Some ())
end)

let rec dests_of_bil bil : dests =
Bil.fold_consts bil |> List.concat_map ~f:dests_of_stmt
if has_jumps bil then
Bil.fold_consts bil |> List.concat_map ~f:dests_of_stmt
else []
and dests_of_stmt = function
| Bil.Jmp (Bil.Int addr) -> [Some addr,`Jump]
| Bil.Jmp (_) -> [None, `Jump]
Expand Down
82 changes: 39 additions & 43 deletions lib/bap_disasm/bap_disasm_reconstructor.ml
Original file line number Diff line number Diff line change
Expand Up @@ -19,51 +19,47 @@ type reconstructor = t
let create f = Reconstructor f
let run (Reconstructor f) = f

let find_calls name roots cfg =
let starts = Addr.Table.create () in
List.iter roots ~f:(fun addr ->
Hashtbl.set starts ~key:addr ~data:(name addr));
Cfg.nodes cfg |> Seq.iter ~f:(fun blk ->
let () =
if Seq.is_empty (Cfg.Node.inputs blk cfg) then
let addr = Block.addr blk in
Hashtbl.set starts ~key:addr ~data:(name addr) in
let term = Block.terminator blk in
if Insn.(is call) term then
Seq.iter (Cfg.Node.outputs blk cfg)
~f:(fun e ->
if Cfg.Edge.label e <> `Fall then
let w = Block.addr (Cfg.Edge.dst e) in
Hashtbl.set starts ~key:w ~data:(name w)));
starts
let roots_of_blk roots cfg blk =
let addr = Block.addr blk in
let term = Block.terminator blk in
let init =
if Set.mem roots addr || Seq.is_empty (Cfg.Node.inputs blk cfg)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If a block address is already in the set of roots then there is no need to add to the set of roots.
also, to count the indegree of a block use the specialized function Cfg.Node.degree ~dir:In , thus this function
could be defined as simple as

let call_destinations cfg blk = 
   if not Insn.(is call) (Block.terminator blk)  then []
   else Seq.to_list_rev @@ Seq.filter_map (Cfg.Node.outputs blk cfg) ~f:(fun e -> 
           Option.some_if (Cfg.Edge.label e <> `Fall) (Cfg.Edge.dst e))

let roots_of_blk cfg blk = Addr.Set.of_list @@ List.concat [
   if Cfg.Node.degree ~dir:`In = 0 then Block.addr blk else [];
   call_destinations cfg blk
] 

then [blk]
else [] in
if Insn.(is call) term then
Seq.fold ~init (Cfg.Node.outputs blk cfg)
~f:(fun rs e ->
if Cfg.Edge.label e <> `Fall then Cfg.Edge.dst e :: rs
else rs)
else init

let find_calls cfg roots =
let roots = List.fold ~init:Addr.Set.empty ~f:Set.add roots in
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is Addr.Set.of_list for this.

Graphlib.depth_first_search (module Cfg)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You don't need DFS for that as you do not need any specific ordering. Just iterate over all nodes. Also, you don't need roots, to check that root is root and add it to roots. This function is simply:

let find_calls cfg roots = Cfg.nodes cfg |> Seq.fold ~init:roots ~f:(fun roots n -> 
   Set.union roots (roots_of_node n))

cfg ~init:Block.Set.empty
~enter_node:(fun _ blk all ->
roots_of_blk roots cfg blk |>
List.fold ~init:all ~f:Set.add)

let reconstruct name roots cfg =
let roots = find_calls name roots cfg in
let init =
Cfg.nodes cfg |> Seq.fold ~init:Cfg.empty ~f:(fun cfg n ->
Cfg.Node.insert n cfg) in
let filtered =
Cfg.edges cfg |> Seq.fold ~init ~f:(fun cfg e ->
if Hashtbl.mem roots (Block.addr (Cfg.Edge.dst e)) then cfg
else Cfg.Edge.insert e cfg) in
let find_block addr =
Cfg.nodes cfg |> Seq.find ~f:(fun blk ->
Addr.equal addr (Block.addr blk)) in
Hashtbl.fold roots ~init:Symtab.empty
~f:(fun ~key:entry ~data:name syms ->
match find_block entry with
| None -> syms
| Some entry ->
let cfg : cfg =
with_return (fun {return} ->
Graphlib.depth_first_search (module Cfg)
filtered ~start:entry ~init:Cfg.empty
~enter_edge:(fun _ -> Cfg.Edge.insert)
~start_tree:(fun n t ->
if Block.equal n entry
then Cfg.Node.insert n t
else return t)) in
Symtab.add_symbol syms (name,entry,cfg))
let roots = find_calls cfg roots in
let filtered = Set.fold roots ~init:cfg
~f:(fun g root ->
let inputs = Cfg.Node.inputs root cfg in
Seq.fold inputs ~init:g ~f:(fun g e -> Cfg.Edge.remove e g)) in
Set.fold roots ~init:Symtab.empty
~f:(fun syms entry ->
let name = name (Block.addr entry) in
let cfg : cfg =
with_return (fun {return} ->
Graphlib.depth_first_search (module Cfg)
filtered ~start:entry ~init:Cfg.empty
~enter_edge:(fun _ -> Cfg.Edge.insert)
~start_tree:(fun n t ->
if Block.equal n entry
then Cfg.Node.insert n t
else return t)) in
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So the main difference here is that instead of checking each destination for is it a call or not, we just iterate over all calls and ask for incoming edges of a call. That makes sense, and this is indeed faster, provided that majority of calls are reachable from the whole program CFG.

Symtab.add_symbol syms (name,entry,cfg))

let of_blocks syms =
let reconstruct (cfg : cfg) =
Expand Down
20 changes: 14 additions & 6 deletions lib/bap_disasm/bap_disasm_symtab.ml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ type cfg = Cfg.t [@@deriving compare]

type fn = string * block * cfg [@@deriving compare]

let sexp_of_fn (name,block,cfg) =
let sexp_of_fn (name,block,_cfg) =
Sexp.List [sexp_of_string name; sexp_of_addr (Block.addr block)]

module Fn = Opaque.Make(struct
Expand All @@ -38,7 +38,7 @@ let compare t1 t2 =

type symtab = t [@@deriving compare, sexp_of]

let span ((name,entry,cfg) as fn) =
let span ((_name,_entry,cfg) as fn) =
Cfg.nodes cfg |> Seq.fold ~init:Memmap.empty ~f:(fun map blk ->
Memmap.add map (Block.memory blk) fn)

Expand All @@ -52,19 +52,27 @@ let merge m1 m2 =
Memmap.to_sequence m2 |> Seq.fold ~init:m1 ~f:(fun m1 (mem,x) ->
Memmap.add m1 mem x)

let filter_mem mem name entry =
Memmap.filter mem ~f:(fun (n,e,_) ->
not(String.(name = n) || Block.(entry = e)))

let remove t (name,entry,_) : t = {
names = Map.remove t.names name;
addrs = Map.remove t.addrs (Block.addr entry);
memory = Memmap.filter t.memory ~f:(fun (n,e,_) ->
not(String.(name = n) || Block.(entry = e)))
memory = filter_mem t.memory name entry;
}

let filter t ((name,entry,_ ) as fn) =
Copy link
Member

@ivg ivg Aug 10, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, so from the IRL discussion, the remove function is inefficient since it filters the memory mapping even if the symbol is not in the symtab, the fact that could be checked very fast with the Set membership operation.

However, this is the problem of the remove function, that should be fixed on its side. So, please remove the filter function and add the corresponding check to the remove function (in fact a single test for inclusion in addrs would be sufficient.

if Map.mem t.names name || Map.mem t.addrs (Block.addr entry) then
remove t fn
else t

let add_symbol t (name,entry,cfg) : t =
let data = name,entry,cfg in
let t = remove t data in
let t = filter t data in
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you please justify why we can use filter instead of remove here? It's not quite obvious to me.

{
addrs = Map.add t.addrs ~key:(Block.addr entry) ~data;
names = Map.add t.names ~key:name ~data;
names = Map.add t.names ~key:name ~data;
memory = merge t.memory (span data);
}

Expand Down