Skip to content

Commit

Permalink
improves symbolization facilities
Browse files Browse the repository at this point in the history
Implements support for various relocations and improves existing that
enables us to pass all tests without relying on external symbols or
tools such as objdump or radare2.

This branch support PLT-like relocations, as well as direct calls with
GLOB_DAT relocations (fixes BinaryAnalysisPlatform#1135). The PLT entries are constant
folded and memory references are then analyzed. We also extended the
analysis that detects stub functions to support various ABI and file
formats. For PowerPC MachO, that stores stubs directly in the text
section, we implemented a signature matching procedure to reliably
detect the stubs. We also significantly improved support of mips,
which was sufferening from missing function starts that correspond to
the stubbed functions as byteweigh is unable to detect these stubs.

In addition, this PR brings a new library called Bap_relation that is
a bidirectional mapping useful for storing addr <-> name mapping and
ensure their bijection. This library is now used explicitly or
implicitly (via the old symbolizer interface) by all our providers of
symbolic information. This change prevents symbolizers from providing
conflicting information, which may later lead to the knowledge base
conflicts.

We also removed so far the name to address translation service that we
recently introduced BinaryAnalysisPlatform#1119. We are not ready for this service yet (our
knowledge base is not having enough rules stored in it) and without
this rule we can disassemble 25% faster.

There are also a couple of minor fixes and quality of life
improvements:
- fixes Insn.dests domain functions
- a better default for the KB.Domain.Powerset inspect parameter
- makes glibc-runtime heuristic more aggressive
  • Loading branch information
ivg committed Aug 19, 2020
1 parent e4914cf commit ce0ae89
Show file tree
Hide file tree
Showing 34 changed files with 923 additions and 213 deletions.
1 change: 1 addition & 0 deletions .merlin
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ B _build/lib/knowledge
B lib/bap
B lib/bap_main
B lib/bap_recipe
B lib/bap_relation
B lib/bap_abi
B lib/bap_api
B lib/bitvec
Expand Down
6 changes: 5 additions & 1 deletion lib/bap_disasm/bap_disasm_insn.ml
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,8 @@ module Slot = struct
let dests =
let empty = Some (Set.empty (module Theory.Label)) in
let order x y : KB.Order.partial = match x,y with
| Some x,_ when Set.is_empty x -> LT
| _,Some x when Set.is_empty x -> GT
| None,None -> EQ
| None,_ | _,None -> NC
| Some x, Some y ->
Expand All @@ -146,7 +148,9 @@ module Slot = struct
if Set.is_subset y x then GT else NC in
let join x y = match x,y with
| None,None -> Ok None
| None,_ |Some _,None -> Error Jump_vs_Move
| None,Some x |Some x,None ->
if Set.is_empty x then Ok None
else Error Jump_vs_Move
| Some x, Some y -> Ok (Some (Set.union x y)) in
let module IO = struct
module Set = Set.Make_binable_using_comparator(Theory.Label)
Expand Down
38 changes: 28 additions & 10 deletions lib/bap_disasm/bap_disasm_symbolizer.ml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ open Bap_image_std
open Bap_disasm_source
open KB.Syntax

include Bap_main.Loggers()

type t = {
path : string option;
Expand Down Expand Up @@ -48,28 +49,47 @@ let empty = create (fun _ -> None)
let chain ss =
create (fun addr -> List.find_map ss ~f:(fun s -> run s addr))

let string_of_addr addrs =
List.map addrs ~f:Addr.to_string |>
String.concat ~sep:", "

let report_broken = function
| Bap_relation.Non_injective_fwd (addrs,name) ->
info "skipping (%s) as they all have the same name %s"
(string_of_addr addrs) name
| Bap_relation.Non_injective_bwd (names,addr) ->
info "skipping (%s) as they all have the same address %a"
(String.concat names ~sep:", ") Addr.pp addr

let of_image img =
let symtab = Image.symbols img in
let names = Addr.Table.create () in
Table.iteri symtab ~f:(fun mem sym ->
let init = Bap_relation.empty Addr.compare String.compare in
Table.foldi symtab ~init ~f:(fun mem sym rels ->
let name = Image.Symbol.name sym
and addr = Memory.min_addr mem in
if not (Name.is_empty name)
then Hashtbl.set names ~key:addr ~data:name);
then Bap_relation.add rels addr name
else rels) |> fun rels ->
Bap_relation.matching rels ()
~saturated:(fun addr name () -> Hashtbl.add_exn names addr name)
~unmatched:(fun reason () -> report_broken reason);
{find = Hashtbl.find names; path=Image.filename img; biased=true}

let of_blocks seq =
let names =
let empty_rel = Bap_relation.empty Addr.compare String.compare in
Seq.fold seq ~init:String.Map.empty ~f:(fun addrs (name,addr,_) ->
Map.update addrs name (function
| Some addr' -> Addr.min addr addr'
| None -> addr)) |>
Map.to_sequence |>
Seq.fold ~init:Addr.Map.empty ~f:(fun names (name,entry) ->
Map.add_multi names entry name) in
create @@ fun addr -> match Map.find names addr with
| Some [name] -> Some name
| _ -> None
Seq.fold ~init:empty_rel ~f:(fun rels (name,entry) ->
Bap_relation.add rels entry name) |> fun rels ->
Bap_relation.matching rels Addr.Map.empty
~saturated:(fun addr name names -> Map.add_exn names addr name)
~unmatched:(fun reason names -> report_broken reason; names) in
create @@ Map.find names

module Factory = Factory.Make(struct type nonrec t = t end)

Expand Down Expand Up @@ -102,9 +122,7 @@ let providing agent s =
~propose:(provide_symbolizer s)

let get_name addr =
let data = Some (Word.to_bitvec addr) in
KB.Object.scoped Theory.Program.cls @@ fun label ->
KB.provide Theory.Label.addr label data >>= fun () ->
Theory.Label.for_addr (Word.to_bitvec addr) >>= fun label ->
KB.collect Theory.Label.name label >>| function
| None -> name_of_addr addr
| Some name -> name
Expand Down
1 change: 1 addition & 0 deletions lib/bap_image/bap_memory.ml
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,7 @@ let last_byte mem : t =
let contains mem =
Addr.between ~low:(min_addr mem) ~high:(max_addr mem)


let compare_with mem addr =
let low = min_addr mem and high = max_addr mem in
if Addr.between ~low ~high addr then `addr_is_inside else
Expand Down
1 change: 1 addition & 0 deletions lib/bap_image/bap_memory.mli
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ val create
-> addr
-> Bigstring.t -> t Or_error.t


val rebase : t -> addr -> t

val slot : (Theory.program, t option) KB.slot
Expand Down
42 changes: 22 additions & 20 deletions lib/bap_llvm/llvm_elf_loader.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -166,16 +166,19 @@ void emit_symbol_entry(const ELFObjectFile<T> &obj, const SymbolRef &sym, ogre_d
auto name = prim::symbol_name(sym);
auto addr = prim::symbol_address(sym);
auto off = symbol_file_offset(obj, sym);
if (name && addr && off && !name->empty() &&
!is_external(*addr, *off, sym_elf->st_size)) {
s.entry("llvm:symbol-entry") << *name
<< *addr
<< sym_elf->st_size
<< *off
<< sym_elf->st_value;

if (sym_elf->getType() == ELF::STT_FUNC)
s.entry("llvm:code-entry") << *name << *off << sym_elf->st_size ;
if (name && addr && off && !name->empty()) {
if (is_external(*addr, *off, sym_elf->st_size) && sym_elf->st_value) {
s.entry("llvm:name-reference") << sym_elf->st_value << *name;
} else {
s.entry("llvm:symbol-entry") << *name
<< *addr
<< sym_elf->st_size
<< *off
<< sym_elf->st_value;

if (sym_elf->getType() == ELF::STT_FUNC)
s.entry("llvm:code-entry") << *name << *off << sym_elf->st_size ;
}
}
}

Expand All @@ -196,23 +199,22 @@ void emit_symbol_entries(const ELFObjectFile<T> &obj, ogre_doc &s) {
template <typename T>
void emit_relocations(const ELFObjectFile<T> &obj, ogre_doc &s) {
for (auto sec : obj.sections()) {
if (auto rel_sec = prim::relocated_section(sec)) {
for (auto rel : sec.relocations()) {
auto sym = rel.getSymbol();
if (sym != prim::end_symbols(obj)) {
uint64_t base = section_address(obj, *rel_sec);
auto raddr = prim::relocation_offset(rel) + base;
if (auto addr = prim::symbol_address(*sym))
if (*addr) s.entry("llvm:relocation") << raddr << *addr;
if (auto name = prim::symbol_name(*sym))
for (auto rel : sec.relocations()) {
auto sym = rel.getSymbol();
if (sym != prim::end_symbols(obj)) {
uint64_t raddr = prim::relocation_offset(rel);
if (auto addr = prim::symbol_address(*sym))
if (*addr) s.entry("llvm:relocation") << raddr << *addr;
if (auto name = prim::symbol_name(*sym))
if (!name->empty())
s.entry("llvm:name-reference") << raddr << *name;
}
}
}
}
}



} // namespace elf_loader

template <typename T>
Expand Down
9 changes: 1 addition & 8 deletions lib/bap_llvm/llvm_macho_loader.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -409,20 +409,13 @@ void emit_indirect_symbols(const macho &obj, const MachO::dysymtab_command &dlc,
uint64_t sym_numb = section_size(obj, sec) / stride;
uint32_t tab_indx = section_reserved1(obj, sec);
uint64_t sec_addr = section_addr(obj, sec);
uint32_t sec_offs = section_offset(obj, sec);

for (uint32_t j = 0; j < sym_numb && tab_indx + j < dlc.nindirectsyms; j++) {
auto sym = get_indirect_symbol(obj, dlc, tab_indx + j);
if (sym != prim::end_symbols(obj)) {
if (auto name = prim::symbol_name(*sym)) {
auto sym_addr = sec_addr + j * stride;
auto sym_offs = sec_offs + j * stride;
s.entry("llvm:symbol-entry") << *name
<< sym_addr
<< stride
<< sym_offs
<< sym->getValue();
s.entry("llvm:code-entry") << *name << sym_offs << stride;
s.entry("llvm:name-reference") << sym_addr << *name;
}
}
}
Expand Down
70 changes: 70 additions & 0 deletions lib/bap_relation/bap_relation.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
open Base

type ('k,'d) t = Rel : {
vals : ('k, ('d,'vo) Set.t, 'ko) Map.t;
keys : ('d, ('k,'ko) Set.t, 'vo ) Map.t;
} -> ('k,'d) t

let empty (type key) (type data) compare_key compare_data =
let module D = struct
type t = data
include Comparator.Make(struct
type t = data
let compare = compare_data
let sexp_of_t _ = Sexp.List []
end)
end in
let module K = struct
type t = key
include Base.Comparator.Make(struct
type t = key
let compare = compare_key
let sexp_of_t _ = Sexp.List []
end)
end in
Rel {
vals = Map.empty (module K);
keys = Map.empty (module D);
}

let add (Rel {vals; keys}) key value = Rel {
vals = Map.update vals key ~f:(function
| Some vals -> Set.add vals value
| None -> Set.singleton (Map.comparator_s keys) value);
keys = Map.update keys value ~f:(function
| Some keys -> Set.add keys key
| None -> Set.singleton (Map.comparator_s vals) key);
}

type ('k,'s) non_injective =
| Non_injective_fwd of 'k list * 's
| Non_injective_bwd of 's list * 'k

let skips _ _ x = x
let skipu _ x = x


let find_multi xs x = match Map.find xs x with
| None -> []
| Some xs -> Set.to_list xs

let matching (Rel {vals; keys}) ?(saturated=skips) ?(unmatched=skipu) init =
Map.fold ~init vals ~f:(fun ~key ~data:vals init ->
match Set.to_list vals with
| [] -> assert false
| _ :: _ :: _ as vals->
unmatched (Non_injective_bwd (vals,key)) init
| [s] -> match find_multi keys s with
| [_] -> saturated key s init
| xs -> unmatched (Non_injective_fwd (xs,s)) init)

let fold (Rel {vals}) ~init ~f =
Map.fold vals ~init ~f:(fun ~key:left ~data:rights init ->
Set.fold ~init rights ~f:(fun init right -> f left right init))

let iter rels ~f = fold rels ~init:() ~f:(fun x s () -> f x s)

let is_empty (Rel {vals}) = Map.is_empty vals
let findl (Rel {vals}) = find_multi vals
let findr (Rel {keys}) = find_multi keys
let mem (Rel {vals; keys}) x s = Map.mem vals x && Map.mem keys s
102 changes: 102 additions & 0 deletions lib/bap_relation/bap_relation.mli
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
(** A representation of relations between two sets.
A relation between two sets is a set of pairs made from the
elements of these sets. The precise mathematical defition is given
below. This module implements a bidirectional mapping between two
sets and computes their matching that defines bijections between
the sets.
{2 Format Definition and Notation}
Given two sets [K] and [S], with meta-variables [x,y,z] ranging
over [K] and meta-variables [r,s,t] ranging over [S] we will
denote a finitary relation [R] as a subset of the cartesian
product [K x S], which is a set of pairs [(x,r), ..., (z,t)],
which we represent as a bipartite graph [G = (K,S,R)].
*)

(** the type for relation between ['k] and ['s]. *)
type ('k,'s) t

(** [empty compare_k compare_s] the empty relation between two sets.
- [compare_k] is the function that defines order of the elements
of the set [K].
- [compare_s] is the function that defines order of the elements
of the set [S].
{3 Example}
{[
let empty = Bap_relation.empty
Int.compare
String.compare
]}
*)
val empty : ('k -> 'k -> int) -> ('s -> 's -> int) -> ('k,'s) t

(** [is_empty rel] is true if the relation [rel] is an empty set. *)
val is_empty : (_,_) t -> bool

(** [add relation x s] establishes a relation between [x] and [s]. *)
val add : ('k,'s) t -> 'k -> 's -> ('k,'s) t

(** [mem rel x s] is [true] if [(k,s)] is in the relation [rel]. *)
val mem : ('k,'s) t -> 'k -> 's -> bool

(** [findl rel x] finds all pairs in [rel] that have [x] on the left. *)
val findl : ('k,'s) t -> 'k -> 's list

(** [findr rel s] finds all pairs in [rel] that have [s] on the right. *)
val findr : ('k,'s) t -> 's -> 'k list

(** [fold rel init f] folds over all pairs in the relation [rel]. *)
val fold : ('k,'s) t -> init:'a -> f:('k -> 's -> 'a -> 'a) -> 'a

(** [iter rel f] iterates over all pairs in the relation [rel]. *)
val iter : ('k,'s) t -> f:('k -> 's -> unit) -> unit


(** {2 Bijections and matching}
The set of independent edges [M] (the matching) of the graph [G]
forms a finite bijection between [K] and [S]. It is guaranteed
that for each pair [(x,s)] in [M] there is no other pair in [M],
that will include [x] or [s].
Edges [R] that are not in the matching [M] represent a subset of
[R] that do not match because of one the two anomalies:
- A non-injective forward mapping occurs when the same value from
the set [S] is in relation with more than one value from the set
[K], e.g., [(x,s), (y,s)] is encoded as
[Non_injective_fwd ([x,y],s)];
- A non-injective backward mapping occurs when the same value from
the set [K] is in relation with more than one value from the set
[S], e.g., [(x,r), (x,s)] is encoded as
[Non_injective_bwd ([r;s],x);
*)

(** the reason why was the pair left unmatched *)
type ('k,'s) non_injective =
| Non_injective_fwd of 'k list * 's (** Non-injective forward mapping. *)
| Non_injective_bwd of 's list * 'k (** Non-injective backward mapping. *)

(** [matching relation data] computes the matching for the given [relation].
Calls [saturated x s data] for each [(x,s)] in the matching
[M] (see the module description) and [unmatched z reason d] for
each [(z,t)] in the relation that are not matched, the reason
is one of the:
- [Non_injective_fwd (xs,s)] if the mapping [K -> S] that is
induced by the [relation] is non-injective, because the set of
values [xs] from [K] are mapped to the same value [s] in [S].
- [Non_injective_bwd (ss,x)] if the mapping [S -> K] that is
induced by the [relation] is non-injective, because the set of
values [ss] from [S] are mapped to the same value [x] in [K].
*)
val matching : ('k,'s) t ->
?saturated : ('k -> 's -> 'a -> 'a) ->
?unmatched : (('k,'s) non_injective -> 'a -> 'a) -> 'a -> 'a
2 changes: 1 addition & 1 deletion lib/bap_types/bap_value.ml
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ let register (type a) ?public ?desc ?package ~name ~uuid
type t = S.t option [@@deriving bin_io]
end) in
let equal x y = S.compare x y = 0 in
let domain = KB.Domain.optional ~equal name in
let domain = KB.Domain.optional ~equal name ~inspect:S.sexp_of_t in
let slot = KB.Class.property ?public ?desc ~persistent ?package
Theory.Program.cls name domain in
register_slot ~uuid slot (module S)
Expand Down
2 changes: 1 addition & 1 deletion lib/knowledge/bap_knowledge.ml
Original file line number Diff line number Diff line change
Expand Up @@ -546,7 +546,7 @@ module Domain = struct
let powerset (type t o)
(module S : Comparator.S with type t = t
and type comparator_witness = o)
?(inspect=sexp_of_opaque) name =
?(inspect=S.comparator.sexp_of_t) name =
let empty = Set.empty (module S) in
let order x y : Order.partial =
if Set.equal x y then EQ else
Expand Down
Loading

0 comments on commit ce0ae89

Please sign in to comment.