Skip to content

Commit

Permalink
fixes the base calculation
Browse files Browse the repository at this point in the history
1. For ELF files we compute base as the difference between the address of
any loadable code segment and its offset. If there are no loadable code
segments, then we find a section with minimal offset value and
substract its address from its offset.

2. For COFF and MachO nothing is done, and I am not sure that we need
to do anything.

3. Removed special computation of the base
address (Base.from_sections_offset) from ELF, MachO, and COFF.

It is not tested on LLVM versions below 6, but I believe it should
work up to 3.4.

resolves #1183
  • Loading branch information
ivg committed Jul 24, 2020
1 parent d9cb60a commit 49f3d08
Show file tree
Hide file tree
Showing 12 changed files with 73 additions and 158 deletions.
8 changes: 3 additions & 5 deletions lib/bap_llvm/bap_llvm_ogre_coff.ml
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,8 @@ module Relocatable = struct
module Make(Fact : Ogre.S) = struct
open Fact.Syntax

module Base = Base_address(Fact)

let segments =
Base.from_sections_offset >>= fun base ->
Fact.require base_address >>= fun base ->
Fact.foreach Ogre.Query.(
select (from section_entry
$ virtual_section_header
Expand All @@ -75,7 +73,7 @@ module Relocatable = struct
Fact.provide mapped addr size start)

let sections =
Base.from_sections_offset >>= fun base ->
Fact.require base_address >>= fun base ->
Fact.foreach Ogre.Query.(
select (from section_entry))
~f:(fun (name,_,size,off) -> name,off,size) >>= fun s ->
Expand All @@ -86,7 +84,7 @@ module Relocatable = struct
Fact.provide named_region addr size name)

let code_regions =
Base.from_sections_offset >>= fun base ->
Fact.require base_address >>= fun base ->
Fact.foreach Ogre.Query.(
select (from section_entry $ code_entry)
~join:[[field name];
Expand Down
4 changes: 1 addition & 3 deletions lib/bap_llvm/bap_llvm_ogre_elf.ml
Original file line number Diff line number Diff line change
Expand Up @@ -81,10 +81,8 @@ module Relocatable = struct
module Make(Fact : Ogre.S) = struct
open Fact.Syntax

module Base = Base_address(Fact)

let segments =
Base.from_sections_offset >>= fun base ->
Fact.require base_address >>= fun base ->
Fact.foreach Ogre.Query.(begin
select (from section_entry $ section_flags)
~join:[[field name]]
Expand Down
11 changes: 1 addition & 10 deletions lib/bap_llvm/bap_llvm_ogre_loader.ml
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,6 @@ module type Parameters = sig
val pdb_path : string
end

(** default image base for relocatable files *)
let relocatable_base = 0x0L

module Fact(M : Monad.S) = struct
include Ogre.Make(M)
type 'a m = 'a M.t
Expand Down Expand Up @@ -53,15 +50,9 @@ module Ogre_loader(P : Parameters) = struct
| Macho -> make (module Macho)
| Unknown -> Fact.failf "file type is not supported" ()

let image_base =
Fact.require is_relocatable >>= fun is_rel ->
if Option.is_none P.image_base && is_rel
then Fact.return (Some relocatable_base)
else Fact.return P.image_base

let provide_base =
Fact.require default_base_address >>= fun real ->
image_base >>= function
match P.image_base with
| None -> Fact.provide base_address real
| Some base ->
let base_bias = Int64.(base - real) in
Expand Down
4 changes: 1 addition & 3 deletions lib/bap_llvm/bap_llvm_ogre_macho.ml
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,8 @@ module Relocatable = struct
module Make(Fact : Ogre.S) = struct
open Fact.Syntax

module Base = Base_address(Fact)

let segments =
Base.from_sections_offset >>= fun base ->
Fact.require base_address >>= fun base ->
Fact.foreach Ogre.Query.(begin
select (from section_entry $ code_entry)
~join:[[field name];
Expand Down
32 changes: 4 additions & 28 deletions lib/bap_llvm/bap_llvm_ogre_samples.ml
Original file line number Diff line number Diff line change
Expand Up @@ -36,34 +36,11 @@ module Sections(Fact : Ogre.S) = struct
Fact.provide named_region addr size name)
end


module Base_address(Fact : Ogre.S) = struct
open Scheme
open Fact.Syntax

let from_sections_offset =
Fact.require base_address >>= fun base ->
Fact.foreach Ogre.Query.(begin
select (from section_entry $ code_entry)
~join:[[field name];
[field size ~from:section_entry;
field size ~from:code_entry]]
end)
~f:(fun (_,_,_,off) _ -> off) >>= fun s ->
match Seq.min_elt s ~compare:Int64.compare with
| None -> Fact.return base
| Some x -> Fact.return Int64.(base - x)

end

module Relocatable_symbols(Fact : Ogre.S) = struct
open Scheme
open Fact.Syntax

module Base = Base_address(Fact)

let relocations =
Base.from_sections_offset >>= fun base ->
Fact.require base_address >>= fun base ->
Fact.collect
Ogre.Query.(select (from ref_internal)) >>= fun ints ->
Fact.Seq.iter ints ~f:(fun (sym_off, rel_off) ->
Expand All @@ -72,7 +49,7 @@ module Relocatable_symbols(Fact : Ogre.S) = struct
Fact.provide relocation relocation_addr symbol_addr)

let externals =
Base.from_sections_offset >>= fun base ->
Fact.require base_address >>= fun base ->
Fact.collect
Ogre.Query.(select (from ref_external)) >>= fun exts ->
Fact.Seq.iter exts ~f:(fun (off, name) ->
Expand All @@ -81,7 +58,7 @@ module Relocatable_symbols(Fact : Ogre.S) = struct
let symbols =
relocations >>= fun () ->
externals >>= fun () ->
Base.from_sections_offset >>= fun base ->
Fact.require base_address >>= fun base ->
Fact.collect Ogre.Query.(select (from symbol_entry)) >>= fun s ->
Fact.Seq.iter s ~f:(fun (name, _, size, off) ->
if Int64.(size = 0L) then Fact.return ()
Expand All @@ -100,10 +77,9 @@ module Relocatable_sections(Fact : Ogre.S) = struct
open Scheme
open Fact.Syntax

module Base = Base_address(Fact)

let sections =
Base.from_sections_offset >>= fun base ->
Fact.require base_address >>= fun base ->
Fact.collect Ogre.Query.(select (from section_entry)) >>= fun s ->
Fact.Seq.iter s
~f:(fun (name, _, size, off) ->
Expand Down
4 changes: 0 additions & 4 deletions lib/bap_llvm/bap_llvm_ogre_samples.mli
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,3 @@ end
module Code_regions(Fact : Ogre.S) : sig
val code_regions : unit Fact.t
end

module Base_address(Fact : Ogre.S) : sig
val from_sections_offset : int64 Fact.t
end
3 changes: 1 addition & 2 deletions lib/bap_llvm/llvm_coff_loader.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -253,8 +253,7 @@ error_or<int64_t> symbol_relative_address(const coff_obj &obj, const SymbolRef &
auto base = obj.getImageBase();
auto addr = symbol_address(obj, sym);
if (!addr) return addr;
auto raddr = prim::relative_address(base, *addr);
return success(raddr);
return success(*addr - base);
}

error_or<int> section_number(const coff_obj &obj, const SymbolRef &s) {
Expand Down
144 changes: 56 additions & 88 deletions lib/bap_llvm/llvm_elf_loader.hpp
Original file line number Diff line number Diff line change
@@ -1,47 +1,6 @@
#ifndef LLVM_ELF_LOADER_HPP
#define LLVM_ELF_LOADER_HPP

// Clarification-relocation.
//
// Elf loader provide information about common entries like segments, sectuions and symbols
// Also it provides information about relocations, and there are some details here.
//
// Relocation info is targeting mainly for relocatable files like shared libraries or kernel
// modules. Such files don't have entry point or segments, symbol addresses etc. and contain
// calls to unresolved locations like in example below.
//
// ...
// 0000000000000014 <my_fun>:
// 14: 55 push %rbp
// 15: 48 89 e5 mov %rsp,%rbp
// 18: 48 83 ec 18 sub $0x18,%rsp
// 1c: 89 7d ec mov %edi,-0x14(%rbp)
// 1f: c7 45 f8 2a 00 00 00 movl $0x2a,-0x8(%rbp)
// 26: 8b 55 ec mov -0x14(%rbp),%edx
// 29: 8b 45 f8 mov -0x8(%rbp),%eax
// 2c: 89 d6 mov %edx,%esi
// 2e: 89 c7 mov %eax,%edi
//--> 30: e8 00 00 00 00 callq 35 <my_fun+0x21>
// 35: 89 45 fc mov %eax,-0x4(%rbp)
// 38: 8b 45 fc mov -0x4(%rbp),%eax
// 3b: c9 leaveq
// 3c: c3 retq
// ...
//
// 0x31 is offset where some changes in address expected - 00 00 00 00 defenetly is not
// an address. It could be a reference to a symbol defined in same file or to a symbol defined somewhere
// else (external symbol).
// So, our task is to resolve this case, i.e. to find a mapping from this offset to something sensible.
//
// First of all we should use absolute offset, i.e. file offsets to make every mapping unique.
// So full offset in example above will be computed as section offset + 0x31. And it is a place
// where relocation should be applied.
//
// We define two attributes for relocations, and every relocation is represented only by one of them:
// 1) ref-internal that is a mapping from one file offset to another ;
// 2) ref-external that is a mapping from file offset to some name.
//

#include <algorithm>
#include <iostream>
#include <iomanip>
Expand All @@ -64,21 +23,48 @@ bool is_rel(const ELFObjectFile<T> &obj) {
return (hdr->e_type == ELF::ET_REL);
}

//taking a smallest virtual address of loadable segments as a base address
template <typename Phdr>
uint64_t base_address(Phdr begin, Phdr end) {
if (begin == end) return 0;
std::vector<uint64_t> addrs;
for (auto it = begin; it != end; ++it)
if (it->p_type == ELF::PT_LOAD)
addrs.push_back(it->p_vaddr);
auto it = std::min_element(addrs.begin(), addrs.end());
if (it == addrs.end()) return 0;
return *it;
// computes the base address of an ELF file.
//
// The base address is either derived as a difference between the
// virtual address of any loadable code segment or, if there are no
// segments or no loadable segments, it is the difference between
// the suggested address of the PROGBITS section with a minimal offset
// and that offset. For object and relocatable files, it is usually
// 0 - 0x34.
//
// Finally, if there are no loadable segments or PROGBIT sections,
// i.e., we don't really have a binary program but something else
// packed as an ELF file, we just return 0.
template <typename T>
uint64_t base_address(const ELFObjectFile<T> &obj) {
uint64_t base = 0L;
auto elf = *obj.getELFFile();
auto segs = prim::elf_program_headers(elf);
auto code = segs.end();

for (auto it = segs.begin(); it != segs.end(); ++it)
if (it->p_type == ELF::PT_LOAD && (it->p_flags & ELF::PF_X))
code = it;

if (code != segs.end()) {
base = code->p_vaddr - code->p_offset;
} else {
auto secs = prim::elf_sections(elf);
auto first = secs.end();
auto smallest = std::numeric_limits<uint64_t>::max();
for (auto it = secs.begin(); it != secs.end(); ++it) {
if (it->sh_type == ELF::SHT_PROGBITS && it->sh_offset < smallest) {
first = it;
smallest = it->sh_offset;
}
}

if (first != secs.end())
base = first->sh_addr - first->sh_offset;
}
return base;
}

template <typename T>
uint64_t base_address(const ELFObjectFile<T> &obj);

template <typename T>
void file_header(const ELFObjectFile<T> &obj, ogre_doc &s) {
Expand All @@ -94,28 +80,9 @@ std::string name_of_index(std::size_t i) {
return s.str();
}

template <typename I>
void program_headers(I begin, I end, ogre_doc &s) {
std::size_t i = 0;
uint64_t base = base_address(begin, end);
for (auto it = begin; it != end; ++it, ++i) {
bool ld = (it->p_type == ELF::PT_LOAD);
bool r = static_cast<bool>(it->p_flags & ELF::PF_R);
bool w = static_cast<bool>(it->p_flags & ELF::PF_W);
bool x = static_cast<bool>(it->p_flags & ELF::PF_X);
auto off = it->p_offset;
auto filesz = it->p_filesz;
auto name = name_of_index(i);
auto addr = prim::relative_address(base, it->p_vaddr);
s.entry("program-header") << name << off << filesz;
s.entry("virtual-program-header") << name << addr << it->p_memsz;
s.entry("program-header-flags") << name << ld << r << w << x;
}
}

template <typename T>
void section_header(const T &hdr, const std::string &name, uint64_t base, ogre_doc &s) {
auto addr = prim::relative_address(base, hdr.sh_addr);
auto addr = hdr.sh_addr - base;
s.entry("section-entry") << name << addr << hdr.sh_size << hdr.sh_offset;
bool w = static_cast<bool>(hdr.sh_flags & ELF::SHF_WRITE);
bool x = static_cast<bool>(hdr.sh_flags & ELF::SHF_EXECINSTR);
Expand Down Expand Up @@ -154,7 +121,7 @@ error_or<int64_t> symbol_address(const ELFObjectFile<T> &obj, const SymbolRef &s
auto addr = prim::symbol_address(sym);
if (!addr) return addr;
auto base = base_address(obj);
return success(prim::relative_address(base, *addr));
return success(*addr - base);
}
}

Expand Down Expand Up @@ -195,16 +162,23 @@ void symbol_entry(const ELFObjectFile<T> &obj, const SymbolRef &sym, ogre_doc &s
#if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR == 8 \
|| LLVM_VERSION_MAJOR >= 4

template <typename T>
uint64_t base_address(const ELFObjectFile<T> &obj) {
auto hdrs = prim::elf_program_headers(*obj.getELFFile());
return base_address(hdrs.begin(), hdrs.end());
}

template <typename T>
void program_headers(const ELFObjectFile<T> &obj, ogre_doc &s) {
uint64_t base = base_address(obj);
auto hdrs = prim::elf_program_headers(*obj.getELFFile());
program_headers(hdrs.begin(), hdrs.end(), s);
for (auto it = hdrs.begin(); it != hdrs.end(); ++it) {
bool ld = (it->p_type == ELF::PT_LOAD);
bool r = static_cast<bool>(it->p_flags & ELF::PF_R);
bool w = static_cast<bool>(it->p_flags & ELF::PF_W);
bool x = static_cast<bool>(it->p_flags & ELF::PF_X);
auto off = it->p_offset;
auto filesz = it->p_filesz;
auto name = name_of_index(it - hdrs.begin());
auto addr = it->p_vaddr - base;
s.entry("program-header") << name << off << filesz;
s.entry("virtual-program-header") << name << addr << it->p_memsz;
s.entry("program-header-flags") << name << ld << r << w << x;
}
}

template <typename T>
Expand Down Expand Up @@ -271,12 +245,6 @@ uint64_t section_offset(const ELFObjectFile<T> &obj, section_iterator it) {

#elif LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR == 4

template <typename T>
uint64_t base_address(const ELFObjectFile<T> &obj) {
auto elf = obj.getELFFile();
return base_address(elf->begin_program_headers(), elf->end_program_headers());
}

template <typename T>
void program_headers(const ELFObjectFile<T> &obj, ogre_doc &s) {
auto elf = obj.getELFFile();
Expand Down
8 changes: 4 additions & 4 deletions lib/bap_llvm/llvm_macho_loader.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ void segment_command(const T &cmd, uint64_t base, ogre_doc &s) {
bool x = static_cast<bool>(cmd.initprot & MachO::VM_PROT_EXECUTE);
s.entry("segment-command") << cmd.segname << cmd.fileoff << cmd.filesize;
s.entry("segment-command-flags") << cmd.segname << r << w << x;
s.entry("virtual-segment-command") << cmd.segname << prim::relative_address(base, cmd.vmaddr) << cmd.vmsize;
s.entry("virtual-segment-command") << cmd.segname << (cmd.vmaddr - base) << cmd.vmsize;
}

uint32_t filetype(const macho &obj) {
Expand Down Expand Up @@ -277,7 +277,7 @@ error_or<int64_t> symbol_address(const macho &obj, const SymbolRef &sym) {
auto addr = prim::symbol_address(sym);
if (!addr) return addr;
auto base = image_base(obj);
return success(prim::relative_address(base, *addr));
return success(*addr - base);
}

void relocations(const macho &obj, ogre_doc &s) {
Expand All @@ -302,7 +302,7 @@ void sections(const macho &obj, ogre_doc &s) {
auto name = prim::section_name(sec);
auto offs = section_offset(obj, section_iterator(sec));
if (addr && name && size) {
section(*name, prim::relative_address(base, *addr), *size, offs, s);
section(*name, *addr-base, *size, offs, s);
if (is_code_section(obj, sec))
s.entry("code-entry") << *name << offs << *size;
}
Expand Down Expand Up @@ -427,7 +427,7 @@ void indirect_symbols(const macho &obj, const MachO::dysymtab_command &dlc, ogre
auto sym = get_indirect_symbol(obj, dlc, tab_indx + j);
if (sym != prim::end_symbols(obj)) {
if (auto name = prim::symbol_name(*sym)) {
auto sym_addr = prim::relative_address(base, sec_addr + j * stride);
auto sym_addr = sec_addr + j * stride - base;
auto sym_offs = sec_offs + j * stride;
s.entry("symbol-entry") << *name << sym_addr << stride << sym_offs ;
s.entry("code-entry") << *name << sym_offs << stride;
Expand Down
Loading

0 comments on commit 49f3d08

Please sign in to comment.