diff --git a/lib/bap_llvm/bap_llvm_ogre_coff.ml b/lib/bap_llvm/bap_llvm_ogre_coff.ml index 93fb556ee2..79c21383b6 100644 --- a/lib/bap_llvm/bap_llvm_ogre_coff.ml +++ b/lib/bap_llvm/bap_llvm_ogre_coff.ml @@ -56,10 +56,8 @@ module Relocatable = struct module Make(Fact : Ogre.S) = struct open Fact.Syntax - module Base = Base_address(Fact) - let segments = - Base.from_sections_offset >>= fun base -> + Fact.require base_address >>= fun base -> Fact.foreach Ogre.Query.( select (from section_entry $ virtual_section_header @@ -75,7 +73,7 @@ module Relocatable = struct Fact.provide mapped addr size start) let sections = - Base.from_sections_offset >>= fun base -> + Fact.require base_address >>= fun base -> Fact.foreach Ogre.Query.( select (from section_entry)) ~f:(fun (name,_,size,off) -> name,off,size) >>= fun s -> @@ -86,7 +84,7 @@ module Relocatable = struct Fact.provide named_region addr size name) let code_regions = - Base.from_sections_offset >>= fun base -> + Fact.require base_address >>= fun base -> Fact.foreach Ogre.Query.( select (from section_entry $ code_entry) ~join:[[field name]; diff --git a/lib/bap_llvm/bap_llvm_ogre_elf.ml b/lib/bap_llvm/bap_llvm_ogre_elf.ml index 49ff407e85..b3983f8e36 100644 --- a/lib/bap_llvm/bap_llvm_ogre_elf.ml +++ b/lib/bap_llvm/bap_llvm_ogre_elf.ml @@ -81,10 +81,8 @@ module Relocatable = struct module Make(Fact : Ogre.S) = struct open Fact.Syntax - module Base = Base_address(Fact) - let segments = - Base.from_sections_offset >>= fun base -> + Fact.require base_address >>= fun base -> Fact.foreach Ogre.Query.(begin select (from section_entry $ section_flags) ~join:[[field name]] diff --git a/lib/bap_llvm/bap_llvm_ogre_loader.ml b/lib/bap_llvm/bap_llvm_ogre_loader.ml index 978e72226d..30b5effb62 100644 --- a/lib/bap_llvm/bap_llvm_ogre_loader.ml +++ b/lib/bap_llvm/bap_llvm_ogre_loader.ml @@ -16,9 +16,6 @@ module type Parameters = sig val pdb_path : string end -(** default image base for relocatable files *) -let relocatable_base = 0x0L - module Fact(M : Monad.S) = struct include Ogre.Make(M) type 'a m = 'a M.t @@ -53,15 +50,9 @@ module Ogre_loader(P : Parameters) = struct | Macho -> make (module Macho) | Unknown -> Fact.failf "file type is not supported" () - let image_base = - Fact.require is_relocatable >>= fun is_rel -> - if Option.is_none P.image_base && is_rel - then Fact.return (Some relocatable_base) - else Fact.return P.image_base - let provide_base = Fact.require default_base_address >>= fun real -> - image_base >>= function + match P.image_base with | None -> Fact.provide base_address real | Some base -> let base_bias = Int64.(base - real) in diff --git a/lib/bap_llvm/bap_llvm_ogre_macho.ml b/lib/bap_llvm/bap_llvm_ogre_macho.ml index 33a716aa24..bacbd7f179 100644 --- a/lib/bap_llvm/bap_llvm_ogre_macho.ml +++ b/lib/bap_llvm/bap_llvm_ogre_macho.ml @@ -34,10 +34,8 @@ module Relocatable = struct module Make(Fact : Ogre.S) = struct open Fact.Syntax - module Base = Base_address(Fact) - let segments = - Base.from_sections_offset >>= fun base -> + Fact.require base_address >>= fun base -> Fact.foreach Ogre.Query.(begin select (from section_entry $ code_entry) ~join:[[field name]; diff --git a/lib/bap_llvm/bap_llvm_ogre_samples.ml b/lib/bap_llvm/bap_llvm_ogre_samples.ml index fda2200a1a..635386c2ef 100644 --- a/lib/bap_llvm/bap_llvm_ogre_samples.ml +++ b/lib/bap_llvm/bap_llvm_ogre_samples.ml @@ -36,34 +36,11 @@ module Sections(Fact : Ogre.S) = struct Fact.provide named_region addr size name) end - -module Base_address(Fact : Ogre.S) = struct - open Scheme - open Fact.Syntax - - let from_sections_offset = - Fact.require base_address >>= fun base -> - Fact.foreach Ogre.Query.(begin - select (from section_entry $ code_entry) - ~join:[[field name]; - [field size ~from:section_entry; - field size ~from:code_entry]] - end) - ~f:(fun (_,_,_,off) _ -> off) >>= fun s -> - match Seq.min_elt s ~compare:Int64.compare with - | None -> Fact.return base - | Some x -> Fact.return Int64.(base - x) - -end - module Relocatable_symbols(Fact : Ogre.S) = struct open Scheme open Fact.Syntax - - module Base = Base_address(Fact) - let relocations = - Base.from_sections_offset >>= fun base -> + Fact.require base_address >>= fun base -> Fact.collect Ogre.Query.(select (from ref_internal)) >>= fun ints -> Fact.Seq.iter ints ~f:(fun (sym_off, rel_off) -> @@ -72,7 +49,7 @@ module Relocatable_symbols(Fact : Ogre.S) = struct Fact.provide relocation relocation_addr symbol_addr) let externals = - Base.from_sections_offset >>= fun base -> + Fact.require base_address >>= fun base -> Fact.collect Ogre.Query.(select (from ref_external)) >>= fun exts -> Fact.Seq.iter exts ~f:(fun (off, name) -> @@ -81,7 +58,7 @@ module Relocatable_symbols(Fact : Ogre.S) = struct let symbols = relocations >>= fun () -> externals >>= fun () -> - Base.from_sections_offset >>= fun base -> + Fact.require base_address >>= fun base -> Fact.collect Ogre.Query.(select (from symbol_entry)) >>= fun s -> Fact.Seq.iter s ~f:(fun (name, _, size, off) -> if Int64.(size = 0L) then Fact.return () @@ -100,10 +77,9 @@ module Relocatable_sections(Fact : Ogre.S) = struct open Scheme open Fact.Syntax - module Base = Base_address(Fact) let sections = - Base.from_sections_offset >>= fun base -> + Fact.require base_address >>= fun base -> Fact.collect Ogre.Query.(select (from section_entry)) >>= fun s -> Fact.Seq.iter s ~f:(fun (name, _, size, off) -> diff --git a/lib/bap_llvm/bap_llvm_ogre_samples.mli b/lib/bap_llvm/bap_llvm_ogre_samples.mli index 4d46dfad71..50929f2650 100644 --- a/lib/bap_llvm/bap_llvm_ogre_samples.mli +++ b/lib/bap_llvm/bap_llvm_ogre_samples.mli @@ -19,7 +19,3 @@ end module Code_regions(Fact : Ogre.S) : sig val code_regions : unit Fact.t end - -module Base_address(Fact : Ogre.S) : sig - val from_sections_offset : int64 Fact.t -end diff --git a/lib/bap_llvm/llvm_coff_loader.hpp b/lib/bap_llvm/llvm_coff_loader.hpp index a6b1478fd4..4c795e4a3d 100644 --- a/lib/bap_llvm/llvm_coff_loader.hpp +++ b/lib/bap_llvm/llvm_coff_loader.hpp @@ -253,8 +253,7 @@ error_or symbol_relative_address(const coff_obj &obj, const SymbolRef & auto base = obj.getImageBase(); auto addr = symbol_address(obj, sym); if (!addr) return addr; - auto raddr = prim::relative_address(base, *addr); - return success(raddr); + return success(*addr - base); } error_or section_number(const coff_obj &obj, const SymbolRef &s) { diff --git a/lib/bap_llvm/llvm_elf_loader.hpp b/lib/bap_llvm/llvm_elf_loader.hpp index aee9a59458..5230e34ffe 100644 --- a/lib/bap_llvm/llvm_elf_loader.hpp +++ b/lib/bap_llvm/llvm_elf_loader.hpp @@ -1,47 +1,6 @@ #ifndef LLVM_ELF_LOADER_HPP #define LLVM_ELF_LOADER_HPP -// Clarification-relocation. -// -// Elf loader provide information about common entries like segments, sectuions and symbols -// Also it provides information about relocations, and there are some details here. -// -// Relocation info is targeting mainly for relocatable files like shared libraries or kernel -// modules. Such files don't have entry point or segments, symbol addresses etc. and contain -// calls to unresolved locations like in example below. -// -// ... -// 0000000000000014 : -// 14: 55 push %rbp -// 15: 48 89 e5 mov %rsp,%rbp -// 18: 48 83 ec 18 sub $0x18,%rsp -// 1c: 89 7d ec mov %edi,-0x14(%rbp) -// 1f: c7 45 f8 2a 00 00 00 movl $0x2a,-0x8(%rbp) -// 26: 8b 55 ec mov -0x14(%rbp),%edx -// 29: 8b 45 f8 mov -0x8(%rbp),%eax -// 2c: 89 d6 mov %edx,%esi -// 2e: 89 c7 mov %eax,%edi -//--> 30: e8 00 00 00 00 callq 35 -// 35: 89 45 fc mov %eax,-0x4(%rbp) -// 38: 8b 45 fc mov -0x4(%rbp),%eax -// 3b: c9 leaveq -// 3c: c3 retq -// ... -// -// 0x31 is offset where some changes in address expected - 00 00 00 00 defenetly is not -// an address. It could be a reference to a symbol defined in same file or to a symbol defined somewhere -// else (external symbol). -// So, our task is to resolve this case, i.e. to find a mapping from this offset to something sensible. -// -// First of all we should use absolute offset, i.e. file offsets to make every mapping unique. -// So full offset in example above will be computed as section offset + 0x31. And it is a place -// where relocation should be applied. -// -// We define two attributes for relocations, and every relocation is represented only by one of them: -// 1) ref-internal that is a mapping from one file offset to another ; -// 2) ref-external that is a mapping from file offset to some name. -// - #include #include #include @@ -64,21 +23,48 @@ bool is_rel(const ELFObjectFile &obj) { return (hdr->e_type == ELF::ET_REL); } -//taking a smallest virtual address of loadable segments as a base address -template -uint64_t base_address(Phdr begin, Phdr end) { - if (begin == end) return 0; - std::vector addrs; - for (auto it = begin; it != end; ++it) - if (it->p_type == ELF::PT_LOAD) - addrs.push_back(it->p_vaddr); - auto it = std::min_element(addrs.begin(), addrs.end()); - if (it == addrs.end()) return 0; - return *it; +// computes the base address of an ELF file. +// +// The base address is either derived as a difference between the +// virtual address of any loadable code segment or, if there are no +// segments or no loadable segments, it is the difference between +// the suggested address of the PROGBITS section with a minimal offset +// and that offset. For object and relocatable files, it is usually +// 0 - 0x34. +// +// Finally, if there are no loadable segments or PROGBIT sections, +// i.e., we don't really have a binary program but something else +// packed as an ELF file, we just return 0. +template +uint64_t base_address(const ELFObjectFile &obj) { + uint64_t base = 0L; + auto elf = *obj.getELFFile(); + auto segs = prim::elf_program_headers(elf); + auto code = segs.end(); + + for (auto it = segs.begin(); it != segs.end(); ++it) + if (it->p_type == ELF::PT_LOAD && (it->p_flags & ELF::PF_X)) + code = it; + + if (code != segs.end()) { + base = code->p_vaddr - code->p_offset; + } else { + auto secs = prim::elf_sections(elf); + auto first = secs.end(); + auto smallest = std::numeric_limits::max(); + for (auto it = secs.begin(); it != secs.end(); ++it) { + if (it->sh_type == ELF::SHT_PROGBITS && it->sh_offset < smallest) { + first = it; + smallest = it->sh_offset; + } + } + + if (first != secs.end()) + base = first->sh_addr - first->sh_offset; + } + return base; } -template -uint64_t base_address(const ELFObjectFile &obj); template void file_header(const ELFObjectFile &obj, ogre_doc &s) { @@ -94,28 +80,9 @@ std::string name_of_index(std::size_t i) { return s.str(); } -template -void program_headers(I begin, I end, ogre_doc &s) { - std::size_t i = 0; - uint64_t base = base_address(begin, end); - for (auto it = begin; it != end; ++it, ++i) { - bool ld = (it->p_type == ELF::PT_LOAD); - bool r = static_cast(it->p_flags & ELF::PF_R); - bool w = static_cast(it->p_flags & ELF::PF_W); - bool x = static_cast(it->p_flags & ELF::PF_X); - auto off = it->p_offset; - auto filesz = it->p_filesz; - auto name = name_of_index(i); - auto addr = prim::relative_address(base, it->p_vaddr); - s.entry("program-header") << name << off << filesz; - s.entry("virtual-program-header") << name << addr << it->p_memsz; - s.entry("program-header-flags") << name << ld << r << w << x; - } -} - template void section_header(const T &hdr, const std::string &name, uint64_t base, ogre_doc &s) { - auto addr = prim::relative_address(base, hdr.sh_addr); + auto addr = hdr.sh_addr - base; s.entry("section-entry") << name << addr << hdr.sh_size << hdr.sh_offset; bool w = static_cast(hdr.sh_flags & ELF::SHF_WRITE); bool x = static_cast(hdr.sh_flags & ELF::SHF_EXECINSTR); @@ -154,7 +121,7 @@ error_or symbol_address(const ELFObjectFile &obj, const SymbolRef &s auto addr = prim::symbol_address(sym); if (!addr) return addr; auto base = base_address(obj); - return success(prim::relative_address(base, *addr)); + return success(*addr - base); } } @@ -195,16 +162,23 @@ void symbol_entry(const ELFObjectFile &obj, const SymbolRef &sym, ogre_doc &s #if LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR == 8 \ || LLVM_VERSION_MAJOR >= 4 -template -uint64_t base_address(const ELFObjectFile &obj) { - auto hdrs = prim::elf_program_headers(*obj.getELFFile()); - return base_address(hdrs.begin(), hdrs.end()); -} - template void program_headers(const ELFObjectFile &obj, ogre_doc &s) { + uint64_t base = base_address(obj); auto hdrs = prim::elf_program_headers(*obj.getELFFile()); - program_headers(hdrs.begin(), hdrs.end(), s); + for (auto it = hdrs.begin(); it != hdrs.end(); ++it) { + bool ld = (it->p_type == ELF::PT_LOAD); + bool r = static_cast(it->p_flags & ELF::PF_R); + bool w = static_cast(it->p_flags & ELF::PF_W); + bool x = static_cast(it->p_flags & ELF::PF_X); + auto off = it->p_offset; + auto filesz = it->p_filesz; + auto name = name_of_index(it - hdrs.begin()); + auto addr = it->p_vaddr - base; + s.entry("program-header") << name << off << filesz; + s.entry("virtual-program-header") << name << addr << it->p_memsz; + s.entry("program-header-flags") << name << ld << r << w << x; + } } template @@ -271,12 +245,6 @@ uint64_t section_offset(const ELFObjectFile &obj, section_iterator it) { #elif LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR == 4 -template -uint64_t base_address(const ELFObjectFile &obj) { - auto elf = obj.getELFFile(); - return base_address(elf->begin_program_headers(), elf->end_program_headers()); -} - template void program_headers(const ELFObjectFile &obj, ogre_doc &s) { auto elf = obj.getELFFile(); diff --git a/lib/bap_llvm/llvm_macho_loader.hpp b/lib/bap_llvm/llvm_macho_loader.hpp index 582914009f..ef53f9b8cb 100644 --- a/lib/bap_llvm/llvm_macho_loader.hpp +++ b/lib/bap_llvm/llvm_macho_loader.hpp @@ -53,7 +53,7 @@ void segment_command(const T &cmd, uint64_t base, ogre_doc &s) { bool x = static_cast(cmd.initprot & MachO::VM_PROT_EXECUTE); s.entry("segment-command") << cmd.segname << cmd.fileoff << cmd.filesize; s.entry("segment-command-flags") << cmd.segname << r << w << x; - s.entry("virtual-segment-command") << cmd.segname << prim::relative_address(base, cmd.vmaddr) << cmd.vmsize; + s.entry("virtual-segment-command") << cmd.segname << (cmd.vmaddr - base) << cmd.vmsize; } uint32_t filetype(const macho &obj) { @@ -277,7 +277,7 @@ error_or symbol_address(const macho &obj, const SymbolRef &sym) { auto addr = prim::symbol_address(sym); if (!addr) return addr; auto base = image_base(obj); - return success(prim::relative_address(base, *addr)); + return success(*addr - base); } void relocations(const macho &obj, ogre_doc &s) { @@ -302,7 +302,7 @@ void sections(const macho &obj, ogre_doc &s) { auto name = prim::section_name(sec); auto offs = section_offset(obj, section_iterator(sec)); if (addr && name && size) { - section(*name, prim::relative_address(base, *addr), *size, offs, s); + section(*name, *addr-base, *size, offs, s); if (is_code_section(obj, sec)) s.entry("code-entry") << *name << offs << *size; } @@ -427,7 +427,7 @@ void indirect_symbols(const macho &obj, const MachO::dysymtab_command &dlc, ogre auto sym = get_indirect_symbol(obj, dlc, tab_indx + j); if (sym != prim::end_symbols(obj)) { if (auto name = prim::symbol_name(*sym)) { - auto sym_addr = prim::relative_address(base, sec_addr + j * stride); + auto sym_addr = sec_addr + j * stride - base; auto sym_offs = sec_offs + j * stride; s.entry("symbol-entry") << *name << sym_addr << stride << sym_offs ; s.entry("code-entry") << *name << sym_offs << stride; diff --git a/lib/bap_llvm/llvm_pdb_loader.hpp b/lib/bap_llvm/llvm_pdb_loader.hpp index 134a5b0f1a..0a94a79f07 100644 --- a/lib/bap_llvm/llvm_pdb_loader.hpp +++ b/lib/bap_llvm/llvm_pdb_loader.hpp @@ -91,7 +91,7 @@ namespace pdb_loader { using namespace llvm; struct section_info { - int64_t rel_addr; + uint64_t rel_addr; uint64_t offset; }; @@ -110,7 +110,7 @@ coff_sections collect_sections(const object::COFFObjectFile &obj) { auto base = obj.getImageBase(); for (auto sec : prim::sections(obj)) { if (auto addr = prim::section_address(sec)) { - auto raddr = prim::relative_address(base, *addr); + auto raddr = *addr - base; auto offset = section_offset(obj, sec); secs.insert(std::make_pair(i, section_info{raddr,offset})); } diff --git a/lib/bap_llvm/llvm_primitives.cpp b/lib/bap_llvm/llvm_primitives.cpp index e4726aef74..d6fceee932 100644 --- a/lib/bap_llvm/llvm_primitives.cpp +++ b/lib/bap_llvm/llvm_primitives.cpp @@ -8,11 +8,6 @@ namespace prim { using namespace llvm; using namespace llvm::object; - -int64_t relative_address(uint64_t base, uint64_t abs) { - return (abs - base); -} - // some cases are commented out because they are not supported // by all versions of LLVM, we will later use a macro to enable // them depending on the version. diff --git a/lib/bap_llvm/llvm_primitives.hpp b/lib/bap_llvm/llvm_primitives.hpp index 3937641299..33b15db72b 100644 --- a/lib/bap_llvm/llvm_primitives.hpp +++ b/lib/bap_llvm/llvm_primitives.hpp @@ -45,10 +45,6 @@ error_or symbol_size(const SymbolRef &s); // relocation uint64_t relocation_offset(const RelocationRef &rel); -// misc -// returns abs - base -int64_t relative_address(uint64_t base, uint64_t abs); - typedef std::vector> symbols_sizes; //replace to computeSymbolSizes function, because sometimes it's