From 029e569195b5acbb850995989d49db51a6aef63b Mon Sep 17 00:00:00 2001 From: Christoph Schueler Date: Tue, 24 Sep 2024 19:13:28 +0300 Subject: [PATCH] Smaller fixes --- objutils/__init__.py | 12 +- objutils/dwarf/__init__.py | 352 +++++++++++++++++++++++------ objutils/dwarf/constants.py | 11 + objutils/dwarf/lineprog.py | 2 +- objutils/elf/__init__.py | 45 +++- objutils/elf/defs.py | 164 +++++++++++++- objutils/elf/model.py | 114 +++++++++- objutils/ihex.py | 2 +- objutils/scripts/oj_elf_extract.py | 4 + objutils/scripts/oj_elf_info.py | 33 ++- poetry.lock | 69 +++++- pyproject.toml | 1 + setup.py | 1 + 13 files changed, 712 insertions(+), 98 deletions(-) diff --git a/objutils/__init__.py b/objutils/__init__.py index ebd5253..09c49e3 100644 --- a/objutils/__init__.py +++ b/objutils/__init__.py @@ -18,7 +18,7 @@ __copyright__ = """ objutils - Object file library for Python. - (C) 2010-2020 by Christoph Schueler + (C) 2010-2024 by Christoph Schueler All Rights Reserved @@ -37,6 +37,9 @@ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. """ +from rich import pretty +from rich.console import Console +from rich.traceback import install as tb_install import objutils.ash import objutils.binfile @@ -57,6 +60,9 @@ from objutils.section import Section # noqa: F401 +pretty.install() + + registry.register("bin", objutils.binfile.Reader, objutils.binfile.Writer, "Plain binary format.") registry.register( "binzip", @@ -94,6 +100,10 @@ registry.register("shf", objutils.shf.Reader, objutils.shf.Writer, "S Hexdump Format (rfc4149).") +console = Console() +tb_install(show_locals=True, max_frames=3) # Install custom exception handler. + + def load(codec_name, *args, **kws): """Load hex data from file. diff --git a/objutils/dwarf/__init__.py b/objutils/dwarf/__init__.py index cd86d17..8884ac2 100644 --- a/objutils/dwarf/__init__.py +++ b/objutils/dwarf/__init__.py @@ -6,7 +6,7 @@ __copyright__ = """ objutils - Object file library for Python. - (C) 2010-2021 by Christoph Schueler All Rights Reserved @@ -30,9 +30,7 @@ import json from collections import OrderedDict from dataclasses import dataclass, field - -# from pprint import pprint -from typing import Any, List +from typing import Any, List, Optional from construct import ( Array, @@ -42,6 +40,7 @@ Enum, Flag, If, + IfThenElse, Int8sl, Int8ul, Int16sb, @@ -76,13 +75,14 @@ Block4b, Block4l, BlockUleb, + Endianess, FilenameSequence, One, StrP, ) from objutils.dwarf.lineprog import LineNumberProgram from objutils.dwarf.sm import StackMachine -from objutils.elf.defs import Endianess +from objutils.elf import model ENCODED_ATTRIBUTES = { @@ -115,10 +115,12 @@ def encoding_repr(encoding, value): class Attribute: encoding: constants.AttributeEncoding form: constants.AttributeForm + special_value: Optional[Any] = None def __iter__(self): yield self.encoding yield self.form + yield self.special_value @dataclass(frozen=True) @@ -161,6 +163,127 @@ def toJSON(self): return json.dumps(self, default=lambda o: o.__dict__, sort_keys=True, indent=4) +class Abbrevations: + + AbbrevationHeader = Struct( + "start" / Tell, + "code" / ULEB, + "stop" / Tell, + ) + + AbbrevationBody = Struct( + "start" / Tell, + "tag" / ULEB, + "children" / Int8ul, + "stop" / Tell, + ) + + AttributeStruct = Struct( + "start" / Tell, + "attrValue" / ULEB, + "formValue" / ULEB, + "stop" / Tell, + "next" / Computed((this.attrValue != 0) and (this.formValue != 0)), + ) + + SpecialValueStruct = Struct( + "start" / Tell, + "value" / ULEB, + "stop" / Tell, + ) + + def __init__(self, section): + self.image = section.image + self.length = len(section.image) + self.abbrevations = {} + + def get(self, abbr_offset, item): + if item == 0: + return None + if (abbr_offset, item) in self.abbrevations: + return self.abbrevations.get( + ( + abbr_offset, + item, + ) + ) + else: + return self._fetch(abbr_offset, item) + + def _fetch(self, abbr_offset, item): + offset = abbr_offset + while True: + abbrv_header = self.AbbrevationHeader.parse(self.image[offset:]) + # print("ABBREV-CODE", hex(offset), abbrv_header) + code = abbrv_header.code + offset += abbrv_header.stop - abbrv_header.start + if code == 0: + # print("CONT!!!") + continue + key = ( + abbr_offset, + code, + ) + if offset >= self.length: + break + abbrv_body = self.AbbrevationBody.parse(self.image[offset:]) + tag = abbrv_body.tag + children = abbrv_body.children + # print("BODY", abbrv_body, constants.Tag(tag).name) + offset += abbrv_body.stop - abbrv_body.start + + try: + self.abbrevations[key] = Abbrevation( + constants.Tag(tag).name, + (children == 1), + [], + ) + except TypeError: + self.abbrevations[key] = Abbrevation( + f"User TAG value: 0x{tag:04x}", + (children == 1), + [], + ) + + while True: + attr = self.AttributeStruct.parse(self.image[offset:]) + # print("ATTR", attr) + if not attr.next: + # key_offset = offset + break + if attr.formValue == constants.AttributeForm.DW_FORM_implicit_const: + offset += attr.stop - attr.start + special_value = self.SpecialValueStruct.parse(self.image[offset:]) + offset += special_value.stop - special_value.start + self.abbrevations[key].attrs.append( + Attribute( + constants.AttributeEncoding(attr.attrValue), + constants.AttributeForm(attr.formValue), + special_value.value, + ) + ) + continue + if attr.attrValue != 0 and attr.formValue != 0: + # + try: + self.abbrevations[key].attrs.append( + Attribute( + constants.AttributeEncoding(attr.attrValue), + constants.AttributeForm(attr.formValue), + ) + ) + except TypeError: + self.abbrevations[key].attrs.append( + Attribute( + constants.FakeEncoding(attr.attrValue), + constants.AttributeForm(attr.formValue), + ) + ) + offset += attr.stop - attr.start + if code == item: + return self.abbrevations[key] + + class DwarfProcessor: """ """ @@ -175,7 +298,7 @@ def __init__(self, elf_parser): self.b64 = elf_parser.b64 self.endianess = Endianess.Little if elf_parser.endianess == "<" else Endianess.Big self.debug_sections = elf_parser.debug_sections() - print(self.debug_sections.keys()) + # print(self.debug_sections.keys()) if not self.debug_sections: raise TypeError("File has no DWARF sections.") # self.debug_sections = { @@ -185,8 +308,7 @@ def __init__(self, elf_parser): self.strings = self.debug_sections[".debug_str"].image else: self.strings = b"" - - # print(self.strings.read()) + self.db_session = elf_parser.session self.installReaders() def installReaders(self): @@ -204,6 +326,7 @@ def installReaders(self): "block4": (Block4l, Block4b), } self.readers = Readers() + print("SIZE-INFO", 8 if self.b64 else 4, self.endianess) self.readers.native_address = Address(8 if self.b64 else 4, self.endianess) self.readers.uleb = ULEB self.readers.sleb = SLEB @@ -222,8 +345,6 @@ def get_string(self, offset: int): # result = self.UTF8String.parse_stream(self.strings) result = str(self.debug_sections[".debug_str"])[offset : offset + 25] return bytes(result, encoding="ascii") - - def do_abbrevs(self): if ".debug_abbrev" in self.debug_sections: key = ".debug_abbrev" elif ".debug_global_abbrev" in self.debug_sections: @@ -233,19 +354,20 @@ def do_abbrevs(self): section = self.debug_sections[key] image = section.image length = len(section.image) - AbbrevationStruct = Struct( + + AbbrevationHeader = Struct( "start" / Tell, "code" / self.readers.uleb, - "details" - / If( - lambda ctx: ctx.code != 0, - Struct( - "tag" / self.readers.uleb, - "children" / self.readers.u8, - ), - ), "stop" / Tell, ) + + AbbrevationBody = Struct( + "start" / Tell, + "tag" / self.readers.uleb, + "children" / self.readers.u8, + "stop" / Tell, + ) + AttributeStruct = Struct( "start" / Tell, "attrValue" / self.readers.uleb, @@ -253,37 +375,69 @@ def do_abbrevs(self): "stop" / Tell, "next" / Computed((this.attrValue != 0) and (this.formValue != 0)), ) + + SpecialValueStruct = Struct( + "start" / Tell, + "value" / self.readers.uleb, + "stop" / Tell, + ) + offset: int = 0 result = OrderedDict() key_offset: int = offset while True: - abbrev = AbbrevationStruct.parse(image[offset:]) + abbrv_header = AbbrevationHeader.parse(image[offset:]) + print("ABBREV-CODE", hex(offset), abbrv_header) + code = abbrv_header.code key = ( key_offset, - abbrev.code, + code, ) - offset += abbrev.stop - abbrev.start + offset += abbrv_header.stop - abbrv_header.start if offset >= length: break - if abbrev.code == 0: - key_offset = offset - result[key] = Abbrevation(0, False, []) + if code == 0: + # key_offset = offset + # result[key] = Abbrevation(0, False, []) + print("CONT!!!") continue + + abbrv_body = AbbrevationBody.parse(image[offset:]) + tag = abbrv_body.tag + children = abbrv_body.children + print("BODY", abbrv_body, constants.Tag(tag).name) + offset += abbrv_body.stop - abbrv_body.start + try: result[key] = Abbrevation( - constants.Tag(abbrev.details.tag).name, - (abbrev.details.children == 1), + constants.Tag(tag).name, + (children == 1), [], ) except TypeError: result[key] = Abbrevation( - f"User TAG value: 0x{abbrev.details.tag:04x}", - (abbrev.details.children == 1), + f"User TAG value: 0x{tag:04x}", + (children == 1), [], ) while True: attr = AttributeStruct.parse(image[offset:]) + print("ATTR", attr) + if not attr.next: + # key_offset = offset + break + if attr.formValue == constants.AttributeForm.DW_FORM_implicit_const: + offset += attr.stop - attr.start + special_value = SpecialValueStruct.parse(image[offset:]) + offset += special_value.stop - special_value.start + result[key].attrs.append( + Attribute( + constants.AttributeEncoding(attr.attrValue), + special_value.value, + ) + ) + continue if attr.attrValue != 0 and attr.formValue != 0: # try: @@ -301,8 +455,10 @@ def do_abbrevs(self): ) ) offset += attr.stop - attr.start - if not attr.next: - break + print("Finished Attrs.") + from pprint import pprint + + pprint(result) self.abbrevations = result def do_lines(self): @@ -318,7 +474,7 @@ def do_lines(self): VersionInformation = Struct("unit_length" / self.readers.u32, "version" / self.readers.u16) version_information = VersionInformation.parse_stream(image) - print(version_information) + # print(version_information) if version_information.version == 2: LineNumberProgramHeader = Struct( @@ -344,8 +500,7 @@ def do_lines(self): "stop" / Tell, ) - header = LineNumberProgramHeader.parse_stream(image) - print("LineNumberProgramHeader:", header) + LineNumberProgramHeader.parse_stream(image) prg = LineNumberProgram(image) # noqa: F841 def do_mac_info(self): @@ -384,7 +539,7 @@ def do_mac_info(self): while True: macInfo = MacInfo.parse_stream(image) offset += macInfo.stop - macInfo.start - print(offset, macInfo) + # print(offset, macInfo) if offset >= length - 1: break elif ".debug_macro" in self.debug_sections: @@ -410,8 +565,7 @@ def do_mac_info(self): "stop" / Tell, ) - header = MacroInformationHeader.parse_stream(image) - print("debug_macro!!!", length, header) + MacroInformationHeader.parse_stream(image) def get_form_readers(self, addressSize): return { @@ -440,7 +594,7 @@ def get_form_readers(self, addressSize): constants.AttributeForm.DW_FORM_exprloc: self.readers.block_uleb, constants.AttributeForm.DW_FORM_flag_present: One, constants.AttributeForm.DW_FORM_ref_sig8: self.readers.u64, - constants.AttributeForm.DW_FORM_implicit_const: self.readers.block_sleb, + constants.AttributeForm.DW_FORM_implicit_const: None, } def process_attributes(self, image, readers, size, abbrevOffset): @@ -482,24 +636,6 @@ def process_attributes(self, image, readers, size, abbrevOffset): if hasattr(abbr, "children") and abbr.children: level += 1 - def process_compile_unit(self, image): - CompileUnit = Struct( - "start" / Tell, - "unit_length" / self.readers.u32, - "version" / self.readers.u16, - "debug_abbrev_offset" / self.readers.u32, - "address_size" / self.readers.u8, - "stop" / Tell, - "size" / Computed(this.stop - this.start), - ) - cu = CompileUnit.parse_stream(image) - print(f" Compilation Unit @ offset 0x{cu.start:x}:") - print(f" Length: 0x{cu.unit_length:x} (32-bit)") - print(f" Version: {cu.version}") - print(f" Abbrev Offset: 0x{cu.debug_abbrev_offset:x}") - print(f" Pointer Size: {cu.address_size}") - return cu - def do_dbg_info(self): if ".debug_info" in self.debug_sections: key = ".debug_info" @@ -507,18 +643,51 @@ def do_dbg_info(self): key = ".debug_global_info" else: raise TypeError("Neither .debug_info nor .debug_global_info section found") + + if ".debug_abbrev" in self.debug_sections: + dbg_key = ".debug_abbrev" + elif ".debug_global_abbrev" in self.debug_sections: + dbg_key = ".debug_global_abbrev" + else: + raise TypeError("Neither .debug_abbrev nor .debug_global_abbrev section found") + + dbg_section = self.debug_sections[dbg_key] + self.abbrevations = Abbrevations(dbg_section) + section = self.debug_sections[key] image = io.BytesIO(section.image) section_length = len(section.image) - DbgInfo = Struct( + + CommonHeader = Struct( "start" / Tell, "unit_length" / self.readers.u32, "version" / self.readers.u16, + "stop" / Tell, + ) + + UnitHeader5 = Struct( + "start" / Tell, + "unit_type" / self.readers.u8, + "address_size" / self.readers.u8, + "debug_abbrev_offset" / self.readers.u32, + "stop" / Tell, + ) + + UnitHeaderClassic = Struct( + "start" / Tell, "debug_abbrev_offset" / self.readers.u32, "address_size" / self.readers.u8, "stop" / Tell, + ) + + DbgInfo = Struct( + "start" / Tell, + "header" / CommonHeader, + "body" / IfThenElse(this.header.version < 5, UnitHeaderClassic, UnitHeader5), + "stop" / Tell, "size" / Computed(this.stop - this.start), ) + Attribute = Struct( "start" / Tell, "attr" / self.readers.uleb, @@ -536,32 +705,44 @@ def do_dbg_info(self): die_stack = [root_element] dbgInfo = DbgInfo.parse_stream(image) # CU + # print(dbgInfo) + cu_length = dbgInfo.header.unit_length + version = dbgInfo.header.version + if version < 5: + debug_abbrev_offset = dbgInfo.body.debug_abbrev_offset + address_size = dbgInfo.body.address_size + else: + debug_abbrev_offset = dbgInfo.body.debug_abbrev_offset + address_size = dbgInfo.body.address_size + print("*" * 80) print(f" Compilation Unit @ offset 0x{dbgInfo.start:x}:") # TODO: offset is needed for _refx types!!! - print(f" Length: 0x{dbgInfo.unit_length:x} (32-bit)") - print(f" Version: {dbgInfo.version}") - print(f" Abbrev Offset: 0x{dbgInfo.debug_abbrev_offset:x}") - print(f" Pointer Size: {dbgInfo.address_size}") + print(f" Length: 0x{cu_length:x} (32-bit)") + print(f" Version: {version}") + print(f" Abbrev Offset: 0x{debug_abbrev_offset:x}") + print(f" Pointer Size: {address_size}") level = 0 pos = 0 offset += dbgInfo.stop - dbgInfo.start - form_readers = self.get_form_readers(dbgInfo.address_size) - print("Pos, Length", pos, dbgInfo.unit_length) - if pos >= dbgInfo.unit_length: + form_readers = self.get_form_readers(address_size) + print("Pos, Length", pos, cu_length) + if pos >= cu_length: break while True: start = image.tell() - if start >= dbgInfo.start + dbgInfo.unit_length + 4: + if start >= dbgInfo.start + cu_length + 4: break if start >= section_length - 1: break attr = Attribute.parse_stream(image) - abbr = self.abbrevations.get((dbgInfo.debug_abbrev_offset, attr.attr)) + abbr = self.abbrevations.get(debug_abbrev_offset, attr.attr) if not abbr: print(f"<{start:2x}>: Abbrev Number: 0 ---") level -= 1 else: die = DebugInformationEntry(abbr.tag) + db_die = model.DebugInformationEntry(tag=abbr.tag) + self.db_session.add(db_die) if attr.attr != 0: die_stack[-1].children.append(die) print(f"{' ' * (level + 1)}<{level}><{start:02x}>: Abbrev Number: {attr.attr} ({abbr.tag})") @@ -573,14 +754,35 @@ def do_dbg_info(self): pass if attr.attr == 0: level -= 1 - die_stack.pop() - for enc, form in abbr.attrs: + if len(die_stack): + die_stack.pop() + else: + print("DIE_STACK empty!!!") + for enc, form, special_value in abbr.attrs: reader = form_readers.get(form) - if reader is None: - print("*EF", enc, form, start, attr, abbr) start = image.tell() - # if start >= 0xa103: - # print() + if reader is None: + if form == constants.AttributeForm.DW_FORM_implicit_const: + print("DW_FORM_implicit_const") + value = special_value + display_value = value + print(f"{' ' * (level + 1)}<{start:02x}> {enc.name}: {display_value}") + die.attributes.append( + ( + enc.name, + DIEAttribute(value, display_value), + ) + ) + db_die.attributes.append( + model.DIEAttribute(name=enc.name, raw_value=value, display_value=display_value) + ) + offset += attr.stop - attr.start + pos = image.tell() + if pos >= dbgInfo.start + cu_length + 4: + break + continue + else: + print("*EF", enc, form, start, attr, abbr) try: value = reader.parse_stream(image) except Exception as e: @@ -633,12 +835,16 @@ def do_dbg_info(self): DIEAttribute(value, display_value), ) ) + db_die.attributes.append(model.DIEAttribute(name=enc.name, raw_value=value, display_value=display_value)) offset += attr.stop - attr.start pos = image.tell() - if pos >= dbgInfo.start + dbgInfo.unit_length + 4: + if pos >= dbgInfo.start + cu_length + 4: break + # print(db_die, db_die.attributes) + # self.db_session.bulk_save_objects([db_die]) die_map[die_start] = die result.append(root_element) + self.db_session.commit() return DebugInformation(die_map, [d.children[0] for d in result]) def pubnames(self): diff --git a/objutils/dwarf/constants.py b/objutils/dwarf/constants.py index e436e1a..1dd9fd8 100644 --- a/objutils/dwarf/constants.py +++ b/objutils/dwarf/constants.py @@ -449,6 +449,17 @@ class AttributeForm(EnumBase): """ +class UnitHeader(EnumBase): + DW_UT_compile = 0x01 + DW_UT_type = 0x02 + DW_UT_partial = 0x03 + DW_UT_skeleton = 0x04 + DW_UT_split_compile = 0x05 + DW_UT_split_type = 0x06 + DW_UT_lo_user = 0x80 + DW_UT_hi_user = 0xFF + + class Operation(EnumBase): addr = 0x3 deref = 0x6 diff --git a/objutils/dwarf/lineprog.py b/objutils/dwarf/lineprog.py index 3cd5975..3d5237d 100644 --- a/objutils/dwarf/lineprog.py +++ b/objutils/dwarf/lineprog.py @@ -9,5 +9,5 @@ def __init__(self, image) -> None: opcode = single_byte() if not opcode: break - print(ord(opcode), end=" ") + # print(ord(opcode), end=" ") print() diff --git a/objutils/elf/__init__.py b/objutils/elf/__init__.py index d8e9b3f..8072760 100644 --- a/objutils/elf/__init__.py +++ b/objutils/elf/__init__.py @@ -26,10 +26,13 @@ import binascii import hashlib +import os import re import time +import typing from collections import OrderedDict, namedtuple from itertools import groupby +from pathlib import Path from construct import ( Adapter, @@ -377,11 +380,17 @@ class ElfParser: ), ) - def __init__(self, filename): + def __init__(self, filename: str): self.fp = create_memorymapped_fileview(filename) # sha = calculate_crypto_hash(self.fp.tobytes()) + self.filename = Path(filename) - self.db = model.Model() + self.db_name = self.filename.with_suffix(model.DB_EXTENSION) + try: + os.unlink(self.db_name) + except Exception as e: + print(e) + self.db = model.Model(self.db_name) self.session = self.db.session self.symbols = SymbolAPI(self) self.sections = SectionAPI(self) @@ -407,6 +416,8 @@ def __init__(self, filename): for section in self._symbol_sections: self._parse_symbol_section(section) self.session.commit() + md_class = defs.MACHINE_DATA.get(self.e_machine, defs.MachineData) + self._machine_data = md_class(self.e_machine, self.e_flags) def _parser_extended_header(self): ExtendedHeader = Struct( @@ -854,12 +865,16 @@ def notes(self): def query(self): return self.session.query + @property + def machine_data(self) -> str: + return str(self._machine_data) + def create_image( self, join: bool = True, - include_pattern: str = None, - exclude_pattern: str = None, - callback: callable = None, + include_pattern: str = "", + exclude_pattern: str = "", + callback: typing.Optional[typing.Callable] = None, ): """ @@ -894,10 +909,16 @@ def create_image( Look at `scripts/oj_elf_extract.py` to see `create_image()` in action. """ query = self.query(model.Elf_Section) - query = query.filter( - model.Elf_Section.flag_alloc is True, - model.Elf_Section.has_content is True, - ) + # query = query.filter( + # model.Elf_Section.flag_alloc is True, + # model.Elf_Section.has_content is True, + # ) + # defs.SectionFlags.SHF_ALLOC + # + query.filter(model.Elf_Section.sh_flags.bitwise_and(defs.SectionFlags.SHF_ALLOC) == defs.SectionFlags.SHF_ALLOC) + + # sections = self.query(model.Elf_Section).all() + # print("SECTIONS", sections) if include_pattern: query = query.filter(func.regexp(model.Elf_Section.section_name, include_pattern)) @@ -910,6 +931,12 @@ def create_image( if callback: callback("start", None) for section in query.all(): + if ( + section.section_image is None + or (section.sh_flags & defs.SectionFlags.SHF_ALLOC) != defs.SectionFlags.SHF_ALLOC + or section.sh_type in (defs.SectionType.SHT_NOBITS, defs.SectionType.SHT_NULL) + ): + continue if callback: callback("section", section) result.append(Section(section.sh_addr, section.section_image)) diff --git a/objutils/elf/defs.py b/objutils/elf/defs.py index 715c1d7..343722f 100644 --- a/objutils/elf/defs.py +++ b/objutils/elf/defs.py @@ -5,7 +5,7 @@ __copyright__ = """ objutils - Object file library for Python. - (C) 2010-2020 by Christoph Schueler + (C) 2010-2024 by Christoph Schueler All Rights Reserved @@ -36,11 +36,6 @@ HDR_FMT64 = "HHIQQQIHHHHHH" -class Endianess(enum.IntEnum): - Little = 0 - Big = 1 - - Elf32_Ehdr = namedtuple( "Elf32_Ehdr", """e_type e_machine e_version e_entry e_phoff e_shoff e_flags e_ehsize @@ -454,6 +449,163 @@ class ELFMachineType(enum.IntEnum): } +class AVRMachineType(enum.IntEnum): + E_AVR_MACH_AVR1 = 1 + E_AVR_MACH_AVR2 = 2 + E_AVR_MACH_AVR3 = 3 + E_AVR_MACH_AVR4 = 4 + E_AVR_MACH_AVR5 = 5 + E_AVR_MACH_AVR6 = 6 + E_AVR_MACH_AVR25 = 25 + E_AVR_MACH_AVR31 = 31 + E_AVR_MACH_AVR35 = 35 + E_AVR_MACH_AVR51 = 51 + E_AVR_MACH_AVRTINY = 100 + E_AVR_MACH_XMEGA1 = 101 + E_AVR_MACH_XMEGA2 = 102 + E_AVR_MACH_XMEGA3 = 103 + E_AVR_MACH_XMEGA4 = 104 + E_AVR_MACH_XMEGA5 = 105 + E_AVR_MACH_XMEGA6 = 106 + E_AVR_MACH_XMEGA7 = 107 + + +AVRMachineTypeNames = { + AVRMachineType.E_AVR_MACH_AVR1: "avr:1", + AVRMachineType.E_AVR_MACH_AVR2: "avr:2", + AVRMachineType.E_AVR_MACH_AVR3: "avr:3", + AVRMachineType.E_AVR_MACH_AVR4: "avr:4", + AVRMachineType.E_AVR_MACH_AVR5: "avr:5", + AVRMachineType.E_AVR_MACH_AVR6: "avr:6", + AVRMachineType.E_AVR_MACH_AVR25: "avr:25", + AVRMachineType.E_AVR_MACH_AVR31: "avr:31", + AVRMachineType.E_AVR_MACH_AVR35: "avr:35", + AVRMachineType.E_AVR_MACH_AVR51: "avr:51", + AVRMachineType.E_AVR_MACH_AVRTINY: "avr:100", + AVRMachineType.E_AVR_MACH_XMEGA1: "avr:101", + AVRMachineType.E_AVR_MACH_XMEGA2: "avr:102", + AVRMachineType.E_AVR_MACH_XMEGA3: "avr:103", + AVRMachineType.E_AVR_MACH_XMEGA4: "avr:104", + AVRMachineType.E_AVR_MACH_XMEGA5: "avr:105", + AVRMachineType.E_AVR_MACH_XMEGA6: "avr:106", + AVRMachineType.E_AVR_MACH_XMEGA7: "avr:107", +} + +EF_AVR_MACH = 0x7F +EF_AVR_LINKRELAX_PREPARED = 0x80 + +# Old ABI (ie GNU pre EABI). These are deprecated. +EF_ARM_RELEXEC = 0x01 +EF_ARM_INTERWORK = 0x04 +EF_ARM_APCS_26 = 0x08 +EF_ARM_APCS_FLOAT = 0x10 +EF_ARM_PIC = 0x20 +EF_ARM_ALIGN8 = 0x40 # 8-bit structure alignment is in use. +EF_ARM_NEW_ABI = 0x80 +EF_ARM_OLD_ABI = 0x100 +EF_ARM_SOFT_FLOAT = 0x200 +EF_ARM_VFP_FLOAT = 0x400 +EF_ARM_MAVERICK_FLOAT = 0x800 + +# Old ARM ELF spec. version B-01. Mostly deprecated. +EF_ARM_SYMSARESORTED = 0x04 # NB conflicts with EF_INTERWORK. +EF_ARM_DYNSYMSUSESEGIDX = 0x08 # NB conflicts with EF_APCS26. +EF_ARM_MAPSYMSFIRST = 0x10 # NB conflicts with EF_APCS_FLOAT. + +# New constants defined in the ARM ELF spec. version XXX (AAELF). +# Only valid in conjunction with EF_ARM_EABI_VER5. +EF_ARM_ABI_FLOAT_SOFT = 0x200 # NB conflicts with EF_ARM_SOFT_FLOAT. +EF_ARM_ABI_FLOAT_HARD = 0x400 # NB conflicts with EF_ARM_VFP_FLOAT. + +# Constants defined in AAELF. +EF_ARM_BE8 = 0x00800000 +EF_ARM_LE8 = 0x00400000 + +EF_ARM_EABIMASK = 0xFF000000 +EF_ARM_EABIMASK_COM = 0x00FFFFFF + +EF_ARM_EABI_UNKNOWN = 0x00000000 +EF_ARM_EABI_VER1 = 0x01000000 +EF_ARM_EABI_VER2 = 0x02000000 +EF_ARM_EABI_VER3 = 0x03000000 +EF_ARM_EABI_VER4 = 0x04000000 +EF_ARM_EABI_VER5 = 0x05000000 + + +class MachineData: + + type_name: str = "???" + type_value: int = -1 + machine_name: str = "" + + def __init__(self, machine_code: int, flags: int) -> None: + self.flags = flags + try: + ELFMachineType(machine_code) + except Exception: + self.type_name = "???" + self.type_value = -1 + self.machine_name = "" + else: + machine = ELFMachineType(machine_code) + self.type_name = machine.name[3:] # get rid of 'EM_'. + self.type_value = machine.value + self.machine_name = ELF_MACHINE_NAMES.get(self.type_value, "") + + def specific(self) -> list: + return [] + + def __str__(self): + spec = self.specific() + if spec: + return f'{self.type_name!s} [{self.machine_name!s}] [{", ".join(spec)}]' + else: + return f"{self.type_name!s} [{self.machine_name!s}]" + + __repr__ = __str__ + + +class AvrMachineData(MachineData): + + def specific(self) -> list: + flags = self.flags & EF_AVR_MACH + result = [] + if flags in AVRMachineTypeNames: + result.append(AVRMachineTypeNames[flags]) + else: + result.append("avr:") + if flags & EF_AVR_LINKRELAX_PREPARED: + result.append("link-relax") + return result + + +class ArmMachineData(MachineData): + + def specific(self) -> list: + eabi = self.flags & EF_ARM_EABIMASK + flags = self.flags = self.flags & EF_ARM_EABIMASK_COM + print("ARM", hex(eabi), hex(flags)) + result = [] + + if flags & EF_ARM_RELEXEC: + result.append("relocatable executable") + flags = flags & (~EF_ARM_RELEXEC & 0xFFFFFFFF) + if flags & EF_ARM_PIC: + result.append("position independent") + flags = flags & (~EF_ARM_PIC & 0xFFFFFFFF) + """ + + """ + result = [] + return result + + +MACHINE_DATA = { + ELFMachineType.EM_AVR: AvrMachineData, + ELFMachineType.EM_ARM: ArmMachineData, +} + + EV_NONE = 0 # Invalid version. EV_CURRENT = 1 # Current version. diff --git a/objutils/elf/model.py b/objutils/elf/model.py index b15dc09..6c72155 100644 --- a/objutils/elf/model.py +++ b/objutils/elf/model.py @@ -29,10 +29,11 @@ import re import sqlite3 -from sqlalchemy import Column, and_, create_engine, event, not_, orm, types +from sqlalchemy import Column, ForeignKey, and_, create_engine, event, not_, orm, types from sqlalchemy.engine import Engine from sqlalchemy.ext.declarative import declarative_base, declared_attr from sqlalchemy.ext.hybrid import hybrid_method, hybrid_property +from sqlalchemy.orm import relationship from objutils.elf import defs @@ -40,6 +41,8 @@ CACHE_SIZE = 4 # MB PAGE_SIZE = mmap.PAGESIZE +DB_EXTENSION = ".prgdb" + Base = declarative_base() @@ -175,6 +178,7 @@ def get_flags(self): @hybrid_method def test_flags(self, mask): + print("\ttest_flags", self.get_flags(), mask) return self.get_flags() & mask == mask @test_flags.expression @@ -344,6 +348,73 @@ class Elf_Note(Base, RidMixIn): desc = Column(types.VARCHAR) +""" +class Parent(Base): + __tablename__ = 'parent' + id = Column(Integer, primary_key=True) + children = relationship("Child", back_populates="parent") + +class Child(Base): + __tablename__ = 'child' + id = Column(Integer, primary_key=True) + parent_id = Column(Integer, ForeignKey('parent.id')) + parent = relationship("Parent", back_populates="children") +""" + + +class DIEAttribute(Base, RidMixIn): + name = Column(types.VARCHAR) + raw_value = Column(types.VARCHAR) + display_value = Column(types.VARCHAR) + entry_id = Column(types.Integer, ForeignKey("debuginformationentry.rid")) + entry = relationship("DebugInformationEntry", back_populates="attributes") + + +class DebugInformationEntry(Base, RidMixIn): + tag = Column(types.VARCHAR) + attributes = relationship("DIEAttribute", back_populates="entry", uselist=True) + + +class DebugInformation(Base, RidMixIn): + pass + # die_map: dict[int, DebugInformationEntry] + # die_entries: List[DebugInformationEntry] + + +class CompilationUnit(Base, RidMixIn): + pass + + +""" +@dataclass +class DIEAttribute: + raw_value: Any + display_value: str + + def toJSON(self): + print("Hello!?") + + +@dataclass +class DebugInformationEntry: + name: str + attributes: List = field(default_factory=list) + children: List = field(default_factory=list) + + def toJSON(self): + return json.dumps(self, default=lambda o: o.__dict__, sort_keys=True, indent=4) + + +@dataclass +class DebugInformation: + die_map: dict[int, DebugInformationEntry] + die_entries: List[DebugInformationEntry] + + def toJSON(self): + return json.dumps(self, default=lambda o: o.__dict__, sort_keys=True, indent=4) +""" + + def calculateCacheSize(value): return -(value // PAGE_SIZE) @@ -372,10 +443,47 @@ def set_sqlite3_pragmas(dbapi_connection, connection_record): cursor.close() +""" +class A2LDatabase(object): + def __init__(self, filename, debug=False, logLevel="INFO"): + if filename == ":memory:": + self.dbname = "" + else: + if not filename.lower().endswith(DB_EXTENSION): + self.dbname = "{}.{}".format(filename, DB_EXTENSION) + else: + self.dbname = filename + self._engine = create_engine( + "sqlite:///{}".format(self.dbname), + echo=debug, + connect_args={"detect_types": sqlite3.PARSE_DECLTYPES | sqlite3.PARSE_COLNAMES}, + native_datetime=True, + ) + + self._session = orm.Session(self._engine, autoflush=False, autocommit=False) + self._metadata = Base.metadata + # loadInitialData(Node) + Base.metadata.create_all(self.engine) + meta = MetaData(schema_version=CURRENT_SCHEMA_VERSION) + self.session.add(meta) + self.session.flush() + self.session.commit() + self._closed = False +""" + + class Model: - def __init__(self, debug=False): + def __init__(self, filename: str = ":memory:", debug: bool = False): + if filename == ":memory:" or not filename: + self.dbname = ":memory:" + else: + # if not filename.lower().endswith(DB_EXTENSION): + # self.dbname = f"{filename}.{DB_EXTENSION}" + # else: + self.dbname = filename + self._engine = create_engine( - "sqlite:///:memory:", + f"sqlite:///{self.dbname}", echo=debug, connect_args={"detect_types": sqlite3.PARSE_DECLTYPES | sqlite3.PARSE_COLNAMES}, native_datetime=True, diff --git a/objutils/ihex.py b/objutils/ihex.py index 9b4e813..7337044 100644 --- a/objutils/ihex.py +++ b/objutils/ihex.py @@ -103,7 +103,7 @@ def special_processing(self, line, format_type): else: self.warn(f"Invalid record type [{line.type:u}] at line {line.line_number:u}") - def _address_calculator(x): + def _address_calculator(self, x): return x diff --git a/objutils/scripts/oj_elf_extract.py b/objutils/scripts/oj_elf_extract.py index f160e26..240a88a 100644 --- a/objutils/scripts/oj_elf_extract.py +++ b/objutils/scripts/oj_elf_extract.py @@ -30,6 +30,10 @@ from objutils.elf import ElfParser +# import sys +# sys.argv.extend(["-t", "ihex", "./examples/hello_xcp.ino.elf", "./examples/hello_xcp.hex"]) + + def callback(state, section): """ """ if state == "start": diff --git a/objutils/scripts/oj_elf_info.py b/objutils/scripts/oj_elf_info.py index d2cc33a..12e419c 100644 --- a/objutils/scripts/oj_elf_info.py +++ b/objutils/scripts/oj_elf_info.py @@ -33,10 +33,8 @@ from objutils.elf.defs import ( ELF_BYTE_ORDER_NAMES, ELF_CLASS_NAMES, - ELF_MACHINE_NAMES, ELF_TYPE_NAMES, ELFAbiType, - ELFMachineType, ELFType, ) @@ -77,7 +75,7 @@ def main(): ep = ElfParser(args.elf_file) print(f"Class: {ELF_CLASS_NAMES.get(ep.ei_class, '*** INVALID ***')}") print(f"Type: {ELFType(ep.e_type).name[3:]} [{ELF_TYPE_NAMES.get(ep.e_type, '')}]") - print(f"Machine: {ELFMachineType(ep.e_machine).name[3:]} [{ELF_MACHINE_NAMES.get(ep.e_machine, '')}]") + print(f"Machine: {ep.machine_data}") print(f"Data: {ELF_BYTE_ORDER_NAMES.get(ep.ei_data, '*** INVALID ***')}") print(f"OS/ABI {ELFAbiType(ep.ei_osabi).name[9:]} / v{ep.ei_abiversion}") @@ -90,6 +88,25 @@ def main(): f"{sec.section_name:25} {sec.section_type.name[4:]:14} {sec.sh_addr:08x} {sec.sh_offset:08x}" f" {sec.sh_size:06x} {sec.sh_addralign:2}" ) + + print_header("Segments") + print("Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align") + print("-" * 79) + for segment in ep.segments: + print( + f"{segment.p_type:12} 0x{segment.p_offset:08x} 0x{segment.p_vaddr:08x} 0x{segment.p_paddr:08x} 0x{segment.p_filesz:05x} 0x{segment.p_memsz:05x}" + ) + """ + p_type = 1 + p_offset = 148 + p_vaddr = 0 + p_paddr = 0 + p_filesz = 12924 + p_memsz = 12924 + p_flags = 5 + p_align = 2 + """ + comment = ep.comment if comment: print_header(".comment") @@ -109,6 +126,16 @@ def main(): # dp.do_abbrevs() # dp.do_mac_info() # dp.do_dbg_info() + from objutils.dwarf import DwarfProcessor + + if ep.debug_sections(): + dp = DwarfProcessor(ep) + dp.pubnames() + dp.aranges() + dp.do_lines() + dp.do_dbg_info() + # dp.processDebugInfo() + dp.do_mac_info() def print_header(text): diff --git a/poetry.lock b/poetry.lock index 56876fb..8b83d83 100644 --- a/poetry.lock +++ b/poetry.lock @@ -363,6 +363,30 @@ babel = ["Babel"] lingua = ["lingua"] testing = ["pytest"] +[[package]] +name = "markdown-it-py" +version = "3.0.0" +description = "Python port of markdown-it. Markdown parsing, done right!" +optional = false +python-versions = ">=3.8" +files = [ + {file = "markdown-it-py-3.0.0.tar.gz", hash = "sha256:e3f60a94fa066dc52ec76661e37c851cb232d92f9886b15cb560aaada2df8feb"}, + {file = "markdown_it_py-3.0.0-py3-none-any.whl", hash = "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1"}, +] + +[package.dependencies] +mdurl = ">=0.1,<1.0" + +[package.extras] +benchmarking = ["psutil", "pytest", "pytest-benchmark"] +code-style = ["pre-commit (>=3.0,<4.0)"] +compare = ["commonmark (>=0.9,<1.0)", "markdown (>=3.4,<4.0)", "mistletoe (>=1.0,<2.0)", "mistune (>=2.0,<3.0)", "panflute (>=2.3,<3.0)"] +linkify = ["linkify-it-py (>=1,<3)"] +plugins = ["mdit-py-plugins"] +profiling = ["gprof2dot"] +rtd = ["jupyter_sphinx", "mdit-py-plugins", "myst-parser", "pyyaml", "sphinx", "sphinx-copybutton", "sphinx-design", "sphinx_book_theme"] +testing = ["coverage", "pytest", "pytest-cov", "pytest-regressions"] + [[package]] name = "markupsafe" version = "2.1.5" @@ -443,6 +467,17 @@ files = [ {file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"}, ] +[[package]] +name = "mdurl" +version = "0.1.2" +description = "Markdown URL utilities" +optional = false +python-versions = ">=3.7" +files = [ + {file = "mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8"}, + {file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"}, +] + [[package]] name = "mypy-extensions" version = "1.0.0" @@ -569,6 +604,20 @@ files = [ {file = "pyflakes-3.2.0.tar.gz", hash = "sha256:1c61603ff154621fb2a9172037d84dca3500def8c8b630657d1701f026f8af3f"}, ] +[[package]] +name = "pygments" +version = "2.18.0" +description = "Pygments is a syntax highlighting package written in Python." +optional = false +python-versions = ">=3.8" +files = [ + {file = "pygments-2.18.0-py3-none-any.whl", hash = "sha256:b8e6aca0523f3ab76fee51799c488e38782ac06eafcf95e7ba832985c8e7b13a"}, + {file = "pygments-2.18.0.tar.gz", hash = "sha256:786ff802f32e91311bff3889f6e9a86e81505fe99f2735bb6d60ae0c5004f199"}, +] + +[package.extras] +windows-terminal = ["colorama (>=0.4.6)"] + [[package]] name = "pytest" version = "8.3.3" @@ -667,6 +716,24 @@ files = [ {file = "pyyaml-6.0.2.tar.gz", hash = "sha256:d584d9ec91ad65861cc08d42e834324ef890a082e591037abe114850ff7bbc3e"}, ] +[[package]] +name = "rich" +version = "13.8.1" +description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal" +optional = false +python-versions = ">=3.7.0" +files = [ + {file = "rich-13.8.1-py3-none-any.whl", hash = "sha256:1760a3c0848469b97b558fc61c85233e3dafb69c7a071b4d60c38099d3cd4c06"}, + {file = "rich-13.8.1.tar.gz", hash = "sha256:8260cda28e3db6bf04d2d1ef4dbc03ba80a824c88b0e7668a0f23126a424844a"}, +] + +[package.dependencies] +markdown-it-py = ">=2.2.0" +pygments = ">=2.13.0,<3.0.0" + +[package.extras] +jupyter = ["ipywidgets (>=7.5.1,<9)"] + [[package]] name = "ruff" version = "0.4.10" @@ -847,4 +914,4 @@ test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess [metadata] lock-version = "2.0" python-versions = ">=3.9,<4.0" -content-hash = "cee76210aa0e4dcfa83a1ebb3b71cd4fbe917c38f42ee83ff94210852058fa0a" +content-hash = "833cd58922de478b926f203c458b3ca282fd5092ec01586489282af99695ddc0" diff --git a/pyproject.toml b/pyproject.toml index fe40bbd..eb6a65f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -65,6 +65,7 @@ flake8 = "^7.0.0" isort = "^5.13.2" prettier = "^0.0.7" black = "^24.4.1" +rich = "^13.8.1" [tool.poetry.group.dev.dependencies] diff --git a/setup.py b/setup.py index e3b1064..1666d06 100644 --- a/setup.py +++ b/setup.py @@ -17,6 +17,7 @@ "attrs >= 19.3.0", "sortedcontainers", "SQLAlchemy", + "Rich", ] if sys.version_info.major == 2 or (sys.version_info.major == 3 and sys.version_info.minor < 4):