Skip to content

Commit

Permalink
feat: add austinp-resolve tool (#10)
Browse files Browse the repository at this point in the history
This tool can be used to resolve native frames in samples collected
with austinp. Also supports MOJO.
  • Loading branch information
P403n1x87 authored Oct 24, 2022
1 parent 9002a5a commit 4517b97
Show file tree
Hide file tree
Showing 9 changed files with 347 additions and 8 deletions.
2 changes: 1 addition & 1 deletion .flake8
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[flake8]
select = ANN,B,B9,C,D,E,F,W,I
ignore = ANN101,ANN102,ANN401,B950,D100,D104,D107,E203,E501,I001,I005,W503,W606
ignore = ANN101,ANN102,ANN401,B950,D100,D102,D103,D104,D107,E203,E501,I001,I005,W503,W606
exclude =
austin/format/pprof/profile_pb2.py
max-line-length = 88
Expand Down
6 changes: 6 additions & 0 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,12 @@ jobs:
cd $GITHUB_WORKSPACE/austin
gcc -Wall -O3 -Os -s -pthread src/*.c -o src/austin
- name: Install dependencies
run: |
sudo apt-get update -y
sudo apt-get install -y binutils binutils-common
addr2line -V
- name: Run tests
run: |
cd $GITHUB_WORKSPACE/main
Expand Down
44 changes: 37 additions & 7 deletions austin/format/mojo.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,33 @@
__version__ = "0.1.0"


def to_varint(n: int) -> bytes:
"""Convert an integer to a variable-length integer."""
result = bytearray()
b = 0

if n < 0:
b |= 0x40
n = -n

b |= n & 0x3F

n >>= 6
if n:
b |= 0x80

result.append(b)

while n:
b = n & 0x7F
n >>= 7
if n:
b |= 0x80
result.append(b)

return bytes(result)


class MojoEvents:
"""MOJO events."""

Expand Down Expand Up @@ -210,10 +237,14 @@ def __init__(self, mojo: BufferedReader) -> None:
self._last_bytes = bytearray()
self._string_map = {1: MojoString(1, "<unknown>")}

assert self.read(3) == b"MOJ"
if self.read(3) != b"MOJ":
raise ValueError("Not a MOJO file")

self.mojo_version = self.read_int()

self.header = bytes(self._last_bytes)
self._last_bytes.clear()

def read(self, n: int) -> bytes:
"""Read bytes from the MOJO file."""
self._offset += self._last_read
Expand Down Expand Up @@ -358,19 +389,19 @@ def parse_string_ref(self) -> t.Generator[MojoStringReference, None, None]:
def parse_event(self) -> t.Generator[t.Optional[MojoEvent], None, None]:
"""Parse a single event."""
try:
(event,) = self.read(1)
(event_id,) = self.read(1)
except ValueError:
yield None
return

try:
for e in t.cast(dict, self.__handlers__)[event](self):
e.raw = bytes(self._last_bytes)
for event in t.cast(dict, self.__handlers__)[event_id](self):
event.raw = bytes(self._last_bytes)
self._last_bytes.clear()
yield e
yield event
except KeyError as exc:
raise ValueError(
f"Unhandled event: {event} (offset: {self._offset}, last read: {self._last_read})"
f"Unhandled event: {event_id} (offset: {self._offset}, last read: {self._last_read})"
) from exc

def parse(self) -> t.Iterator[MojoEvent]:
Expand All @@ -386,7 +417,6 @@ def parse(self) -> t.Iterator[MojoEvent]:


def main() -> None:
"""austin2speedscope entry point."""
from argparse import ArgumentParser

arg_parser = ArgumentParser(
Expand Down
241 changes: 241 additions & 0 deletions austin/tools/resolve.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,241 @@
# This file is part of "austin-python" which is released under GPL.
#
# See file LICENCE or go to http://www.gnu.org/licenses/ for full license
# details.
#
# austin-python is a Python wrapper around Austin, the CPython frame stack
# sampler.
#
# Copyright (c) 2018-2022 Gabriele N. Tornetta <phoenix1987@gmail.com>.
# All rights reserved.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

import typing as t
from subprocess import check_output

from austin.format.mojo import MojoFile
from austin.format.mojo import MojoFrame
from austin.format.mojo import MojoMetadata
from austin.format.mojo import MojoString
from austin.format.mojo import to_varint


__version__ = "0.1.0"


def demangle_cython(function: str) -> str:
"""Demangle a Cython functio nanme."""
if function.startswith("__pyx_pymod_"):
_, _, function = function[12:].partition("_")
return function

if function.startswith("__pyx_fuse_"):
function = function[function[12:].index("__pyx_") + 12 :]
for _i, d in enumerate(function):
if d.isdigit():
break
else:
raise ValueError(f"Invalid Cython mangled name: {function}")

n = 0
while _i < len(function):
c = function[_i]
_i += 1
if c.isdigit():
n = n * 10 + int(c)
else:
_i += n
n = 0
if not function[_i].isdigit():
return function[_i:]

return function


class Maps:
"""Keep mappings between objects to resolve."""

def __init__(self) -> None:
# TODO: Use an interval tree instead!
self.maps: t.List[t.Tuple[int, int, str]] = []
self.bases: t.Dict[str, int] = {}
self.cache: t.Dict[str, t.Optional[t.Tuple[str, t.Optional[int]]]] = {}
self.lines: t.Dict[int, bytes] = {}

def addr2line(self, address: str) -> t.Optional[t.Tuple[str, t.Optional[int]]]:
if address in self.cache:
return self.cache[address]

addr = int(address, 16)
for lo, hi, _binary in self.maps:
if lo <= addr <= hi:
break
else:
self.cache[address] = None
return None

resolved, _, _ = (
check_output(["addr2line", "-Ce", _binary, f"{addr-self.bases[_binary]:x}"])
.decode()
.strip()
.partition(" ")
)
if resolved.startswith("??"):
# self.cache[address] = (f"{binary}@{addr-self.bases[binary]:x}", None)
self.cache[address] = (f"{_binary}", addr - self.bases[_binary])
return self.cache[address]

symbol, line = tuple(resolved.split(":", maxsplit=1))
self.cache[address] = (symbol, int(line) if line is not None else 0)
return self.cache[address]

def add(self, line: str) -> None:
bounds, _, binary = line[7:].strip().partition(" ")
low, _, high = bounds.partition("-")
lo = int(low, 16)
hi = int(high, 16)
self.maps.append((lo, hi, binary))
if binary in self.bases:
self.bases[binary] = min(self.bases[binary], lo)
else:
self.bases[binary] = lo

def resolve(self, line: str) -> str:
parts = []
frames, _, metrics = line.strip().rpartition(" ")
for part in frames.split(";"):
try:
head, function, lineno = part.split(":")
except ValueError:
parts.append(part)
continue
if function.startswith("__pyx_pw_") or function.startswith("__pyx_pf_"):
# skip Cython wrappers (cpdef)
continue
if function.startswith("__pyx_"):
function = demangle_cython(function)
if head.startswith("native@"):
_, _, address = head.partition("@")
resolved = self.addr2line(address)
if resolved is None:
parts.append(":".join((head, function, lineno)))
else:
source, native_lineno = resolved
parts.append(f"{source}:{function}:{native_lineno or lineno}")
else:
parts.append(":".join((head, function, lineno)))

return " ".join((";".join(parts), metrics))

def resolve_string(self, string: MojoString) -> t.Optional[tuple]:
value = string.value
if value.startswith("__pyx_") and not (
value.startswith("__pyx_pw_") or value.startswith("__pyx_pf_")
):
return (None, demangle_cython(value), None)

if value.startswith("native@"):
_, _, address = value.partition("@")
resolved = self.addr2line(address)
if resolved is not None:
filename, line = resolved
return (filename, None, int(line) if line is not None else 0)

return None


def resolve_mojo(input: str, output: str) -> None:
maps = Maps()
with open(input, "rb") as mojo, open(output, "wb") as fout:
mojo_file = MojoFile(mojo) # Fails if not a MOJO file

# Write the MOJO header
fout.write(mojo_file.header)

# Echo events and intercepts strings that need to be resolved
for event in mojo_file.parse():
if isinstance(event, MojoMetadata) and event.key == "map":
maps.add(event.to_austin())

elif isinstance(event, MojoString):
resolved = maps.resolve_string(event)
if resolved is not None:
filename, scope, line = resolved
new_value = filename or scope

event.raw = event.raw.replace(
event.value.encode(), new_value.encode()
)

if filename is not None:
maps.lines[event.key] = to_varint(line)

elif isinstance(event, MojoFrame):
if event.filename.string.key in maps.lines:
event.raw = (
event.raw[: -len(to_varint(event.line))]
+ maps.lines[event.filename.string.key]
)

fout.write(event.raw)


def resolve_austin(input: str, output: str) -> None:
maps = Maps()
with open(input) as fin, open(output, "w") as fout:
for line in fin:
if line.startswith("# map: "):
maps.add(line)
elif line.startswith("# ") or line == "\n":
print(line, end="", file=fout)
else:
print(maps.resolve(line), file=fout)


def main() -> None:
from argparse import ArgumentParser

arg_parser = ArgumentParser(
prog="austinp-resolve",
description="Resolve native symbols in austinp sample files.",
)

arg_parser.add_argument(
"input",
type=str,
help="The input file to resolve.",
)
arg_parser.add_argument(
"output", type=str, help="The path of the resolved file to write to."
)

arg_parser.add_argument("-V", "--version", action="version", version=__version__)

args = arg_parser.parse_args()

try:
try:
resolve_mojo(args.input, args.output)
except Exception:
resolve_austin(args.input, args.output)
except FileNotFoundError:
print(f"No such input file: {args.input}")
exit(1)
except Exception as e:
print(f"File format not recognised: {e}")
exit(1)


if __name__ == "__main__":
main()
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ austin-diff = "austin.tools.diff:main"
austin2pprof = "austin.format.pprof.__main__:main"
austin2speedscope = "austin.format.speedscope:main"
mojo2austin = "austin.format.mojo:main"
austinp-resolve = "austin.tools.resolve:main"

[tool.hatch.envs.tests]
template = "tests"
Expand Down
Binary file added test/data/austinp.mojo
Binary file not shown.
Binary file added test/data/austinp.resolved.mojo
Binary file not shown.
13 changes: 13 additions & 0 deletions test/format/test_mojo.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,13 @@

import sys
import tempfile
from io import BytesIO
from pathlib import Path
from random import randint

from austin.format.mojo import MojoFile
from austin.format.mojo import main
from austin.format.mojo import to_varint


HERE = Path(__file__).parent
Expand All @@ -42,3 +46,12 @@ def test_mojo_snapshot():
main()

assert expected.read_text() == output.read_text()


def test_mojo_varint():
for _ in range(100_000):
n = randint(-4e9, 4e9)
buffer = BytesIO()
buffer.write(b"MOJ\0" + to_varint(n))
buffer.seek(0)
assert MojoFile(buffer).read_int() == n
Loading

0 comments on commit 4517b97

Please sign in to comment.