Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Draft] Fix BackgroundCommand.send_signal() and add trace-cmd record support #689

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
20 changes: 20 additions & 0 deletions devlib/bin/scripts/devlib-signal-target
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
(
# If there is no data dir, it means we are not running as a background
# command so we just do nothing
if [ -e "$_DEVLIB_BG_CMD_DATA_DIR" ]; then
pid_file="$_DEVLIB_BG_CMD_DATA_DIR/pid"
# Atomically check if the PID file already exist and make the write
# fail if it already does. This way we don't have any race condition
# with the Python API, as there is either no PID or the same PID for
# the duration of the command
set -o noclobber
if ! printf "%u\n" $$ > "$pid_file"; then
echo "$0 was already called for this command" >&2
exit 1
fi
fi
) || exit $?

# Use exec so that the PID of the command we run is the same as the current $$
# PID that we just registered
exec "$@"
73 changes: 57 additions & 16 deletions devlib/collector/ftrace.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import sys
import contextlib
from shlex import quote
import signal

from devlib.collector import (CollectorBase, CollectorOutput,
CollectorOutputEntry)
Expand Down Expand Up @@ -71,6 +72,7 @@ def __init__(self, target,
report_on_target=False,
trace_clock='local',
saved_cmdlines_nr=4096,
mode='write-to-memory',
):
super(FtraceCollector, self).__init__(target)
self.events = events if events is not None else DEFAULT_EVENTS
Expand Down Expand Up @@ -98,6 +100,8 @@ def __init__(self, target,
self.trace_clock = trace_clock
self.saved_cmdlines_nr = saved_cmdlines_nr
self._reset_needed = True
self.mode = mode
self._bg_cmd = None

# pylint: disable=bad-whitespace
# Setup tracing paths
Expand Down Expand Up @@ -284,18 +288,33 @@ async def start(self):
with contextlib.suppress(TargetStableError):
self.target.write_value('/proc/sys/kernel/kptr_restrict', 0)

self.target.execute(
'{} start -B devlib {buffer_size} {cmdlines_size} {clock} {events} {tracer} {functions}'.format(
self.target_binary,
events=self.event_string,
tracer=tracer_string,
functions=tracecmd_functions,
buffer_size='-b {}'.format(self.buffer_size) if self.buffer_size is not None else '',
clock='-C {}'.format(self.trace_clock) if self.trace_clock else '',
cmdlines_size='--cmdlines-size {}'.format(self.saved_cmdlines_nr) if self.saved_cmdlines_nr is not None else '',
),
as_root=True,
params = '-B devlib {buffer_size} {cmdlines_size} {clock} {events} {tracer} {functions}'.format(
events=self.event_string,
tracer=tracer_string,
functions=tracecmd_functions,
buffer_size='-b {}'.format(self.buffer_size) if self.buffer_size is not None else '',
clock='-C {}'.format(self.trace_clock) if self.trace_clock else '',
cmdlines_size='--cmdlines-size {}'.format(self.saved_cmdlines_nr) if self.saved_cmdlines_nr is not None else '',
)

mode = self.mode
if mode == 'write-to-disk':
bg_cmd = self.target.background(
# cd into the working_directory first to workaround this issue:
# https://lore.kernel.org/linux-trace-devel/20240119162743.1a107fa9@gandalf.local.home/
f'cd {self.target.working_directory} && devlib-signal-target {self.target_binary} record -o {quote(self.target_output_file)} {params}',
as_root=True,
)
assert self._bg_cmd is None
self._bg_cmd = bg_cmd.__enter__()
elif mode == 'write-to-memory':
self.target.execute(
f'{self.target_binary} start {params}',
as_root=True,
)
else:
raise ValueError(f'Unknown mode {mode}')

if self.automark:
self.mark_start()
if 'cpufreq' in self.target.modules:
Expand Down Expand Up @@ -330,8 +349,21 @@ def stop(self):
self.stop_time = time.time()
if self.automark:
self.mark_stop()
self.target.execute('{} stop -B devlib'.format(self.target_binary),
timeout=TIMEOUT, as_root=True)

mode = self.mode
if mode == 'write-to-disk':
bg_cmd = self._bg_cmd
self._bg_cmd = None
assert bg_cmd is not None
bg_cmd.send_signal(signal.SIGINT)
bg_cmd.communicate()
bg_cmd.__exit__(None, None, None)
elif mode == 'write-to-memory':
self.target.execute('{} stop -B devlib'.format(self.target_binary),
timeout=TIMEOUT, as_root=True)
else:
raise ValueError(f'Unknown mode {mode}')

self._reset_needed = True

def set_output(self, output_path):
Expand All @@ -342,9 +374,18 @@ def set_output(self, output_path):
def get_data(self):
if self.output_path is None:
raise RuntimeError("Output path was not set.")
self.target.execute('{0} extract -B devlib -o {1}; chmod 666 {1}'.format(self.target_binary,
self.target_output_file),
timeout=TIMEOUT, as_root=True)

busybox = quote(self.target.busybox)

mode = self.mode
if mode == 'write-to-disk':
# Interrupting trace-cmd record will make it create the file
pass
elif mode == 'write-to-memory':
cmd = f'{self.target_binary} extract -B devlib -o {self.target_output_file} && {busybox} chmod 666 {self.target_output_file}'
self.target.execute(cmd, timeout=TIMEOUT, as_root=True)
else:
raise ValueError(f'Unknown mode {mode}')

# The size of trace.dat will depend on how long trace-cmd was running.
# Therefore timout for the pull command must also be adjusted
Expand Down
147 changes: 107 additions & 40 deletions devlib/connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from contextlib import contextmanager, nullcontext
from shlex import quote
import os
from pathlib import Path
import signal
import subprocess
import threading
Expand All @@ -25,14 +26,11 @@
import select
import fcntl

from devlib.utils.misc import InitCheckpoint
from devlib.utils.misc import InitCheckpoint, memoized

_KILL_TIMEOUT = 3


def _kill_pgid_cmd(pgid, sig, busybox):
return '{} kill -{} -{}'.format(busybox, sig.value, pgid)

def _popen_communicate(bg, popen, input, timeout):
try:
stdout, stderr = popen.communicate(input=input, timeout=timeout)
Expand Down Expand Up @@ -130,8 +128,11 @@ class BackgroundCommand(ABC):
semantic as :class:`subprocess.Popen`.
"""

def __init__(self, conn):
def __init__(self, conn, data_dir, cmd, as_root):
self.conn = conn
self._data_dir = data_dir
self.as_root = as_root
self.cmd = cmd

# Poll currently opened background commands on that connection to make
# them deregister themselves if they are completed. This avoids
Expand All @@ -147,15 +148,65 @@ def __init__(self, conn):

conn._current_bg_cmds.add(self)

@classmethod
def from_factory(cls, conn, cmd, as_root, make_init_kwargs):
cmd, data_dir = cls._with_data_dir(conn, cmd)
return cls(
conn=conn,
data_dir=data_dir,
cmd=cmd,
as_root=as_root,
**make_init_kwargs(cmd),
)

def _deregister(self):
try:
self.conn._current_bg_cmds.remove(self)
except KeyError:
pass

@abstractmethod
def _send_signal(self, sig):
pass
@property
def _pid_file(self):
return str(Path(self._data_dir, 'pid'))

@property
@memoized
def _targeted_pid(self):
"""
PID of the process pointed at by ``devlib-signal-target`` command.
"""
path = quote(self._pid_file)
busybox = quote(self.conn.busybox)

def execute(cmd):
return self.conn.execute(cmd, as_root=self.as_root)

while self.poll() is None:
try:
pid = execute(f'{busybox} cat {path}')
except subprocess.CalledProcessError:
time.sleep(0.01)
else:
if pid.endswith('\n'):
return int(pid.strip())
else:
# We got a partial write in the PID file
continue

raise ValueError(f'The background commmand did not use devlib-signal-target wrapper to designate which command should be the target of signals')

@classmethod
def _with_data_dir(cls, conn, cmd):
busybox = quote(conn.busybox)
data_dir = conn.execute(f'{busybox} mktemp -d').strip()
cmd = f'_DEVLIB_BG_CMD_DATA_DIR={data_dir} exec {busybox} sh -c {quote(cmd)}'
return cmd, data_dir

def _cleanup_data_dir(self):
path = quote(self._data_dir)
busybox = quote(self.conn.busybox)
cmd = f'{busybox} rm -r {path} || true'
self.conn.execute(cmd, as_root=self.as_root)

def send_signal(self, sig):
"""
Expand All @@ -165,8 +216,29 @@ def send_signal(self, sig):
:param signal: Signal to send.
:type signal: signal.Signals
"""

def execute(cmd):
return self.conn.execute(cmd, as_root=self.as_root)

def send(sig):
busybox = quote(self.conn.busybox)
# If the command has already completed, we don't want to send a
# signal to another process that might have gotten that PID in the
# meantime.
if self.poll() is None:
if sig in (signal.SIGTERM, signal.SIGQUIT, signal.SIGKILL):
# Use -PGID to target a process group rather than just the
# process itself. This will work in any condition and will
# not require cooperation from the command.
execute(f'{busybox} kill -{sig.value} -{self.pid}')
else:
# Other signals require cooperation from the shell command
# so that it points to a specific process using
# devlib-signal-target
pid = self._targeted_pid
execute(f'{busybox} kill -{sig.value} {pid}')
try:
return self._send_signal(sig)
return send(sig)
finally:
# Deregister if the command has finished
self.poll()
Expand Down Expand Up @@ -287,6 +359,7 @@ def close(self):
return self._close()
finally:
self._deregister()
self._cleanup_data_dir()

def __enter__(self):
return self
Expand All @@ -300,13 +373,15 @@ class PopenBackgroundCommand(BackgroundCommand):
:class:`subprocess.Popen`-based background command.
"""

def __init__(self, conn, popen):
super().__init__(conn=conn)
def __init__(self, conn, data_dir, cmd, as_root, popen):
super().__init__(
conn=conn,
data_dir=data_dir,
cmd=cmd,
as_root=as_root,
)
self.popen = popen

def _send_signal(self, sig):
return os.killpg(self.popen.pid, sig)

@property
def stdin(self):
return self.popen.stdin
Expand Down Expand Up @@ -354,26 +429,20 @@ class ParamikoBackgroundCommand(BackgroundCommand):
"""
:mod:`paramiko`-based background command.
"""
def __init__(self, conn, chan, pid, as_root, cmd, stdin, stdout, stderr, redirect_thread):
super().__init__(conn=conn)
def __init__(self, conn, data_dir, cmd, as_root, chan, pid, stdin, stdout, stderr, redirect_thread):
super().__init__(
conn=conn,
data_dir=data_dir,
cmd=cmd,
as_root=as_root,
)

self.chan = chan
self.as_root = as_root
self._pid = pid
self._stdin = stdin
self._stdout = stdout
self._stderr = stderr
self.redirect_thread = redirect_thread
self.cmd = cmd

def _send_signal(self, sig):
# If the command has already completed, we don't want to send a signal
# to another process that might have gotten that PID in the meantime.
if self.poll() is not None:
return
# Use -PGID to target a process group rather than just the process
# itself
cmd = _kill_pgid_cmd(self.pid, sig, self.conn.busybox)
self.conn.execute(cmd, as_root=self.as_root)

@property
def pid(self):
Expand Down Expand Up @@ -517,18 +586,16 @@ class AdbBackgroundCommand(BackgroundCommand):
``adb``-based background command.
"""

def __init__(self, conn, adb_popen, pid, as_root):
super().__init__(conn=conn)
self.as_root = as_root
def __init__(self, conn, data_dir, cmd, as_root, adb_popen, pid):
super().__init__(
conn=conn,
data_dir=data_dir,
cmd=cmd,
as_root=as_root,
)
self.adb_popen = adb_popen
self._pid = pid

def _send_signal(self, sig):
self.conn.execute(
_kill_pgid_cmd(self.pid, sig, self.conn.busybox),
as_root=self.as_root,
)

@property
def stdin(self):
return self.adb_popen.stdin
Expand Down Expand Up @@ -638,7 +705,7 @@ def cancel(self):


class PopenTransferHandle(TransferHandleBase):
def __init__(self, bg_cmd, dest, direction, *args, **kwargs):
def __init__(self, popen, dest, direction, *args, **kwargs):
super().__init__(*args, **kwargs)

if direction == 'push':
Expand All @@ -650,7 +717,7 @@ def __init__(self, bg_cmd, dest, direction, *args, **kwargs):

self.sample_size = lambda: sample_size(dest)

self.bg_cmd = bg_cmd
self.popen = popen
self.last_sample = 0

@staticmethod
Expand All @@ -671,7 +738,7 @@ def _push_dest_size(self, dest):
return int(out.split()[0])

def cancel(self):
self.bg_cmd.cancel()
self.popen.terminate()

def isactive(self):
try:
Expand Down
Loading