Skip to content

Commit

Permalink
Add flag to handle running processes automatically (#954)
Browse files Browse the repository at this point in the history
With this commit we add a new command line flag `--kill-running-processes` that
allows to control Rally's behavior when leftover Rally processes are encountered
on the machine.

Closes #922
  • Loading branch information
bartier authored Apr 8, 2020
1 parent 763aebf commit 97eaa37
Show file tree
Hide file tree
Showing 4 changed files with 100 additions and 6 deletions.
9 changes: 9 additions & 0 deletions docs/command_line_reference.rst
Original file line number Diff line number Diff line change
Expand Up @@ -710,6 +710,15 @@ The following invocation will list the 50 most recent races::

Suppresses some output on the command line.

``kill-running-processes``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Rally attempts to generate benchmark results that are not skewed unintentionally. Consequently, if some benchmark is running, Rally will not allow you to start another one. Instead, you should stop the current benchmark and start another one manually. This flag can be added to handle automatically this process for you.

Only one Rally benchmark is allowed to run at the same time. If any processes is running, it is going to kill them and allow Rally to continue to run a new benchmark.

The default value is ``false``.

``offline``
~~~~~~~~~~~

Expand Down
37 changes: 31 additions & 6 deletions esrally/rally.py
Original file line number Diff line number Diff line change
Expand Up @@ -551,6 +551,12 @@ def runtime_jdk(v):
help="Suppress as much as output as possible (default: false).",
default=False,
action="store_true")
p.add_argument(
"--kill-running-processes",
action="store_true",
default=False,
help="If any processes is running, it is going to kill them and allow Rally to continue to run."
)

return parser

Expand Down Expand Up @@ -611,12 +617,30 @@ def print_help_on_errors():


def race(cfg):
other_rally_processes = process.find_all_other_rally_processes()
if other_rally_processes:
pids = [p.pid for p in other_rally_processes]
msg = "There are other Rally processes running on this machine (PIDs: %s) but only one Rally benchmark is allowed to run at " \
"the same time. Please check and terminate these processes and retry again." % pids
raise exceptions.RallyError(msg)
logger = logging.getLogger(__name__)

kill_running_processes = cfg.opts("system", "kill.running.processes")

if kill_running_processes:
logger.info("Killing running Rally processes")

# Kill any lingering Rally processes before attempting to continue - the actor system needs to be a singleton on this machine
# noinspection PyBroadException
try:
process.kill_running_rally_instances()
except BaseException:
logger.exception(
"Could not terminate potentially running Rally instances correctly. Attempting to go on anyway.")
else:
other_rally_processes = process.find_all_other_rally_processes()
if other_rally_processes:
pids = [p.pid for p in other_rally_processes]

msg = f"There are other Rally processes running on this machine (PIDs: {pids}) but only one Rally " \
f"benchmark is allowed to run at the same time.\n\nYou can use --kill-running-processes flag " \
f"to kill running processes automatically and allow Rally to continue to run a new benchmark. " \
f"Otherwise, you need to manually kill them."
raise exceptions.RallyError(msg)

with_actor_system(racecontrol.run, cfg)

Expand Down Expand Up @@ -774,6 +798,7 @@ def main():
cfg.add(config.Scope.applicationOverride, "system", "race.id", args.race_id)
cfg.add(config.Scope.applicationOverride, "system", "quiet.mode", args.quiet)
cfg.add(config.Scope.applicationOverride, "system", "offline.mode", args.offline)
cfg.add(config.Scope.applicationOverride, "system", "kill.running.processes", args.kill_running_processes)

# Local config per node
cfg.add(config.Scope.application, "node", "rally.root", paths.rally_root())
Expand Down
11 changes: 11 additions & 0 deletions esrally/utils/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,3 +135,14 @@ def for_all_other_processes(predicate, action):
action(p)
except (psutil.ZombieProcess, psutil.AccessDenied, psutil.NoSuchProcess):
pass


def kill_running_rally_instances():
def rally_process(p):
return p.name() == "esrally" or \
p.name() == "rally" or \
(p.name().lower().startswith("python")
and any("esrally" in e for e in p.cmdline())
and not any("esrallyd" in e for e in p.cmdline()))

kill_all(rally_process)
49 changes: 49 additions & 0 deletions tests/utils/process_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,3 +94,52 @@ def test_find_no_other_rally_process_running(self, process_iter):
process_iter.return_value = [ metrics_store_process, random_python]

self.assertEqual(0, len(process.find_all_other_rally_processes()))

@mock.patch("psutil.process_iter")
def test_kills_only_rally_processes(self, process_iter):
rally_es_5_process = ProcessTests.Process(100, "java",
["/usr/lib/jvm/java-8-oracle/bin/java", "-Xms2g", "-Xmx2g", "-Enode.name=rally-node0",
"org.elasticsearch.bootstrap.Elasticsearch"])
rally_es_1_process = ProcessTests.Process(101, "java",
["/usr/lib/jvm/java-8-oracle/bin/java", "-Xms2g", "-Xmx2g", "-Des.node.name=rally-node0",
"org.elasticsearch.bootstrap.Elasticsearch"])
metrics_store_process = ProcessTests.Process(102, "java", ["/usr/lib/jvm/java-8-oracle/bin/java", "-Xms2g", "-Xmx2g",
"-Des.path.home=~/rally/metrics/",
"org.elasticsearch.bootstrap.Elasticsearch"])
random_python = ProcessTests.Process(103, "python3", ["/some/django/app"])
other_process = ProcessTests.Process(104, "init", ["/usr/sbin/init"])
rally_process_p = ProcessTests.Process(105, "python3", ["/usr/bin/python3", "~/.local/bin/esrally"])
rally_process_r = ProcessTests.Process(106, "rally", ["/usr/bin/python3", "~/.local/bin/esrally"])
rally_process_e = ProcessTests.Process(107, "esrally", ["/usr/bin/python3", "~/.local/bin/esrally"])
rally_process_mac = ProcessTests.Process(108, "Python", ["/Python.app/Contents/MacOS/Python", "~/.local/bin/esrally"])
# fake own process by determining our pid
own_rally_process = ProcessTests.Process(os.getpid(), "Python", ["/Python.app/Contents/MacOS/Python", "~/.local/bin/esrally"])
night_rally_process = ProcessTests.Process(110, "Python", ["/Python.app/Contents/MacOS/Python", "~/.local/bin/night_rally"])

process_iter.return_value = [
rally_es_1_process,
rally_es_5_process,
metrics_store_process,
random_python,
other_process,
rally_process_p,
rally_process_r,
rally_process_e,
rally_process_mac,
own_rally_process,
night_rally_process,
]

process.kill_running_rally_instances()

self.assertFalse(rally_es_5_process.killed)
self.assertFalse(rally_es_1_process.killed)
self.assertFalse(metrics_store_process.killed)
self.assertFalse(random_python.killed)
self.assertFalse(other_process.killed)
self.assertTrue(rally_process_p.killed)
self.assertTrue(rally_process_r.killed)
self.assertTrue(rally_process_e.killed)
self.assertTrue(rally_process_mac.killed)
self.assertFalse(own_rally_process.killed)
self.assertFalse(night_rally_process.killed)

0 comments on commit 97eaa37

Please sign in to comment.