Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add flag to handle running processes automatically #954

Merged
merged 5 commits into from
Apr 8, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions docs/command_line_reference.rst
Original file line number Diff line number Diff line change
Expand Up @@ -710,6 +710,15 @@ The following invocation will list the 50 most recent races::

Suppresses some output on the command line.

``kill-running-processes``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Rally attempts to generate benchmark results that are not skewed unintentionally. Consequently, if some benchmark is running, Rally will not allow you to start another one. Instead, you should stop the current benchmark and start another one manually. This flag can be added to handle automatically this process for you.

Only one Rally benchmark is allowed to run at the same time. If any processes is running, it is going to kill them and allow Rally to continue to run a new benchmark.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we could add an explanation why we want that (in order to ensure that benchmark results are not skewed due to unintentionally running multiple benchmarks at the same time).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added more information in the docs about why we want that. Can you check again?


The default value is ``false``.

``offline``
~~~~~~~~~~~

Expand Down
37 changes: 31 additions & 6 deletions esrally/rally.py
Original file line number Diff line number Diff line change
Expand Up @@ -551,6 +551,12 @@ def runtime_jdk(v):
help="Suppress as much as output as possible (default: false).",
default=False,
action="store_true")
p.add_argument(
"--kill-running-processes",
action="store_true",
default=False,
help="If any processes is running, it is going to kill them and allow Rally to continue to run."
)

return parser

Expand Down Expand Up @@ -611,12 +617,30 @@ def print_help_on_errors():


def race(cfg):
other_rally_processes = process.find_all_other_rally_processes()
if other_rally_processes:
pids = [p.pid for p in other_rally_processes]
msg = "There are other Rally processes running on this machine (PIDs: %s) but only one Rally benchmark is allowed to run at " \
"the same time. Please check and terminate these processes and retry again." % pids
raise exceptions.RallyError(msg)
logger = logging.getLogger(__name__)

kill_running_processes = cfg.opts("system", "kill.running.processes")

if kill_running_processes:
logger.info("Killing running Rally processes")

# Kill any lingering Rally processes before attempting to continue - the actor system needs to be a singleton on this machine
# noinspection PyBroadException
try:
process.kill_running_rally_instances()
except BaseException:
logger.exception(
"Could not terminate potentially running Rally instances correctly. Attempting to go on anyway.")
else:
other_rally_processes = process.find_all_other_rally_processes()
if other_rally_processes:
pids = [p.pid for p in other_rally_processes]

msg = f"There are other Rally processes running on this machine (PIDs: {pids}) but only one Rally " \
f"benchmark is allowed to run at the same time.\n\nYou can use --kill-running-processes flag " \
f"to kill running processes automatically and allow Rally to continue to run a new benchmark. " \
f"Otherwise, you need to manually kill them."
raise exceptions.RallyError(msg)

with_actor_system(racecontrol.run, cfg)

Expand Down Expand Up @@ -774,6 +798,7 @@ def main():
cfg.add(config.Scope.applicationOverride, "system", "race.id", args.race_id)
cfg.add(config.Scope.applicationOverride, "system", "quiet.mode", args.quiet)
cfg.add(config.Scope.applicationOverride, "system", "offline.mode", args.offline)
cfg.add(config.Scope.applicationOverride, "system", "kill.running.processes", args.kill_running_processes)

# Local config per node
cfg.add(config.Scope.application, "node", "rally.root", paths.rally_root())
Expand Down
11 changes: 11 additions & 0 deletions esrally/utils/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,3 +135,14 @@ def for_all_other_processes(predicate, action):
action(p)
except (psutil.ZombieProcess, psutil.AccessDenied, psutil.NoSuchProcess):
pass


def kill_running_rally_instances():
def rally_process(p):
return p.name() == "esrally" or \
p.name() == "rally" or \
(p.name().lower().startswith("python")
and any("esrally" in e for e in p.cmdline())
and not any("esrallyd" in e for e in p.cmdline()))

kill_all(rally_process)
49 changes: 49 additions & 0 deletions tests/utils/process_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,3 +94,52 @@ def test_find_no_other_rally_process_running(self, process_iter):
process_iter.return_value = [ metrics_store_process, random_python]

self.assertEqual(0, len(process.find_all_other_rally_processes()))

@mock.patch("psutil.process_iter")
def test_kills_only_rally_processes(self, process_iter):
rally_es_5_process = ProcessTests.Process(100, "java",
["/usr/lib/jvm/java-8-oracle/bin/java", "-Xms2g", "-Xmx2g", "-Enode.name=rally-node0",
"org.elasticsearch.bootstrap.Elasticsearch"])
rally_es_1_process = ProcessTests.Process(101, "java",
["/usr/lib/jvm/java-8-oracle/bin/java", "-Xms2g", "-Xmx2g", "-Des.node.name=rally-node0",
"org.elasticsearch.bootstrap.Elasticsearch"])
metrics_store_process = ProcessTests.Process(102, "java", ["/usr/lib/jvm/java-8-oracle/bin/java", "-Xms2g", "-Xmx2g",
"-Des.path.home=~/rally/metrics/",
"org.elasticsearch.bootstrap.Elasticsearch"])
random_python = ProcessTests.Process(103, "python3", ["/some/django/app"])
other_process = ProcessTests.Process(104, "init", ["/usr/sbin/init"])
rally_process_p = ProcessTests.Process(105, "python3", ["/usr/bin/python3", "~/.local/bin/esrally"])
rally_process_r = ProcessTests.Process(106, "rally", ["/usr/bin/python3", "~/.local/bin/esrally"])
rally_process_e = ProcessTests.Process(107, "esrally", ["/usr/bin/python3", "~/.local/bin/esrally"])
rally_process_mac = ProcessTests.Process(108, "Python", ["/Python.app/Contents/MacOS/Python", "~/.local/bin/esrally"])
# fake own process by determining our pid
own_rally_process = ProcessTests.Process(os.getpid(), "Python", ["/Python.app/Contents/MacOS/Python", "~/.local/bin/esrally"])
night_rally_process = ProcessTests.Process(110, "Python", ["/Python.app/Contents/MacOS/Python", "~/.local/bin/night_rally"])

process_iter.return_value = [
rally_es_1_process,
rally_es_5_process,
metrics_store_process,
random_python,
other_process,
rally_process_p,
rally_process_r,
rally_process_e,
rally_process_mac,
own_rally_process,
night_rally_process,
]

process.kill_running_rally_instances()

self.assertFalse(rally_es_5_process.killed)
self.assertFalse(rally_es_1_process.killed)
self.assertFalse(metrics_store_process.killed)
self.assertFalse(random_python.killed)
self.assertFalse(other_process.killed)
self.assertTrue(rally_process_p.killed)
self.assertTrue(rally_process_r.killed)
self.assertTrue(rally_process_e.killed)
self.assertTrue(rally_process_mac.killed)
self.assertFalse(own_rally_process.killed)
self.assertFalse(night_rally_process.killed)