diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index c06d2f732..bfd4056b2 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -11,6 +11,6 @@ jobs: with: python-version: '3.8' - name: Install Tox - run: pip install tox + run: pip install tox==3.28.0 - name: Run Tox run: tox -e style,py38,cover diff --git a/ducktape/tests/runner.py b/ducktape/tests/runner.py index 47da2d02b..8d1f0b33f 100644 --- a/ducktape/tests/runner.py +++ b/ducktape/tests/runner.py @@ -203,11 +203,15 @@ def run_all_tests(self): event = self.receiver.recv(timeout=self.session_context.test_runner_timeout) self._handle(event) except Exception as e: - err_str = "Exception receiving message: %s: %s, active_tests: \n %s \n" % (str(type(e)), str(e), self.active_tests_debug()) + err_str = "Exception receiving message: %s: %s" % (str(type(e)), str(e)) + err_str += ", active_tests: \n %s \n" % (self.active_tests_debug()) err_str += "\n" + traceback.format_exc(limit=16) self._log(logging.ERROR, err_str) # All processes are on the same machine, so treat communication failure as a fatal error + for proc in self._client_procs.values(): + proc.terminate() + self._client_procs = {} raise except KeyboardInterrupt: # If SIGINT is received, stop triggering new tests, and let the currently running tests finish diff --git a/systests/cluster/test_runner_operations.py b/systests/cluster/test_runner_operations.py new file mode 100644 index 000000000..956723a28 --- /dev/null +++ b/systests/cluster/test_runner_operations.py @@ -0,0 +1,51 @@ +# Copyright 2022 Confluent Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from ducktape.services.service import Service +from ducktape.tests.test import Test +from ducktape.mark.resource import cluster +import time + + +class SimpleEchoService(Service): + """Simple service that allocates one node for performing tests of RemoteAccount functionality""" + logs = { + "my_log": { + "path": "/tmp/log", + "collect_default": True + }, + } + + def __init__(self, context): + super(SimpleEchoService, self).__init__(context, num_nodes=1) + self.count = 0 + + def echo(self): + self.nodes[0].account.ssh("echo {} >> /tmp/log".format(self.count)) + self.count += 1 + + +class SimpleRunnerTest(Test): + def setup(self): + self.service = SimpleEchoService(self.test_context) + + @cluster(num_nodes=1) + def timeout_test(self): + """ + a simple longer running test to test special run flags agaisnt. + """ + self.service.start() + + while self.service.count < 100000000: + self.service.echo() + time.sleep(.2) diff --git a/tests/runner/check_runner.py b/tests/runner/check_runner.py index 8c40b82cc..fbe880ca3 100644 --- a/tests/runner/check_runner.py +++ b/tests/runner/check_runner.py @@ -14,6 +14,7 @@ from unittest.mock import patch +import pytest from ducktape.tests.runner_client import RunnerClient from ducktape.tests.test import TestContext from ducktape.tests.runner import TestRunner @@ -27,7 +28,7 @@ from .resources.test_thingy import ClusterTestThingy, TestThingy from .resources.test_failing_tests import FailingTest from ducktape.tests.reporter import JUnitReporter - +from ducktape.errors import TimeoutError from mock import Mock import os @@ -199,3 +200,18 @@ def check_run_failure_with_bad_cluster_allocation(self): assert results.num_failed == 1 assert results.num_passed == 1 assert results.num_ignored == 0 + + def check_runner_timeout(self): + """Check process cleanup and error handling in a parallel runner client run.""" + mock_cluster = LocalhostCluster(num_nodes=1000) + session_context = tests.ducktape_mock.session_context(max_parallel=1000, test_runner_timeout=1) + + test_methods = [TestThingy.test_delayed, TestThingy.test_failure] + ctx_list = self._do_expand(test_file=TEST_THINGY_FILE, test_class=TestThingy, test_methods=test_methods, + cluster=mock_cluster, session_context=session_context) + runner = TestRunner(mock_cluster, session_context, Mock(), ctx_list, 1) + + with pytest.raises(TimeoutError): + runner.run_all_tests() + + assert not runner._client_procs diff --git a/tests/runner/resources/test_thingy.py b/tests/runner/resources/test_thingy.py index 45ff1b7b2..79104cce3 100644 --- a/tests/runner/resources/test_thingy.py +++ b/tests/runner/resources/test_thingy.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +from time import time from ducktape.cluster.cluster_spec import ClusterSpec from ducktape.tests.test import Test from ducktape.mark import ignore, parametrize @@ -28,6 +29,9 @@ def min_cluster_spec(self): def test_pi(self): return {"data": 3.14159} + def test_delayed(self): + time.sleep(1) + @ignore def test_ignore1(self): pass