From d5fa0e4349f86f3ec7fd1cf32f698a781456ae81 Mon Sep 17 00:00:00 2001
From: vp-elitnet <93337942+vp-elitnet@users.noreply.github.com>
Date: Wed, 8 Dec 2021 02:20:21 +0200
Subject: [PATCH 01/83] Fix test file discovery regression (#285)

* Fix test file discovery regression

Do not subject explicitly provided test files to default pattern, i.e.
restore the behaviour prior to 2036577e.

* Update styling

Co-authored-by: Ian McDonald <ian_mcdonald@rocketmail.com>

Co-authored-by: Stanislav Vodetskyi <49661990+stan-confluent@users.noreply.github.com>
Co-authored-by: Ian McDonald <ian_mcdonald@rocketmail.com>
---
 docs/run_tests.rst                                        | 6 +++---
 ducktape/tests/loader.py                                  | 5 +----
 tests/loader/check_loader.py                              | 8 ++++++--
 tests/loader/resources/a.py                               | 1 +
 .../loader_test_directory/name_does_not_match_pattern.py  | 8 ++++++++
 5 files changed, 19 insertions(+), 9 deletions(-)
 create mode 120000 tests/loader/resources/a.py
 create mode 100644 tests/loader/resources/loader_test_directory/name_does_not_match_pattern.py

diff --git a/docs/run_tests.rst b/docs/run_tests.rst
index 3ea89b335..a9941f956 100644
--- a/docs/run_tests.rst
+++ b/docs/run_tests.rst
@@ -8,9 +8,9 @@ Running Tests
 =============
 
 ducktape discovers and runs tests in the path(s) provided.
-You can specify a folder with tests, a specific test file or even a specific class or test method, via absolute or
-relative paths. You can optionally specify a specific set of parameters
-for tests with ``@parametrize`` or ``@matrix`` annotations::
+You can specify a folder with tests (all tests in Python modules named with "test\_" prefix or "_test" suffix will be
+run), a specific test file (with any name) or even a specific class or test method, via absolute or relative paths.
+You can optionally specify a specific set of parameters for tests with ``@parametrize`` or ``@matrix`` annotations::
 
     ducktape <relative_path_to_testdirectory>                   # e.g. ducktape dir/tests
     ducktape <relative_path_to_file>                            # e.g. ducktape dir/tests/my_test.py
diff --git a/ducktape/tests/loader.py b/ducktape/tests/loader.py
index a7419be92..be3d4ed21 100644
--- a/ducktape/tests/loader.py
+++ b/ducktape/tests/loader.py
@@ -397,10 +397,7 @@ def _find_test_files(self, path_or_glob):
                 raise LoaderException('Path {} does not exist'.format(path))
             self.logger.debug('Checking {}'.format(path))
             if os.path.isfile(path):
-                if self._is_test_file(path):
-                    test_files.append(os.path.abspath(path))
-                else:
-                    self.logger.debug("Skipping {} because it isn't a test file".format(path))
+                test_files.append(os.path.abspath(path))
             elif os.path.isdir(path):
                 for pwd, dirs, files in os.walk(path):
                     if "__init__.py" not in files:
diff --git a/tests/loader/check_loader.py b/tests/loader/check_loader.py
index af41cb277..aa5f66f9b 100644
--- a/tests/loader/check_loader.py
+++ b/tests/loader/check_loader.py
@@ -195,10 +195,14 @@ def check_test_loader_with_directory(self):
         tests = loader.load([discover_dir()])
         assert len(tests) == num_tests_in_dir(discover_dir())
 
-    def check_test_loader_with_file(self):
+    @pytest.mark.parametrize(['dir_', 'file_name'], [
+        pytest.param(discover_dir(), 'test_a.py'),
+        pytest.param(resources_dir(), 'a.py')
+    ])
+    def check_test_loader_with_file(self, dir_, file_name):
         """Check discovery on a file. """
         loader = TestLoader(self.SESSION_CONTEXT, logger=Mock())
-        module_path = os.path.join(discover_dir(), "test_a.py")
+        module_path = os.path.join(dir_, file_name)
 
         tests = loader.load([module_path])
         assert len(tests) == num_tests_in_file(module_path)
diff --git a/tests/loader/resources/a.py b/tests/loader/resources/a.py
new file mode 120000
index 000000000..bc2ea9ccd
--- /dev/null
+++ b/tests/loader/resources/a.py
@@ -0,0 +1 @@
+loader_test_directory/test_a.py
\ No newline at end of file
diff --git a/tests/loader/resources/loader_test_directory/name_does_not_match_pattern.py b/tests/loader/resources/loader_test_directory/name_does_not_match_pattern.py
new file mode 100644
index 000000000..ebd954920
--- /dev/null
+++ b/tests/loader/resources/loader_test_directory/name_does_not_match_pattern.py
@@ -0,0 +1,8 @@
+from ducktape.tests.test import Test
+
+
+class TestNotLoaded(Test):
+    """Loader should not discover this - module name does not match default pattern."""
+
+    def test_a(self):
+        pass

From 4331f44ef1ef922bcb2333919510279b8842bce0 Mon Sep 17 00:00:00 2001
From: imcdo <imcdonald@confluent.io>
Date: Tue, 7 Dec 2021 16:27:33 -0800
Subject: [PATCH 02/83] Bump version to 0.8.9

---
 ducktape/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ducktape/__init__.py b/ducktape/__init__.py
index a02276f75..462a7c581 100644
--- a/ducktape/__init__.py
+++ b/ducktape/__init__.py
@@ -1 +1 @@
-__version__ = '0.8.8'
+__version__ = '0.8.9'

From 74ba71a4924a4231c25d0889a6845c7dcf287968 Mon Sep 17 00:00:00 2001
From: Ian McDonald <imcdonald@confluent.io>
Date: Wed, 26 Jan 2022 15:32:58 -0800
Subject: [PATCH 03/83] Add SSH failure callbacks (#287)

* ssh checkers

* debugs

* log non connection errors:

* include socket errors

* cleanup debuging

* clean up

* add checker test

* fix mock

* add docs

* update checks and error messages

* add better description

* fix style

* style fix
---
 docs/debug_tests.rst                      | 10 +++++++
 ducktape/cluster/json.py                  | 12 ++++-----
 ducktape/cluster/linux_remoteaccount.py   |  5 ++--
 ducktape/cluster/localhost.py             |  4 ++-
 ducktape/cluster/remoteaccount.py         | 33 +++++++++++++++++++++--
 ducktape/cluster/vagrant.py               |  6 ++---
 ducktape/cluster/windows_remoteaccount.py |  5 ++--
 ducktape/command_line/main.py             |  8 +++++-
 ducktape/command_line/parse_args.py       |  5 ++++
 ducktape/utils/util.py                    | 10 +++++++
 tests/cluster/check_remoteaccount.py      | 28 +++++++++++++++++++
 tests/ducktape_mock.py                    |  4 +--
 12 files changed, 109 insertions(+), 21 deletions(-)

diff --git a/docs/debug_tests.rst b/docs/debug_tests.rst
index 71df91d6c..7b17066dc 100644
--- a/docs/debug_tests.rst
+++ b/docs/debug_tests.rst
@@ -97,3 +97,13 @@ Tools for Managing Logs
 =======================
 
 Analyzing and matching up logs from a distributed service could be time consuming. There are many good tools for working with logs. Examples include http://lnav.org/, http://list.xmodulo.com/multitail.html, and http://glogg.bonnefon.org/.
+
+Validating Ssh Issues
+=======================
+
+Ducktape supports running custom validators when an ssh error occurs, allowing you to run your own validation against a host.
+this is done simply by running ducktape with the `--ssh-checker-function`, followed by the module path to your function, so for instance::
+    
+    ducktape my-test.py --ssh-checker-function my.module.validator.validate_ssh
+
+this function will take in the ssh error raised as its first argument, and the remote account object as its second.
diff --git a/ducktape/cluster/json.py b/ducktape/cluster/json.py
index 8f4b72d63..8e03b503d 100644
--- a/ducktape/cluster/json.py
+++ b/ducktape/cluster/json.py
@@ -90,7 +90,9 @@ def __init__(self, cluster_json=None, *args, **kwargs):
                     "Cluster json has a node without a ssh_config field: %s\n Cluster json: %s" % (ninfo, cluster_json)
 
                 ssh_config = RemoteAccountSSHConfig(**ninfo.get("ssh_config", {}))
-                remote_account = JsonCluster.make_remote_account(ssh_config, ninfo.get("externally_routable_ip"))
+                remote_account = \
+                    JsonCluster.make_remote_account(ssh_config, ninfo.get("externally_routable_ip"),
+                                                    ssh_exception_checks=kwargs.get("ssh_exception_checks"))
                 if remote_account.externally_routable_ip is None:
                     remote_account.externally_routable_ip = self._externally_routable_ip(remote_account)
                 self._available_accounts.add_node(remote_account)
@@ -100,15 +102,13 @@ def __init__(self, cluster_json=None, *args, **kwargs):
         self._id_supplier = 0
 
     @staticmethod
-    def make_remote_account(ssh_config, externally_routable_ip=None):
+    def make_remote_account(ssh_config, *args, **kwargs):
         """Factory function for creating the correct RemoteAccount implementation."""
 
         if ssh_config.host and WINDOWS in ssh_config.host:
-            return WindowsRemoteAccount(ssh_config=ssh_config,
-                                        externally_routable_ip=externally_routable_ip)
+            return WindowsRemoteAccount(ssh_config, *args, **kwargs)
         else:
-            return LinuxRemoteAccount(ssh_config=ssh_config,
-                                      externally_routable_ip=externally_routable_ip)
+            return LinuxRemoteAccount(ssh_config, *args, **kwargs)
 
     def do_alloc(self, cluster_spec):
         allocated_accounts = self._available_accounts.remove_spec(cluster_spec)
diff --git a/ducktape/cluster/linux_remoteaccount.py b/ducktape/cluster/linux_remoteaccount.py
index 22bf3778a..64783e387 100644
--- a/ducktape/cluster/linux_remoteaccount.py
+++ b/ducktape/cluster/linux_remoteaccount.py
@@ -18,9 +18,8 @@
 
 class LinuxRemoteAccount(RemoteAccount):
 
-    def __init__(self, ssh_config, externally_routable_ip=None, logger=None):
-        super(LinuxRemoteAccount, self).__init__(ssh_config, externally_routable_ip=externally_routable_ip,
-                                                 logger=logger)
+    def __init__(self, *args, **kwargs):
+        super(LinuxRemoteAccount, self).__init__(*args, **kwargs)
         self._ssh_client = None
         self._sftp_client = None
         self.os = LINUX
diff --git a/ducktape/cluster/localhost.py b/ducktape/cluster/localhost.py
index b76c76673..b502a5624 100644
--- a/ducktape/cluster/localhost.py
+++ b/ducktape/cluster/localhost.py
@@ -32,7 +32,9 @@ def __init__(self, *args, **kwargs):
         self._available_nodes = NodeContainer()
         for i in range(num_nodes):
             ssh_config = RemoteAccountSSHConfig("localhost%d" % i, hostname="localhost", port=22)
-            self._available_nodes.add_node(ClusterNode(LinuxRemoteAccount(ssh_config)))
+            self._available_nodes.add_node(ClusterNode(
+                LinuxRemoteAccount(ssh_config,
+                                   ssh_exception_checks=kwargs.get("ssh_exception_checks"))))
         self._in_use_nodes = NodeContainer()
 
     def do_alloc(self, cluster_spec):
diff --git a/ducktape/cluster/remoteaccount.py b/ducktape/cluster/remoteaccount.py
index 470248436..4f5ad7040 100644
--- a/ducktape/cluster/remoteaccount.py
+++ b/ducktape/cluster/remoteaccount.py
@@ -16,6 +16,7 @@
 import logging
 import os
 from paramiko import SSHClient, SSHConfig, MissingHostKeyPolicy
+from paramiko.ssh_exception import SSHException, NoValidConnectionsError
 import shutil
 import signal
 import socket
@@ -28,6 +29,20 @@
 from ducktape.errors import DucktapeError
 
 
+def check_ssh(method):
+    def wrapper(self, *args, **kwargs):
+        try:
+            return method(self, *args, **kwargs)
+        except (SSHException, NoValidConnectionsError, socket.error) as e:
+            if self._custom_ssh_exception_checks:
+                self._log(logging.DEBUG, "starting ssh checks:")
+                self._log(logging.DEBUG, "\n".join(repr(f) for f in self._custom_ssh_exception_checks))
+                for func in self._custom_ssh_exception_checks:
+                    func(e, self)
+            raise
+    return wrapper
+
+
 class RemoteAccountSSHConfig(object):
     def __init__(self, host=None, hostname=None, user=None, port=None, password=None, identityfile=None, **kwargs):
         """Wrapper for ssh configs used by ducktape to connect to remote machines.
@@ -120,7 +135,7 @@ class RemoteAccount(HttpMixin):
     Each operating system has its own RemoteAccount implementation.
     """
 
-    def __init__(self, ssh_config, externally_routable_ip=None, logger=None):
+    def __init__(self, ssh_config, externally_routable_ip=None, logger=None, ssh_exception_checks=[]):
         # Instance of RemoteAccountSSHConfig - use this instead of a dict, because we need the entire object to
         # be hashable
         self.ssh_config = ssh_config
@@ -139,6 +154,7 @@ def __init__(self, ssh_config, externally_routable_ip=None, logger=None):
         self.os = None
         self._ssh_client = None
         self._sftp_client = None
+        self._custom_ssh_exception_checks = ssh_exception_checks
 
     @property
     def operating_system(self):
@@ -159,6 +175,7 @@ def _log(self, level, msg, *args, **kwargs):
         msg = "%s: %s" % (str(self), msg)
         self.logger.log(level, msg, *args, **kwargs)
 
+    @check_ssh
     def _set_ssh_client(self):
         client = SSHClient()
         client.set_missing_host_key_policy(IgnoreMissingHostKeyPolicy())
@@ -250,6 +267,7 @@ def _can_ping_url(self, url, headers):
         except Exception:
             return False
 
+    @check_ssh
     def ssh(self, cmd, allow_fail=False):
         """Run the given command on the remote host, and block until the command has finished running.
 
@@ -283,6 +301,7 @@ def ssh(self, cmd, allow_fail=False):
 
         return exit_status
 
+    @check_ssh
     def ssh_capture(self, cmd, allow_fail=False, callback=None, combine_stderr=True, timeout_sec=None):
         """Run the given command asynchronously via ssh, and return an SSHOutputIter object.
 
@@ -336,6 +355,7 @@ def output_generator():
 
         return SSHOutputIter(output_generator, stdout)
 
+    @check_ssh
     def ssh_output(self, cmd, allow_fail=False, combine_stderr=True, timeout_sec=None):
         """Runs the command via SSH and captures the output, returning it as a string.
 
@@ -487,6 +507,7 @@ def _re_anchor_basename(self, path, directory):
 
         return os.path.join(directory, path_basename)
 
+    @check_ssh
     def copy_from(self, src, dest):
         if os.path.isdir(dest):
             # dest is an existing directory, so assuming src looks like path/to/src_name,
@@ -513,6 +534,7 @@ def scp_to(self, src, dest, recursive=False):
         warnings.warn("scp_to is now deprecated. Please use copy_to")
         self.copy_to(src, dest)
 
+    @check_ssh
     def copy_to(self, src, dest):
 
         if self.isdir(dest):
@@ -537,6 +559,7 @@ def copy_to(self, src, dest):
                     # TODO what about uncopyable file types?
                     pass
 
+    @check_ssh
     def islink(self, path):
         try:
             # stat should follow symlinks
@@ -545,6 +568,7 @@ def islink(self, path):
         except Exception:
             return False
 
+    @check_ssh
     def isdir(self, path):
         try:
             # stat should follow symlinks
@@ -553,6 +577,7 @@ def isdir(self, path):
         except Exception:
             return False
 
+    @check_ssh
     def exists(self, path):
         """Test that the path exists, but don't follow symlinks."""
         try:
@@ -562,6 +587,7 @@ def exists(self, path):
         except IOError:
             return False
 
+    @check_ssh
     def isfile(self, path):
         """Imitates semantics of os.path.isfile
 
@@ -578,6 +604,7 @@ def isfile(self, path):
     def open(self, path, mode='r'):
         return self.sftp_client.open(path, mode)
 
+    @check_ssh
     def create_file(self, path, contents):
         """Create file at path, with the given contents.
 
@@ -585,12 +612,14 @@ def create_file(self, path, contents):
         """
         # TODO: what should semantics be if path exists? what actually happens if it already exists?
         # TODO: what happens if the base part of the path does not exist?
+
         with self.sftp_client.open(path, "w") as f:
             f.write(contents)
 
     _DEFAULT_PERMISSIONS = int('755', 8)
-    def mkdir(self, path, mode=_DEFAULT_PERMISSIONS):
 
+    @check_ssh
+    def mkdir(self, path, mode=_DEFAULT_PERMISSIONS):
         self.sftp_client.mkdir(path, mode)
 
     def mkdirs(self, path, mode=_DEFAULT_PERMISSIONS):
diff --git a/ducktape/cluster/vagrant.py b/ducktape/cluster/vagrant.py
index 462131a1a..0074cad5b 100644
--- a/ducktape/cluster/vagrant.py
+++ b/ducktape/cluster/vagrant.py
@@ -36,7 +36,7 @@ class VagrantCluster(JsonCluster):
     def __init__(self, *args, **kwargs):
         self._is_aws = None
         is_read_from_file = False
-
+        self.ssh_exception_checks = kwargs.get("ssh_exception_checks")
         cluster_file = kwargs.get("cluster_file")
         if cluster_file is not None:
             try:
@@ -51,7 +51,7 @@ def __init__(self, *args, **kwargs):
                 "nodes": self._get_nodes_from_vagrant()
             }
 
-        super(VagrantCluster, self).__init__(cluster_json)
+        super(VagrantCluster, self).__init__(cluster_json, *args, **kwargs)
 
         # If cluster file is specified but the cluster info is not read from it, write the cluster info into the file
         if not is_read_from_file and cluster_file is not None:
@@ -82,7 +82,7 @@ def _get_nodes_from_vagrant(self):
 
             account = None
             try:
-                account = JsonCluster.make_remote_account(ssh_config)
+                account = JsonCluster.make_remote_account(ssh_config, ssh_exception_checks=self.ssh_exception_checks)
                 externally_routable_ip = account.fetch_externally_routable_ip(self.is_aws)
             finally:
                 if account:
diff --git a/ducktape/cluster/windows_remoteaccount.py b/ducktape/cluster/windows_remoteaccount.py
index 68c7197ec..b57b6d730 100644
--- a/ducktape/cluster/windows_remoteaccount.py
+++ b/ducktape/cluster/windows_remoteaccount.py
@@ -37,9 +37,8 @@ class WindowsRemoteAccount(RemoteAccount):
 
     WINRM_USERNAME = "Administrator"
 
-    def __init__(self, ssh_config, externally_routable_ip=None, logger=None):
-        super(WindowsRemoteAccount, self).__init__(ssh_config, externally_routable_ip=externally_routable_ip,
-                                                   logger=logger)
+    def __init__(self, *args, **kwargs):
+        super(WindowsRemoteAccount, self).__init__(*args, **kwargs)
         self.os = WINDOWS
         self._winrm_client = None
 
diff --git a/ducktape/command_line/main.py b/ducktape/command_line/main.py
index dd88754f9..aa04a4a80 100644
--- a/ducktape/command_line/main.py
+++ b/ducktape/command_line/main.py
@@ -34,6 +34,7 @@
 from ducktape.tests.session import SessionContext, SessionLoggerMaker
 from ducktape.tests.session import generate_session_id, generate_results_dir
 from ducktape.utils.local_filesystem_utils import mkdir_p
+from ducktape.utils.util import load_function
 
 
 def extend_import_paths(paths):
@@ -181,7 +182,12 @@ def main():
         (cluster_mod_name, cluster_class_name) = args_dict["cluster"].rsplit('.', 1)
         cluster_mod = importlib.import_module(cluster_mod_name)
         cluster_class = getattr(cluster_mod, cluster_class_name)
-        cluster = cluster_class(cluster_file=args_dict["cluster_file"])
+
+        cluster_kwargs = {"cluster_file": args_dict["cluster_file"]}
+        checkers = [load_function(func_path) for func_path in args_dict["ssh_checker_function"]]
+        if checkers:
+            cluster_kwargs['ssh_exception_checks'] = checkers
+        cluster = cluster_class(**cluster_kwargs)
         for ctx in tests:
             # Note that we're attaching a reference to cluster
             # only after test context objects have been instantiated
diff --git a/ducktape/command_line/parse_args.py b/ducktape/command_line/parse_args.py
index c3e19a241..13ba9d0e5 100644
--- a/ducktape/command_line/parse_args.py
+++ b/ducktape/command_line/parse_args.py
@@ -77,6 +77,11 @@ def create_ducktape_parser():
     parser.add_argument("--test-runner-timeout", action="store", type=int, default=1800000,
                         help="Amount of time in milliseconds between test communicating between the test runner"
                              " before a timeout error occurs. Default is 30 minutes")
+    parser.add_argument("--ssh-checker-function", action="store", type=str, nargs="+",
+                        help="Python module path(s) to a function that takes an exception and a remote account"
+                        " that will be called when an ssh error occurs, this can give some "
+                        "validation or better logging when an ssh error occurs. Specify any "
+                        "number of module paths after this flag to be called.")
     return parser
 
 
diff --git a/ducktape/utils/util.py b/ducktape/utils/util.py
index f01d99d39..85d100818 100644
--- a/ducktape/utils/util.py
+++ b/ducktape/utils/util.py
@@ -53,3 +53,13 @@ def package_is_installed(package_name):
 def ducktape_version():
     """Return string representation of current ducktape version."""
     return __ducktape_version__
+
+
+def load_function(func_module_path):
+    """Loads and returns a function from a module path seperated by '.'s"""
+    module, function = func_module_path.rsplit(".", 1)
+    try:
+        return getattr(importlib.import_module(module), function)
+    except AttributeError:
+        raise Exception("Function could not be loaded from the module path {}, "
+                        "verify that it is '.' seperated".format(func_module_path))
diff --git a/tests/cluster/check_remoteaccount.py b/tests/cluster/check_remoteaccount.py
index 57c1c2f6a..e08b39f34 100644
--- a/tests/cluster/check_remoteaccount.py
+++ b/tests/cluster/check_remoteaccount.py
@@ -17,6 +17,7 @@
 from tests.test_utils import find_available_port
 from ducktape.cluster.remoteaccount import RemoteAccount
 from ducktape.cluster.remoteaccount import RemoteAccountSSHConfig
+import pytest
 
 import logging
 from threading import Thread
@@ -26,6 +27,18 @@
 import time
 
 
+class DummyException(Exception):
+    pass
+
+
+def raise_error_checker(error, remote_account):
+    raise DummyException("dummy raise: {}\nfrom: {}".format(error, remote_account))
+
+
+def raise_no_error_checker(error, remote_account):
+    pass
+
+
 class SimpleServer(object):
     """Helper class which starts a simple server listening on localhost at the specified port
     """
@@ -86,6 +99,21 @@ def check_wait_for_http_timeout(self):
             actual_timeout = time.time() - start
             assert abs(actual_timeout - timeout) / timeout < 1
 
+    @pytest.mark.parametrize("checkers", [[raise_error_checker],
+                                          [raise_no_error_checker, raise_error_checker],
+                                          [raise_error_checker, raise_no_error_checker]])
+    def check_ssh_checker(self, checkers):
+        self.server.start()
+        self.account = RemoteAccount(RemoteAccountSSHConfig.from_string(
+            """
+        Host dummy_host.com
+            Hostname dummy_host.name.com
+            Port 22
+            User dummy
+        """), ssh_exception_checks=checkers)
+        with pytest.raises(DummyException):
+            self.account.ssh('echo test')
+
     def teardown(self):
         self.server.stop()
 
diff --git a/tests/ducktape_mock.py b/tests/ducktape_mock.py
index f7cfa7675..b11f7c407 100644
--- a/tests/ducktape_mock.py
+++ b/tests/ducktape_mock.py
@@ -89,11 +89,11 @@ def __init__(self):
 class MockAccount(LinuxRemoteAccount):
     """Mock node.account object. It's Linux because tests are run in Linux."""
 
-    def __init__(self):
+    def __init__(self, **kwargs):
         ssh_config = RemoteAccountSSHConfig(
             host="localhost",
             user=None,
             hostname="localhost",
             port=22)
 
-        super(MockAccount, self).__init__(ssh_config, externally_routable_ip="localhost", logger=None)
+        super(MockAccount, self).__init__(ssh_config, externally_routable_ip="localhost", logger=None, **kwargs)

From 5b776fbc75d4470fb1b42819eb8c184dee4c94e1 Mon Sep 17 00:00:00 2001
From: imcdo <imcdonald@confluent.io>
Date: Thu, 27 Jan 2022 11:07:26 -0800
Subject: [PATCH 04/83] Bump version to 0.7.15

---
 ducktape/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ducktape/__init__.py b/ducktape/__init__.py
index f2911567b..4c93f0e6c 100644
--- a/ducktape/__init__.py
+++ b/ducktape/__init__.py
@@ -1 +1 @@
-__version__ = '0.7.14'
+__version__ = '0.7.15'

From 889d3ce81755840cf2fe0c302483162327973938 Mon Sep 17 00:00:00 2001
From: Ian McDonald <imcdonald@confluent.io>
Date: Thu, 27 Jan 2022 13:28:14 -0800
Subject: [PATCH 05/83] Add SSH failure callbacks (#287) (#290)

* ssh checkers

* debugs

* log non connection errors:

* include socket errors

* cleanup debuging

* clean up

* add checker test

* fix mock

* add docs

* update checks and error messages

* add better description

* fix style

* style fix
---
 docs/debug_tests.rst                      | 10 +++++++
 ducktape/cluster/json.py                  | 12 ++++-----
 ducktape/cluster/linux_remoteaccount.py   |  5 ++--
 ducktape/cluster/localhost.py             |  4 ++-
 ducktape/cluster/remoteaccount.py         | 32 +++++++++++++++++++++--
 ducktape/cluster/vagrant.py               |  6 ++---
 ducktape/cluster/windows_remoteaccount.py |  5 ++--
 ducktape/command_line/main.py             |  8 +++++-
 ducktape/command_line/parse_args.py       |  5 ++++
 ducktape/utils/util.py                    | 10 +++++++
 tests/cluster/check_remoteaccount.py      | 28 ++++++++++++++++++++
 tests/ducktape_mock.py                    |  4 +--
 12 files changed, 108 insertions(+), 21 deletions(-)

diff --git a/docs/debug_tests.rst b/docs/debug_tests.rst
index 71df91d6c..7b17066dc 100644
--- a/docs/debug_tests.rst
+++ b/docs/debug_tests.rst
@@ -97,3 +97,13 @@ Tools for Managing Logs
 =======================
 
 Analyzing and matching up logs from a distributed service could be time consuming. There are many good tools for working with logs. Examples include http://lnav.org/, http://list.xmodulo.com/multitail.html, and http://glogg.bonnefon.org/.
+
+Validating Ssh Issues
+=======================
+
+Ducktape supports running custom validators when an ssh error occurs, allowing you to run your own validation against a host.
+this is done simply by running ducktape with the `--ssh-checker-function`, followed by the module path to your function, so for instance::
+    
+    ducktape my-test.py --ssh-checker-function my.module.validator.validate_ssh
+
+this function will take in the ssh error raised as its first argument, and the remote account object as its second.
diff --git a/ducktape/cluster/json.py b/ducktape/cluster/json.py
index 8f4b72d63..8e03b503d 100644
--- a/ducktape/cluster/json.py
+++ b/ducktape/cluster/json.py
@@ -90,7 +90,9 @@ def __init__(self, cluster_json=None, *args, **kwargs):
                     "Cluster json has a node without a ssh_config field: %s\n Cluster json: %s" % (ninfo, cluster_json)
 
                 ssh_config = RemoteAccountSSHConfig(**ninfo.get("ssh_config", {}))
-                remote_account = JsonCluster.make_remote_account(ssh_config, ninfo.get("externally_routable_ip"))
+                remote_account = \
+                    JsonCluster.make_remote_account(ssh_config, ninfo.get("externally_routable_ip"),
+                                                    ssh_exception_checks=kwargs.get("ssh_exception_checks"))
                 if remote_account.externally_routable_ip is None:
                     remote_account.externally_routable_ip = self._externally_routable_ip(remote_account)
                 self._available_accounts.add_node(remote_account)
@@ -100,15 +102,13 @@ def __init__(self, cluster_json=None, *args, **kwargs):
         self._id_supplier = 0
 
     @staticmethod
-    def make_remote_account(ssh_config, externally_routable_ip=None):
+    def make_remote_account(ssh_config, *args, **kwargs):
         """Factory function for creating the correct RemoteAccount implementation."""
 
         if ssh_config.host and WINDOWS in ssh_config.host:
-            return WindowsRemoteAccount(ssh_config=ssh_config,
-                                        externally_routable_ip=externally_routable_ip)
+            return WindowsRemoteAccount(ssh_config, *args, **kwargs)
         else:
-            return LinuxRemoteAccount(ssh_config=ssh_config,
-                                      externally_routable_ip=externally_routable_ip)
+            return LinuxRemoteAccount(ssh_config, *args, **kwargs)
 
     def do_alloc(self, cluster_spec):
         allocated_accounts = self._available_accounts.remove_spec(cluster_spec)
diff --git a/ducktape/cluster/linux_remoteaccount.py b/ducktape/cluster/linux_remoteaccount.py
index 22bf3778a..64783e387 100644
--- a/ducktape/cluster/linux_remoteaccount.py
+++ b/ducktape/cluster/linux_remoteaccount.py
@@ -18,9 +18,8 @@
 
 class LinuxRemoteAccount(RemoteAccount):
 
-    def __init__(self, ssh_config, externally_routable_ip=None, logger=None):
-        super(LinuxRemoteAccount, self).__init__(ssh_config, externally_routable_ip=externally_routable_ip,
-                                                 logger=logger)
+    def __init__(self, *args, **kwargs):
+        super(LinuxRemoteAccount, self).__init__(*args, **kwargs)
         self._ssh_client = None
         self._sftp_client = None
         self.os = LINUX
diff --git a/ducktape/cluster/localhost.py b/ducktape/cluster/localhost.py
index b76c76673..b502a5624 100644
--- a/ducktape/cluster/localhost.py
+++ b/ducktape/cluster/localhost.py
@@ -32,7 +32,9 @@ def __init__(self, *args, **kwargs):
         self._available_nodes = NodeContainer()
         for i in range(num_nodes):
             ssh_config = RemoteAccountSSHConfig("localhost%d" % i, hostname="localhost", port=22)
-            self._available_nodes.add_node(ClusterNode(LinuxRemoteAccount(ssh_config)))
+            self._available_nodes.add_node(ClusterNode(
+                LinuxRemoteAccount(ssh_config,
+                                   ssh_exception_checks=kwargs.get("ssh_exception_checks"))))
         self._in_use_nodes = NodeContainer()
 
     def do_alloc(self, cluster_spec):
diff --git a/ducktape/cluster/remoteaccount.py b/ducktape/cluster/remoteaccount.py
index a15ded583..8fcf9b2a3 100644
--- a/ducktape/cluster/remoteaccount.py
+++ b/ducktape/cluster/remoteaccount.py
@@ -16,6 +16,7 @@
 import logging
 import os
 from paramiko import SSHClient, SSHConfig, MissingHostKeyPolicy
+from paramiko.ssh_exception import SSHException, NoValidConnectionsError
 import shutil
 import signal
 import socket
@@ -28,6 +29,20 @@
 from ducktape.errors import DucktapeError
 
 
+def check_ssh(method):
+    def wrapper(self, *args, **kwargs):
+        try:
+            return method(self, *args, **kwargs)
+        except (SSHException, NoValidConnectionsError, socket.error) as e:
+            if self._custom_ssh_exception_checks:
+                self._log(logging.DEBUG, "starting ssh checks:")
+                self._log(logging.DEBUG, "\n".join(repr(f) for f in self._custom_ssh_exception_checks))
+                for func in self._custom_ssh_exception_checks:
+                    func(e, self)
+            raise
+    return wrapper
+
+
 class RemoteAccountSSHConfig(object):
     def __init__(self, host=None, hostname=None, user=None, port=None, password=None, identityfile=None, **kwargs):
         """Wrapper for ssh configs used by ducktape to connect to remote machines.
@@ -120,7 +135,7 @@ class RemoteAccount(HttpMixin):
     Each operating system has its own RemoteAccount implementation.
     """
 
-    def __init__(self, ssh_config, externally_routable_ip=None, logger=None):
+    def __init__(self, ssh_config, externally_routable_ip=None, logger=None, ssh_exception_checks=[]):
         # Instance of RemoteAccountSSHConfig - use this instead of a dict, because we need the entire object to
         # be hashable
         self.ssh_config = ssh_config
@@ -139,6 +154,7 @@ def __init__(self, ssh_config, externally_routable_ip=None, logger=None):
         self.os = None
         self._ssh_client = None
         self._sftp_client = None
+        self._custom_ssh_exception_checks = ssh_exception_checks
 
     @property
     def operating_system(self):
@@ -159,6 +175,7 @@ def _log(self, level, msg, *args, **kwargs):
         msg = "%s: %s" % (str(self), msg)
         self.logger.log(level, msg, *args, **kwargs)
 
+    @check_ssh
     def _set_ssh_client(self):
         client = SSHClient()
         client.set_missing_host_key_policy(IgnoreMissingHostKeyPolicy())
@@ -250,6 +267,7 @@ def _can_ping_url(self, url, headers):
         except Exception:
             return False
 
+    @check_ssh
     def ssh(self, cmd, allow_fail=False):
         """Run the given command on the remote host, and block until the command has finished running.
 
@@ -283,6 +301,7 @@ def ssh(self, cmd, allow_fail=False):
 
         return exit_status
 
+    @check_ssh
     def ssh_capture(self, cmd, allow_fail=False, callback=None, combine_stderr=True, timeout_sec=None):
         """Run the given command asynchronously via ssh, and return an SSHOutputIter object.
 
@@ -336,6 +355,7 @@ def output_generator():
 
         return SSHOutputIter(output_generator, stdout)
 
+    @check_ssh
     def ssh_output(self, cmd, allow_fail=False, combine_stderr=True, timeout_sec=None):
         """Runs the command via SSH and captures the output, returning it as a string.
 
@@ -487,6 +507,7 @@ def _re_anchor_basename(self, path, directory):
 
         return os.path.join(directory, path_basename)
 
+    @check_ssh
     def copy_from(self, src, dest):
         if os.path.isdir(dest):
             # dest is an existing directory, so assuming src looks like path/to/src_name,
@@ -513,6 +534,7 @@ def scp_to(self, src, dest, recursive=False):
         warnings.warn("scp_to is now deprecated. Please use copy_to")
         self.copy_to(src, dest)
 
+    @check_ssh
     def copy_to(self, src, dest):
 
         if self.isdir(dest):
@@ -537,6 +559,7 @@ def copy_to(self, src, dest):
                     # TODO what about uncopyable file types?
                     pass
 
+    @check_ssh
     def islink(self, path):
         try:
             # stat should follow symlinks
@@ -545,6 +568,7 @@ def islink(self, path):
         except Exception:
             return False
 
+    @check_ssh
     def isdir(self, path):
         try:
             # stat should follow symlinks
@@ -553,6 +577,7 @@ def isdir(self, path):
         except Exception:
             return False
 
+    @check_ssh
     def exists(self, path):
         """Test that the path exists, but don't follow symlinks."""
         try:
@@ -562,6 +587,7 @@ def exists(self, path):
         except IOError:
             return False
 
+    @check_ssh
     def isfile(self, path):
         """Imitates semantics of os.path.isfile
 
@@ -578,6 +604,7 @@ def isfile(self, path):
     def open(self, path, mode='r'):
         return self.sftp_client.open(path, mode)
 
+    @check_ssh
     def create_file(self, path, contents):
         """Create file at path, with the given contents.
 
@@ -585,13 +612,14 @@ def create_file(self, path, contents):
         """
         # TODO: what should semantics be if path exists? what actually happens if it already exists?
         # TODO: what happens if the base part of the path does not exist?
+
         with self.sftp_client.open(path, "w") as f:
             f.write(contents)
 
     _DEFAULT_PERMISSIONS = int('755', 8)
 
+    @check_ssh
     def mkdir(self, path, mode=_DEFAULT_PERMISSIONS):
-
         self.sftp_client.mkdir(path, mode)
 
     def mkdirs(self, path, mode=_DEFAULT_PERMISSIONS):
diff --git a/ducktape/cluster/vagrant.py b/ducktape/cluster/vagrant.py
index 462131a1a..0074cad5b 100644
--- a/ducktape/cluster/vagrant.py
+++ b/ducktape/cluster/vagrant.py
@@ -36,7 +36,7 @@ class VagrantCluster(JsonCluster):
     def __init__(self, *args, **kwargs):
         self._is_aws = None
         is_read_from_file = False
-
+        self.ssh_exception_checks = kwargs.get("ssh_exception_checks")
         cluster_file = kwargs.get("cluster_file")
         if cluster_file is not None:
             try:
@@ -51,7 +51,7 @@ def __init__(self, *args, **kwargs):
                 "nodes": self._get_nodes_from_vagrant()
             }
 
-        super(VagrantCluster, self).__init__(cluster_json)
+        super(VagrantCluster, self).__init__(cluster_json, *args, **kwargs)
 
         # If cluster file is specified but the cluster info is not read from it, write the cluster info into the file
         if not is_read_from_file and cluster_file is not None:
@@ -82,7 +82,7 @@ def _get_nodes_from_vagrant(self):
 
             account = None
             try:
-                account = JsonCluster.make_remote_account(ssh_config)
+                account = JsonCluster.make_remote_account(ssh_config, ssh_exception_checks=self.ssh_exception_checks)
                 externally_routable_ip = account.fetch_externally_routable_ip(self.is_aws)
             finally:
                 if account:
diff --git a/ducktape/cluster/windows_remoteaccount.py b/ducktape/cluster/windows_remoteaccount.py
index 68c7197ec..b57b6d730 100644
--- a/ducktape/cluster/windows_remoteaccount.py
+++ b/ducktape/cluster/windows_remoteaccount.py
@@ -37,9 +37,8 @@ class WindowsRemoteAccount(RemoteAccount):
 
     WINRM_USERNAME = "Administrator"
 
-    def __init__(self, ssh_config, externally_routable_ip=None, logger=None):
-        super(WindowsRemoteAccount, self).__init__(ssh_config, externally_routable_ip=externally_routable_ip,
-                                                   logger=logger)
+    def __init__(self, *args, **kwargs):
+        super(WindowsRemoteAccount, self).__init__(*args, **kwargs)
         self.os = WINDOWS
         self._winrm_client = None
 
diff --git a/ducktape/command_line/main.py b/ducktape/command_line/main.py
index d31e51657..6069e17af 100644
--- a/ducktape/command_line/main.py
+++ b/ducktape/command_line/main.py
@@ -33,6 +33,7 @@
 from ducktape.tests.session import generate_session_id, generate_results_dir
 from ducktape.utils.local_filesystem_utils import mkdir_p
 from ducktape.utils import persistence
+from ducktape.utils.util import load_function
 
 
 def get_user_defined_globals(globals_str):
@@ -160,7 +161,12 @@ def main():
         (cluster_mod_name, cluster_class_name) = args_dict["cluster"].rsplit('.', 1)
         cluster_mod = importlib.import_module(cluster_mod_name)
         cluster_class = getattr(cluster_mod, cluster_class_name)
-        cluster = cluster_class(cluster_file=args_dict["cluster_file"])
+
+        cluster_kwargs = {"cluster_file": args_dict["cluster_file"]}
+        checkers = [load_function(func_path) for func_path in args_dict["ssh_checker_function"]]
+        if checkers:
+            cluster_kwargs['ssh_exception_checks'] = checkers
+        cluster = cluster_class(**cluster_kwargs)
         for ctx in tests:
             # Note that we're attaching a reference to cluster
             # only after test context objects have been instantiated
diff --git a/ducktape/command_line/parse_args.py b/ducktape/command_line/parse_args.py
index 7728c65ce..9902d2276 100644
--- a/ducktape/command_line/parse_args.py
+++ b/ducktape/command_line/parse_args.py
@@ -81,6 +81,11 @@ def create_ducktape_parser():
     parser.add_argument("--test-runner-timeout", action="store", type=int, default=1800000,
                         help="Amount of time in milliseconds between test communicating between the test runner"
                              " before a timeout error occurs. Default is 30 minutes")
+    parser.add_argument("--ssh-checker-function", action="store", type=str, nargs="+",
+                        help="Python module path(s) to a function that takes an exception and a remote account"
+                        " that will be called when an ssh error occurs, this can give some "
+                        "validation or better logging when an ssh error occurs. Specify any "
+                        "number of module paths after this flag to be called.")
     return parser
 
 
diff --git a/ducktape/utils/util.py b/ducktape/utils/util.py
index d96c51250..6bf274472 100644
--- a/ducktape/utils/util.py
+++ b/ducktape/utils/util.py
@@ -70,3 +70,13 @@ def package_is_installed(package_name):
 def ducktape_version():
     """Return string representation of current ducktape version."""
     return __ducktape_version__
+
+
+def load_function(func_module_path):
+    """Loads and returns a function from a module path seperated by '.'s"""
+    module, function = func_module_path.rsplit(".", 1)
+    try:
+        return getattr(importlib.import_module(module), function)
+    except AttributeError:
+        raise Exception("Function could not be loaded from the module path {}, "
+                        "verify that it is '.' seperated".format(func_module_path))
diff --git a/tests/cluster/check_remoteaccount.py b/tests/cluster/check_remoteaccount.py
index 57c1c2f6a..e08b39f34 100644
--- a/tests/cluster/check_remoteaccount.py
+++ b/tests/cluster/check_remoteaccount.py
@@ -17,6 +17,7 @@
 from tests.test_utils import find_available_port
 from ducktape.cluster.remoteaccount import RemoteAccount
 from ducktape.cluster.remoteaccount import RemoteAccountSSHConfig
+import pytest
 
 import logging
 from threading import Thread
@@ -26,6 +27,18 @@
 import time
 
 
+class DummyException(Exception):
+    pass
+
+
+def raise_error_checker(error, remote_account):
+    raise DummyException("dummy raise: {}\nfrom: {}".format(error, remote_account))
+
+
+def raise_no_error_checker(error, remote_account):
+    pass
+
+
 class SimpleServer(object):
     """Helper class which starts a simple server listening on localhost at the specified port
     """
@@ -86,6 +99,21 @@ def check_wait_for_http_timeout(self):
             actual_timeout = time.time() - start
             assert abs(actual_timeout - timeout) / timeout < 1
 
+    @pytest.mark.parametrize("checkers", [[raise_error_checker],
+                                          [raise_no_error_checker, raise_error_checker],
+                                          [raise_error_checker, raise_no_error_checker]])
+    def check_ssh_checker(self, checkers):
+        self.server.start()
+        self.account = RemoteAccount(RemoteAccountSSHConfig.from_string(
+            """
+        Host dummy_host.com
+            Hostname dummy_host.name.com
+            Port 22
+            User dummy
+        """), ssh_exception_checks=checkers)
+        with pytest.raises(DummyException):
+            self.account.ssh('echo test')
+
     def teardown(self):
         self.server.stop()
 
diff --git a/tests/ducktape_mock.py b/tests/ducktape_mock.py
index f7cfa7675..b11f7c407 100644
--- a/tests/ducktape_mock.py
+++ b/tests/ducktape_mock.py
@@ -89,11 +89,11 @@ def __init__(self):
 class MockAccount(LinuxRemoteAccount):
     """Mock node.account object. It's Linux because tests are run in Linux."""
 
-    def __init__(self):
+    def __init__(self, **kwargs):
         ssh_config = RemoteAccountSSHConfig(
             host="localhost",
             user=None,
             hostname="localhost",
             port=22)
 
-        super(MockAccount, self).__init__(ssh_config, externally_routable_ip="localhost", logger=None)
+        super(MockAccount, self).__init__(ssh_config, externally_routable_ip="localhost", logger=None, **kwargs)

From 96660aeb5886198e96f069d29692480090fc0b0b Mon Sep 17 00:00:00 2001
From: imcdo <imcdonald@confluent.io>
Date: Thu, 27 Jan 2022 13:41:26 -0800
Subject: [PATCH 06/83] Bump version to 0.8.10

---
 ducktape/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ducktape/__init__.py b/ducktape/__init__.py
index 462a7c581..c2c91763b 100644
--- a/ducktape/__init__.py
+++ b/ducktape/__init__.py
@@ -1 +1 @@
-__version__ = '0.8.9'
+__version__ = '0.8.10'

From 4ad00e9d4e00349db600c6f8f08d545ad77d4afc Mon Sep 17 00:00:00 2001
From: Stanislav Vodetskyi <stan@confluent.io>
Date: Fri, 28 Jan 2022 18:28:07 -0800
Subject: [PATCH 07/83] locked two more requirements to versions that support
 python 2

---
 setup.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 2ee4e99a2..c6c0c6a92 100644
--- a/setup.py
+++ b/setup.py
@@ -59,7 +59,11 @@ def run_tests(self):
                         'pycryptodome==3.8.2',
                         'more-itertools==5.0.0',
                         'tox==3.13.2',
-                        'six==1.12.0'],
+                        'six==1.12.0',
+                        # for the following packages these are the last versions supporting python 2
+                        'pynacl==1.4.0',
+                        'filelock==3.2.1'
+                        ],
       tests_require=['pytest==4.6.5',
                      'mock==3.0.5',
                      'psutil==5.6.3',

From d8666abd17837b6ab8b37cf5c1ec0df099216912 Mon Sep 17 00:00:00 2001
From: Stanislav Vodetskyi <stan@confluent.io>
Date: Mon, 31 Jan 2022 18:16:00 -0800
Subject: [PATCH 08/83] Bump version to 0.7.16

---
 ducktape/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ducktape/__init__.py b/ducktape/__init__.py
index 4c93f0e6c..84e2e5a42 100644
--- a/ducktape/__init__.py
+++ b/ducktape/__init__.py
@@ -1 +1 @@
-__version__ = '0.7.15'
+__version__ = '0.7.16'

From b2a841fba6ce74fea362d1e2bc901dabbedf0248 Mon Sep 17 00:00:00 2001
From: Stanislav Vodetskyi <stan@confluent.io>
Date: Wed, 2 Feb 2022 15:57:16 -0800
Subject: [PATCH 09/83] removed tox and locked cryptography version

---
 setup.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/setup.py b/setup.py
index c6c0c6a92..2be4ad818 100644
--- a/setup.py
+++ b/setup.py
@@ -58,11 +58,11 @@ def run_tests(self):
                         'pyzmq==18.1.0',
                         'pycryptodome==3.8.2',
                         'more-itertools==5.0.0',
-                        'tox==3.13.2',
                         'six==1.12.0',
                         # for the following packages these are the last versions supporting python 2
                         'pynacl==1.4.0',
-                        'filelock==3.2.1'
+                        'filelock==3.2.1',
+                        'cryptography==3.3.2'
                         ],
       tests_require=['pytest==4.6.5',
                      'mock==3.0.5',

From 5c89870e6b1c1eb089d8812e0dd47ff44c16eb58 Mon Sep 17 00:00:00 2001
From: Stanislav Vodetskyi <stan@confluent.io>
Date: Wed, 2 Feb 2022 17:10:11 -0800
Subject: [PATCH 10/83] fixed the null pointer

---
 ducktape/command_line/main.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/ducktape/command_line/main.py b/ducktape/command_line/main.py
index aa04a4a80..4b7bb7b3f 100644
--- a/ducktape/command_line/main.py
+++ b/ducktape/command_line/main.py
@@ -184,9 +184,11 @@ def main():
         cluster_class = getattr(cluster_mod, cluster_class_name)
 
         cluster_kwargs = {"cluster_file": args_dict["cluster_file"]}
-        checkers = [load_function(func_path) for func_path in args_dict["ssh_checker_function"]]
-        if checkers:
-            cluster_kwargs['ssh_exception_checks'] = checkers
+        checker_function_names = args_dict['ssh_checker_function']
+        if checker_function_names:
+            checkers = [load_function(func_path) for func_path in checker_function_names]
+            if checkers:
+                cluster_kwargs['ssh_exception_checks'] = checkers
         cluster = cluster_class(**cluster_kwargs)
         for ctx in tests:
             # Note that we're attaching a reference to cluster

From 3235b9c142ccf70213378d40cb5b8852f4d66667 Mon Sep 17 00:00:00 2001
From: Stanislav Vodetskyi <stan@confluent.io>
Date: Wed, 2 Feb 2022 17:59:29 -0800
Subject: [PATCH 11/83] Bump version to 0.7.17

---
 ducktape/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ducktape/__init__.py b/ducktape/__init__.py
index 84e2e5a42..8589f6793 100644
--- a/ducktape/__init__.py
+++ b/ducktape/__init__.py
@@ -1 +1 @@
-__version__ = '0.7.16'
+__version__ = '0.7.17'

From 47b445db3a231652c336d317bdc60db2aeaf5f28 Mon Sep 17 00:00:00 2001
From: Stanislav Vodetskyi <stan@confluent.io>
Date: Wed, 9 Mar 2022 18:32:31 -0800
Subject: [PATCH 12/83] add summary column to the report html

---
 ducktape/templates/report/report.html | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/ducktape/templates/report/report.html b/ducktape/templates/report/report.html
index 49446dd69..c06c1d6be 100644
--- a/ducktape/templates/report/report.html
+++ b/ducktape/templates/report/report.html
@@ -87,6 +87,7 @@ <h1>
               <td colSpan='5' align='center'><pre>{this.props.test.description}</pre></td>
               <td colSpan='5' align='center'><pre>{this.props.test.run_time}</pre></td>
               <td colSpan='5' align='center'><pre>{this.props.test.data}</pre></td>
+              <td colSpan='5' align='center'><pre>{this.props.test.summary}</pre></td>
               {detailCol}
             </tr>
           );
@@ -105,6 +106,7 @@ <h1>
                   <th colSpan='5' align='center'>Description</th>
                   <th colSpan='5' align='center'>Time</th>
                   <th colSpan='5' align='center'>Data</th>
+                  <th colSpan='5' align='center'>Summary</th>
                   <th colSpan='5' align='center'>Detail</th>
                 </tr>
               </thead>

From 5185b105f51b5c91bc043bb18d2335c52fc55e01 Mon Sep 17 00:00:00 2001
From: Stanislav Vodetskyi <stan@confluent.io>
Date: Wed, 9 Mar 2022 18:50:33 -0800
Subject: [PATCH 13/83] updated html reporter

---
 ducktape/tests/reporter.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/ducktape/tests/reporter.py b/ducktape/tests/reporter.py
index a461e1677..c25a50b7c 100644
--- a/ducktape/tests/reporter.py
+++ b/ducktape/tests/reporter.py
@@ -239,6 +239,7 @@ def format_result(self, result):
             "run_time": format_time(result.run_time_seconds),
             "data": "" if result.data is None else json.dumps(result.data, sort_keys=True,
                                                               indent=2, separators=(',', ': ')),
+            "summary": result.summary,
             "test_log": self.test_results_dir(result)
         }
         return result_json

From bcac06026981624f7596878e077caf5e42672043 Mon Sep 17 00:00:00 2001
From: Stanislav Vodetskyi <stan@confluent.io>
Date: Fri, 11 Mar 2022 00:31:37 -0800
Subject: [PATCH 14/83] Bump version to 0.7.18

---
 ducktape/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ducktape/__init__.py b/ducktape/__init__.py
index 8589f6793..4f25e8a08 100644
--- a/ducktape/__init__.py
+++ b/ducktape/__init__.py
@@ -1 +1 @@
-__version__ = '0.7.17'
+__version__ = '0.7.18'

From 1788117469cd3c811b08eec6dce4e90f89b528df Mon Sep 17 00:00:00 2001
From: Stanislav Vodetskyi <stan@confluent.io>
Date: Fri, 11 Mar 2022 00:44:38 -0800
Subject: [PATCH 15/83] Bump version to 0.8.11

---
 ducktape/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ducktape/__init__.py b/ducktape/__init__.py
index c2c91763b..2863f6867 100644
--- a/ducktape/__init__.py
+++ b/ducktape/__init__.py
@@ -1 +1 @@
-__version__ = '0.8.10'
+__version__ = '0.8.11'

From 8dfba8440e76c3aedf652cdbc181d3ccc9a1a023 Mon Sep 17 00:00:00 2001
From: Stanislav Vodetskyi <49661990+stan-confluent@users.noreply.github.com>
Date: Fri, 22 Apr 2022 18:07:53 -0700
Subject: [PATCH 16/83] wrap long stack trace lines + update vagrantfile to
 work with latest vagrant and vbox on monterey (#300)

---
 Vagrantfile                           | 2 +-
 ducktape/templates/report/report.css  | 7 +++++++
 ducktape/templates/report/report.html | 2 +-
 3 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/Vagrantfile b/Vagrantfile
index 1191624ac..8f524551e 100644
--- a/Vagrantfile
+++ b/Vagrantfile
@@ -51,7 +51,7 @@ Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
     name = "ducktape" + i.to_s
     config.vm.define name do |worker|
       worker.vm.hostname = name
-      worker.vm.network :private_network, ip: "192.168.50." + (150 + i).to_s
+      worker.vm.network :private_network, ip: "192.168.56." + (150 + i).to_s
     end
   }
 
diff --git a/ducktape/templates/report/report.css b/ducktape/templates/report/report.css
index fe5018278..57daacda1 100644
--- a/ducktape/templates/report/report.css
+++ b/ducktape/templates/report/report.css
@@ -33,6 +33,13 @@ h1, h2, h3, h4, h5, h6 {
     padding: 2px;
 }
 
+.pre_stack_trace {
+    white-space: pre-wrap;       /* Since CSS 2.1 */
+    white-space: -moz-pre-wrap;  /* Mozilla, since 1999 */
+    white-space: -o-pre-wrap;    /* Opera 7 */
+    word-wrap: break-word;       /* Internet Explorer 5.5+ */
+}
+
 .header_row {
     font-weight: bold;
     color: white;
diff --git a/ducktape/templates/report/report.html b/ducktape/templates/report/report.html
index c06c1d6be..3823968e8 100644
--- a/ducktape/templates/report/report.html
+++ b/ducktape/templates/report/report.html
@@ -87,7 +87,7 @@ <h1>
               <td colSpan='5' align='center'><pre>{this.props.test.description}</pre></td>
               <td colSpan='5' align='center'><pre>{this.props.test.run_time}</pre></td>
               <td colSpan='5' align='center'><pre>{this.props.test.data}</pre></td>
-              <td colSpan='5' align='center'><pre>{this.props.test.summary}</pre></td>
+              <td colSpan='5' align='center'><pre className="pre_stack_trace">{this.props.test.summary}</pre></td>
               {detailCol}
             </tr>
           );

From 7ef6259cda19ddc62e546d4465f9a2899ce14cea Mon Sep 17 00:00:00 2001
From: Stanislav Vodetskyi <49661990+stan-confluent@users.noreply.github.com>
Date: Sun, 24 Apr 2022 23:56:19 -0700
Subject: [PATCH 17/83] Print failed test specs (#301)

* Fix test file discovery regression (#285)

* Fix test file discovery regression

Do not subject explicitly provided test files to default pattern, i.e.
restore the behaviour prior to 2036577e.

* Update styling

Co-authored-by: Ian McDonald <ian_mcdonald@rocketmail.com>

Co-authored-by: Stanislav Vodetskyi <49661990+stan-confluent@users.noreply.github.com>
Co-authored-by: Ian McDonald <ian_mcdonald@rocketmail.com>

* print failed test specs

* better output

* Update ducktape/tests/reporter.py

Co-authored-by: Ian McDonald <imcdonald@confluent.io>

* renamed some params and added a simple unit test

* style

Co-authored-by: Nikesh <nshettipalli@gmail.com>
Co-authored-by: imcdo <imcdonald@confluent.io>
Co-authored-by: vp-elitnet <93337942+vp-elitnet@users.noreply.github.com>
Co-authored-by: Ian McDonald <ian_mcdonald@rocketmail.com>
---
 ducktape/command_line/main.py           |  5 +--
 ducktape/tests/reporter.py              | 42 +++++++++++++++++++++++++
 ducktape/tests/result.py                |  1 +
 systests/cluster/test_remote_account.py | 28 +++++++++++++++++
 tests/reporter/check_symbol_reporter.py | 17 ++++++++++
 5 files changed, 91 insertions(+), 2 deletions(-)
 create mode 100644 tests/reporter/check_symbol_reporter.py

diff --git a/ducktape/command_line/main.py b/ducktape/command_line/main.py
index 36108e5c8..9946d9755 100644
--- a/ducktape/command_line/main.py
+++ b/ducktape/command_line/main.py
@@ -27,7 +27,7 @@
 from ducktape.tests.loader import TestLoader, LoaderException
 from ducktape.tests.loggermaker import close_logger
 from ducktape.tests.reporter import SimpleStdoutSummaryReporter, SimpleFileSummaryReporter, \
-    HTMLSummaryReporter, JSONReporter, JUnitReporter
+    HTMLSummaryReporter, JSONReporter, JUnitReporter, FailedTestSymbolReporter
 from ducktape.tests.runner import TestRunner
 from ducktape.tests.session import SessionContext, SessionLoggerMaker
 from ducktape.tests.session import generate_session_id, generate_results_dir
@@ -188,7 +188,8 @@ def main():
         SimpleFileSummaryReporter(test_results),
         HTMLSummaryReporter(test_results),
         JSONReporter(test_results),
-        JUnitReporter(test_results)
+        JUnitReporter(test_results),
+        FailedTestSymbolReporter(test_results)
     ]
 
     for r in reporters:
diff --git a/ducktape/tests/reporter.py b/ducktape/tests/reporter.py
index c25a50b7c..afb0cabb6 100644
--- a/ducktape/tests/reporter.py
+++ b/ducktape/tests/reporter.py
@@ -15,6 +15,9 @@
 from __future__ import print_function
 
 import json
+from pathlib import Path
+
+import yaml
 import os
 import shutil
 import xml.etree.ElementTree as ET
@@ -304,3 +307,42 @@ def format_report(self):
 
     def report(self):
         self.format_report()
+
+
+class FailedTestSymbolReporter(SummaryReporter):
+
+    def __init__(self, results):
+        super().__init__(results)
+        self.separator = "=" * self.width
+
+    @staticmethod
+    def to_symbol(result):
+        line = f'{result.file_name}::{result.cls_name}.{result.function_name}'
+        if result.injected_args:
+            injected_args_str = json.dumps(result.injected_args, separators=(',', ':'))
+            line += f'@{injected_args_str}'
+        return line
+
+    def dump_test_suite(self, lines):
+        print(self.separator)
+        print('FAILED TEST SUITE')
+        suite = {self.results.session_context.session_id: lines}
+        file_path = Path(self.results.session_context.results_dir) / "rerun-failed.yml"
+        with file_path.open('w') as fp:
+            print(f'Test suite to rerun failed tests: {file_path}')
+            yaml.dump(suite, stream=fp, indent=4)
+
+    def print_test_symbols_string(self, lines):
+        print(self.separator)
+        print('FAILED TEST SYMBOLS')
+        print('Pass the test symbols below to your ducktape run')
+        # quote the symbol because json parameters will be processed by shell otherwise, making it not copy-pasteable
+        print(' '.join([f"'{line}'" for line in lines]))
+
+    def report(self):
+        symbols = [self.to_symbol(result) for result in self.results if result.test_status == FAIL]
+        if not symbols:
+            return
+
+        self.dump_test_suite(symbols)
+        self.print_test_symbols_string(symbols)
diff --git a/ducktape/tests/result.py b/ducktape/tests/result.py
index d4035fdec..81413bbbc 100644
--- a/ducktape/tests/result.py
+++ b/ducktape/tests/result.py
@@ -64,6 +64,7 @@ def __init__(self,
         self.test_status = test_status
         self.summary = summary
         self.data = data
+        self.file_name = test_context.file
 
         self.base_results_dir = session_context.results_dir
         if not self.results_dir.endswith(os.path.sep):
diff --git a/systests/cluster/test_remote_account.py b/systests/cluster/test_remote_account.py
index 4b9787a15..9fb655438 100644
--- a/systests/cluster/test_remote_account.py
+++ b/systests/cluster/test_remote_account.py
@@ -16,6 +16,7 @@
 from ducktape.tests.test import Test
 from ducktape.errors import TimeoutError
 from ducktape.mark.resource import cluster
+from ducktape.mark import matrix, parametrize
 
 import os
 import pytest
@@ -110,6 +111,33 @@ def under_utilized_test(self):
         assert self.test_context.cluster.max_used() == 2
 
 
+class FailingTest(Test):
+    """
+    The purpose of this test is to validate reporters. Some of them are intended to fail.
+    """
+    def setup(self):
+        self.service = GenericService(self.test_context, 1)
+
+    @cluster(num_nodes=1)
+    @matrix(string_param=['success-first', 'fail-second', 'fail-third'], int_param=[10, 20, -30])
+    def matrix_test(self, string_param, int_param):
+        assert not string_param.startswith('fail') and int_param > 0
+
+    @cluster(num_nodes=1)
+    @parametrize(string_param=['success-first', 'fail-second'])
+    @parametrize(int_param=[10, -10])
+    def parametrized_test(self, string_param, int_param):
+        assert not string_param.startswith('fail') and int_param > 0
+
+    @cluster(num_nodes=1)
+    def failing_test(self):
+        assert False
+
+    @cluster(num_nodes=1)
+    def successful_test(self):
+        assert True
+
+
 class FileSystemTest(Test):
     """
     Note that in an attempt to isolate the file system methods, validation should be done with ssh/shell commands.
diff --git a/tests/reporter/check_symbol_reporter.py b/tests/reporter/check_symbol_reporter.py
new file mode 100644
index 000000000..779eff542
--- /dev/null
+++ b/tests/reporter/check_symbol_reporter.py
@@ -0,0 +1,17 @@
+from unittest.mock import Mock
+
+from ducktape.tests.reporter import FailedTestSymbolReporter
+
+
+def check_to_symbol_no_args():
+    result = Mock(file_name='test_folder/test_file', cls_name='TestClass', function_name='test_func',
+                  injected_args=None)
+
+    assert FailedTestSymbolReporter.to_symbol(result) == 'test_folder/test_file::TestClass.test_func'
+
+
+def check_to_symbol_with_args():
+    result = Mock(file_name='test_folder/test_file', cls_name='TestClass', function_name='test_func',
+                  injected_args={'arg': 'val'})
+
+    assert FailedTestSymbolReporter.to_symbol(result) == 'test_folder/test_file::TestClass.test_func@{"arg":"val"}'

From bc4a5cc4dfe57ee83a393e7e41fa908de866c704 Mon Sep 17 00:00:00 2001
From: Stanislav Vodetskyi <stan@confluent.io>
Date: Mon, 25 Apr 2022 16:01:35 -0700
Subject: [PATCH 18/83] Bump version to 0.8.12

---
 ducktape/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ducktape/__init__.py b/ducktape/__init__.py
index 2863f6867..0a5ca3c08 100644
--- a/ducktape/__init__.py
+++ b/ducktape/__init__.py
@@ -1 +1 @@
-__version__ = '0.8.11'
+__version__ = '0.8.12'

From 206ad0327d5b6bb074df9f7a2da716a4b281bd0c Mon Sep 17 00:00:00 2001
From: Stanislav Vodetskyi <49661990+stan-confluent@users.noreply.github.com>
Date: Wed, 27 Apr 2022 17:08:57 -0700
Subject: [PATCH 19/83] readthedocs configs (new style) (#302)

* readthedocs configs (new style)

* removed comments
---
 .readthedocs.yaml | 18 ++++++++++++++++++
 README.md         |  3 +++
 2 files changed, 21 insertions(+)
 create mode 100644 .readthedocs.yaml

diff --git a/.readthedocs.yaml b/.readthedocs.yaml
new file mode 100644
index 000000000..b76cb3095
--- /dev/null
+++ b/.readthedocs.yaml
@@ -0,0 +1,18 @@
+# .readthedocs.yaml
+# Read the Docs configuration file
+# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
+
+# Required
+version: 2
+
+build:
+  os: ubuntu-20.04
+  tools:
+    python: "3.7"
+
+sphinx:
+  configuration: docs/conf.py
+
+python:
+  install:
+    - requirements: docs/requirements.txt
diff --git a/README.md b/README.md
index 2ad3345de..5cd75327e 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,6 @@
+[![Documentation Status](https://readthedocs.org/projects/ducktape-docs/badge/?version=latest)](https://ducktape-docs.readthedocs.io/en/latest/?badge=latest)
+
+
 Distributed System Integration & Performance Testing Library
 ============================================================
 

From d9b3fe7020910203805b3f266c6f19dfdb9febfb Mon Sep 17 00:00:00 2001
From: Andrew Hsu <xuzuan@gmail.com>
Date: Thu, 28 Apr 2022 15:47:20 -0500
Subject: [PATCH 20/83] readme: update readthedocs link (#296)

* Fix test file discovery regression (#285)

* Fix test file discovery regression

Do not subject explicitly provided test files to default pattern, i.e.
restore the behaviour prior to 2036577e.

* Update styling

Co-authored-by: Ian McDonald <ian_mcdonald@rocketmail.com>

Co-authored-by: Stanislav Vodetskyi <49661990+stan-confluent@users.noreply.github.com>
Co-authored-by: Ian McDonald <ian_mcdonald@rocketmail.com>

* readme: update readthedocs link

Co-authored-by: Nikesh <nshettipalli@gmail.com>
Co-authored-by: Stanislav Vodetskyi <stan@confluent.io>
Co-authored-by: imcdo <imcdonald@confluent.io>
Co-authored-by: vp-elitnet <93337942+vp-elitnet@users.noreply.github.com>
Co-authored-by: Stanislav Vodetskyi <49661990+stan-confluent@users.noreply.github.com>
Co-authored-by: Ian McDonald <ian_mcdonald@rocketmail.com>
---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 5cd75327e..8c1b3c94c 100644
--- a/README.md
+++ b/README.md
@@ -20,7 +20,7 @@ Ducktape contains tools for running system integration and performance tests. It
 Documentation
 -------------
 
-For detailed documentation on how to install, run, create new tests please refer to: http://ducktape-docs.readthedocs.io/
+For detailed documentation on how to install, run, create new tests please refer to: http://ducktape.readthedocs.io/
 
 Contribute
 ----------

From ea51045769abb228419f35698d0c490b8f2716a2 Mon Sep 17 00:00:00 2001
From: Ian McDonald <imcdonald@confluent.io>
Date: Fri, 6 May 2022 11:36:05 -0700
Subject: [PATCH 21/83] Add deflake feature to Ducktape (#299)

* basic flaky

* run tests with deflake option

* fix tests

* pr comments, fixes

* warning, account for multiple registries.

* store destroyed services seperatly

* track destroyed state

* wrap long stack trace lines + update vagrantfile to work with latest vagrant and vbox on monterey (#300)

* add seperator

* simplify service registry changes

* remove registry

* formating

* pr comments

* clear services after loop

Co-authored-by: Stanislav Vodetskyi <49661990+stan-confluent@users.noreply.github.com>
---
 ducktape/command_line/main.py           |   6 +-
 ducktape/command_line/parse_args.py     |   7 +-
 ducktape/services/service.py            |  33 ++++-
 ducktape/services/service_registry.py   |  14 +-
 ducktape/templates/report/report.css    |   4 +
 ducktape/templates/report/report.html   |   6 +
 ducktape/tests/reporter.py              |  39 +++--
 ducktape/tests/result.py                |  12 +-
 ducktape/tests/runner.py                |   7 +-
 ducktape/tests/runner_client.py         | 188 ++++++++++++++----------
 ducktape/tests/status.py                |   1 +
 setup.py                                |  16 +-
 systests/cluster/test_remote_account.py |   4 +
 tests/runner/check_runner.py            |  39 +++--
 tests/runner/check_runner_memory.py     |   2 +-
 tests/runner/resources/test_thingy.py   |   8 +
 16 files changed, 258 insertions(+), 128 deletions(-)

diff --git a/ducktape/command_line/main.py b/ducktape/command_line/main.py
index 4b7bb7b3f..1dbe1558b 100644
--- a/ducktape/command_line/main.py
+++ b/ducktape/command_line/main.py
@@ -200,7 +200,11 @@ def main():
         sys.exit(1)
 
     # Run the tests
-    runner = TestRunner(cluster, session_context, session_logger, tests)
+    deflake_num = args_dict['deflake']
+    if deflake_num < 1:
+        session_logger.warning("specified number of deflake runs specified to be less than 1, running without deflake.")
+    deflake_num = max(1, deflake_num)
+    runner = TestRunner(cluster, session_context, session_logger, tests, deflake_num)
     test_results = runner.run_all_tests()
 
     # Report results
diff --git a/ducktape/command_line/parse_args.py b/ducktape/command_line/parse_args.py
index 13ba9d0e5..b91b01f2e 100644
--- a/ducktape/command_line/parse_args.py
+++ b/ducktape/command_line/parse_args.py
@@ -81,7 +81,12 @@ def create_ducktape_parser():
                         help="Python module path(s) to a function that takes an exception and a remote account"
                         " that will be called when an ssh error occurs, this can give some "
                         "validation or better logging when an ssh error occurs. Specify any "
-                        "number of module paths after this flag to be called.")
+                        "number of module paths after this flag to be called."),
+    parser.add_argument("--deflake", action="store", type=int, default=1,
+                        help=
+                        "the number of times a failed test should be ran total (including its initial run) to determin flakyness,"
+                        "when not present, deflake will not be used, and a test will be marked as either passed or failed, when enabled"
+                        "tests will be marked as flaky if it passes any of the reruns")
     return parser
 
 
diff --git a/ducktape/services/service.py b/ducktape/services/service.py
index 3452586fe..d44a67f9b 100644
--- a/ducktape/services/service.py
+++ b/ducktape/services/service.py
@@ -23,6 +23,31 @@
 import time
 
 
+class ServiceIdFactory:
+    def generate_service_id(self, service):
+        return "{service_name}-{service_number}-{service_id}".format(
+            service_name=service.__class__.__name__,
+            service_number=service._order,
+            service_id=id(service)
+        )
+
+
+class MultiRunServiceIdFactory:
+    def __init__(self, run_number=1):
+        self.run_number = run_number
+
+    def generate_service_id(self, service):
+        return "{run_number}-{service_name}-{service_number}-{service_id}".format(
+            run_number=self.run_number,
+            service_name=service.__class__.__name__,
+            service_number=service._order,
+            service_id=id(service)
+        )
+
+
+service_id_factory = ServiceIdFactory()
+
+
 class Service(TemplateRenderer):
     """Service classes know how to deploy a service onto a set of nodes and then clean up after themselves.
 
@@ -72,6 +97,7 @@ def __init__(self, context, num_nodes=None, cluster_spec=None, *args, **kwargs):
         self._clean_time = -1
 
         self._initialized = False
+        self.service_id_factory = service_id_factory
         self.cluster_spec = Service.setup_cluster_spec(num_nodes=num_nodes, cluster_spec=cluster_spec)
         self.context = context
 
@@ -123,7 +149,7 @@ def local_scratch_dir(self):
     @property
     def service_id(self):
         """Human-readable identifier (almost certainly) unique within a test run."""
-        return "%s-%d-%d" % (self.__class__.__name__, self._order, id(self))
+        return self.service_id_factory.generate_service_id(self)
 
     @property
     def _order(self):
@@ -302,13 +328,12 @@ def clean_node(self, node):
 
     def free(self):
         """Free each node. This 'deallocates' the nodes so the cluster can assign them to other services."""
-        for node in self.nodes:
+        while self.nodes:
+            node = self.nodes.pop()
             self.logger.info("%s: freeing node" % self.who_am_i(node))
             node.account.logger = None
             self.cluster.free(node)
 
-        self.nodes = []
-
     def run(self):
         """Helper that executes run(), wait(), and stop() in sequence."""
         self.start()
diff --git a/ducktape/services/service_registry.py b/ducktape/services/service_registry.py
index ce2960018..24d3d315a 100644
--- a/ducktape/services/service_registry.py
+++ b/ducktape/services/service_registry.py
@@ -38,7 +38,7 @@ def append(self, service):
         self._nodes[id(service)] = [str(n.account) for n in service.nodes]
 
     def to_json(self):
-        return [self._services[k].to_json() for k in self._services]
+        return [service.to_json() for service in self._services.values()]
 
     def stop_all(self):
         """Stop all currently registered services in the reverse of the order in which they were added.
@@ -84,6 +84,8 @@ def free_all(self):
 
         if keyboard_interrupt is not None:
             raise keyboard_interrupt
+        self._services.clear()
+        self._nodes.clear()
 
     def min_cluster_spec(self):
         """
@@ -99,8 +101,8 @@ def errors(self):
         """
         Gets a printable string containing any errors produced by the services.
         """
-        all_errors = []
-        for service in self._services.values():
-            if hasattr(service, 'error') and service.error:
-                all_errors.append("%s: %s" % (service.who_am_i(), service.error))
-        return '\n\n'.join(all_errors)
+        return '\n\n'.join(
+            "{}: {}".format(service.who_am_i(), service.error)
+            for service in self._services.values()
+            if hasattr(service, 'error') and service.error
+        )
diff --git a/ducktape/templates/report/report.css b/ducktape/templates/report/report.css
index 57daacda1..c8a9060da 100644
--- a/ducktape/templates/report/report.css
+++ b/ducktape/templates/report/report.css
@@ -66,6 +66,10 @@ h1, h2, h3, h4, h5, h6 {
     background-color: #6c6;
 }
 
+.flaky {
+    background-color: #dd2;
+}
+
 .fail {
     background-color: #c60; 
 }
diff --git a/ducktape/templates/report/report.html b/ducktape/templates/report/report.html
index 3823968e8..e3bfb7677 100644
--- a/ducktape/templates/report/report.html
+++ b/ducktape/templates/report/report.html
@@ -11,6 +11,7 @@
     <div id="color_key_panel"></div>
     <div id="failed_test_panel"></div>
     <div id="ignored_test_panel"></div>
+    <div id="flaky_test_panel"></div>
     <div id="passed_test_panel"></div>
     <script type="text/jsx">
       /* This small block makes it possible to use React dev tools in the Chrome browser */
@@ -38,6 +39,7 @@ <h1>
             <tr>
               <td colSpan='5' align='center'>{this.props.summary_prop.tests}</td>
               <td colSpan='5' align='center'>{this.props.summary_prop.passes}</td>
+              <td colSpan='5' align='center'>{this.props.summary_prop.flaky}</td>
               <td colSpan='5' align='center'>{this.props.summary_prop.failures}</td>
               <td colSpan='5' align='center'>{this.props.summary_prop.ignored}</td>
               <td colSpan='5' align='center'>{this.props.summary_prop.run_time}</td>
@@ -54,6 +56,7 @@ <h1>
                 <tr id="summary_header_row">
                   <th colSpan='5' align='center'>Tests</th>
                   <th colSpan='5' align='center'>Passes</th>
+                  <th colSpan='5' align='center'>Flaky</th>
                   <th colSpan='5' align='center'>Failures</th>
                   <th colSpan='5' align='center'>Ignored</th>
                   <th colSpan='5' align='center'>Time</th>
@@ -177,6 +180,7 @@ <h2>{this.props.title}</h2>
       SUMMARY=[{
         "tests": %(num_tests)d,
         "passes": %(num_passes)d,
+        "flaky": %(num_flaky)d,
         "failures": %(num_failures)d,
         "ignored": %(num_ignored)d,
         "run_time": '%(run_time)s'
@@ -190,6 +194,7 @@ <h2>{this.props.title}</h2>
       COLOR_KEYS=[%(test_status_names)s];
 
       PASSED_TESTS=[%(passed_tests)s];
+      FLAKY_TESTS=[%(flaky_tests)s];
       FAILED_TESTS=[%(failed_tests)s];
       IGNORED_TESTS=[%(ignored_tests)s];
 
@@ -198,6 +203,7 @@ <h2>{this.props.title}</h2>
       React.render(<SummaryPanel summary_props={SUMMARY}/>, document.getElementById('summary_panel'));
       React.render(<TestPanel title="Failed Tests" tests={FAILED_TESTS}/>, document.getElementById('failed_test_panel'));
       React.render(<TestPanel title="Ignored Tests" tests={IGNORED_TESTS}/>, document.getElementById('ignored_test_panel'));
+      React.render(<TestPanel title="Flaky Tests" tests={FLAKY_TESTS}/>, document.getElementById('flaky_test_panel'));
       React.render(<TestPanel title="Passed Tests" tests={PASSED_TESTS}/>, document.getElementById('passed_test_panel'));
     </script>
   </body>
diff --git a/ducktape/tests/reporter.py b/ducktape/tests/reporter.py
index c25a50b7c..984cacdc5 100644
--- a/ducktape/tests/reporter.py
+++ b/ducktape/tests/reporter.py
@@ -23,7 +23,7 @@
 
 from ducktape.utils.terminal_size import get_terminal_size
 from ducktape.utils.util import ducktape_version
-from ducktape.tests.status import PASS, FAIL, IGNORE
+from ducktape.tests.status import PASS, FAIL, IGNORE, FLAKY
 from ducktape.json_serializable import DucktapeJSONEncoder
 
 
@@ -109,6 +109,7 @@ def header_string(self):
             "run time:         %s" % format_time(self.results.run_time_seconds),
             "tests run:        %d" % len(self.results),
             "passed:           %d" % self.results.num_passed,
+            "flaky:            %d" % self.results.num_flaky,
             "failed:           %d" % self.results.num_failed,
             "ignored:          %d" % self.results.num_ignored,
             "=" * self.width
@@ -177,7 +178,7 @@ def report(self):
             elif result.test_status == IGNORE:
                 testsuite['skipped'] += 1
 
-        total = self.results.num_failed + self.results.num_ignored + self.results.num_passed
+        total = self.results.num_failed + self.results.num_ignored + self.results.num_passed + self.results.num_flaky
         # Now start building XML document
         root = ET.Element('testsuites', attrib=dict(
             name="ducktape", time=str(self.results.run_time_seconds),
@@ -261,35 +262,43 @@ def format_report(self):
 
         num_tests = len(self.results)
         num_passes = 0
-        failed_result_string = ""
-        passed_result_string = ""
-        ignored_result_string = ""
+        failed_result_string = []
+        passed_result_string = []
+        ignored_result_string = []
+        flaky_result_string = []
 
         for result in self.results:
             json_string = json.dumps(self.format_result(result))
             if result.test_status == PASS:
                 num_passes += 1
-                passed_result_string += json_string
-                passed_result_string += ","
+                passed_result_string.append(json_string)
+                passed_result_string.append(",")
             elif result.test_status == FAIL:
-                failed_result_string += json_string
-                failed_result_string += ","
+                failed_result_string.append(json_string)
+                failed_result_string.append(",")
+            elif result.test_status == IGNORE:
+                ignored_result_string.append(json_string)
+                ignored_result_string.append(",")
+            elif result.test_status == FLAKY:
+                flaky_result_string.append(json_string)
+                flaky_result_string.append(",")
             else:
-                ignored_result_string += json_string
-                ignored_result_string += ","
+                raise Exception("Unknown test status in report: {}".format(result.test_status.to_json()))
 
         args = {
             'ducktape_version': ducktape_version(),
             'num_tests': num_tests,
             'num_passes': self.results.num_passed,
+            'num_flaky': self.results.num_flaky,
             'num_failures': self.results.num_failed,
             'num_ignored': self.results.num_ignored,
             'run_time': format_time(self.results.run_time_seconds),
             'session': self.results.session_context.session_id,
-            'passed_tests': passed_result_string,
-            'failed_tests': failed_result_string,
-            'ignored_tests': ignored_result_string,
-            'test_status_names': ",".join(["\'%s\'" % str(status) for status in [PASS, FAIL, IGNORE]])
+            'passed_tests': "".join(passed_result_string),
+            'flaky_tests': "".join(flaky_result_string),
+            'failed_tests': "".join(failed_result_string),
+            'ignored_tests': "".join(ignored_result_string),
+            'test_status_names': ",".join(["\'%s\'" % str(status) for status in [PASS, FAIL, IGNORE, FLAKY]])
         }
 
         html = template % args
diff --git a/ducktape/tests/result.py b/ducktape/tests/result.py
index d4035fdec..c8eb98b14 100644
--- a/ducktape/tests/result.py
+++ b/ducktape/tests/result.py
@@ -21,7 +21,7 @@
 from ducktape.tests.reporter import SingleResultFileReporter
 from ducktape.utils.local_filesystem_utils import mkdir_p
 from ducktape.utils.util import ducktape_version
-from ducktape.tests.status import PASS, FAIL, IGNORE
+from ducktape.tests.status import FLAKY, PASS, FAIL, IGNORE
 
 
 class TestResult(object):
@@ -161,6 +161,10 @@ def num_failed(self):
     def num_ignored(self):
         return len([r for r in self._results if r.test_status == IGNORE])
 
+    @property
+    def num_flaky(self):
+        return len([r for r in self._results if r.test_status == FLAKY])
+
     @property
     def run_time_seconds(self):
         if self.start_time < 0:
@@ -203,8 +207,7 @@ def to_json(self):
             cluster_utilization = (1.0 / len(self.cluster)) * (1.0 / self.run_time_seconds) * \
                 sum([r.nodes_used * r.run_time_seconds for r in self])
             parallelism = sum([r.run_time_seconds for r in self._results]) / self.run_time_seconds
-
-        return {
+        result = {
             "ducktape_version": ducktape_version(),
             "session_context": self.session_context,
             "run_time_seconds": self.run_time_seconds,
@@ -221,3 +224,6 @@ def to_json(self):
             "parallelism": parallelism,
             "results": [r for r in self._results]
         }
+        if self.num_flaky:
+            result['num_flaky'] = self.num_flaky
+        return result
diff --git a/ducktape/tests/runner.py b/ducktape/tests/runner.py
index c7931ed9e..93bfa24a5 100644
--- a/ducktape/tests/runner.py
+++ b/ducktape/tests/runner.py
@@ -84,7 +84,7 @@ class TestRunner(object):
     # When set to True, the test runner will finish running/cleaning the current test, but it will not run any more
     stop_testing = False
 
-    def __init__(self, cluster, session_context, session_logger, tests,
+    def __init__(self, cluster, session_context, session_logger, tests, deflake_num,
                  min_port=ConsoleDefaults.TEST_DRIVER_MIN_PORT,
                  max_port=ConsoleDefaults.TEST_DRIVER_MAX_PORT):
 
@@ -101,6 +101,8 @@ def __init__(self, cluster, session_context, session_logger, tests,
         self.hostname = "localhost"
         self.receiver = Receiver(min_port, max_port)
 
+        self.deflake_num = deflake_num
+
         self.session_context = session_context
         self.max_parallel = session_context.max_parallel
         self.results = TestResults(self.session_context, self.cluster)
@@ -241,7 +243,8 @@ def _run_single_test(self, test_context):
                 TestContext.logger_name(test_context, current_test_counter),
                 TestContext.results_dir(test_context, current_test_counter),
                 self.session_context.debug,
-                self.session_context.fail_bad_cluster_utilization
+                self.session_context.fail_bad_cluster_utilization,
+                self.deflake_num
             ])
 
         self._client_procs[test_key] = proc
diff --git a/ducktape/tests/runner_client.py b/ducktape/tests/runner_client.py
index ae3358842..9dc9ce80f 100644
--- a/ducktape/tests/runner_client.py
+++ b/ducktape/tests/runner_client.py
@@ -20,10 +20,13 @@
 import zmq
 
 from six import iteritems
+from ducktape.services.service import MultiRunServiceIdFactory, service_id_factory
+from ducktape.services.service_registry import ServiceRegistry
 
 from ducktape.tests.event import ClientEventFactory
 from ducktape.tests.loader import TestLoader
 from ducktape.tests.serde import SerDe
+from ducktape.tests.status import FLAKY
 from ducktape.tests.test import test_logger, TestContext
 
 from ducktape.tests.result import TestResult, IGNORE, PASS, FAIL
@@ -39,7 +42,7 @@ class RunnerClient(object):
     """Run a single test"""
 
     def __init__(self, server_hostname, server_port, test_id,
-                 test_index, logger_name, log_dir, debug, fail_bad_cluster_utilization):
+                 test_index, logger_name, log_dir, debug, fail_bad_cluster_utilization, deflake_num):
         signal.signal(signal.SIGTERM, self._sigterm_handler)  # register a SIGTERM handler
 
         self.serde = SerDe()
@@ -58,6 +61,8 @@ def __init__(self, server_hostname, server_port, test_id,
         self.test_metadata = ready_reply["test_metadata"]
         self.cluster = ready_reply["cluster"]
 
+        self.deflake_num = deflake_num
+
         # Wait to instantiate the test object until running the test
         self.test = None
         self.test_context = None
@@ -102,82 +107,108 @@ def run(self):
 
         start_time = -1
         stop_time = -1
-        test_status = PASS
-        summary = ""
+        test_status = FAIL
+        summary = []
         data = None
-
-        try:
-            # Results from this test, as well as logs will be dumped here
-            mkdir_p(TestContext.results_dir(self.test_context, self.test_index))
-            # Instantiate test
-            self.test = self.test_context.cls(self.test_context)
-
-            self.log(logging.DEBUG, "Checking if there are enough nodes...")
-            min_cluster_spec = self.test.min_cluster_spec()
-            os_to_num_nodes = {}
-            for node_spec in min_cluster_spec:
-                if not os_to_num_nodes.get(node_spec.operating_system):
-                    os_to_num_nodes[node_spec.operating_system] = 1
-                else:
-                    os_to_num_nodes[node_spec.operating_system] = os_to_num_nodes[node_spec.operating_system] + 1
-            for (operating_system, node_count) in iteritems(os_to_num_nodes):
-                num_avail = len(list(self.cluster.all().nodes.elements(operating_system=operating_system)))
-                if node_count > num_avail:
-                    raise RuntimeError(
-                        "There are not enough nodes available in the cluster to run this test. "
-                        "Cluster size for %s: %d, Need at least: %d. Services currently registered: %s" %
-                        (operating_system, num_avail, node_count, self.test_context.services))
-
-            # Run the test unit
-            start_time = time.time()
-            self.setup_test()
-
-            data = self.run_test()
-
-            test_status = PASS
-            self.log(logging.INFO, "PASS")
-
-        except BaseException as e:
-            # mark the test as failed before doing anything else
-            test_status = FAIL
-            err_trace = self._exc_msg(e)
-            summary += err_trace
-            self.log(logging.INFO, "FAIL: " + err_trace)
-
-        finally:
-            self.teardown_test(teardown_services=not self.session_context.no_teardown, test_status=test_status)
-
-            stop_time = time.time()
-
-            if hasattr(self, "services"):
-                service_errors = self.test_context.services.errors()
-                if service_errors:
-                    summary += "\n\n" + service_errors
-
-            test_status, summary = self._check_cluster_utilization(test_status, summary)
-
-            result = TestResult(
-                self.test_context,
-                self.test_index,
-                self.session_context,
-                test_status,
-                summary,
-                data,
-                start_time,
-                stop_time)
-
-            self.log(logging.INFO, "Summary: %s" % str(result.summary))
-            self.log(logging.INFO, "Data: %s" % str(result.data))
-
-            result.report()
-            # Tell the server we are finished
-            self._do_safely(lambda: self.send(self.message.finished(result=result)),
-                            "Problem sending FINISHED message for " + str(self.test_metadata) + ":\n")
-            # Release test_context resources only after creating the result and finishing logging activity
-            # The Sender object uses the same logger, so we postpone closing until after the finished message is sent
-            self.test_context.close()
-            self.test_context = None
-            self.test = None
+        all_services = ServiceRegistry()
+
+        sid_factory = service_id_factory
+
+        num_runs = 1
+        while test_status == FAIL and num_runs <= self.deflake_num:
+            if self.deflake_num > 1:
+                sid_factory = MultiRunServiceIdFactory(num_runs)
+            self.log(logging.INFO, "on run {}/{}".format(num_runs, self.deflake_num))
+            try:
+                # Results from this test, as well as logs will be dumped here
+                mkdir_p(TestContext.results_dir(self.test_context, self.test_index))
+                # Instantiate test
+                self.test = self.test_context.cls(self.test_context)
+
+                self.log(logging.DEBUG, "Checking if there are enough nodes...")
+                min_cluster_spec = self.test.min_cluster_spec()
+                os_to_num_nodes = {}
+                for node_spec in min_cluster_spec:
+                    if not os_to_num_nodes.get(node_spec.operating_system):
+                        os_to_num_nodes[node_spec.operating_system] = 1
+                    else:
+                        os_to_num_nodes[node_spec.operating_system] = os_to_num_nodes[node_spec.operating_system] + 1
+                for (operating_system, node_count) in iteritems(os_to_num_nodes):
+                    num_avail = len(list(self.cluster.all().nodes.elements(operating_system=operating_system)))
+                    if node_count > num_avail:
+                        raise RuntimeError(
+                            "There are not enough nodes available in the cluster to run this test. "
+                            "Cluster size for %s: %d, Need at least: %d. Services currently registered: %s" %
+                            (operating_system, num_avail, node_count, self.test_context.services))
+
+                # Run the test unit
+                start_time = time.time()
+                self.setup_test()
+
+                data = self.run_test()
+
+                test_status = PASS if num_runs == 1 else FLAKY
+                self.log(logging.INFO, "{} TEST".format(test_status.to_json()))
+
+            except BaseException as e:
+                # mark the test as failed before doing anything else
+                test_status = FAIL
+                err_trace = self._exc_msg(e)
+                summary.append(err_trace)
+                if num_runs != self.deflake_num:
+                    summary.append("~" * max(len(l) for l in err_trace.split('\n')) + "\n")
+                self.log(logging.INFO, "FAIL: " + err_trace)
+
+            finally:
+                for service in self.test_context.services:
+                    service.service_id_factory = sid_factory
+                    all_services.append(service)
+
+                self.teardown_test(teardown_services=not self.session_context.no_teardown, test_status=test_status)
+
+                stop_time = time.time()
+
+                if hasattr(self.test_context, "services"):
+                    service_errors = self.test_context.services.errors()
+                    if service_errors:
+                        summary.extend(["\n\n", service_errors])
+
+                # free nodes
+                if self.test:
+                    self.log(logging.DEBUG, "Freeing nodes...")
+                    self._do_safely(self.test.free_nodes, "Error freeing nodes:")
+
+                num_runs += 1
+
+        summary = "".join(summary)
+        test_status, summary = self._check_cluster_utilization(test_status, summary)
+
+        if num_runs > 1:
+            # for reporting purposes report all services
+            self.test_context.services = all_services
+        # for flaky tests, we report the start and end time of the successful run, and not the whole run period
+        result = TestResult(
+            self.test_context,
+            self.test_index,
+            self.session_context,
+            test_status,
+            summary,
+            data,
+            start_time,
+            stop_time)
+
+        self.log(logging.INFO, "Summary: %s" % str(result.summary))
+        self.log(logging.INFO, "Data: %s" % str(result.data))
+
+        result.report()
+        # Tell the server we are finished
+        self._do_safely(lambda: self.send(self.message.finished(result=result)),
+                        "Problem sending FINISHED message for " + str(self.test_metadata) + ":\n")
+        # Release test_context resources only after creating the result and finishing logging activity
+        # The Sender object uses the same logger, so we postpone closing until after the finished message is sent
+        self.test_context.close()
+        self.test_context = None
+        self.test = None
 
     def _check_cluster_utilization(self, result, summary):
         """Checks if the number of nodes used by a test is less than the number of
@@ -191,7 +222,7 @@ def _check_cluster_utilization(self, result, summary):
             message = "Test requested %d nodes, used only %d" % (total, max_used)
             if self.fail_bad_cluster_utilization:
                 # only check node utilization on test pass
-                if result == PASS:
+                if result == PASS or result == FLAKY:
                     self.log(logging.INFO, "FAIL: " + message)
 
                 result = FAIL
@@ -252,9 +283,6 @@ def teardown_test(self, teardown_services=True, test_status=None):
             self.log(logging.DEBUG, "Cleaning up services...")
             self._do_safely(services.clean_all, "Error cleaning services:")
 
-        self.log(logging.DEBUG, "Freeing nodes...")
-        self._do_safely(self.test.free_nodes, "Error freeing nodes:")
-
     def log(self, log_level, msg, *args, **kwargs):
         """Log to the service log and the test log of the current test."""
 
diff --git a/ducktape/tests/status.py b/ducktape/tests/status.py
index 3db594e57..111d52087 100644
--- a/ducktape/tests/status.py
+++ b/ducktape/tests/status.py
@@ -28,5 +28,6 @@ def to_json(self):
 
 
 PASS = TestStatus("pass")
+FLAKY = TestStatus("flaky")
 FAIL = TestStatus("fail")
 IGNORE = TestStatus("ignore")
diff --git a/setup.py b/setup.py
index 2be4ad818..5fe45d754 100644
--- a/setup.py
+++ b/setup.py
@@ -31,6 +31,14 @@ def run_tests(self):
         self.run_command('flake8')
         sys.exit(errno)
 
+test_req = [
+    'pytest==4.6.5',
+    'mock==3.0.5',
+    'psutil==5.6.3',
+    'memory_profiler==0.55',
+    'statistics==1.0.3.5',
+    'requests-testadapter==0.3.0'
+]
 
 setup(name="ducktape",
       version=version,
@@ -64,12 +72,8 @@ def run_tests(self):
                         'filelock==3.2.1',
                         'cryptography==3.3.2'
                         ],
-      tests_require=['pytest==4.6.5',
-                     'mock==3.0.5',
-                     'psutil==5.6.3',
-                     'memory_profiler==0.55',
-                     'statistics==1.0.3.5',
-                     'requests-testadapter==0.3.0'],
+      tests_require=test_req,
+      extras_require={'test': test_req},
       setup_requires=['flake8==3.7.8'],
       cmdclass={'test': PyTest},
       )
diff --git a/systests/cluster/test_remote_account.py b/systests/cluster/test_remote_account.py
index 56d84a80c..45ad1600a 100644
--- a/systests/cluster/test_remote_account.py
+++ b/systests/cluster/test_remote_account.py
@@ -427,6 +427,10 @@ def __init__(self, test_context):
     def setup(self):
         self.account_service.start()
 
+    @cluster(num_nodes=1)
+    def test_flaky(self):
+        assert random.choice([True, False, False])
+
     @cluster(num_nodes=1)
     def test_ssh_capture_combine_stderr(self):
         """Test that ssh_capture correctly captures stderr and stdout from remote process.
diff --git a/tests/runner/check_runner.py b/tests/runner/check_runner.py
index 148f6bbbb..f2754638d 100644
--- a/tests/runner/check_runner.py
+++ b/tests/runner/check_runner.py
@@ -54,7 +54,7 @@ def check_insufficient_cluster_resources(self):
 
         test_context = TestContext(session_context=session_context, module=None, cls=TestThingy,
                                    function=TestThingy.test_pi, file=TEST_THINGY_FILE, cluster=mock_cluster)
-        runner = TestRunner(mock_cluster, session_context, Mock(), [test_context])
+        runner = TestRunner(mock_cluster, session_context, Mock(), [test_context], 1)
 
         # Even though the cluster is too small, the test runner should this handle gracefully without raising an error
         results = runner.run_all_tests()
@@ -80,10 +80,11 @@ def check_simple_run(self):
         test_methods = [TestThingy.test_pi, TestThingy.test_ignore1, TestThingy.test_ignore2, TestThingy.test_failure]
         ctx_list = self._do_expand(test_file=TEST_THINGY_FILE, test_class=TestThingy, test_methods=test_methods,
                                    cluster=mock_cluster, session_context=session_context)
-        runner = TestRunner(mock_cluster, session_context, Mock(), ctx_list)
+        runner = TestRunner(mock_cluster, session_context, Mock(), ctx_list, 1)
 
         results = runner.run_all_tests()
         assert len(results) == 4
+        assert results.num_flaky == 0
         assert results.num_failed == 1
         assert results.num_passed == 1
         assert results.num_ignored == 2
@@ -91,6 +92,23 @@ def check_simple_run(self):
         result_with_data = [r for r in results if r.data is not None][0]
         assert result_with_data.data == {"data": 3.14159}
 
+    def check_deflake_run(self):
+        """Check expected behavior when running a single test."""
+        mock_cluster = LocalhostCluster(num_nodes=1000)
+        session_context = tests.ducktape_mock.session_context()
+
+        test_methods = [TestThingy.test_flaky, TestThingy.test_failure]
+        ctx_list = self._do_expand(test_file=TEST_THINGY_FILE, test_class=TestThingy, test_methods=test_methods,
+                                   cluster=mock_cluster, session_context=session_context)
+        runner = TestRunner(mock_cluster, session_context, Mock(), ctx_list, 2)
+
+        results = runner.run_all_tests()
+        assert len(results) == 2
+        assert results.num_flaky == 1
+        assert results.num_failed == 1
+        assert results.num_passed == 0
+        assert results.num_ignored == 0
+
     def check_runner_report_junit(self):
         """Check we can serialize results into a xunit xml format. Also ensures that the XML report
         adheres to the Junit spec using xpath queries"""
@@ -99,7 +117,7 @@ def check_runner_report_junit(self):
         test_methods = [TestThingy.test_pi, TestThingy.test_ignore1, TestThingy.test_ignore2, TestThingy.test_failure]
         ctx_list = self._do_expand(test_file=TEST_THINGY_FILE, test_class=TestThingy, test_methods=test_methods,
                                    cluster=mock_cluster, session_context=session_context)
-        runner = TestRunner(mock_cluster, session_context, Mock(), ctx_list)
+        runner = TestRunner(mock_cluster, session_context, Mock(), ctx_list, 1)
 
         results = runner.run_all_tests()
         JUnitReporter(results).report()
@@ -138,7 +156,7 @@ def check_exit_first(self):
         test_methods = [FailingTest.test_fail]
         ctx_list = self._do_expand(test_file=FAILING_TEST_FILE, test_class=FailingTest, test_methods=test_methods,
                                    cluster=mock_cluster, session_context=session_context)
-        runner = TestRunner(mock_cluster, session_context, Mock(), ctx_list)
+        runner = TestRunner(mock_cluster, session_context, Mock(), ctx_list, 1)
         results = runner.run_all_tests()
         assert len(ctx_list) > 1
         assert len(results) == 1
@@ -156,10 +174,11 @@ def check_exits_if_failed_to_initialize(self):
                                         test_methods=[FailsToInitInSetupTest.test_nothing],
                                         cluster=mock_cluster, session_context=session_context))
 
-        runner = TestRunner(mock_cluster, session_context, Mock(), ctx_list)
+        runner = TestRunner(mock_cluster, session_context, Mock(), ctx_list, 1)
         results = runner.run_all_tests()
         # These tests fail to initialize, each class has two test methods, so should have 4 results, all failed
         assert len(results) == 4
+        assert results.num_flaky == 0
         assert results.num_failed == 4
         assert results.num_passed == 0
         assert results.num_ignored == 0
@@ -180,12 +199,13 @@ def check_sends_result_when_internal_error(self, exc_msg_mock, mkdir_p_mock):
         test_methods = [TestThingy.test_pi, TestThingy.test_ignore1, TestThingy.test_ignore2, TestThingy.test_failure]
         ctx_list = self._do_expand(test_file=TEST_THINGY_FILE, test_class=TestThingy, test_methods=test_methods,
                                    cluster=mock_cluster, session_context=session_context)
-        runner = TestRunner(mock_cluster, session_context, Mock(), ctx_list)
+        runner = TestRunner(mock_cluster, session_context, Mock(), ctx_list, 1)
 
         results = runner.run_all_tests()
         assert len(results) == 4
-        assert results.num_failed == 2
-        assert results.num_passed == 0
+        assert results.num_flaky == 0
+        assert results.num_failed == 1
+        assert results.num_passed == 1
         assert results.num_ignored == 2
 
     def check_run_failure_with_bad_cluster_allocation(self):
@@ -198,11 +218,12 @@ def check_run_failure_with_bad_cluster_allocation(self):
         ctx_list = self._do_expand(test_file=TEST_THINGY_FILE, test_class=ClusterTestThingy,
                                    test_methods=test_methods, cluster=mock_cluster,
                                    session_context=session_context)
-        runner = TestRunner(mock_cluster, session_context, Mock(), ctx_list)
+        runner = TestRunner(mock_cluster, session_context, Mock(), ctx_list, 1)
 
         results = runner.run_all_tests()
 
         assert len(results) == 2
+        assert results.num_flaky == 0
         assert results.num_failed == 1
         assert results.num_passed == 1
         assert results.num_ignored == 0
diff --git a/tests/runner/check_runner_memory.py b/tests/runner/check_runner_memory.py
index 5958d2d05..c1ca80479 100644
--- a/tests/runner/check_runner_memory.py
+++ b/tests/runner/check_runner_memory.py
@@ -87,7 +87,7 @@ def check_for_inter_test_memory_leak(self):
         assert len(ctx_list) == N_TEST_CASES  # Sanity check
 
         q = queue.Queue()
-        runner = InstrumentedTestRunner(self.cluster, self.session_context, Mock(), ctx_list, queue=q)
+        runner = InstrumentedTestRunner(self.cluster, self.session_context, Mock(), ctx_list, 1, queue=q)
         runner.run_all_tests()
 
         measurements = []
diff --git a/tests/runner/resources/test_thingy.py b/tests/runner/resources/test_thingy.py
index 45ff1b7b2..36c2866e8 100644
--- a/tests/runner/resources/test_thingy.py
+++ b/tests/runner/resources/test_thingy.py
@@ -18,6 +18,9 @@
 from ducktape.mark.resource import cluster
 
 
+_flake = False
+
+
 class TestThingy(Test):
     """Fake ducktape test class"""
 
@@ -40,6 +43,11 @@ def test_ignore2(self, x=2):
     def test_failure(self):
         raise Exception("This failed")
 
+    def test_flaky(self):
+        global _flake
+        flake, _flake = _flake, not _flake
+        assert flake
+
 
 class ClusterTestThingy(Test):
     """Fake ducktape test class"""

From 01bc09d32917f2bb7a76f65e8734b07ff1805be3 Mon Sep 17 00:00:00 2001
From: Stanislav Vodetskyi <49661990+stan-confluent@users.noreply.github.com>
Date: Wed, 11 May 2022 22:00:21 -0700
Subject: [PATCH 22/83] Fix runner client (#309)

* fix runner client exception handling

* updated unit test

* fixed style violations
---
 ducktape/command_line/parse_args.py |   8 +-
 ducktape/tests/runner_client.py     | 193 +++++++++++++++-------------
 setup.py                            |   1 +
 tests/runner/check_runner.py        |   4 +-
 4 files changed, 109 insertions(+), 97 deletions(-)

diff --git a/ducktape/command_line/parse_args.py b/ducktape/command_line/parse_args.py
index b91b01f2e..2e8882488 100644
--- a/ducktape/command_line/parse_args.py
+++ b/ducktape/command_line/parse_args.py
@@ -83,10 +83,10 @@ def create_ducktape_parser():
                         "validation or better logging when an ssh error occurs. Specify any "
                         "number of module paths after this flag to be called."),
     parser.add_argument("--deflake", action="store", type=int, default=1,
-                        help=
-                        "the number of times a failed test should be ran total (including its initial run) to determin flakyness,"
-                        "when not present, deflake will not be used, and a test will be marked as either passed or failed, when enabled"
-                        "tests will be marked as flaky if it passes any of the reruns")
+                        help="the number of times a failed test should be ran in total (including its initial run) "
+                             "to determine flakyness. When not present, deflake will not be used, "
+                             "and a test will be marked as either passed or failed. "
+                             "When enabled tests will be marked as flaky if it passes on any of the reruns")
     return parser
 
 
diff --git a/ducktape/tests/runner_client.py b/ducktape/tests/runner_client.py
index 9dc9ce80f..de7b38b1c 100644
--- a/ducktape/tests/runner_client.py
+++ b/ducktape/tests/runner_client.py
@@ -66,6 +66,7 @@ def __init__(self, server_hostname, server_port, test_id,
         # Wait to instantiate the test object until running the test
         self.test = None
         self.test_context = None
+        self.all_services = None
 
     def send(self, event):
         return self.sender.send(event)
@@ -110,105 +111,115 @@ def run(self):
         test_status = FAIL
         summary = []
         data = None
-        all_services = ServiceRegistry()
-
-        sid_factory = service_id_factory
-
-        num_runs = 1
-        while test_status == FAIL and num_runs <= self.deflake_num:
-            if self.deflake_num > 1:
-                sid_factory = MultiRunServiceIdFactory(num_runs)
-            self.log(logging.INFO, "on run {}/{}".format(num_runs, self.deflake_num))
-            try:
-                # Results from this test, as well as logs will be dumped here
-                mkdir_p(TestContext.results_dir(self.test_context, self.test_index))
-                # Instantiate test
-                self.test = self.test_context.cls(self.test_context)
-
-                self.log(logging.DEBUG, "Checking if there are enough nodes...")
-                min_cluster_spec = self.test.min_cluster_spec()
-                os_to_num_nodes = {}
-                for node_spec in min_cluster_spec:
-                    if not os_to_num_nodes.get(node_spec.operating_system):
-                        os_to_num_nodes[node_spec.operating_system] = 1
-                    else:
-                        os_to_num_nodes[node_spec.operating_system] = os_to_num_nodes[node_spec.operating_system] + 1
-                for (operating_system, node_count) in iteritems(os_to_num_nodes):
-                    num_avail = len(list(self.cluster.all().nodes.elements(operating_system=operating_system)))
-                    if node_count > num_avail:
-                        raise RuntimeError(
-                            "There are not enough nodes available in the cluster to run this test. "
-                            "Cluster size for %s: %d, Need at least: %d. Services currently registered: %s" %
-                            (operating_system, num_avail, node_count, self.test_context.services))
-
-                # Run the test unit
-                start_time = time.time()
-                self.setup_test()
-
-                data = self.run_test()
+        self.all_services = ServiceRegistry()
 
-                test_status = PASS if num_runs == 1 else FLAKY
-                self.log(logging.INFO, "{} TEST".format(test_status.to_json()))
+        num_runs = 0
 
-            except BaseException as e:
-                # mark the test as failed before doing anything else
-                test_status = FAIL
-                err_trace = self._exc_msg(e)
-                summary.append(err_trace)
-                if num_runs != self.deflake_num:
-                    summary.append("~" * max(len(l) for l in err_trace.split('\n')) + "\n")
-                self.log(logging.INFO, "FAIL: " + err_trace)
-
-            finally:
-                for service in self.test_context.services:
-                    service.service_id_factory = sid_factory
-                    all_services.append(service)
+        try:
+            while test_status == FAIL and num_runs < self.deflake_num:
+                num_runs += 1
+                self.log(logging.INFO, "on run {}/{}".format(num_runs, self.deflake_num))
+                start_time = time.time()
+                test_status, summary, data = self._do_run(num_runs)
+        finally:
+            stop_time = time.time()
+            if test_status == PASS and num_runs > 1:
+                test_status = FLAKY
+            summary = "".join(summary)
+            test_status, summary = self._check_cluster_utilization(test_status, summary)
+
+            if num_runs > 1:
+                # for reporting purposes report all services
+                self.test_context.services = self.all_services
+            # for flaky tests, we report the start and end time of the successful run, and not the whole run period
+            result = TestResult(
+                self.test_context,
+                self.test_index,
+                self.session_context,
+                test_status,
+                summary,
+                data,
+                start_time,
+                stop_time)
+
+            self.log(logging.INFO, "Summary: %s" % str(result.summary))
+            self.log(logging.INFO, "Data: %s" % str(result.data))
 
-                self.teardown_test(teardown_services=not self.session_context.no_teardown, test_status=test_status)
+            result.report()
+            # Tell the server we are finished
+            self._do_safely(lambda: self.send(self.message.finished(result=result)),
+                            "Problem sending FINISHED message for " + str(self.test_metadata) + ":\n")
+            # Release test_context resources only after creating the result and finishing logging activity
+            # The Sender object uses the same logger, so we postpone closing until after the finished message is sent
+            self.test_context.close()
+            self.all_services = None
+            self.test_context = None
+            self.test = None
 
-                stop_time = time.time()
+    def _do_run(self, num_runs):
+        test_status = FAIL
+        summary = []
+        data = None
+        sid_factory = MultiRunServiceIdFactory(num_runs) if self.deflake_num > 1 else service_id_factory
+        try:
+            # Results from this test, as well as logs will be dumped here
+            mkdir_p(TestContext.results_dir(self.test_context, self.test_index))
+            # Instantiate test
+            self.test = self.test_context.cls(self.test_context)
+            # Check if there are enough nodes
+            self._check_min_cluster_spec()
+            # Run the test unit
 
-                if hasattr(self.test_context, "services"):
-                    service_errors = self.test_context.services.errors()
-                    if service_errors:
-                        summary.extend(["\n\n", service_errors])
+            self.setup_test()
 
-                # free nodes
-                if self.test:
-                    self.log(logging.DEBUG, "Freeing nodes...")
-                    self._do_safely(self.test.free_nodes, "Error freeing nodes:")
+            data = self.run_test()
 
-                num_runs += 1
+            test_status = PASS
+            self.log(logging.INFO, "{} TEST".format(test_status.to_json()))
 
-        summary = "".join(summary)
-        test_status, summary = self._check_cluster_utilization(test_status, summary)
-
-        if num_runs > 1:
-            # for reporting purposes report all services
-            self.test_context.services = all_services
-        # for flaky tests, we report the start and end time of the successful run, and not the whole run period
-        result = TestResult(
-            self.test_context,
-            self.test_index,
-            self.session_context,
-            test_status,
-            summary,
-            data,
-            start_time,
-            stop_time)
-
-        self.log(logging.INFO, "Summary: %s" % str(result.summary))
-        self.log(logging.INFO, "Data: %s" % str(result.data))
-
-        result.report()
-        # Tell the server we are finished
-        self._do_safely(lambda: self.send(self.message.finished(result=result)),
-                        "Problem sending FINISHED message for " + str(self.test_metadata) + ":\n")
-        # Release test_context resources only after creating the result and finishing logging activity
-        # The Sender object uses the same logger, so we postpone closing until after the finished message is sent
-        self.test_context.close()
-        self.test_context = None
-        self.test = None
+        except BaseException as e:
+            # mark the test as failed before doing anything else
+            test_status = FAIL
+            err_trace = self._exc_msg(e)
+            summary.append(err_trace)
+            if num_runs != self.deflake_num:
+                summary.append("~" * max(len(l) for l in err_trace.split('\n')) + "\n")
+            self.log(logging.INFO, "FAIL: " + err_trace)
+
+        finally:
+            for service in self.test_context.services:
+                service.service_id_factory = sid_factory
+                self.all_services.append(service)
+
+            self.teardown_test(teardown_services=not self.session_context.no_teardown, test_status=test_status)
+
+            if hasattr(self.test_context, "services"):
+                service_errors = self.test_context.services.errors()
+                if service_errors:
+                    summary.extend(["\n\n", service_errors])
+
+            # free nodes
+            if self.test:
+                self.log(logging.DEBUG, "Freeing nodes...")
+                self._do_safely(self.test.free_nodes, "Error freeing nodes:")
+            return test_status, summary, data
+
+    def _check_min_cluster_spec(self):
+        self.log(logging.DEBUG, "Checking if there are enough nodes...")
+        min_cluster_spec = self.test.min_cluster_spec()
+        os_to_num_nodes = {}
+        for node_spec in min_cluster_spec:
+            if not os_to_num_nodes.get(node_spec.operating_system):
+                os_to_num_nodes[node_spec.operating_system] = 1
+            else:
+                os_to_num_nodes[node_spec.operating_system] = os_to_num_nodes[node_spec.operating_system] + 1
+        for (operating_system, node_count) in iteritems(os_to_num_nodes):
+            num_avail = len(list(self.cluster.all().nodes.elements(operating_system=operating_system)))
+            if node_count > num_avail:
+                raise RuntimeError(
+                    "There are not enough nodes available in the cluster to run this test. "
+                    "Cluster size for %s: %d, Need at least: %d. Services currently registered: %s" %
+                    (operating_system, num_avail, node_count, self.test_context.services))
 
     def _check_cluster_utilization(self, result, summary):
         """Checks if the number of nodes used by a test is less than the number of
diff --git a/setup.py b/setup.py
index 5fe45d754..0120359cf 100644
--- a/setup.py
+++ b/setup.py
@@ -31,6 +31,7 @@ def run_tests(self):
         self.run_command('flake8')
         sys.exit(errno)
 
+
 test_req = [
     'pytest==4.6.5',
     'mock==3.0.5',
diff --git a/tests/runner/check_runner.py b/tests/runner/check_runner.py
index f2754638d..67398bc4d 100644
--- a/tests/runner/check_runner.py
+++ b/tests/runner/check_runner.py
@@ -204,8 +204,8 @@ def check_sends_result_when_internal_error(self, exc_msg_mock, mkdir_p_mock):
         results = runner.run_all_tests()
         assert len(results) == 4
         assert results.num_flaky == 0
-        assert results.num_failed == 1
-        assert results.num_passed == 1
+        assert results.num_failed == 2
+        assert results.num_passed == 0
         assert results.num_ignored == 2
 
     def check_run_failure_with_bad_cluster_allocation(self):

From 51d9df0ce87cd7d6bd1e38ea1d2aeb87210d85c1 Mon Sep 17 00:00:00 2001
From: Stanislav Vodetskyi <stan@confluent.io>
Date: Wed, 11 May 2022 22:13:29 -0700
Subject: [PATCH 23/83] Bump version to 0.7.19

---
 ducktape/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ducktape/__init__.py b/ducktape/__init__.py
index 4f25e8a08..276cdf144 100644
--- a/ducktape/__init__.py
+++ b/ducktape/__init__.py
@@ -1 +1 @@
-__version__ = '0.7.18'
+__version__ = '0.7.19'

From f34b8c0a9a0aba448e40c7281e6f07a84e7d4fd9 Mon Sep 17 00:00:00 2001
From: Stanislav Vodetskyi <stan@confluent.io>
Date: Wed, 11 May 2022 22:20:46 -0700
Subject: [PATCH 24/83] Bump version to 0.8.13

---
 ducktape/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ducktape/__init__.py b/ducktape/__init__.py
index 0a5ca3c08..ab26e3de0 100644
--- a/ducktape/__init__.py
+++ b/ducktape/__init__.py
@@ -1 +1 @@
-__version__ = '0.8.12'
+__version__ = '0.8.13'

From 469850808d92f987298f133aea64acf1ada371dd Mon Sep 17 00:00:00 2001
From: Stanislav Vodetskyi <49661990+stan-confluent@users.noreply.github.com>
Date: Fri, 13 May 2022 16:16:29 -0700
Subject: [PATCH 25/83] fixed #284 properly (#310)

---
 ducktape/tests/loader.py | 24 +++++++++++++++++-------
 1 file changed, 17 insertions(+), 7 deletions(-)

diff --git a/ducktape/tests/loader.py b/ducktape/tests/loader.py
index be3d4ed21..df2d1314f 100644
--- a/ducktape/tests/loader.py
+++ b/ducktape/tests/loader.py
@@ -389,15 +389,22 @@ def _find_test_files(self, path_or_glob):
         """
         test_files = []
         self.logger.debug('Looking for test files in {}'.format(path_or_glob))
+        # glob is safe to be called on non-glob path - it would just return that same path wrapped in a list
         expanded_glob = glob.glob(path_or_glob)
         self.logger.debug('Expanded {} into {}'.format(path_or_glob, expanded_glob))
-        # glob is safe to be called on non-glob path - it would just return that same path wrapped in a list
+
+        def maybe_add_test_file(f):
+            if self._is_test_file(f):
+                test_files.append(f)
+            else:
+                self.logger.debug("Skipping {} because it isn't a test file".format(f))
+
         for path in expanded_glob:
             if not os.path.exists(path):
                 raise LoaderException('Path {} does not exist'.format(path))
             self.logger.debug('Checking {}'.format(path))
             if os.path.isfile(path):
-                test_files.append(os.path.abspath(path))
+                maybe_add_test_file(path)
             elif os.path.isdir(path):
                 for pwd, dirs, files in os.walk(path):
                     if "__init__.py" not in files:
@@ -405,10 +412,7 @@ def _find_test_files(self, path_or_glob):
                         continue
                     for f in files:
                         file_path = os.path.abspath(os.path.join(pwd, f))
-                        if self._is_test_file(file_path):
-                            test_files.append(file_path)
-                        else:
-                            self.logger.debug("Skipping {} because it isn't a test file".format(file_path))
+                        maybe_add_test_file(file_path)
             else:
                 raise LoaderException("Got a path that we don't understand: " + path)
 
@@ -556,7 +560,13 @@ def _load_test_contexts(self, test_discovery_symbols, base_dir=None):
             path_or_glob = os.path.abspath(path_or_glob)
 
             # TODO: consider adding a check to ensure glob or dir is not used together with cls_name and method
-            test_files = self._find_test_files(path_or_glob)
+            test_files = []
+            if os.path.isfile(path_or_glob):
+                # if it is a single file, just add it directly - https://github.com/confluentinc/ducktape/issues/284
+                test_files = [path_or_glob]
+            else:
+                # otherwise, when dealing with a dir or a glob, apply pattern matching rules
+                test_files = self._find_test_files(path_or_glob)
 
             self._add_top_level_dirs_to_sys_path(test_files)
 

From fc4c6455bca6607afff79a2100171cacbab2b969 Mon Sep 17 00:00:00 2001
From: Stanislav Vodetskyi <49661990+stan-confluent@users.noreply.github.com>
Date: Fri, 13 May 2022 17:08:11 -0700
Subject: [PATCH 26/83] Requirements txt (#305)

* use requirements.txt instead of inline reqs

* updated setup.py

* updated setup.py

* make tox also read from requirements-test.txt
---
 requirements-test.txt |  9 +++++++++
 requirements.txt      | 19 +++++++++++++++++++
 setup.py              | 30 ++----------------------------
 tox.ini               | 10 +---------
 4 files changed, 31 insertions(+), 37 deletions(-)
 create mode 100644 requirements-test.txt
 create mode 100644 requirements.txt

diff --git a/requirements-test.txt b/requirements-test.txt
new file mode 100644
index 000000000..1421af453
--- /dev/null
+++ b/requirements-test.txt
@@ -0,0 +1,9 @@
+pytest==4.6.5
+mock==3.0.5
+psutil==5.6.3
+memory_profiler==0.55
+statistics==1.0.3.5
+requests-testadapter==0.3.0
+flake8~=3.7.9
+pytest-cov~=2.6.1
+pytest-xdist~=1.34.0
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 000000000..8ce97b6ee
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,19 @@
+jinja2==2.10.1
+boto3==1.9.217
+# jinja2 pulls in MarkupSafe with a > constraint, but we need to constrain it for compatibility
+MarkupSafe<2.0.0
+pyparsing<3.0.0
+zipp<2.0.0
+pywinrm==0.2.2
+requests==2.22.0
+bcrypt==3.1.7
+paramiko~=2.3.2
+pysistence==0.4.1
+pyzmq==18.1.0
+pycryptodome==3.8.2
+more-itertools==5.0.0
+six==1.12.0
+# for the following packages these are the last versions supporting python 2
+pynacl==1.4.0
+filelock==3.2.1
+cryptography==3.3.2
diff --git a/setup.py b/setup.py
index 0120359cf..a6e3b1607 100644
--- a/setup.py
+++ b/setup.py
@@ -32,14 +32,7 @@ def run_tests(self):
         sys.exit(errno)
 
 
-test_req = [
-    'pytest==4.6.5',
-    'mock==3.0.5',
-    'psutil==5.6.3',
-    'memory_profiler==0.55',
-    'statistics==1.0.3.5',
-    'requests-testadapter==0.3.0'
-]
+test_req = open('requirements-test.txt').read()
 
 setup(name="ducktape",
       version=version,
@@ -53,26 +46,7 @@ def run_tests(self):
       url="http://github.com/confluentinc/ducktape",
       packages=find_packages(),
       package_data={'ducktape': ['templates/report/*']},
-      install_requires=['jinja2==2.10.1',
-                        'boto3==1.9.217',
-                        # jinja2 pulls in MarkupSafe with a > constraint, but we need to constrain it for compatibility
-                        'MarkupSafe<2.0.0',
-                        'pyparsing<3.0.0',
-                        'zipp<2.0.0',
-                        'pywinrm==0.2.2',
-                        'requests==2.22.0',
-                        'bcrypt==3.1.7',
-                        'paramiko~=2.3.2',
-                        'pysistence==0.4.1',
-                        'pyzmq==18.1.0',
-                        'pycryptodome==3.8.2',
-                        'more-itertools==5.0.0',
-                        'six==1.12.0',
-                        # for the following packages these are the last versions supporting python 2
-                        'pynacl==1.4.0',
-                        'filelock==3.2.1',
-                        'cryptography==3.3.2'
-                        ],
+      install_requires=open('requirements.txt').read(),
       tests_require=test_req,
       extras_require={'test': test_req},
       setup_requires=['flake8==3.7.8'],
diff --git a/tox.ini b/tox.ini
index fb4c09d60..8c5e716b3 100644
--- a/tox.ini
+++ b/tox.ini
@@ -6,15 +6,7 @@ envlist = py27, py36, py37, cover, style
 # have a single env to work with, which makes debugging easier (like which env?).
 # Not as clean but easier to work with during development, which is better.
 deps =
-    flake8==3.7.*
-    mock==2.0.*
-    pytest==4.4.*
-    pytest-cov==2.6.*
-    pytest-xdist==1.28.*
-    psutil==4.1.0
-    memory_profiler==0.41
-    statistics==1.0.3.5
-    requests-testadapter==0.3.0
+    -r requirements-test.txt
 install_command =
     pip install -U {packages}
 recreate = False

From 38071b4acc3ee96dfb86d787d710c3e73c5e2719 Mon Sep 17 00:00:00 2001
From: Stanislav Vodetskyi <49661990+stan-confluent@users.noreply.github.com>
Date: Fri, 13 May 2022 17:09:19 -0700
Subject: [PATCH 27/83] Requirements txt - 0.8.x edition (#306)

* use requirements.txt instead of inline reqs

* updated setup.py

* work in progress - update deps and tox

* fixed #284 properly

* uncomment a test and fix the style violation
---
 ducktape/tests/runner_client.py |  2 +-
 requirements-test.txt           | 10 ++++++++++
 requirements.txt                | 15 +++++++++++++++
 setup.py                        | 28 +++-------------------------
 tox.ini                         | 10 +---------
 5 files changed, 30 insertions(+), 35 deletions(-)
 create mode 100644 requirements-test.txt
 create mode 100644 requirements.txt

diff --git a/ducktape/tests/runner_client.py b/ducktape/tests/runner_client.py
index de7b38b1c..b8cef9f7f 100644
--- a/ducktape/tests/runner_client.py
+++ b/ducktape/tests/runner_client.py
@@ -183,7 +183,7 @@ def _do_run(self, num_runs):
             err_trace = self._exc_msg(e)
             summary.append(err_trace)
             if num_runs != self.deflake_num:
-                summary.append("~" * max(len(l) for l in err_trace.split('\n')) + "\n")
+                summary.append("~" * max(len(line) for line in err_trace.split('\n')) + "\n")
             self.log(logging.INFO, "FAIL: " + err_trace)
 
         finally:
diff --git a/requirements-test.txt b/requirements-test.txt
new file mode 100644
index 000000000..3c37e6fbe
--- /dev/null
+++ b/requirements-test.txt
@@ -0,0 +1,10 @@
+pytest~=6.2.0
+# 4.0 drops py27 support
+mock==4.0.2
+psutil==5.7.2
+memory_profiler==0.57
+statistics==1.0.3.5
+requests-testadapter==0.3.0
+flake8~=4.0.0
+pytest-cov~=3.0
+pytest-xdist~=2.5
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 000000000..31239060d
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,15 @@
+jinja2==2.11.2
+boto3==1.15.9
+# jinja2 pulls in MarkupSafe with a > constraint, but we need to constrain it for compatibility
+MarkupSafe<2.0.0
+pyparsing<3.0.0
+zipp<2.0.0
+pywinrm==0.2.2
+requests==2.24.0
+paramiko~=2.7.2
+pyzmq==19.0.2
+pycryptodome==3.9.8
+more-itertools==5.0.0
+tox==3.20.0
+six==1.15.0
+PyYAML==5.3.1
\ No newline at end of file
diff --git a/setup.py b/setup.py
index 835a5f2dc..6c3389b28 100644
--- a/setup.py
+++ b/setup.py
@@ -32,15 +32,8 @@ def run_tests(self):
         sys.exit(errno)
 
 
-test_req = [
-    'pytest==6.1.0',
-    # 4.0 drops py27 support
-    'mock==4.0.2',
-    'psutil==5.7.2',
-    'memory_profiler==0.57',
-    'statistics==1.0.3.5',
-    'requests-testadapter==0.3.0'
-]
+test_req = open('requirements-test.txt').read()
+
 
 setup(name="ducktape",
       version=version,
@@ -55,22 +48,7 @@ def run_tests(self):
       packages=find_packages(),
       package_data={'ducktape': ['templates/report/*']},
       python_requires='>= 3.6',
-      install_requires=['jinja2==2.11.2',
-                        'boto3==1.15.9',
-                        # jinja2 pulls in MarkupSafe with a > constraint, but we need to constrain it for compatibility
-                        'MarkupSafe<2.0.0',
-                        'pyparsing<3.0.0',
-                        'zipp<2.0.0',
-                        'pywinrm==0.2.2',
-                        'requests==2.24.0',
-                        'paramiko~=2.7.2',
-                        'pyzmq==19.0.2',
-                        'pycryptodome==3.9.8',
-                        # > 5.0 drops py27 support
-                        'more-itertools==5.0.0',
-                        'tox==3.20.0',
-                        'six==1.15.0',
-                        'PyYAML==5.3.1'],
+      install_requires=open('requirements.txt').read(),
       tests_require=test_req,
       extras_require={'test': test_req},
       setup_requires=['flake8==3.8.3'],
diff --git a/tox.ini b/tox.ini
index fad0e430c..0e75cf442 100644
--- a/tox.ini
+++ b/tox.ini
@@ -6,15 +6,7 @@ envlist = py36, py37, py38, cover, style
 # have a single env to work with, which makes debugging easier (like which env?).
 # Not as clean but easier to work with during development, which is better.
 deps =
-    flake8==3.7.*
-    mock==2.0.*
-    pytest==4.4.*
-    pytest-cov==2.6.*
-    pytest-xdist==1.28.*
-    psutil==4.1.0
-    memory_profiler==0.41
-    statistics==1.0.3.5
-    requests-testadapter==0.3.0
+    -r requirements-test.txt
 install_command =
     pip install -U {packages}
 recreate = False

From 65824355a97d1f6fcc4f762bbfb074655f35da22 Mon Sep 17 00:00:00 2001
From: Stanislav Vodetskyi <49661990+stan-confluent@users.noreply.github.com>
Date: Fri, 13 May 2022 20:30:47 -0700
Subject: [PATCH 28/83] relative paths when printing tests to rerun (#308)

* relative paths when printing tests to rerun

* fixed #284 properly

* fix tests

* unskip skipped test
---
 ducktape/tests/reporter.py              |  7 ++++---
 systests/cluster/test_remote_account.py |  4 ++--
 tests/reporter/check_symbol_reporter.py | 22 +++++++++++++++++-----
 3 files changed, 23 insertions(+), 10 deletions(-)

diff --git a/ducktape/tests/reporter.py b/ducktape/tests/reporter.py
index 3d0c37cf6..8163d1756 100644
--- a/ducktape/tests/reporter.py
+++ b/ducktape/tests/reporter.py
@@ -322,11 +322,12 @@ class FailedTestSymbolReporter(SummaryReporter):
 
     def __init__(self, results):
         super().__init__(results)
+        self.working_dir = Path().absolute()
         self.separator = "=" * self.width
 
-    @staticmethod
-    def to_symbol(result):
-        line = f'{result.file_name}::{result.cls_name}.{result.function_name}'
+    def to_symbol(self, result):
+        p = Path(result.file_name).relative_to(self.working_dir)
+        line = f'{p}::{result.cls_name}.{result.function_name}'
         if result.injected_args:
             injected_args_str = json.dumps(result.injected_args, separators=(',', ':'))
             line += f'@{injected_args_str}'
diff --git a/systests/cluster/test_remote_account.py b/systests/cluster/test_remote_account.py
index cb8bb3aa1..cf5d4f25c 100644
--- a/systests/cluster/test_remote_account.py
+++ b/systests/cluster/test_remote_account.py
@@ -124,8 +124,8 @@ def matrix_test(self, string_param, int_param):
         assert not string_param.startswith('fail') and int_param > 0
 
     @cluster(num_nodes=1)
-    @parametrize(string_param=['success-first', 'fail-second'])
-    @parametrize(int_param=[10, -10])
+    @parametrize(string_param='success-first', int_param=10)
+    @parametrize(string_param='fail-second', int_param=-10)
     def parametrized_test(self, string_param, int_param):
         assert not string_param.startswith('fail') and int_param > 0
 
diff --git a/tests/reporter/check_symbol_reporter.py b/tests/reporter/check_symbol_reporter.py
index 779eff542..b78a604cb 100644
--- a/tests/reporter/check_symbol_reporter.py
+++ b/tests/reporter/check_symbol_reporter.py
@@ -1,17 +1,29 @@
+from pathlib import Path
 from unittest.mock import Mock
 
 from ducktape.tests.reporter import FailedTestSymbolReporter
 
 
-def check_to_symbol_no_args():
-    result = Mock(file_name='test_folder/test_file', cls_name='TestClass', function_name='test_func',
+def check_to_symbol_no_args(tmp_path):
+    result = Mock(file_name='/test_folder/test_file', cls_name='TestClass', function_name='test_func',
                   injected_args=None)
+    reporter = FailedTestSymbolReporter(Mock())
+    reporter.working_dir = Path('/')
+    assert reporter.to_symbol(result) == 'test_folder/test_file::TestClass.test_func'
 
-    assert FailedTestSymbolReporter.to_symbol(result) == 'test_folder/test_file::TestClass.test_func'
+
+def check_to_symbol_relative_path(tmp_path):
+    result = Mock(file_name='/test_folder/test_file', cls_name='TestClass', function_name='test_func',
+                  injected_args=None)
+    reporter = FailedTestSymbolReporter(Mock())
+    reporter.working_dir = Path('/test_folder')
+    assert reporter.to_symbol(result) == 'test_file::TestClass.test_func'
 
 
 def check_to_symbol_with_args():
-    result = Mock(file_name='test_folder/test_file', cls_name='TestClass', function_name='test_func',
+    result = Mock(file_name='/test_folder/test_file', cls_name='TestClass', function_name='test_func',
                   injected_args={'arg': 'val'})
 
-    assert FailedTestSymbolReporter.to_symbol(result) == 'test_folder/test_file::TestClass.test_func@{"arg":"val"}'
+    reporter = FailedTestSymbolReporter(Mock())
+    reporter.working_dir = Path('/')
+    assert reporter.to_symbol(result) == 'test_folder/test_file::TestClass.test_func@{"arg":"val"}'

From 8a8279467ce31c6d64a1eebef5814fe607607d77 Mon Sep 17 00:00:00 2001
From: Stanislav Vodetskyi <49661990+stan-confluent@users.noreply.github.com>
Date: Wed, 25 May 2022 13:12:15 -0700
Subject: [PATCH 29/83] cleaner logging for a passing test (#313)

* cleaner logging for a passing test

* revamped logs again

* code review comment
---
 ducktape/tests/runner_client.py | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/ducktape/tests/runner_client.py b/ducktape/tests/runner_client.py
index de7b38b1c..c066af2eb 100644
--- a/ducktape/tests/runner_client.py
+++ b/ducktape/tests/runner_client.py
@@ -121,11 +121,21 @@ def run(self):
                 self.log(logging.INFO, "on run {}/{}".format(num_runs, self.deflake_num))
                 start_time = time.time()
                 test_status, summary, data = self._do_run(num_runs)
+
+                if test_status == PASS and num_runs > 1:
+                    test_status = FLAKY
+
+                msg = str(test_status.to_json())
+                if summary:
+                    msg += ": {}".format(summary)
+                if num_runs != self.deflake_num:
+                    msg += "\n" + "~" * max(len(l) for l in summary.split('\n'))
+
+                self.log(logging.INFO, msg)
+
         finally:
             stop_time = time.time()
-            if test_status == PASS and num_runs > 1:
-                test_status = FLAKY
-            summary = "".join(summary)
+
             test_status, summary = self._check_cluster_utilization(test_status, summary)
 
             if num_runs > 1:
@@ -142,7 +152,6 @@ def run(self):
                 start_time,
                 stop_time)
 
-            self.log(logging.INFO, "Summary: %s" % str(result.summary))
             self.log(logging.INFO, "Data: %s" % str(result.data))
 
             result.report()
@@ -175,16 +184,12 @@ def _do_run(self, num_runs):
             data = self.run_test()
 
             test_status = PASS
-            self.log(logging.INFO, "{} TEST".format(test_status.to_json()))
 
         except BaseException as e:
             # mark the test as failed before doing anything else
             test_status = FAIL
             err_trace = self._exc_msg(e)
             summary.append(err_trace)
-            if num_runs != self.deflake_num:
-                summary.append("~" * max(len(l) for l in err_trace.split('\n')) + "\n")
-            self.log(logging.INFO, "FAIL: " + err_trace)
 
         finally:
             for service in self.test_context.services:
@@ -202,7 +207,7 @@ def _do_run(self, num_runs):
             if self.test:
                 self.log(logging.DEBUG, "Freeing nodes...")
                 self._do_safely(self.test.free_nodes, "Error freeing nodes:")
-            return test_status, summary, data
+            return test_status, "".join(summary), data
 
     def _check_min_cluster_spec(self):
         self.log(logging.DEBUG, "Checking if there are enough nodes...")

From 66251d35cc16e17f06dc81417f807b8c6ee9214a Mon Sep 17 00:00:00 2001
From: Stanislav Vodetskyi <49661990+stan-confluent@users.noreply.github.com>
Date: Wed, 25 May 2022 15:49:26 -0700
Subject: [PATCH 30/83] merge conflict resolution (#315)

* merge conflict resolution

* missed changes from merge

* fix style - ambiguous var name
---
 ducktape/tests/runner_client.py | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/ducktape/tests/runner_client.py b/ducktape/tests/runner_client.py
index b8cef9f7f..b39f721ab 100644
--- a/ducktape/tests/runner_client.py
+++ b/ducktape/tests/runner_client.py
@@ -121,11 +121,21 @@ def run(self):
                 self.log(logging.INFO, "on run {}/{}".format(num_runs, self.deflake_num))
                 start_time = time.time()
                 test_status, summary, data = self._do_run(num_runs)
+
+                if test_status == PASS and num_runs > 1:
+                    test_status = FLAKY
+
+                msg = str(test_status.to_json())
+                if summary:
+                    msg += ": {}".format(summary)
+                if num_runs != self.deflake_num:
+                    msg += "\n" + "~" * max(len(line) for line in summary.split('\n'))
+
+                self.log(logging.INFO, msg)
+
         finally:
             stop_time = time.time()
-            if test_status == PASS and num_runs > 1:
-                test_status = FLAKY
-            summary = "".join(summary)
+
             test_status, summary = self._check_cluster_utilization(test_status, summary)
 
             if num_runs > 1:
@@ -142,7 +152,6 @@ def run(self):
                 start_time,
                 stop_time)
 
-            self.log(logging.INFO, "Summary: %s" % str(result.summary))
             self.log(logging.INFO, "Data: %s" % str(result.data))
 
             result.report()
@@ -175,16 +184,12 @@ def _do_run(self, num_runs):
             data = self.run_test()
 
             test_status = PASS
-            self.log(logging.INFO, "{} TEST".format(test_status.to_json()))
 
         except BaseException as e:
             # mark the test as failed before doing anything else
             test_status = FAIL
             err_trace = self._exc_msg(e)
             summary.append(err_trace)
-            if num_runs != self.deflake_num:
-                summary.append("~" * max(len(line) for line in err_trace.split('\n')) + "\n")
-            self.log(logging.INFO, "FAIL: " + err_trace)
 
         finally:
             for service in self.test_context.services:
@@ -202,7 +207,7 @@ def _do_run(self, num_runs):
             if self.test:
                 self.log(logging.DEBUG, "Freeing nodes...")
                 self._do_safely(self.test.free_nodes, "Error freeing nodes:")
-            return test_status, summary, data
+            return test_status, "".join(summary), data
 
     def _check_min_cluster_spec(self):
         self.log(logging.DEBUG, "Checking if there are enough nodes...")

From ea21afa82c690274aa2762d6a3e4b28938a548e9 Mon Sep 17 00:00:00 2001
From: Stanislav Vodetskyi <stan@confluent.io>
Date: Wed, 25 May 2022 16:48:24 -0700
Subject: [PATCH 31/83] Bump version to 0.7.20

---
 ducktape/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ducktape/__init__.py b/ducktape/__init__.py
index 276cdf144..fc588e507 100644
--- a/ducktape/__init__.py
+++ b/ducktape/__init__.py
@@ -1 +1 @@
-__version__ = '0.7.19'
+__version__ = '0.7.20'

From 7f2a9d110a80a7d98cc791a6ad2f1100197d8acc Mon Sep 17 00:00:00 2001
From: Stanislav Vodetskyi <stan@confluent.io>
Date: Wed, 25 May 2022 17:01:57 -0700
Subject: [PATCH 32/83] Bump version to 0.8.14

---
 ducktape/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ducktape/__init__.py b/ducktape/__init__.py
index ab26e3de0..1f0fbdbce 100644
--- a/ducktape/__init__.py
+++ b/ducktape/__init__.py
@@ -1 +1 @@
-__version__ = '0.8.13'
+__version__ = '0.8.14'

From b4624ecf63b792fc149cd1478f7c96cc45707b97 Mon Sep 17 00:00:00 2001
From: Stanislav Vodetskyi <49661990+stan-confluent@users.noreply.github.com>
Date: Fri, 27 May 2022 12:20:21 -0700
Subject: [PATCH 33/83] Print ssh error (#319)

* print ssh exception before running ssh checker

* print ssh exception before running ssh checker
---
 ducktape/cluster/remoteaccount.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/ducktape/cluster/remoteaccount.py b/ducktape/cluster/remoteaccount.py
index 4f5ad7040..2242b9470 100644
--- a/ducktape/cluster/remoteaccount.py
+++ b/ducktape/cluster/remoteaccount.py
@@ -35,6 +35,7 @@ def wrapper(self, *args, **kwargs):
             return method(self, *args, **kwargs)
         except (SSHException, NoValidConnectionsError, socket.error) as e:
             if self._custom_ssh_exception_checks:
+                self._log(logging.DEBUG, "caught ssh error", exc_info=True)
                 self._log(logging.DEBUG, "starting ssh checks:")
                 self._log(logging.DEBUG, "\n".join(repr(f) for f in self._custom_ssh_exception_checks))
                 for func in self._custom_ssh_exception_checks:

From 4dfd9ba9ad1cf7781c213b4e4acb3680ec9822a1 Mon Sep 17 00:00:00 2001
From: Stanislav Vodetskyi <49661990+stan-confluent@users.noreply.github.com>
Date: Tue, 31 May 2022 23:10:41 -0700
Subject: [PATCH 34/83] lock certifi requirements (#318)

---
 requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/requirements.txt b/requirements.txt
index 8ce97b6ee..0fe64dd7a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -17,3 +17,4 @@ six==1.12.0
 pynacl==1.4.0
 filelock==3.2.1
 cryptography==3.3.2
+certifi==2020.04.05.1

From 5f2dba5e09b9ef70e3933f348f63f7526e3b1739 Mon Sep 17 00:00:00 2001
From: Stanislav Vodetskyi <49661990+stan-confluent@users.noreply.github.com>
Date: Tue, 7 Jun 2022 15:49:50 -0700
Subject: [PATCH 35/83] command to print total number of nodes (#320)

* command to print total number of nodes

* change to a single command-line flag --collect-num-nodes

* use expected_num_nodes
---
 ducktape/command_line/main.py           |  5 +++++
 ducktape/command_line/parse_args.py     |  2 ++
 systests/cluster/test_remote_account.py | 14 +++++++++++++-
 3 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/ducktape/command_line/main.py b/ducktape/command_line/main.py
index 1dbe1558b..a7b666540 100644
--- a/ducktape/command_line/main.py
+++ b/ducktape/command_line/main.py
@@ -165,6 +165,11 @@ def main():
             print("    " + str(test))
         sys.exit(0)
 
+    if args_dict["collect_num_nodes"]:
+        total_nodes = sum(test.expected_num_nodes for test in tests)
+        print(total_nodes)
+        sys.exit(0)
+
     if args_dict["sample"]:
         print("Running a sample of %d tests" % args_dict["sample"])
         try:
diff --git a/ducktape/command_line/parse_args.py b/ducktape/command_line/parse_args.py
index 2e8882488..108f5c235 100644
--- a/ducktape/command_line/parse_args.py
+++ b/ducktape/command_line/parse_args.py
@@ -28,6 +28,8 @@ def create_ducktape_parser():
     parser.add_argument('test_path', metavar='test_path', type=str, nargs='*', default=[os.getcwd()],
                         help='one or more space-delimited strings indicating where to search for tests.')
     parser.add_argument("--collect-only", action="store_true", help="display collected tests, but do not run.")
+    parser.add_argument("--collect-num-nodes", action="store_true",
+                        help="display total number of nodes requested by all tests, but do not run anything.")
     parser.add_argument("--debug", action="store_true", help="pipe more verbose test output to stdout.")
     parser.add_argument("--config-file", action="store", default=ConsoleDefaults.USER_CONFIG_FILE,
                         help="path to project-specific configuration file.")
diff --git a/systests/cluster/test_remote_account.py b/systests/cluster/test_remote_account.py
index 45ad1600a..2b1d3cf0a 100644
--- a/systests/cluster/test_remote_account.py
+++ b/systests/cluster/test_remote_account.py
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from ducktape.cluster.cluster_spec import ClusterSpec
+from ducktape.cluster.cluster_spec import ClusterSpec, WINDOWS, LINUX, NodeSpec
 from ducktape.services.service import Service
 from ducktape.tests.test import Test
 from ducktape.errors import TimeoutError
@@ -418,6 +418,18 @@ def test_create_two_node_service(self):
         for node in self.service.nodes:
             node.account.ssh("echo hi")
 
+    @cluster(cluster_spec=ClusterSpec.from_nodes(
+        [
+            NodeSpec(operating_system=WINDOWS),
+            NodeSpec(operating_system=LINUX),
+            NodeSpec()  # this one is also linux
+        ]
+    ))
+    def three_nodes_test(self):
+        self.service = GenericService(self.test_context, 3)
+        for node in self.service.nodes:
+            node.account.ssh("echo hi")
+
 
 class RemoteAccountTest(Test):
     def __init__(self, test_context):

From 6946eb449d611408a50e06e3cc145be58da038e9 Mon Sep 17 00:00:00 2001
From: Ian McDonald <imcdonald@confluent.io>
Date: Wed, 15 Jun 2022 17:24:28 -0700
Subject: [PATCH 36/83] pin xmltodict (#326)

---
 requirements.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/requirements.txt b/requirements.txt
index 0fe64dd7a..f6a869120 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -18,3 +18,5 @@ pynacl==1.4.0
 filelock==3.2.1
 cryptography==3.3.2
 certifi==2020.04.05.1
+# xmltodict is required for pywinrm, but as they didn't pin their python2 version, we have to
+xmltodict==0.12.0

From a214102db4b85adaafe102f2715879c34a2fc4c5 Mon Sep 17 00:00:00 2001
From: Ian McDonald <imcdonald@confluent.io>
Date: Wed, 15 Jun 2022 17:28:56 -0700
Subject: [PATCH 37/83] Terminate proccess's when experiencing a fatal error in
 ductape runner (#323)

* update test runner

* update docstring

* readd newline

* add a simple test to run against

* fix formating
---
 ducktape/tests/runner.py                   |  3 ++
 systests/cluster/test_runner_operations.py | 51 ++++++++++++++++++++++
 tests/runner/check_runner.py               | 18 +++++++-
 tests/runner/resources/test_thingy.py      |  4 ++
 4 files changed, 75 insertions(+), 1 deletion(-)
 create mode 100644 systests/cluster/test_runner_operations.py

diff --git a/ducktape/tests/runner.py b/ducktape/tests/runner.py
index 93bfa24a5..5c6037919 100644
--- a/ducktape/tests/runner.py
+++ b/ducktape/tests/runner.py
@@ -210,6 +210,9 @@ def run_all_tests(self):
                         self._log(logging.ERROR, err_str)
 
                         # All processes are on the same machine, so treat communication failure as a fatal error
+                        for proc in self._client_procs.values():
+                            proc.terminate()
+                        self._client_procs = {}
                         raise
             except KeyboardInterrupt:
                 # If SIGINT is received, stop triggering new tests, and let the currently running tests finish
diff --git a/systests/cluster/test_runner_operations.py b/systests/cluster/test_runner_operations.py
new file mode 100644
index 000000000..956723a28
--- /dev/null
+++ b/systests/cluster/test_runner_operations.py
@@ -0,0 +1,51 @@
+# Copyright 2022 Confluent Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from ducktape.services.service import Service
+from ducktape.tests.test import Test
+from ducktape.mark.resource import cluster
+import time
+
+
+class SimpleEchoService(Service):
+    """Simple service that allocates one node for performing tests of RemoteAccount functionality"""
+    logs = {
+        "my_log": {
+            "path": "/tmp/log",
+            "collect_default": True
+        },
+    }
+
+    def __init__(self, context):
+        super(SimpleEchoService, self).__init__(context, num_nodes=1)
+        self.count = 0
+
+    def echo(self):
+        self.nodes[0].account.ssh("echo {} >> /tmp/log".format(self.count))
+        self.count += 1
+
+
+class SimpleRunnerTest(Test):
+    def setup(self):
+        self.service = SimpleEchoService(self.test_context)
+
+    @cluster(num_nodes=1)
+    def timeout_test(self):
+        """
+        a simple longer running test to test special run flags agaisnt.
+        """
+        self.service.start()
+
+        while self.service.count < 100000000:
+            self.service.echo()
+            time.sleep(.2)
diff --git a/tests/runner/check_runner.py b/tests/runner/check_runner.py
index 67398bc4d..706de7e16 100644
--- a/tests/runner/check_runner.py
+++ b/tests/runner/check_runner.py
@@ -16,6 +16,7 @@
 except ImportError:
     from mock import patch, MagicMock  # noqa: F401
 
+import pytest
 from ducktape.tests.runner_client import RunnerClient
 from ducktape.tests.test import TestContext
 from ducktape.tests.runner import TestRunner
@@ -29,7 +30,7 @@
 from .resources.test_thingy import ClusterTestThingy, TestThingy
 from .resources.test_failing_tests import FailingTest
 from ducktape.tests.reporter import JUnitReporter
-
+from ducktape.errors import TimeoutError
 
 from mock import Mock
 import os
@@ -227,3 +228,18 @@ def check_run_failure_with_bad_cluster_allocation(self):
         assert results.num_failed == 1
         assert results.num_passed == 1
         assert results.num_ignored == 0
+
+    def check_runner_timeout(self):
+        """Check process cleanup and error handling in a parallel runner client run."""
+        mock_cluster = LocalhostCluster(num_nodes=1000)
+        session_context = tests.ducktape_mock.session_context(max_parallel=1000, test_runner_timeout=1)
+
+        test_methods = [TestThingy.test_delayed, TestThingy.test_failure]
+        ctx_list = self._do_expand(test_file=TEST_THINGY_FILE, test_class=TestThingy, test_methods=test_methods,
+                                   cluster=mock_cluster, session_context=session_context)
+        runner = TestRunner(mock_cluster, session_context, Mock(), ctx_list, 1)
+
+        with pytest.raises(TimeoutError):
+            runner.run_all_tests()
+
+        assert not runner._client_procs
diff --git a/tests/runner/resources/test_thingy.py b/tests/runner/resources/test_thingy.py
index 36c2866e8..ffa53bf25 100644
--- a/tests/runner/resources/test_thingy.py
+++ b/tests/runner/resources/test_thingy.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from time import time
 from ducktape.cluster.cluster_spec import ClusterSpec
 from ducktape.tests.test import Test
 from ducktape.mark import ignore, parametrize
@@ -31,6 +32,9 @@ def min_cluster_spec(self):
     def test_pi(self):
         return {"data": 3.14159}
 
+    def test_delayed(self):
+        time.sleep(1)
+
     @ignore
     def test_ignore1(self):
         pass

From fd6707bde0596f3900aa0e4af1f61f4fb7f16799 Mon Sep 17 00:00:00 2001
From: Ian McDonald <imcdonald@confluent.io>
Date: Thu, 16 Jun 2022 16:13:59 -0700
Subject: [PATCH 38/83] update fetch_externally_routable_ips to use any network
 device (#314)

* update fetch_externally_routable_ips

* pr comments

* update-vagrant

* spelling fix

* fix tests

* make network device methods available and also fix test

* pin xmltodict

* remove deprecation
---
 ducktape/cluster/linux_remoteaccount.py   | 60 +++++++++++++++++++----
 ducktape/cluster/vagrant.py               | 18 +------
 ducktape/cluster/windows_remoteaccount.py |  5 +-
 tests/cluster/check_vagrant.py            |  3 +-
 4 files changed, 54 insertions(+), 32 deletions(-)

diff --git a/ducktape/cluster/linux_remoteaccount.py b/ducktape/cluster/linux_remoteaccount.py
index 64783e387..f30bcd71f 100644
--- a/ducktape/cluster/linux_remoteaccount.py
+++ b/ducktape/cluster/linux_remoteaccount.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 from ducktape.cluster.cluster_spec import LINUX
-from ducktape.cluster.remoteaccount import RemoteAccount
+from ducktape.cluster.remoteaccount import RemoteAccount, RemoteAccountError
 
 
 class LinuxRemoteAccount(RemoteAccount):
@@ -30,11 +30,53 @@ def local(self):
         This is an imperfect heuristic, but should work for simple local testing."""
         return self.hostname == "localhost" and self.user is None and self.ssh_config is None
 
-    def fetch_externally_routable_ip(self, is_aws):
-        if is_aws:
-            cmd = "/sbin/ifconfig eth0 "
-        else:
-            cmd = "/sbin/ifconfig eth1 "
-        cmd += r"| grep 'inet ' | tail -n 1 | egrep -o '[0-9\.]+' | head -n 1 2>&1"
-        output = "".join(self.ssh_capture(cmd))
-        return output.strip()
+    def get_network_devices(self):
+        """
+        Utility to get all network devices on a linux account
+        """
+        return [
+            device
+            for device in self.sftp_client.listdir('/sys/class/net')
+        ]
+
+    def get_external_accessible_network_devices(self):
+        """
+        gets the subset of devices accessible through an external conenction
+        """
+        return [
+            device
+            for device in self.get_network_devices()
+            if device != 'lo'  # do not include local device
+            and ("eth" in device or "ens" in device)  # filter out other devices
+        ]
+
+    # deprecated, please use the self.externally_routable_ip that is set in your cluster,
+    # not explicitly deprecating it as it's used by vagrant cluster
+    def fetch_externally_routable_ip(self, is_aws=None):
+        if is_aws is not None:
+            self.logger.warning("fetch_externally_routable_ip: is_aws is a deprecated flag, and does nothing")
+
+        devices = self.get_external_accessible_network_devices()
+
+        self.logger.debug("found devices: {}".format(devices))
+
+        if not devices:
+            raise RemoteAccountError("Couldn't find any network devices")
+
+        fmt_cmd = (
+            "/sbin/ifconfig {device} | "
+            "grep 'inet ' | "
+            "tail -n 1 | "
+            r"egrep -o '[0-9\.]+' | "
+            "head -n 1 2>&1"
+        )
+
+        ips = [
+            "".join(
+                self.ssh_capture(fmt_cmd.format(device=device))
+            ).strip()
+            for device in devices
+        ]
+        self.logger.debug("found ips: {}".format(ips))
+        self.logger.debug("returning the first ip found")
+        return next(iter(ips))
diff --git a/ducktape/cluster/vagrant.py b/ducktape/cluster/vagrant.py
index 0074cad5b..71d72dc27 100644
--- a/ducktape/cluster/vagrant.py
+++ b/ducktape/cluster/vagrant.py
@@ -34,7 +34,6 @@ class VagrantCluster(JsonCluster):
     """
 
     def __init__(self, *args, **kwargs):
-        self._is_aws = None
         is_read_from_file = False
         self.ssh_exception_checks = kwargs.get("ssh_exception_checks")
         cluster_file = kwargs.get("cluster_file")
@@ -83,7 +82,7 @@ def _get_nodes_from_vagrant(self):
             account = None
             try:
                 account = JsonCluster.make_remote_account(ssh_config, ssh_exception_checks=self.ssh_exception_checks)
-                externally_routable_ip = account.fetch_externally_routable_ip(self.is_aws)
+                externally_routable_ip = account.fetch_externally_routable_ip()
             finally:
                 if account:
                     account.close()
@@ -102,18 +101,3 @@ def _vagrant_ssh_config(self):
                                                   # Force to text mode in py2/3 compatible way
                                                   universal_newlines=True).communicate()
         return ssh_config_info, error
-
-    @property
-    def is_aws(self):
-        """Heuristic to detect whether the slave nodes are local or aws.
-
-        Return true if they are running on aws.
-        """
-        if self._is_aws is None:
-            proc = subprocess.Popen("vagrant status", shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
-                                    close_fds=True,
-                                    # Force to text mode in py2/3 compatible way
-                                    universal_newlines=True)
-            output, _ = proc.communicate()
-            self._is_aws = output.find("aws") >= 0
-        return self._is_aws
diff --git a/ducktape/cluster/windows_remoteaccount.py b/ducktape/cluster/windows_remoteaccount.py
index b57b6d730..7979054c2 100644
--- a/ducktape/cluster/windows_remoteaccount.py
+++ b/ducktape/cluster/windows_remoteaccount.py
@@ -96,10 +96,7 @@ def winrm_client(self):
 
         return self._winrm_client
 
-    def fetch_externally_routable_ip(self, is_aws):
-        if not is_aws:
-            raise NotImplementedError("Windows is only supported in AWS.")
-
+    def fetch_externally_routable_ip(self, is_aws=None):
         # EC2 windows machines aren't given an externally routable IP. Use the hostname instead.
         return self.ssh_config.hostname
 
diff --git a/tests/cluster/check_vagrant.py b/tests/cluster/check_vagrant.py
index f5d68ebd0..a63b359c5 100644
--- a/tests/cluster/check_vagrant.py
+++ b/tests/cluster/check_vagrant.py
@@ -59,10 +59,9 @@ def teardown_method(self, _):
 
     def _set_monkeypatch_attr(self, monkeypatch):
         monkeypatch.setattr("ducktape.cluster.vagrant.VagrantCluster._vagrant_ssh_config", lambda vc: (TWO_HOSTS, None))
-        monkeypatch.setattr("ducktape.cluster.vagrant.VagrantCluster.is_aws", lambda vc: False)
         monkeypatch.setattr(
             "ducktape.cluster.linux_remoteaccount.LinuxRemoteAccount.fetch_externally_routable_ip",
-            lambda vc, node_account: "127.0.0.1")
+            lambda vc: "127.0.0.1")
 
     def check_pickleable(self, monkeypatch):
         self._set_monkeypatch_attr(monkeypatch)

From 58635e09fc7bfcb100b3b16621f73c481fd02d21 Mon Sep 17 00:00:00 2001
From: Ian McDonald <imcdonald@confluent.io>
Date: Fri, 17 Jun 2022 14:00:14 -0700
Subject: [PATCH 39/83] fix time import (#327)

---
 tests/runner/resources/test_thingy.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/runner/resources/test_thingy.py b/tests/runner/resources/test_thingy.py
index ffa53bf25..b128b5d20 100644
--- a/tests/runner/resources/test_thingy.py
+++ b/tests/runner/resources/test_thingy.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from time import time
+import time
 from ducktape.cluster.cluster_spec import ClusterSpec
 from ducktape.tests.test import Test
 from ducktape.mark import ignore, parametrize

From 9209c19a22da9a247bdc00b881bf32896754f9f0 Mon Sep 17 00:00:00 2001
From: Stanislav Vodetskyi <49661990+stan-confluent@users.noreply.github.com>
Date: Fri, 17 Jun 2022 14:45:12 -0700
Subject: [PATCH 40/83] updated vagrant to ubuntu20 and fixed network discovery
 to account for other possible interface names (#328)

---
 Vagrantfile                             | 2 +-
 ducktape/cluster/linux_remoteaccount.py | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/Vagrantfile b/Vagrantfile
index 8f524551e..fc29167b9 100644
--- a/Vagrantfile
+++ b/Vagrantfile
@@ -23,7 +23,7 @@ VAGRANTFILE_API_VERSION = "2"
 enable_dns = false
 num_workers = 3
 ram_megabytes = 300
-base_box = "ubuntu/trusty64"
+base_box = "ubuntu/focal64"
 
 local_config_file = File.join(File.dirname(__FILE__), "Vagrantfile.local")
 if File.exists?(local_config_file) then
diff --git a/ducktape/cluster/linux_remoteaccount.py b/ducktape/cluster/linux_remoteaccount.py
index f30bcd71f..3bf7dd138 100644
--- a/ducktape/cluster/linux_remoteaccount.py
+++ b/ducktape/cluster/linux_remoteaccount.py
@@ -47,7 +47,8 @@ def get_external_accessible_network_devices(self):
             device
             for device in self.get_network_devices()
             if device != 'lo'  # do not include local device
-            and ("eth" in device or "ens" in device)  # filter out other devices
+            and (device.startswith("en") or device.startswith('eth'))  # filter out other devices; "en" means ethernet
+            # eth0 can also sometimes happen, see https://unix.stackexchange.com/q/134483
         ]
 
     # deprecated, please use the self.externally_routable_ip that is set in your cluster,

From d01dda740c89b6ac27ce6a296ff08b87100413f4 Mon Sep 17 00:00:00 2001
From: Ian McDonald <imcdonald@confluent.io>
Date: Fri, 17 Jun 2022 15:58:50 -0700
Subject: [PATCH 41/83] raise RemoteAccountError on failure to find devices
 (#329)

* raise RemoteAccountError on failure to find devices

* fix formating
---
 ducktape/cluster/linux_remoteaccount.py |  2 +-
 tests/cluster/check_vagrant.py          | 13 +++++++++++++
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/ducktape/cluster/linux_remoteaccount.py b/ducktape/cluster/linux_remoteaccount.py
index 3bf7dd138..72873edd7 100644
--- a/ducktape/cluster/linux_remoteaccount.py
+++ b/ducktape/cluster/linux_remoteaccount.py
@@ -62,7 +62,7 @@ def fetch_externally_routable_ip(self, is_aws=None):
         self.logger.debug("found devices: {}".format(devices))
 
         if not devices:
-            raise RemoteAccountError("Couldn't find any network devices")
+            raise RemoteAccountError(self, "Couldn't find any network devices")
 
         fmt_cmd = (
             "/sbin/ifconfig {device} | "
diff --git a/tests/cluster/check_vagrant.py b/tests/cluster/check_vagrant.py
index a63b359c5..9a1afc08e 100644
--- a/tests/cluster/check_vagrant.py
+++ b/tests/cluster/check_vagrant.py
@@ -18,6 +18,8 @@
 import pickle
 import os
 import random
+import pytest
+from ducktape.cluster.remoteaccount import RemoteAccountError
 
 TWO_HOSTS = """Host worker1
   HostName 127.0.0.1
@@ -173,3 +175,14 @@ def check_cluster_file_read(self, monkeypatch):
         assert node2.account.user == "vagrant"
         assert node2.account.ssh_hostname == '127.0.0.3'
         assert node2.account.ssh_config.to_json() == node1_expected["ssh_config"]
+
+    def check_no_valid_network_devices(self, monkeypatch):
+        """
+        test to make sure that a remote account error is raised when no network devices are found
+        """
+        monkeypatch.setattr("ducktape.cluster.vagrant.VagrantCluster._vagrant_ssh_config", lambda vc: (TWO_HOSTS, None))
+        monkeypatch.setattr("ducktape.cluster.linux_remoteaccount.LinuxRemoteAccount.get_network_devices",
+                            lambda account: [])
+
+        with pytest.raises(RemoteAccountError):
+            VagrantCluster()

From a289ab4f75518d09de2a4143086aed20555f887a Mon Sep 17 00:00:00 2001
From: imcdo <imcdonald@confluent.io>
Date: Fri, 17 Jun 2022 17:00:38 -0700
Subject: [PATCH 42/83] Bump version to 0.8.15

---
 ducktape/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ducktape/__init__.py b/ducktape/__init__.py
index 1f0fbdbce..8df8185dd 100644
--- a/ducktape/__init__.py
+++ b/ducktape/__init__.py
@@ -1 +1 @@
-__version__ = '0.8.14'
+__version__ = '0.8.15'

From 7c8b446c2f55a2b1c9d1e8809378b91ea2dc837c Mon Sep 17 00:00:00 2001
From: imcdo <imcdonald@confluent.io>
Date: Fri, 17 Jun 2022 16:37:25 -0700
Subject: [PATCH 43/83] Bump version to 0.7.21

---
 ducktape/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ducktape/__init__.py b/ducktape/__init__.py
index fc588e507..64cecbf0d 100644
--- a/ducktape/__init__.py
+++ b/ducktape/__init__.py
@@ -1 +1 @@
-__version__ = '0.7.20'
+__version__ = '0.7.21'

From 33cde2dade9373fc07f2004260e14877286f07d2 Mon Sep 17 00:00:00 2001
From: Stanislav Vodetskyi <49661990+stan-confluent@users.noreply.github.com>
Date: Fri, 15 Jul 2022 16:24:29 -0700
Subject: [PATCH 44/83] build docs with tox (#337)

---
 docs/README.md        | 34 +++++++++++++++++++++++-----------
 docs/requirements.txt |  2 ++
 tox.ini               | 10 +++++++++-
 3 files changed, 34 insertions(+), 12 deletions(-)

diff --git a/docs/README.md b/docs/README.md
index 2f38dba4c..8dcc4456a 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -1,23 +1,35 @@
 Ducktape documentation quick start guide
 ========================================
 
-This file provides a quick guide on how to compile the Ducktape documentation.
 
+Build the documentation
+-----------------------
 
-Setup the environment
----------------------
+To render the pages run::
+```shell
+tox -e docs
+```
+    
+The rendered pages will be in ``docs/_build/html``
 
-To compile the documentation you need Sphinx Python library. To install it and all its dependencies run::
 
-    pip install -r requirements.txt
+Specify documentation format
+----------------------------
 
+Documentation is built using [sphinx-build](https://www.sphinx-doc.org/en/master/man/sphinx-build.html) command.
+You can select which builder to use using SPHINX_BUILDER command:
+```shell
+SPHINX_BUILDER=man tox -e docs
+```
+All available values: https://www.sphinx-doc.org/en/master/man/sphinx-build.html#cmdoption-sphinx-build-M
 
-Build the documentation
------------------------
 
-To render the pages run::
+Pass options to sphinx-build
+----------------------------
+Any argument after `--` will be passed to the 
+[sphinx-build](https://www.sphinx-doc.org/en/master/man/sphinx-build.html) command directly:
+```shell
+tox -e docs -- -E
+```
 
-    make html
-    
-The rendered pages will be in ``docs/_build/html``
 
diff --git a/docs/requirements.txt b/docs/requirements.txt
index 0242f3fab..bb1cf98d2 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -4,3 +4,5 @@ sphinx-rtd-theme==0.2.4
 boto3==1.9.0
 pycryptodome==3.7.0
 pywinrm==0.2.2
+jinja2==2.11.2
+MarkupSafe<2.0.0
diff --git a/tox.ini b/tox.ini
index 8c5e716b3..09d1b0ee3 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,5 +1,5 @@
 [tox]
-envlist = py27, py36, py37, cover, style
+envlist = py27, py36, py37, cover, style, docs
 
 [testenv]
 # Consolidate all deps here instead of separately in test/style/cover so we
@@ -42,6 +42,14 @@ commands =
     pytest {env:PYTESTARGS:} --cov ducktape --cov-report=xml --cov-report=html --cov-report=term --cov-report=annotate:textcov \
                              --cov-fail-under=70
 
+[testenv:docs]
+basepython = python3.7
+deps =
+    -r {toxinidir}/docs/requirements.txt
+changedir = {toxinidir}/docs
+commands = sphinx-build -M {env:SPHINX_BUILDER:html} . _build  {posargs}
+
+
 [flake8]
 exclude = .git,.tox,.eggs,__pycache__,docs,build,dist
 ignore = E111,E121,W292,E123,E226,W503

From 41ec0e9f78374abcf9875d84251acf10a2a53853 Mon Sep 17 00:00:00 2001
From: Stanislav Vodetskyi <49661990+stan-confluent@users.noreply.github.com>
Date: Fri, 15 Jul 2022 17:13:18 -0700
Subject: [PATCH 45/83] fix readthedocs config for older versions (#341)

---
 .readthedocs.yaml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.readthedocs.yaml b/.readthedocs.yaml
index b76cb3095..15e7f5f4f 100644
--- a/.readthedocs.yaml
+++ b/.readthedocs.yaml
@@ -16,3 +16,5 @@ sphinx:
 python:
   install:
     - requirements: docs/requirements.txt
+    - method: setuptools
+      path: .

From 12104d7c91bc685b8219e5afec1d9b96a795c858 Mon Sep 17 00:00:00 2001
From: Stanislav Vodetskyi <49661990+stan-confluent@users.noreply.github.com>
Date: Fri, 15 Jul 2022 17:15:29 -0700
Subject: [PATCH 46/83] build docs with tox - 0.8.x edition (#342)

---
 docs/README.md        | 34 +++++++++++++++++++++++-----------
 docs/requirements.txt |  2 ++
 tox.ini               | 10 +++++++++-
 3 files changed, 34 insertions(+), 12 deletions(-)

diff --git a/docs/README.md b/docs/README.md
index 2f38dba4c..8dcc4456a 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -1,23 +1,35 @@
 Ducktape documentation quick start guide
 ========================================
 
-This file provides a quick guide on how to compile the Ducktape documentation.
 
+Build the documentation
+-----------------------
 
-Setup the environment
----------------------
+To render the pages run::
+```shell
+tox -e docs
+```
+    
+The rendered pages will be in ``docs/_build/html``
 
-To compile the documentation you need Sphinx Python library. To install it and all its dependencies run::
 
-    pip install -r requirements.txt
+Specify documentation format
+----------------------------
 
+Documentation is built using [sphinx-build](https://www.sphinx-doc.org/en/master/man/sphinx-build.html) command.
+You can select which builder to use using SPHINX_BUILDER command:
+```shell
+SPHINX_BUILDER=man tox -e docs
+```
+All available values: https://www.sphinx-doc.org/en/master/man/sphinx-build.html#cmdoption-sphinx-build-M
 
-Build the documentation
------------------------
 
-To render the pages run::
+Pass options to sphinx-build
+----------------------------
+Any argument after `--` will be passed to the 
+[sphinx-build](https://www.sphinx-doc.org/en/master/man/sphinx-build.html) command directly:
+```shell
+tox -e docs -- -E
+```
 
-    make html
-    
-The rendered pages will be in ``docs/_build/html``
 
diff --git a/docs/requirements.txt b/docs/requirements.txt
index 389884184..1edb008d3 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -4,3 +4,5 @@ sphinx-rtd-theme==0.2.4
 boto3==1.15.9
 pycryptodome==3.9.8
 pywinrm==0.2.2
+jinja2==2.11.2
+MarkupSafe<2.0.0
diff --git a/tox.ini b/tox.ini
index 0e75cf442..55203a4cc 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,5 +1,5 @@
 [tox]
-envlist = py36, py37, py38, cover, style
+envlist = py36, py37, py38, cover, style, docs
 
 [testenv]
 # Consolidate all deps here instead of separately in test/style/cover so we
@@ -42,6 +42,14 @@ commands =
     pytest {env:PYTESTARGS:} --cov ducktape --cov-report=xml --cov-report=html --cov-report=term --cov-report=annotate:textcov \
                              --cov-fail-under=70
 
+[testenv:docs]
+basepython = python3.8
+deps =
+    -r {toxinidir}/docs/requirements.txt
+changedir = {toxinidir}/docs
+commands = sphinx-build -M {env:SPHINX_BUILDER:html} . _build  {posargs}
+
+
 [flake8]
 exclude = .git,.tox,.eggs,__pycache__,docs,build,dist
 ignore = E111,E121,W292,E123,E226,W503

From f90cb7f2b90c9bd19cdf140fa3ee06c3064c899d Mon Sep 17 00:00:00 2001
From: Stanislav Vodetskyi <stan@confluent.io>
Date: Fri, 15 Jul 2022 17:28:37 -0700
Subject: [PATCH 47/83] Bump version to 0.8.16

---
 ducktape/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ducktape/__init__.py b/ducktape/__init__.py
index 8df8185dd..0cf52f178 100644
--- a/ducktape/__init__.py
+++ b/ducktape/__init__.py
@@ -1 +1 @@
-__version__ = '0.8.15'
+__version__ = '0.8.16'

From 966164e914f69a658dc2aed56f492fd65834f2f8 Mon Sep 17 00:00:00 2001
From: Stanislav Vodetskyi <stan@confluent.io>
Date: Fri, 15 Jul 2022 18:09:04 -0700
Subject: [PATCH 48/83] changelog for 0.8.x again

---
 docs/changelog.rst | 16 ++++++++++++++++
 docs/index.rst     |  1 +
 2 files changed, 17 insertions(+)
 create mode 100644 docs/changelog.rst

diff --git a/docs/changelog.rst b/docs/changelog.rst
new file mode 100644
index 000000000..cb753b845
--- /dev/null
+++ b/docs/changelog.rst
@@ -0,0 +1,16 @@
+.. _topics-changelog:
+
+====
+Changelog
+====
+
+0.8.x
+=====
+- Support test suites
+- Easier way to rerun failed tests - generate test suite with all the failed tests and also print them in the log so that user can copy them and paste as ducktape command line arguments
+- Python 2 is no longer supported, minimum supported version is 3.6
+- [backport, also in 0.9.1] - use a generic network device based on the devices found on the remote machine rather than a hardcoded one - `#314 <https://github.com/confluentinc/ducktape/pull/314>`_ and `#328 <https://github.com/confluentinc/ducktape/pull/328>`_
+- [backport, also in 0.9.1] - clean up process properly after an exception during test runner execution - `#323 <https://github.com/confluentinc/ducktape/pull/323>`_
+- [backport, also in 0.9.1] - log ssh errors - `#319 <https://github.com/confluentinc/ducktape/pull/319>`_
+- [backport, also in 0.9.1] - update vagrant tests to use ubuntu20 - `#328 <https://github.com/confluentinc/ducktape/pull/328>`_
+- [backport, also in 0.9.1] - added command to print the total number of nodes the tests run will require - `#320 <https://github.com/confluentinc/ducktape/pull/320>`_
\ No newline at end of file
diff --git a/docs/index.rst b/docs/index.rst
index ada5ed625..834b00fe8 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -21,6 +21,7 @@ Ducktape contains tools for running system integration and performance tests. It
    debug_tests
    api
    misc
+   changelog
 
 Contribute
 ==========

From adf08076c95965ecc56def1cceeb0091fe08861f Mon Sep 17 00:00:00 2001
From: Stanislav Vodetskyi <49661990+stan-confluent@users.noreply.github.com>
Date: Tue, 2 Aug 2022 13:21:46 -0700
Subject: [PATCH 49/83] add deflake pr to changelog (#345)

---
 docs/changelog.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/changelog.rst b/docs/changelog.rst
index cb753b845..be2ee897a 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -9,6 +9,7 @@ Changelog
 - Support test suites
 - Easier way to rerun failed tests - generate test suite with all the failed tests and also print them in the log so that user can copy them and paste as ducktape command line arguments
 - Python 2 is no longer supported, minimum supported version is 3.6
+- Added `--deflake N` flag - if provided, it will attempt to rerun each failed test  up to N times, and if it eventually passes, it will be marked as Flaky - `#299 <https://github.com/confluentinc/ducktape/pull/299>`_
 - [backport, also in 0.9.1] - use a generic network device based on the devices found on the remote machine rather than a hardcoded one - `#314 <https://github.com/confluentinc/ducktape/pull/314>`_ and `#328 <https://github.com/confluentinc/ducktape/pull/328>`_
 - [backport, also in 0.9.1] - clean up process properly after an exception during test runner execution - `#323 <https://github.com/confluentinc/ducktape/pull/323>`_
 - [backport, also in 0.9.1] - log ssh errors - `#319 <https://github.com/confluentinc/ducktape/pull/319>`_

From 34e46424d0ff37031a4b88eef75107c9dc31d1fd Mon Sep 17 00:00:00 2001
From: Stanislav Vodetskyi <49661990+stan-confluent@users.noreply.github.com>
Date: Tue, 2 Aug 2022 14:53:55 -0700
Subject: [PATCH 50/83] update readthedocs link 0.7.x branch (#354)

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 2ad3345de..7e11a9dcb 100644
--- a/README.md
+++ b/README.md
@@ -17,7 +17,7 @@ Ducktape contains tools for running system integration and performance tests. It
 Documentation
 -------------
 
-For detailed documentation on how to install, run, create new tests please refer to: http://ducktape-docs.readthedocs.io/
+For detailed documentation on how to install, run, create new tests please refer to: http://ducktape.readthedocs.io/
 
 Contribute
 ----------

From d9f4d7a2ea276daaf70e8d0bd6087d9697203817 Mon Sep 17 00:00:00 2001
From: Stanislav Vodetskyi <49661990+stan-confluent@users.noreply.github.com>
Date: Tue, 2 Aug 2022 14:54:54 -0700
Subject: [PATCH 51/83] docs badge (#351)

---
 README.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 8c1b3c94c..eb7c30c9f 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,5 @@
-[![Documentation Status](https://readthedocs.org/projects/ducktape-docs/badge/?version=latest)](https://ducktape-docs.readthedocs.io/en/latest/?badge=latest)
+[![Documentation Status](https://readthedocs.org/projects/ducktape/badge/?version=0.8.x)](https://ducktape.readthedocs.io/en/0.8.x/?badge=0.8.x)
+
 
 
 Distributed System Integration & Performance Testing Library

From 49420410b320a6d48accebe4c0e6c2bf4f511d9a Mon Sep 17 00:00:00 2001
From: Stanislav Vodetskyi <49661990+stan-confluent@users.noreply.github.com>
Date: Tue, 9 Aug 2022 23:41:52 -0700
Subject: [PATCH 52/83] removed tox dependency (#356)

---
 requirements.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 31239060d..f494505a3 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -10,6 +10,5 @@ paramiko~=2.7.2
 pyzmq==19.0.2
 pycryptodome==3.9.8
 more-itertools==5.0.0
-tox==3.20.0
 six==1.15.0
 PyYAML==5.3.1
\ No newline at end of file

From 78dd2761838aa13e6e4aa82502483c3be5f886eb Mon Sep 17 00:00:00 2001
From: Stanislav Vodetskyi <stan@confluent.io>
Date: Tue, 9 Aug 2022 23:46:33 -0700
Subject: [PATCH 53/83] changelog for 0.8.17

---
 docs/changelog.rst | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/docs/changelog.rst b/docs/changelog.rst
index be2ee897a..e2073dd1b 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -4,6 +4,10 @@
 Changelog
 ====
 
+0.8.17
+======
+- Removed `tox` from requirements. It was not used, but was breaking our builds due to recent pushes to `virtualenv`.
+
 0.8.x
 =====
 - Support test suites

From 4e0b8c794f62b104f3bef3bca6c5a14e1a17ee7b Mon Sep 17 00:00:00 2001
From: Stanislav Vodetskyi <stan@confluent.io>
Date: Tue, 9 Aug 2022 23:52:35 -0700
Subject: [PATCH 54/83] Bump version to 0.8.17

---
 ducktape/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ducktape/__init__.py b/ducktape/__init__.py
index 0cf52f178..fa70622f0 100644
--- a/ducktape/__init__.py
+++ b/ducktape/__init__.py
@@ -1 +1 @@
-__version__ = '0.8.16'
+__version__ = '0.8.17'

From d920ff0f06d1f226e90ea6e215ee465cd75c6812 Mon Sep 17 00:00:00 2001
From: Stanislav Vodetskyi <49661990+stan-confluent@users.noreply.github.com>
Date: Tue, 16 Aug 2022 14:23:05 -0700
Subject: [PATCH 55/83] Fix kill_process system test (#358)

* removed tox dependency

* fixed kill test to find the process more reliably

* removed garbage
---
 systests/cluster/test_remote_account.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/systests/cluster/test_remote_account.py b/systests/cluster/test_remote_account.py
index 880557f2f..1432a0449 100644
--- a/systests/cluster/test_remote_account.py
+++ b/systests/cluster/test_remote_account.py
@@ -579,9 +579,10 @@ def test_monitor_log_exception(self):
     @cluster(num_nodes=1)
     def test_kill_process(self):
         """Tests that kill_process correctly works"""
+        grep_str = '"nc -l -p 5000"'
 
         def get_pids():
-            pid_cmd = "ps ax | grep -i nc | grep -v grep | awk '{print $1}'"
+            pid_cmd = f"ps ax | grep -i {grep_str} | grep -v grep | awk '{{print $1}}'"
 
             return list(node.account.ssh_capture(pid_cmd, callback=int))
 
@@ -594,7 +595,7 @@ def get_pids():
                    err_msg="Failed to start process within %d sec" % 10)
 
         # Kill service.
-        node.account.kill_process("nc")
+        node.account.kill_process(grep_str)
 
         wait_until(lambda: len(get_pids()) == 0, timeout_sec=10,
                    err_msg="Failed to kill process within %d sec" % 10)

From d07f2f0d384118cb2ed9b8ab23ea758e5b01e84b Mon Sep 17 00:00:00 2001
From: Stanislav Vodetskyi <49661990+stan-confluent@users.noreply.github.com>
Date: Wed, 19 Oct 2022 16:12:48 +0200
Subject: [PATCH 56/83] print ssh command output on debug level (#367)

---
 ducktape/cluster/remoteaccount.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ducktape/cluster/remoteaccount.py b/ducktape/cluster/remoteaccount.py
index 2242b9470..3009fb7ed 100644
--- a/ducktape/cluster/remoteaccount.py
+++ b/ducktape/cluster/remoteaccount.py
@@ -396,7 +396,7 @@ def ssh_output(self, cmd, allow_fail=False, combine_stderr=True, timeout_sec=Non
             stdin.close()
             stdout.close()
             stderr.close()
-
+        self._log(logging.DEBUG, "Returning ssh command output:\n%s" % stdoutdata)
         return stdoutdata
 
     def alive(self, pid):

From 760583c739c2d1884abcaffdafb467cb889007bc Mon Sep 17 00:00:00 2001
From: Ian McDonald <imcdonald@confluent.io>
Date: Mon, 23 Jan 2023 15:24:50 -0800
Subject: [PATCH 57/83] Update to newer version of tox (#376)

* Update to newer version of tox

* ignore virtualenvs in style

* add virtualenvs to ignore

* use sphinx 1.6
---
 .gitignore            |  1 +
 docs/requirements.txt |  2 +-
 tox.ini               | 12 ++++++------
 3 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/.gitignore b/.gitignore
index a9d948bdc..abda83a65 100644
--- a/.gitignore
+++ b/.gitignore
@@ -97,6 +97,7 @@ venv/
 ENV/
 env.bak/
 venv.bak/
+.virtualenvs/
 
 # Spyder project settings
 .spyderproject
diff --git a/docs/requirements.txt b/docs/requirements.txt
index bb1cf98d2..a6257c2ad 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -1,4 +1,4 @@
-Sphinx==1.5.3
+Sphinx<1.7
 sphinx-argparse==0.1.17
 sphinx-rtd-theme==0.2.4
 boto3==1.9.0
diff --git a/tox.ini b/tox.ini
index 09d1b0ee3..aff499ea2 100644
--- a/tox.ini
+++ b/tox.ini
@@ -16,7 +16,7 @@ setenv =
     PIP_PROCESS_DEPENDENCY_LINKS=1
     PIP_DEFAULT_TIMEOUT=60
     ARCHFLAGS=-Wno-error=unused-command-line-argument-hard-error-in-future
-envdir = {homedir}/.virtualenvs/ducktape_{envname}
+envdir = {package_root}/.virtualenvs/ducktape_{envname}
 commands =
     pytest {env:PYTESTARGS:} {posargs}
 
@@ -24,20 +24,20 @@ commands =
 envdir = {homedir}/.virtualenvs/ducktape-py2
 
 [testenv:py36]
-envdir = {homedir}/.virtualenvs/ducktape-py36
+envdir = {package_root}/.virtualenvs/ducktape-py36
 
 [testenv:py37]
-envdir = {homedir}/.virtualenvs/ducktape-py37
+envdir = {package_root}/.virtualenvs/ducktape-py37
 
 [testenv:style]
 basepython = python3.7
-envdir = {homedir}/.virtualenvs/ducktape
+envdir = {package_root}/.virtualenvs/ducktape
 commands =
     flake8 --config tox.ini
 
 [testenv:cover]
 basepython = python3.7
-envdir = {homedir}/.virtualenvs/ducktape
+envdir = {package_root}/.virtualenvs/ducktape
 commands =
     pytest {env:PYTESTARGS:} --cov ducktape --cov-report=xml --cov-report=html --cov-report=term --cov-report=annotate:textcov \
                              --cov-fail-under=70
@@ -51,6 +51,6 @@ commands = sphinx-build -M {env:SPHINX_BUILDER:html} . _build  {posargs}
 
 
 [flake8]
-exclude = .git,.tox,.eggs,__pycache__,docs,build,dist
+exclude = .git,.tox,.eggs,__pycache__,docs,build,dist,.virtualenvs
 ignore = E111,E121,W292,E123,E226,W503
 max-line-length = 120

From 068921befe32b9c89dfbd35a88058248b74df1ee Mon Sep 17 00:00:00 2001
From: Ian McDonald <imcdonald@confluent.io>
Date: Thu, 2 Feb 2023 11:30:35 -0800
Subject: [PATCH 58/83] Update to use package_root to fix 0.8.x (#377)

---
 tox.ini | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tox.ini b/tox.ini
index c72fa6280..a76e58a4d 100644
--- a/tox.ini
+++ b/tox.ini
@@ -27,7 +27,7 @@ envdir = {package_root}/.virtualenvs/ducktape-py36
 envdir = {package_root}/.virtualenvs/ducktape-py37
 
 [testenv:py38]
-envdir = {homedir}/.virtualenvs/ducktape-py38
+envdir = {package_root}/.virtualenvs/ducktape-py38
 
 [testenv:style]
 basepython = python3.8

From 5b2dfa7d69436629057a9752fd5f5ae4baa40407 Mon Sep 17 00:00:00 2001
From: Stanislav Vodetskyi <49661990+stan-confluent@users.noreply.github.com>
Date: Wed, 29 Mar 2023 17:59:15 -0700
Subject: [PATCH 59/83] lock pyspnego dependency to py2 version (#379)

* lock pyspnego dependency to py2 version

* also lock requests-ntlm
---
 requirements.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/requirements.txt b/requirements.txt
index f6a869120..d253b11dc 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -20,3 +20,5 @@ cryptography==3.3.2
 certifi==2020.04.05.1
 # xmltodict is required for pywinrm, but as they didn't pin their python2 version, we have to
 xmltodict==0.12.0
+pyspnego<0.2.0
+requests-ntlm==1.1.0

From 07281bd16220e51469331958920995c921a4f058 Mon Sep 17 00:00:00 2001
From: Confluent Jenkins Bot <jenkins@confluent.io>
Date: Thu, 30 Mar 2023 01:12:53 +0000
Subject: [PATCH 60/83] Bump version to 0.7.22 and changelog

---
 ducktape/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ducktape/__init__.py b/ducktape/__init__.py
index 64cecbf0d..df26052fe 100644
--- a/ducktape/__init__.py
+++ b/ducktape/__init__.py
@@ -1 +1 @@
-__version__ = '0.7.21'
+__version__ = '0.7.22'

From ea1cea6fcd18f4d508c137e4b34c97051d681693 Mon Sep 17 00:00:00 2001
From: Aman Khare <akhare@confluent.io>
Date: Fri, 18 Aug 2023 11:31:01 +0530
Subject: [PATCH 61/83] updating requests version (#390)

---
 requirements.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index f494505a3..27ef81d29 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,10 +5,10 @@ MarkupSafe<2.0.0
 pyparsing<3.0.0
 zipp<2.0.0
 pywinrm==0.2.2
-requests==2.24.0
+requests==2.31.0
 paramiko~=2.7.2
 pyzmq==19.0.2
 pycryptodome==3.9.8
 more-itertools==5.0.0
 six==1.15.0
-PyYAML==5.3.1
\ No newline at end of file
+PyYAML==5.3.1

From dd0c2e55918b9235ef4933e12bf8dd9838bd6878 Mon Sep 17 00:00:00 2001
From: amankhare14 <akhare@confluent.io>
Date: Fri, 18 Aug 2023 14:37:39 +0530
Subject: [PATCH 62/83] changelog for 0.8.18

---
 docs/changelog.rst | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/docs/changelog.rst b/docs/changelog.rst
index e2073dd1b..3b32806ad 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -3,6 +3,9 @@
 ====
 Changelog
 ====
+0.8.18
+======
+- Updated `requests` version to `2.31.0`
 
 0.8.17
 ======

From 0576311b0eae1a2cdaa97e61419ac43e81cad9fc Mon Sep 17 00:00:00 2001
From: amankhare14 <akhare@confluent.io>
Date: Fri, 18 Aug 2023 14:59:25 +0530
Subject: [PATCH 63/83] correct changelog for 0.8.18

---
 docs/changelog.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/changelog.rst b/docs/changelog.rst
index 3b32806ad..3e43ebfb4 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -3,6 +3,7 @@
 ====
 Changelog
 ====
+
 0.8.18
 ======
 - Updated `requests` version to `2.31.0`

From 7422b37ccd2b88a3fb3724bbeed73a7662c97694 Mon Sep 17 00:00:00 2001
From: Confluent Jenkins Bot <jenkins@confluent.io>
Date: Fri, 18 Aug 2023 10:03:19 +0000
Subject: [PATCH 64/83] Bump version to 0.8.18 and changelog

---
 docs/changelog.rst   | 6 ++++++
 ducktape/__init__.py | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/docs/changelog.rst b/docs/changelog.rst
index 3e43ebfb4..7f0dd0b83 100644
--- a/docs/changelog.rst
+++ b/docs/changelog.rst
@@ -4,6 +4,12 @@
 Changelog
 ====
 
+0.8.18
+======
+Friday, August 18th, 2023
+-------------------------
+-
+
 0.8.18
 ======
 - Updated `requests` version to `2.31.0`
diff --git a/ducktape/__init__.py b/ducktape/__init__.py
index fa70622f0..b14fb08a8 100644
--- a/ducktape/__init__.py
+++ b/ducktape/__init__.py
@@ -1 +1 @@
-__version__ = '0.8.17'
+__version__ = '0.8.18'

From 5a5c127fe5cb62e7383f1a08d693b3b015180deb Mon Sep 17 00:00:00 2001
From: Noah Watkins <noahwatkins@gmail.com>
Date: Fri, 10 Sep 2021 13:53:52 -0700
Subject: [PATCH 65/83] report: render detail link with trailing slash

This is useful when serving from something like s3.

Signed-off-by: Noah Watkins <noahwatkins@gmail.com>
---
 ducktape/templates/report/report.html | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ducktape/templates/report/report.html b/ducktape/templates/report/report.html
index 7eac4a6a9..2a1092249 100644
--- a/ducktape/templates/report/report.html
+++ b/ducktape/templates/report/report.html
@@ -79,7 +79,7 @@ <h1>
           var className = this.props.test.test_result;
           var detailCol;
           if (className !== "ignore") {
-            detailCol = <td colSpan='5' align='center'><pre><a href={this.props.test.test_log}>Detail</a></pre></td>
+            detailCol = <td colSpan='5' align='center'><pre><a href={this.props.test.test_log + '/'}>Detail</a></pre></td>
           } else {
             detailCol = <td colSpan='5' align='center'></td>
           }

From 9d54d776fe2cd62f2b0315c7d928b3ad94c752fe Mon Sep 17 00:00:00 2001
From: Noah Watkins <noah@vectorized.io>
Date: Tue, 18 Jan 2022 16:42:38 -0800
Subject: [PATCH 66/83] reporter: print report and failures at the end

This is a easier ordering to consume in the common case that the output
is presented in a log format where consuming from the tail is common.

Signed-off-by: Noah Watkins <noah@vectorized.io>
---
 ducktape/tests/reporter.py | 29 +++++++++++++++++++++--------
 1 file changed, 21 insertions(+), 8 deletions(-)

diff --git a/ducktape/tests/reporter.py b/ducktape/tests/reporter.py
index 8163d1756..b87308450 100644
--- a/ducktape/tests/reporter.py
+++ b/ducktape/tests/reporter.py
@@ -102,9 +102,9 @@ def report(self):
 
 
 class SimpleSummaryReporter(SummaryReporter):
-    def header_string(self):
-        """Header lines of the report"""
-        header_lines = [
+    def footer_string(self):
+        """Footer lines of the report"""
+        footer_lines = [
             "=" * self.width,
             "SESSION REPORT (ALL TESTS)",
             "ducktape version: %s" % ducktape_version(),
@@ -118,15 +118,28 @@ def header_string(self):
             "=" * self.width
         ]
 
-        return "\n".join(header_lines)
+        return "\n".join(footer_lines)
 
     def report_string(self):
         """Get the whole report string."""
-        report_lines = [
-            self.header_string()]
 
-        report_lines.extend(
-            [SingleResultReporter(result).result_string() + "\n" + "-" * self.width for result in self.results])
+        passed = []
+        ignored = []
+        failed = []
+        for result in self.results:
+            if result.test_status == FAIL:
+                failed.append(result)
+            elif result.test_status == IGNORE:
+                ignored.append(result)
+            else:
+                passed.append(result)
+
+        ordered_results = passed + ignored + failed
+
+        report_lines = \
+            [SingleResultReporter(result).result_string() + "\n" + "-" * self.width for result in ordered_results]
+
+        report_lines.append(self.footer_string())
 
         return "\n".join(report_lines)
 

From 83efad335e30de713b09454aaa24154c83b55301 Mon Sep 17 00:00:00 2001
From: Andrew Hsu <xuzuan@gmail.com>
Date: Wed, 30 Mar 2022 06:04:12 +0000
Subject: [PATCH 67/83] add github workflow for python to run tests

---
 .github/workflows/python.yml | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)
 create mode 100644 .github/workflows/python.yml

diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml
new file mode 100644
index 000000000..d1447689a
--- /dev/null
+++ b/.github/workflows/python.yml
@@ -0,0 +1,16 @@
+name: Python
+on: [push]
+jobs:
+  check:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+      - name: Setup Python
+        uses: actions/setup-python@v3
+        with:
+          python-version: '3.8'
+      - name: Install Tox
+        run: pip install tox
+      - name: Run Tox
+        run: tox -e py38

From e715a5f0c3e359a16c881d830ca908e9c077e207 Mon Sep 17 00:00:00 2001
From: Andrew Hsu <xuzuan@gmail.com>
Date: Thu, 31 Mar 2022 05:30:58 +0000
Subject: [PATCH 68/83] run python check on PRs

---
 .github/workflows/python.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml
index d1447689a..b97240154 100644
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -1,5 +1,5 @@
 name: Python
-on: [push]
+on: [push, pull_request]
 jobs:
   check:
     runs-on: ubuntu-latest

From 07dd2219373602c9e4e02f21ac61d1141085561c Mon Sep 17 00:00:00 2001
From: Andrew Hsu <xuzuan@gmail.com>
Date: Thu, 31 Mar 2022 05:33:51 +0000
Subject: [PATCH 69/83] run python check for style and code coverage

---
 .github/workflows/python.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml
index b97240154..c06d2f732 100644
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -13,4 +13,4 @@ jobs:
       - name: Install Tox
         run: pip install tox
       - name: Run Tox
-        run: tox -e py38
+        run: tox -e style,py38,cover

From be3310d09f96bc63488c41457b31e031a048f3fb Mon Sep 17 00:00:00 2001
From: Andrew Hsu <xuzuan@gmail.com>
Date: Fri, 25 Mar 2022 06:26:54 +0000
Subject: [PATCH 70/83] tests: check `@ok_to_fail`

---
 tests/mark/check_ok_to_fail.py | 59 ++++++++++++++++++++++++++++++++++
 1 file changed, 59 insertions(+)
 create mode 100644 tests/mark/check_ok_to_fail.py

diff --git a/tests/mark/check_ok_to_fail.py b/tests/mark/check_ok_to_fail.py
new file mode 100644
index 000000000..709c6f0fe
--- /dev/null
+++ b/tests/mark/check_ok_to_fail.py
@@ -0,0 +1,59 @@
+from ducktape.mark.mark_expander import MarkedFunctionExpander
+from ducktape.mark import ok_to_fail, oked_to_fail, parametrize, matrix
+
+import pytest
+
+class CheckOkToFail(object):
+    def check_simple(self):
+        @ok_to_fail
+        def function(x=1, y=2, z=3):
+            return x, y, z
+
+        assert oked_to_fail(function)
+        context_list = MarkedFunctionExpander(function=function).expand()
+        assert len(context_list) == 1
+        assert context_list[0].ok_to_fail
+
+    def check_simple_method(self):
+        class C(object):
+            @ok_to_fail
+            def function(self, x=1, y=2, z=3):
+                return x, y, z
+
+        assert oked_to_fail(C.function)
+        context_list = MarkedFunctionExpander(function=C.function, cls=C).expand()
+        assert len(context_list) == 1
+        assert context_list[0].ok_to_fail
+
+    def check_ok_to_fail_method(self):
+        """Check @ok_to_fail() with no arguments used with various parametrizations on a method."""
+        class C(object):
+            @ok_to_fail
+            @parametrize(x=100, y=200, z=300)
+            @parametrize(x=100, z=300)
+            @parametrize(y=200)
+            @matrix(x=[1, 2, 3])
+            @parametrize()
+            def function(self, x=1, y=2, z=3):
+                return x, y, z
+
+        assert oked_to_fail(C.function)
+        context_list = MarkedFunctionExpander(function=C.function, cls=C).expand()
+        assert len(context_list) == 7
+        for ctx in context_list:
+            assert ctx.ok_to_fail
+
+    def check_invalid_ok_to_fail(self):
+        """If there are no test cases to which ok_to_fail applies, it should raise an error"""
+        class C(object):
+            @parametrize(x=100, y=200, z=300)
+            @parametrize(x=100, z=300)
+            @parametrize(y=200)
+            @parametrize()
+            @ok_to_fail
+            def function(self, x=1, y=2, z=3):
+                return x, y, z
+
+        assert oked_to_fail(C.function)
+        with pytest.raises(AssertionError):
+            MarkedFunctionExpander(function=C.function, cls=C).expand()

From 0c619e2c8ade4361bd8a0760ce0e0e7c5c94d0db Mon Sep 17 00:00:00 2001
From: Andrew Hsu <xuzuan@gmail.com>
Date: Mon, 21 Mar 2022 08:35:06 -0500
Subject: [PATCH 71/83] add `@ok_to_fail` decorator

---
 .gitignore                            |  1 +
 ducktape/mark/__init__.py             |  2 +-
 ducktape/mark/_mark.py                | 43 +++++++++++++++++++++++++++
 ducktape/templates/report/report.css  |  8 +++++
 ducktape/templates/report/report.html | 12 ++++++++
 ducktape/tests/reporter.py            | 34 ++++++++++++++++++---
 ducktape/tests/result.py              | 12 +++++++-
 ducktape/tests/runner_client.py       | 19 ++++++++----
 ducktape/tests/status.py              |  2 ++
 ducktape/tests/test.py                |  9 +++---
 tests/mark/check_ok_to_fail.py        |  1 +
 11 files changed, 127 insertions(+), 16 deletions(-)

diff --git a/.gitignore b/.gitignore
index 0f1350897..fb0bacc60 100644
--- a/.gitignore
+++ b/.gitignore
@@ -115,3 +115,4 @@ venv.bak/
 .idea
 /.vagrant/
 .vscode
+tags
diff --git a/ducktape/mark/__init__.py b/ducktape/mark/__init__.py
index 676db7053..f38c58211 100644
--- a/ducktape/mark/__init__.py
+++ b/ducktape/mark/__init__.py
@@ -12,4 +12,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from ._mark import parametrize, matrix, defaults, ignore, parametrized, ignored, env, is_env  # NOQA
+from ._mark import parametrize, matrix, defaults, ignore, ok_to_fail, parametrized, ignored, oked_to_fail, env, is_env  # NOQA
diff --git a/ducktape/mark/_mark.py b/ducktape/mark/_mark.py
index a087ec454..acfd91f45 100644
--- a/ducktape/mark/_mark.py
+++ b/ducktape/mark/_mark.py
@@ -107,6 +107,24 @@ def __init__(self):
         self.injected_args = None
 
 
+class OkToFail(Mark):
+    """Run the test but categorize status as OPASS or OFAIL instead of PASS or FAIL."""
+
+    def __init__(self):
+        super(OkToFail, self).__init__()
+        self.injected_args = None
+
+    @property
+    def name(self):
+        return "OK_TO_FAIL"
+
+    def apply(self, seed_context, context_list):
+        assert len(context_list) > 0, "ignore annotation is not being applied to any test cases"
+        for ctx in context_list:
+            ctx.ok_to_fail = ctx.ok_to_fail or self.injected_args is None
+        return context_list
+
+
 class Matrix(Mark):
     """Parametrize with a matrix of arguments.
     Assume each values in self.injected_args is iterable
@@ -218,6 +236,7 @@ def __eq__(self, other):
 MATRIX = Matrix()
 DEFAULTS = Defaults()
 IGNORE = Ignore()
+OK_TO_FAIL = OkToFail()
 ENV = Env()
 
 
@@ -235,6 +254,11 @@ def ignored(f):
     return Mark.marked(f, IGNORE)
 
 
+def oked_to_fail(f):
+    """Is this function or object decorated with @ok_to_fail?"""
+    return Mark.marked(f, OK_TO_FAIL)
+
+
 def is_env(f):
     return Mark.marked(f, ENV)
 
@@ -411,6 +435,25 @@ def ignorer(f):
     return ignorer
 
 
+def ok_to_fail(*args, **kwargs):
+    """
+    Test method decorator which signals to the test runner to run test but set status as OPASS or OFAIL.
+    This will keep test results separate from the status PASS and FAIL.
+
+    Example::
+        @ok_to_fail
+        def the_test(...):
+            ...
+    """
+    if len(args) == 1 and len(kwargs) == 0:
+        # this corresponds to the usage of the decorator with no arguments
+        # @ok_to_fail
+        # def test_function:
+        #   ...
+        Mark.mark(args[0], OkToFail())
+        return args[0]
+
+
 def env(**kwargs):
     def environment(f):
         Mark.mark(f, Env(**kwargs))
diff --git a/ducktape/templates/report/report.css b/ducktape/templates/report/report.css
index c8a9060da..8d41cbde2 100644
--- a/ducktape/templates/report/report.css
+++ b/ducktape/templates/report/report.css
@@ -78,6 +78,14 @@ h1, h2, h3, h4, h5, h6 {
     background-color: #555;
 }
 
+.ofail {
+    background-color: #ffc;
+}
+
+.opass {
+    background-color: #9cf;
+}
+
 .testcase { 
     margin-left: 2em;
 }
diff --git a/ducktape/templates/report/report.html b/ducktape/templates/report/report.html
index 2a1092249..8009b9e2c 100644
--- a/ducktape/templates/report/report.html
+++ b/ducktape/templates/report/report.html
@@ -12,6 +12,8 @@
     <div id="failed_test_panel"></div>
     <div id="ignored_test_panel"></div>
     <div id="flaky_test_panel"></div>
+    <div id="opassed_test_panel"></div>
+    <div id="ofailed_test_panel"></div>
     <div id="passed_test_panel"></div>
     <script type="text/jsx">
       /* This small block makes it possible to use React dev tools in the Chrome browser */
@@ -42,6 +44,8 @@ <h1>
               <td colSpan='5' align='center'>{this.props.summary_prop.flaky}</td>
               <td colSpan='5' align='center'>{this.props.summary_prop.failures}</td>
               <td colSpan='5' align='center'>{this.props.summary_prop.ignored}</td>
+              <td colSpan='5' align='center'>{this.props.summary_prop.opassed}</td>
+              <td colSpan='5' align='center'>{this.props.summary_prop.ofailed}</td>
               <td colSpan='5' align='center'>{this.props.summary_prop.run_time}</td>
             </tr>
           );
@@ -59,6 +63,8 @@ <h1>
                   <th colSpan='5' align='center'>Flaky</th>
                   <th colSpan='5' align='center'>Failures</th>
                   <th colSpan='5' align='center'>Ignored</th>
+                  <th colSpan='5' align='center'>OPassed</th>
+                  <th colSpan='5' align='center'>OFailed</th>
                   <th colSpan='5' align='center'>Time</th>
                 </tr>
               </thead>
@@ -183,6 +189,8 @@ <h2>{this.props.title}</h2>
         "flaky": %(num_flaky)d,
         "failures": %(num_failures)d,
         "ignored": %(num_ignored)d,
+        "opassed": %(num_opassed)d,
+        "ofailed": %(num_ofailed)d,
         "run_time": '%(run_time)s'
       }];
       
@@ -197,6 +205,8 @@ <h2>{this.props.title}</h2>
       FLAKY_TESTS=[%(flaky_tests)s];
       FAILED_TESTS=[%(failed_tests)s];
       IGNORED_TESTS=[%(ignored_tests)s];
+      OPASSED_TESTS=[%(opassed_tests)s];
+      OFAILED_TESTS=[%(ofailed_tests)s];
 
       React.render(<Heading heading={HEADING}/>, document.getElementById('heading'));
       React.render(<ColorKeyPanel test_status_names={COLOR_KEYS}/>, document.getElementById('color_key_panel'));
@@ -204,6 +214,8 @@ <h2>{this.props.title}</h2>
       React.render(<TestPanel title="Failed Tests" tests={FAILED_TESTS}/>, document.getElementById('failed_test_panel'));
       React.render(<TestPanel title="Ignored Tests" tests={IGNORED_TESTS}/>, document.getElementById('ignored_test_panel'));
       React.render(<TestPanel title="Flaky Tests" tests={FLAKY_TESTS}/>, document.getElementById('flaky_test_panel'));
+      React.render(<TestPanel title="OPassed Tests" tests={OPASSED_TESTS}/>, document.getElementById('opassed_test_panel'));
+      React.render(<TestPanel title="OFailed Tests" tests={OFAILED_TESTS}/>, document.getElementById('ofailed_test_panel'));
       React.render(<TestPanel title="Passed Tests" tests={PASSED_TESTS}/>, document.getElementById('passed_test_panel'));
     </script>
   </body>
diff --git a/ducktape/tests/reporter.py b/ducktape/tests/reporter.py
index b87308450..8589bfa99 100644
--- a/ducktape/tests/reporter.py
+++ b/ducktape/tests/reporter.py
@@ -26,7 +26,7 @@
 
 from ducktape.utils.terminal_size import get_terminal_size
 from ducktape.utils.util import ducktape_version
-from ducktape.tests.status import PASS, FAIL, IGNORE, FLAKY
+from ducktape.tests.status import PASS, FAIL, IGNORE, FLAKY, OPASS, OFAIL
 from ducktape.json_serializable import DucktapeJSONEncoder
 
 
@@ -115,6 +115,8 @@ def footer_string(self):
             "flaky:            %d" % self.results.num_flaky,
             "failed:           %d" % self.results.num_failed,
             "ignored:          %d" % self.results.num_ignored,
+            "opassed:          %d" % self.results.num_opassed,
+            "ofailed:          %d" % self.results.num_ofailed,
             "=" * self.width
         ]
 
@@ -126,15 +128,21 @@ def report_string(self):
         passed = []
         ignored = []
         failed = []
+        ofail = []
+        opass = []
         for result in self.results:
             if result.test_status == FAIL:
                 failed.append(result)
             elif result.test_status == IGNORE:
                 ignored.append(result)
+            elif result.test_status == OPASS:
+                opass.append(result)
+            elif result.test_status == OFAIL:
+                ofail.append(result)
             else:
                 passed.append(result)
 
-        ordered_results = passed + ignored + failed
+        ordered_results = passed + ignored + failed + opass + ofail
 
         report_lines = \
             [SingleResultReporter(result).result_string() + "\n" + "-" * self.width for result in ordered_results]
@@ -193,8 +201,13 @@ def report(self):
                 testsuite['failures'] += 1
             elif result.test_status == IGNORE:
                 testsuite['skipped'] += 1
+            elif result.test_status == OPASS:
+                testsuite['skipped'] += 1
+            elif result.test_status == OFAIL:
+                testsuite['skipped'] += 1
 
-        total = self.results.num_failed + self.results.num_ignored + self.results.num_passed + self.results.num_flaky
+        total = self.results.num_failed + self.results.num_ignored + self.results.num_ofailed + \
+            self.results.num_opassed + self.results.num_passed + self.results.num_flaky
         # Now start building XML document
         root = ET.Element('testsuites', attrib=dict(
             name="ducktape", time=str(self.results.run_time_seconds),
@@ -282,6 +295,8 @@ def format_report(self):
         passed_result_string = []
         ignored_result_string = []
         flaky_result_string = []
+        opassed_result_string = []
+        ofailed_result_string = []
 
         for result in self.results:
             json_string = json.dumps(self.format_result(result))
@@ -298,6 +313,12 @@ def format_report(self):
             elif result.test_status == FLAKY:
                 flaky_result_string.append(json_string)
                 flaky_result_string.append(",")
+            elif result.test_status == OPASS:
+                opassed_result_string.append(json_string)
+                opassed_result_string.append(",")
+            elif result.test_status == OFAIL:
+                ofailed_result_string.append(json_string)
+                ofailed_result_string.append(",")
             else:
                 raise Exception("Unknown test status in report: {}".format(result.test_status.to_json()))
 
@@ -308,13 +329,18 @@ def format_report(self):
             'num_flaky': self.results.num_flaky,
             'num_failures': self.results.num_failed,
             'num_ignored': self.results.num_ignored,
+            'num_opassed': self.results.num_opassed,
+            'num_ofailed': self.results.num_ofailed,
             'run_time': format_time(self.results.run_time_seconds),
             'session': self.results.session_context.session_id,
             'passed_tests': "".join(passed_result_string),
             'flaky_tests': "".join(flaky_result_string),
             'failed_tests': "".join(failed_result_string),
             'ignored_tests': "".join(ignored_result_string),
-            'test_status_names': ",".join(["\'%s\'" % str(status) for status in [PASS, FAIL, IGNORE, FLAKY]])
+            'ofailed_tests': "".join(ofailed_result_string),
+            'opassed_tests': "".join(opassed_result_string),
+            'test_status_names': ",".join(["\'%s\'" % str(status) for status in
+                                           [PASS, FAIL, IGNORE, FLAKY, OPASS, OFAIL]])
         }
 
         html = template % args
diff --git a/ducktape/tests/result.py b/ducktape/tests/result.py
index 5f567f3ff..3c90ad25c 100644
--- a/ducktape/tests/result.py
+++ b/ducktape/tests/result.py
@@ -21,7 +21,7 @@
 from ducktape.tests.reporter import SingleResultFileReporter
 from ducktape.utils.local_filesystem_utils import mkdir_p
 from ducktape.utils.util import ducktape_version
-from ducktape.tests.status import FLAKY, PASS, FAIL, IGNORE
+from ducktape.tests.status import FLAKY, PASS, FAIL, IGNORE, OPASS, OFAIL
 
 
 class TestResult(object):
@@ -166,6 +166,14 @@ def num_ignored(self):
     def num_flaky(self):
         return len([r for r in self._results if r.test_status == FLAKY])
 
+    @property
+    def num_opassed(self):
+        return len([r for r in self._results if r.test_status == OPASS])
+
+    @property
+    def num_ofailed(self):
+        return len([r for r in self._results if r.test_status == OFAIL])
+
     @property
     def run_time_seconds(self):
         if self.start_time < 0:
@@ -222,6 +230,8 @@ def to_json(self):
             "num_passed": self.num_passed,
             "num_failed": self.num_failed,
             "num_ignored": self.num_ignored,
+            "num_opassed": self.num_opassed,
+            "num_ofailed": self.num_ofailed,
             "parallelism": parallelism,
             "results": [r for r in self._results]
         }
diff --git a/ducktape/tests/runner_client.py b/ducktape/tests/runner_client.py
index b39f721ab..8abb70423 100644
--- a/ducktape/tests/runner_client.py
+++ b/ducktape/tests/runner_client.py
@@ -29,7 +29,7 @@
 from ducktape.tests.status import FLAKY
 from ducktape.tests.test import test_logger, TestContext
 
-from ducktape.tests.result import TestResult, IGNORE, PASS, FAIL
+from ducktape.tests.result import TestResult, IGNORE, PASS, FAIL, OPASS, OFAIL
 from ducktape.utils.local_filesystem_utils import mkdir_p
 
 
@@ -183,11 +183,16 @@ def _do_run(self, num_runs):
 
             data = self.run_test()
 
-            test_status = PASS
+            if self.test_context.ok_to_fail:
+                test_status = OPASS
+            else:
+                test_status = PASS
 
         except BaseException as e:
-            # mark the test as failed before doing anything else
-            test_status = FAIL
+            if self.test_context.ok_to_fail:
+                test_status = OFAIL
+            else:
+                test_status = FAIL
             err_trace = self._exc_msg(e)
             summary.append(err_trace)
 
@@ -240,8 +245,10 @@ def _check_cluster_utilization(self, result, summary):
                 # only check node utilization on test pass
                 if result == PASS or result == FLAKY:
                     self.log(logging.INFO, "FAIL: " + message)
-
-                result = FAIL
+                    result = FAIL
+                elif result == OPASS:
+                    self.log(logging.INFO, "OFAIL: " + message)
+                    result = OFAIL
                 summary += message
             else:
                 self.log(logging.WARN, message)
diff --git a/ducktape/tests/status.py b/ducktape/tests/status.py
index 111d52087..0264af2e9 100644
--- a/ducktape/tests/status.py
+++ b/ducktape/tests/status.py
@@ -31,3 +31,5 @@ def to_json(self):
 FLAKY = TestStatus("flaky")
 FAIL = TestStatus("fail")
 IGNORE = TestStatus("ignore")
+OPASS = TestStatus("opass")
+OFAIL = TestStatus("ofail")
diff --git a/ducktape/tests/test.py b/ducktape/tests/test.py
index a6e8f4db1..1c0c8dd52 100644
--- a/ducktape/tests/test.py
+++ b/ducktape/tests/test.py
@@ -28,7 +28,7 @@
 from ducktape.services.service_registry import ServiceRegistry
 from ducktape.template import TemplateRenderer
 from ducktape.mark.resource import CLUSTER_SPEC_KEYWORD, CLUSTER_SIZE_KEYWORD
-from ducktape.tests.status import FAIL
+from ducktape.tests.status import FAIL, OFAIL
 
 
 class Test(TemplateRenderer):
@@ -151,7 +151,7 @@ def copy_service_logs(self, test_status):
                 # Gather locations of logs to collect
                 node_logs = []
                 for log_name in log_dirs.keys():
-                    if test_status == FAIL or self.should_collect_log(log_name, service):
+                    if test_status == FAIL or test_status == OFAIL or self.should_collect_log(log_name, service):
                         node_logs.append(log_dirs[log_name]["path"])
 
                 self.test_context.logger.debug("Preparing to copy logs from %s: %s" %
@@ -304,6 +304,7 @@ def __init__(self, **kwargs):
         self.function = kwargs.get("function")
         self.injected_args = kwargs.get("injected_args")
         self.ignore = kwargs.get("ignore", False)
+        self.ok_to_fail = kwargs.get("ok_to_fail", False)
 
         # cluster_use_metadata is a dict containing information about how this test will use cluster resources
         self.cluster_use_metadata = copy.copy(kwargs.get("cluster_use_metadata", {}))
@@ -320,9 +321,9 @@ def __init__(self, **kwargs):
     def __repr__(self):
         return \
             "<module=%s, cls=%s, function=%s, injected_args=%s, file=%s, ignore=%s, " \
-            "cluster_size=%s, cluster_spec=%s>" % \
+            "ok_to_fail=%s, cluster_size=%s, cluster_spec=%s>" % \
             (self.module, self.cls_name, self.function_name, str(self.injected_args), str(self.file),
-             str(self.ignore), str(self.expected_num_nodes), str(self.expected_cluster_spec))
+             str(self.ignore), str(self.ok_to_fail), str(self.expected_num_nodes), str(self.expected_cluster_spec))
 
     def copy(self, **kwargs):
         """Construct a new TestContext object from another TestContext object
diff --git a/tests/mark/check_ok_to_fail.py b/tests/mark/check_ok_to_fail.py
index 709c6f0fe..c66b5f6df 100644
--- a/tests/mark/check_ok_to_fail.py
+++ b/tests/mark/check_ok_to_fail.py
@@ -3,6 +3,7 @@
 
 import pytest
 
+
 class CheckOkToFail(object):
     def check_simple(self):
         @ok_to_fail

From 2e729067f6bb431540eb48bcdf974a9d5a95e987 Mon Sep 17 00:00:00 2001
From: John Spray <jcs@redpanda.com>
Date: Thu, 14 Apr 2022 13:42:59 +0100
Subject: [PATCH 72/83] Include summary in XML output for OFAIL tests

Previously this attribute was omitted for all but failures,
so on OFAIL we didn't get our one-line summary of a test's
failure reason (usually the exception).

Including this for OFAIL results makes it quicker to see what
went wrong.
---
 ducktape/tests/reporter.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ducktape/tests/reporter.py b/ducktape/tests/reporter.py
index 8589bfa99..791dc5955 100644
--- a/ducktape/tests/reporter.py
+++ b/ducktape/tests/reporter.py
@@ -230,7 +230,7 @@ def report(self):
                     name=name, classname=test.cls_name, time=str(test.run_time_seconds),
                     status=str(test.test_status), assertions=""
                 ))
-                if test.test_status == FAIL:
+                if test.test_status == FAIL or test.test_status == OFAIL:
                     xml_failure = ET.SubElement(xml_testcase, 'failure', attrib=dict(
                         message=test.summary.splitlines()[0]
                     ))

From 9de9e29b2c286e30c28530f880ca2aff36a6a035 Mon Sep 17 00:00:00 2001
From: Andrew Hsu <xuzuan@gmail.com>
Date: Mon, 25 Jul 2022 19:15:49 +0000
Subject: [PATCH 73/83] updates for p 3.10

From patch supplied by Michal Maslanka <michal@vectorized.io>
---
 ducktape/cluster/cluster.py | 3 +--
 requirements-test.txt       | 4 ++--
 requirements.txt            | 6 ++++--
 3 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/ducktape/cluster/cluster.py b/ducktape/cluster/cluster.py
index d869968f3..961808bd0 100644
--- a/ducktape/cluster/cluster.py
+++ b/ducktape/cluster/cluster.py
@@ -35,7 +35,6 @@ class Cluster(object):
     This interface doesn't define any mapping of roles/services to nodes. It only interacts with some underlying
     system that can describe available resources and mediates reservations of those resources.
     """
-
     def __init__(self):
         self.max_used_nodes = 0
 
@@ -67,7 +66,7 @@ def do_alloc(self, cluster_spec):
 
     def free(self, nodes):
         """Free the given node or list of nodes"""
-        if isinstance(nodes, collections.Iterable):
+        if isinstance(nodes, collections.abc.Iterable):
             for s in nodes:
                 self.free_single(s)
         else:
diff --git a/requirements-test.txt b/requirements-test.txt
index 3c37e6fbe..0c71f1c5b 100644
--- a/requirements-test.txt
+++ b/requirements-test.txt
@@ -1,4 +1,4 @@
-pytest~=6.2.0
+pytest==6.1.0
 # 4.0 drops py27 support
 mock==4.0.2
 psutil==5.7.2
@@ -7,4 +7,4 @@ statistics==1.0.3.5
 requests-testadapter==0.3.0
 flake8~=4.0.0
 pytest-cov~=3.0
-pytest-xdist~=2.5
\ No newline at end of file
+pytest-xdist~=2.5
diff --git a/requirements.txt b/requirements.txt
index 27ef81d29..0958bf133 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,10 +5,12 @@ MarkupSafe<2.0.0
 pyparsing<3.0.0
 zipp<2.0.0
 pywinrm==0.2.2
-requests==2.31.0
-paramiko~=2.7.2
+requests==2.24.0
+paramiko~=2.11.0
 pyzmq==19.0.2
 pycryptodome==3.9.8
+# > 5.0 drops py27 support
 more-itertools==5.0.0
+tox==3.20.0
 six==1.15.0
 PyYAML==5.3.1

From dd323f3f653d6f7715de43c57113b9b80c51df2e Mon Sep 17 00:00:00 2001
From: Denis Rystsov <denis@vectorized.io>
Date: Sat, 18 Jun 2022 19:40:48 -0700
Subject: [PATCH 74/83] wait_until: ignore backoff_sec on success

---
 ducktape/utils/util.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/ducktape/utils/util.py b/ducktape/utils/util.py
index 6bf274472..2b2c606c3 100644
--- a/ducktape/utils/util.py
+++ b/ducktape/utils/util.py
@@ -51,8 +51,7 @@ def wait_until(condition, timeout_sec, backoff_sec=.1, err_msg="", retry_on_exc=
             last_exception = e
             if not retry_on_exc:
                 raise e
-        finally:
-            time.sleep(backoff_sec)
+        time.sleep(backoff_sec)
 
     # it is safe to call Exception from None - will be just treated as a normal exception
     raise TimeoutError(err_msg() if callable(err_msg) else err_msg) from last_exception

From 15ae8811f85e3a9611e8656215513a991a095ece Mon Sep 17 00:00:00 2001
From: Andrew Wong <awong@redpanda.com>
Date: Mon, 1 Aug 2022 17:40:34 -0700
Subject: [PATCH 75/83] relativize latest symlink

When running with dockerized ducktape, the 'latest' symlink generated
typically does not survive the journey across the bind mount. This
commit relativizes it since the symlink is always expected to be in the
results directory anyway.
---
 ducktape/command_line/main.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/ducktape/command_line/main.py b/ducktape/command_line/main.py
index 05540d7dc..aa70749c1 100644
--- a/ducktape/command_line/main.py
+++ b/ducktape/command_line/main.py
@@ -204,7 +204,9 @@ def main():
     for r in reporters:
         r.report()
 
-    update_latest_symlink(args_dict["results_root"], results_dir)
+    # Update the symlink to the session ID (i.e. the relativized path) rather
+    # than the full path to support bound volumes.
+    update_latest_symlink(args_dict["results_root"], session_id)
     close_logger(session_logger)
     if not test_results.get_aggregate_success():
         # Non-zero exit if at least one test failed

From 574f25da421a6ceb47bbd04e6df863df83830414 Mon Sep 17 00:00:00 2001
From: ZeDRoman <ilgovskiy@mail.ru>
Date: Wed, 19 Oct 2022 11:07:38 +0200
Subject: [PATCH 76/83] remote_account: rise paramiko REKEY_BYTES

Changing it due to https://github.com/redpanda-data/redpanda/issues/6792
---
 ducktape/cluster/remoteaccount.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/ducktape/cluster/remoteaccount.py b/ducktape/cluster/remoteaccount.py
index 8f3f606a8..02d83929a 100644
--- a/ducktape/cluster/remoteaccount.py
+++ b/ducktape/cluster/remoteaccount.py
@@ -12,6 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import paramiko
+# Constant that is responsible for updating ssh session keys after
+# more than REKEY_BYTES data passed through the connection
+# Changing it due to https://github.com/redpanda-data/redpanda/issues/6792
+paramiko.packet.Packetizer.REKEY_BYTES = pow(2, 32) # noqa
+
 from contextlib import contextmanager
 import logging
 import os

From bb50f749abd65926a1d3b0e27f604200513eceba Mon Sep 17 00:00:00 2001
From: John Spray <jcs@redpanda.com>
Date: Thu, 2 Feb 2023 13:36:03 +0000
Subject: [PATCH 77/83] setup.py: update boto3 to latest

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 0958bf133..62a83c133 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,5 @@
 jinja2==2.11.2
-boto3==1.15.9
+boto3==1.26.62
 # jinja2 pulls in MarkupSafe with a > constraint, but we need to constrain it for compatibility
 MarkupSafe<2.0.0
 pyparsing<3.0.0

From 424181c96a5faaf7b68efa5d328070273bd30381 Mon Sep 17 00:00:00 2001
From: Bharath Vissapragada <bharathv@redpanda.com>
Date: Fri, 3 Feb 2023 11:23:57 -0800
Subject: [PATCH 78/83] Include debugging information during timeouts

Prints the executing test context that failed to respond
to the runner client causing a timeout.
---
 ducktape/tests/runner.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/ducktape/tests/runner.py b/ducktape/tests/runner.py
index a6d2a4a2c..742f96ee1 100644
--- a/ducktape/tests/runner.py
+++ b/ducktape/tests/runner.py
@@ -205,7 +205,7 @@ def run_all_tests(self):
                         event = self.receiver.recv(timeout=self.session_context.test_runner_timeout)
                         self._handle(event)
                     except Exception as e:
-                        err_str = "Exception receiving message: %s: %s" % (str(type(e)), str(e))
+                        err_str = "Exception receiving message: %s: %s, active_tests: \n %s \n" % (str(type(e)), str(e), self.active_tests_debug())
                         err_str += "\n" + traceback.format_exc(limit=16)
                         self._log(logging.ERROR, err_str)
 
@@ -226,6 +226,10 @@ def run_all_tests(self):
 
         return self.results
 
+    def active_tests_debug(self):
+        """Returns debug information about currently active tests"""
+        return list(self.active_tests.keys())
+
     def _run_single_test(self, test_context):
         """Start a test runner client in a subprocess"""
         current_test_counter = self.test_counter

From 6e143032ddaa5c2fd736bb9ba84d544175a333c1 Mon Sep 17 00:00:00 2001
From: Noah Watkins <noahwatkins@gmail.com>
Date: Tue, 7 Mar 2023 16:57:46 -0800
Subject: [PATCH 79/83] upgrade pyyaml to 6.0

Signed-off-by: Noah Watkins <noahwatkins@gmail.com>
---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 62a83c133..9b34c2226 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -13,4 +13,4 @@ pycryptodome==3.9.8
 more-itertools==5.0.0
 tox==3.20.0
 six==1.15.0
-PyYAML==5.3.1
+PyYAML==6.0

From 4d5ecb065ca58682ef1fdb77c1a0e006b2c34275 Mon Sep 17 00:00:00 2001
From: Solonas Gousteris <solonas@redpanda.com>
Date: Mon, 13 Mar 2023 16:47:09 +0200
Subject: [PATCH 80/83] mark/matrix: forgive empty parametrization

ref redpanda-data/redpanda#8704
---
 ducktape/mark/_mark.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/ducktape/mark/_mark.py b/ducktape/mark/_mark.py
index acfd91f45..634446e53 100644
--- a/ducktape/mark/_mark.py
+++ b/ducktape/mark/_mark.py
@@ -143,10 +143,15 @@ def name(self):
         return "MATRIX"
 
     def apply(self, seed_context, context_list):
+        empty_parametrization = True
         for injected_args in cartesian_product_dict(self.injected_args):
+            empty_parametrization = False
             injected_fun = _inject(**injected_args)(seed_context.function)
             context_list.insert(0, seed_context.copy(function=injected_fun, injected_args=injected_args))
-
+        # forgive empty parametrization and mark test as IGNORE
+        if empty_parametrization and not context_list:
+            seed_context.ignore = True
+            context_list.insert(0, seed_context)
         return context_list
 
     def __eq__(self, other):

From a8fea3e24f3b9c2946c43c86a87e2b9ee6c21791 Mon Sep 17 00:00:00 2001
From: Andrew Hsu <xuzuan@gmail.com>
Date: Wed, 22 Mar 2023 03:22:04 +0000
Subject: [PATCH 81/83] lock tox to 3.28.0

because latest version of tox 4 is unable to run tests
---
 .github/workflows/python.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml
index c06d2f732..bfd4056b2 100644
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -11,6 +11,6 @@ jobs:
         with:
           python-version: '3.8'
       - name: Install Tox
-        run: pip install tox
+        run: pip install tox==3.28.0
       - name: Run Tox
         run: tox -e style,py38,cover

From da7d19a8cdbc91ff00d5c09a569f16abbcf7553c Mon Sep 17 00:00:00 2001
From: Andrew Hsu <xuzuan@gmail.com>
Date: Wed, 22 Mar 2023 03:32:30 +0000
Subject: [PATCH 82/83] fix style in runner.py

./ducktape/tests/runner.py:206:121: E501 line too long (147 > 120
characters)
---
 ducktape/tests/runner.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/ducktape/tests/runner.py b/ducktape/tests/runner.py
index 742f96ee1..466581533 100644
--- a/ducktape/tests/runner.py
+++ b/ducktape/tests/runner.py
@@ -205,7 +205,8 @@ def run_all_tests(self):
                         event = self.receiver.recv(timeout=self.session_context.test_runner_timeout)
                         self._handle(event)
                     except Exception as e:
-                        err_str = "Exception receiving message: %s: %s, active_tests: \n %s \n" % (str(type(e)), str(e), self.active_tests_debug())
+                        err_str = "Exception receiving message: %s: %s" % (str(type(e)), str(e))
+                        err_str += ", active_tests: \n %s \n" % (self.active_tests_debug())
                         err_str += "\n" + traceback.format_exc(limit=16)
                         self._log(logging.ERROR, err_str)
 

From 62e0285f6b3a2f22fd4a43b5fdbc13be8d4290d9 Mon Sep 17 00:00:00 2001
From: Solonas Gousteris <solonas@redpanda.com>
Date: Fri, 1 Sep 2023 14:35:47 +0300
Subject: [PATCH 83/83] fix test issues

---
 .github/workflows/python.yml      |  2 +-
 ducktape/cluster/remoteaccount.py | 30 +++++++++++++++---------------
 requirements-test.txt             |  2 +-
 3 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml
index bfd4056b2..812edb651 100644
--- a/.github/workflows/python.yml
+++ b/.github/workflows/python.yml
@@ -11,6 +11,6 @@ jobs:
         with:
           python-version: '3.8'
       - name: Install Tox
-        run: pip install tox==3.28.0
+        run: pip install tox==4.11.0
       - name: Run Tox
         run: tox -e style,py38,cover
diff --git a/ducktape/cluster/remoteaccount.py b/ducktape/cluster/remoteaccount.py
index 02d83929a..0b630cd37 100644
--- a/ducktape/cluster/remoteaccount.py
+++ b/ducktape/cluster/remoteaccount.py
@@ -18,21 +18,21 @@
 # Changing it due to https://github.com/redpanda-data/redpanda/issues/6792
 paramiko.packet.Packetizer.REKEY_BYTES = pow(2, 32) # noqa
 
-from contextlib import contextmanager
-import logging
-import os
-from paramiko import SSHClient, SSHConfig, MissingHostKeyPolicy
-from paramiko.ssh_exception import SSHException, NoValidConnectionsError
-import shutil
-import signal
-import socket
-import stat
-import tempfile
-import warnings
-
-from ducktape.utils.http_utils import HttpMixin
-from ducktape.utils.util import wait_until
-from ducktape.errors import DucktapeError
+from contextlib import contextmanager # noqa
+import logging # noqa
+import os # noqa
+from paramiko import SSHClient, SSHConfig, MissingHostKeyPolicy # noqa
+from paramiko.ssh_exception import SSHException, NoValidConnectionsError # noqa
+import shutil # noqa
+import signal # noqa
+import socket # noqa
+import stat # noqa
+import tempfile # noqa
+import warnings # noqa
+
+from ducktape.utils.http_utils import HttpMixin # noqa
+from ducktape.utils.util import wait_until # noqa
+from ducktape.errors import DucktapeError # noqa
 
 
 def check_ssh(method):
diff --git a/requirements-test.txt b/requirements-test.txt
index 0c71f1c5b..af59e6420 100644
--- a/requirements-test.txt
+++ b/requirements-test.txt
@@ -1,4 +1,4 @@
-pytest==6.1.0
+pytest==6.2.0
 # 4.0 drops py27 support
 mock==4.0.2
 psutil==5.7.2