From e3ac7c3bf0405aaf958bcf76b51e74deb551f841 Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Thu, 14 Sep 2023 12:09:32 +0200 Subject: [PATCH 1/5] net-test: packetdrill: run_all: limit tests running in parallel When using machines with limited resources (e.g. public CI) and a kernel with a debug config, some errors have been observed when too many tests were executed in parallel, e.g. packets being received too late, etc. This commit allows to limit the number of tests being launched in parallel while still keeping a timeout per test and all the other features. If the new '-P' option is not used, the behaviour is not modified. With MPTCP tests, this new limit is set to twice the number of cores (nproc). It seems like it is very helpful to improve the stability of the tests while still not taking too long to execute them. Signed-off-by: Matthieu Baerts --- gtests/net/packetdrill/run_all.py | 98 +++++++++++++++++++------------ 1 file changed, 60 insertions(+), 38 deletions(-) diff --git a/gtests/net/packetdrill/run_all.py b/gtests/net/packetdrill/run_all.py index de629910..3056c717 100755 --- a/gtests/net/packetdrill/run_all.py +++ b/gtests/net/packetdrill/run_all.py @@ -34,8 +34,8 @@ def FindTests(self, path='.'): tests.append(dirpath + '/' + filename) return sorted(tests) - def StartTest(self, path, variant, extra_args=None): - """Run a test using packetdrill in a subprocess.""" + def CmdTest(self, path, variant, extra_args=None): + """Return a command to run a test using packetdrill in a subprocess.""" bin_path = self.tools_path + '/' + 'packetdrill' nswrap_path = self.tools_path + '/' + 'in_netns.sh' @@ -48,17 +48,11 @@ def StartTest(self, path, variant, extra_args=None): cmd.extend(extra_args.split()) cmd.append(basename) - outfile = tempfile.TemporaryFile(mode='w+') - errfile = tempfile.TemporaryFile(mode='w+') - - process = subprocess.Popen(cmd, stdout=outfile, stderr=errfile, cwd=execdir) - if self.args['serialized']: - process.wait() - return (process, path, variant, outfile, errfile) + return (cmd, execdir, path, variant) - def StartTestIPv4(self, path): - """Run a packetdrill test over ipv4.""" - return self.StartTest( + def CmdTestIPv4(self, path): + """Return a command to run a packetdrill test over ipv4.""" + return self.CmdTest( path, 'ipv4', ('--ip_version=ipv4 ' '--local_ip=192.168.0.1 ' @@ -71,9 +65,9 @@ def StartTestIPv4(self, path): '-D CMSG_TYPE_RECVERR=IP_RECVERR') ) - def StartTestIPv6(self, path): - """Run a packetdrill test over ipv6.""" - return self.StartTest( + def CmdTestIPv6(self, path): + """Return a command to run a packetdrill test over ipv6.""" + return self.CmdTest( path, 'ipv6', ('--ip_version=ipv6 --mtu=1520 ' '--local_ip=fd3d:0a0b:17d6::1 ' @@ -84,9 +78,9 @@ def StartTestIPv6(self, path): '-D CMSG_TYPE_RECVERR=IPV6_RECVERR') ) - def StartTestIPv4Mappedv6(self, path): - """Run a packetdrill test over ipv4-mapped-v6.""" - return self.StartTest( + def CmdTestIPv4Mappedv6(self, path): + """Return a command to run a packetdrill test over ipv4-mapped-v6.""" + return self.CmdTest( path, 'ipv4-mapped-v6', ('--ip_version=ipv4-mapped-ipv6 ' '--local_ip=192.168.0.1 ' @@ -99,17 +93,17 @@ def StartTestIPv4Mappedv6(self, path): '-D CMSG_TYPE_RECVERR=IPV6_RECVERR') ) - def StartTests(self, tests): + def CmdsTests(self, tests): """Run every test in tests in all three variants (v4, v6, v4-mapped-v6).""" - procs = [] + cmds = [] for test in tests: if not test.endswith('v6.pkt'): - procs.append(self.StartTestIPv4(test)) - procs.append(self.StartTestIPv4Mappedv6(test)) + cmds.append(self.CmdTestIPv4(test)) + cmds.append(self.CmdTestIPv4Mappedv6(test)) if not test.endswith('v4.pkt'): - procs.append(self.StartTestIPv6(test)) + cmds.append(self.CmdTestIPv6(test)) - return procs + return cmds def Log(self, outfile, errfile): """Print a background process's stdout and stderr streams.""" @@ -120,12 +114,22 @@ def Log(self, outfile, errfile): errfile.seek(0) sys.stderr.write(errfile.read()) - def PollTest(self, test): - """Test whether a test has finished and if so record its return value.""" - process, path, variant, outfile, errfile = test + def StartTest(self, cmd, execdir, path, variant): + """Run a packetdrill test""" + outfile = tempfile.TemporaryFile(mode='w+') + errfile = tempfile.TemporaryFile(mode='w+') + time_start = time.time() + process = subprocess.Popen(cmd, stdout=outfile, stderr=errfile, cwd=execdir) + if self.args['serialized']: + process.wait() + + return (process, path, variant, outfile, errfile, time_start) + + def PollTest(self, process, path, variant, outfile, errfile, time_start, now): + """Test whether a test has finished and if so record its return value.""" if process.poll() is None: - return False + return False, now - time_start >= self.max_runtime if not process.returncode: self.num_pass += 1 @@ -140,18 +144,34 @@ def PollTest(self, test): if self.args['log_on_error']: self.Log(outfile, errfile) - return True + return True, False + + def StartPollTestSet(self, cmds): + """Start and wait until all tests in procs have finished or until timeout.""" + max_in_parallel = self.args['max_in_parallel'] + if max_in_parallel == 0 or max_in_parallel > len(cmds): + max_in_parallel = len(cmds) - def PollTestSet(self, procs, time_start): - """Wait until a,l tests in procs have finished or until timeout.""" - while time.time() - time_start < self.max_runtime and procs: - time.sleep(1) + procs = [] + while len(procs) < max_in_parallel: + procs.append(self.StartTest(*cmds.pop(0))) + + timedouts = [] + while procs: + time.sleep(.1) + now = time.time() for entry in procs: - if self.PollTest(entry): + stopped, timedout = self.PollTest(*entry, now) + + if stopped or timedout: procs.remove(entry) + if cmds: + procs.append(self.StartTest(*cmds.pop(0))) + if timedout: + timedouts.append(entry) - self.num_timedout = len(procs) - for proc, path, variant, outfile, errfile in procs: + self.num_timedout = len(timedouts) + for proc, path, variant, outfile, errfile, _ in timedouts: try: proc.kill() except: @@ -167,8 +187,8 @@ def RunTests(self, path): tests = self.FindTests(path) time_start = time.time() - procs = self.StartTests(tests) - self.PollTestSet(procs, time_start) + cmds = self.CmdsTests(tests) + self.StartPollTestSet(cmds) print( 'Ran % 4d tests: % 4d passing, % 4d failing, % 4d timed out (%.2f sec): %s' # pylint: disable=line-too-long @@ -239,6 +259,8 @@ def ParseArgs(): args.add_argument('-L', '--log_on_success', action='store_true', help='requires verbose') args.add_argument('-p', '--parallelize_dirs', action='store_true') + args.add_argument('-P', '--max_in_parallel', metavar='N', type=int, default=0, + help="max number of tests running in parallel") args.add_argument('-s', '--subdirs', action='store_true') args.add_argument('-S', '--serialized', action='store_true') args.add_argument('-v', '--verbose', action='store_true') From 704af70b7c7fcc5a11ab3ae4e43cd15042b681df Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Thu, 14 Sep 2023 15:51:39 +0200 Subject: [PATCH 2/5] net-test: packetdrill: run_all: support timeout with --serialized Before, when using the --serialized argument, the script was simply waiting for the packetdrill command to finish but without any timeout. Now that the limitation of the parallel tasks has been put in place, it is possible to easily support this timeout feature. Signed-off-by: Matthieu Baerts --- gtests/net/packetdrill/run_all.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/gtests/net/packetdrill/run_all.py b/gtests/net/packetdrill/run_all.py index 3056c717..e3e66a45 100755 --- a/gtests/net/packetdrill/run_all.py +++ b/gtests/net/packetdrill/run_all.py @@ -121,8 +121,6 @@ def StartTest(self, cmd, execdir, path, variant): time_start = time.time() process = subprocess.Popen(cmd, stdout=outfile, stderr=errfile, cwd=execdir) - if self.args['serialized']: - process.wait() return (process, path, variant, outfile, errfile, time_start) @@ -148,7 +146,7 @@ def PollTest(self, process, path, variant, outfile, errfile, time_start, now): def StartPollTestSet(self, cmds): """Start and wait until all tests in procs have finished or until timeout.""" - max_in_parallel = self.args['max_in_parallel'] + max_in_parallel = 1 if self.args['serialized'] else self.args['max_in_parallel'] if max_in_parallel == 0 or max_in_parallel > len(cmds): max_in_parallel = len(cmds) From 3be5b12c78b96d920227cb7f77a6f9645a80ba0f Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Thu, 14 Sep 2023 10:50:31 +0200 Subject: [PATCH 3/5] net-test: packetdrill: run_all: extra -v are passed to pktdrill cmd Sometimes, it is useful to have more info from packetdrill which can be done by passing '-v' parameter to it. Instead of having to modify run_all.py or run packetdrill manually, it is now possible to add extra -v, e.g. $ run_all.py -lL -vvvv Signed-off-by: Matthieu Baerts --- gtests/net/packetdrill/run_all.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/gtests/net/packetdrill/run_all.py b/gtests/net/packetdrill/run_all.py index e3e66a45..15e317ce 100755 --- a/gtests/net/packetdrill/run_all.py +++ b/gtests/net/packetdrill/run_all.py @@ -46,6 +46,8 @@ def CmdTest(self, path, variant, extra_args=None): cmd.extend(self.default_args.split()) if extra_args is not None: cmd.extend(extra_args.split()) + if self.args['verbose'] > 1: + cmd.append('-' + 'v' * (self.args['verbose'] - 1)) cmd.append(basename) return (cmd, execdir, path, variant) @@ -261,7 +263,8 @@ def ParseArgs(): help="max number of tests running in parallel") args.add_argument('-s', '--subdirs', action='store_true') args.add_argument('-S', '--serialized', action='store_true') - args.add_argument('-v', '--verbose', action='store_true') + args.add_argument('-v', '--verbose', action='count', default=0, + help="can be repeated to run packetdrill with -v") return vars(args.parse_args()) From 7f9e1fd33ba00d1267bb17f17cbf85a963a25f2c Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Fri, 28 Jan 2022 14:31:16 +0100 Subject: [PATCH 4/5] net-test: packetdrill: run_all: add --capture option This new option is useful to capture packet traces per test for debug purposes. With the new parameter, we can specify to capture packets with TCPDump and write the .pcap files in the given directory without modifying in_netns.sh or re-launching the tests manually outside the netns. e.g. to write .pcap files in the home directory: $ run_all.py -c ~ Files will be generated using the name of the test and its variant. Note that this adds some delay: one second before starting not to miss the first packets and one second at the end to make sure the last packets will be captured by TCPDump. Signed-off-by: Matthieu Baerts --- gtests/net/packetdrill/in_netns.sh | 12 ++++++++++++ gtests/net/packetdrill/run_all.py | 14 +++++++++++--- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/gtests/net/packetdrill/in_netns.sh b/gtests/net/packetdrill/in_netns.sh index 0851ed39..ef1f72c7 100755 --- a/gtests/net/packetdrill/in_netns.sh +++ b/gtests/net/packetdrill/in_netns.sh @@ -6,12 +6,24 @@ set -e readonly NETNS="ns-$(mktemp -u XXXXXX)" +TCPDUMP_PID= + setup() { ip netns add "${NETNS}" ip -netns "${NETNS}" link set lo up + if [ -n "${TCPDUMP_OUTPUT}" ]; then + ip netns exec "${NETNS}" tcpdump -i any -s 150 -w "${TCPDUMP_OUTPUT}" & + TCPDUMP_PID=$! + sleep 1 # give some time to TCPDump to start + fi } cleanup() { + if [ -n "${TCPDUMP_PID}" ]; then + sleep 1 # give some time to TCPDump to process the packets + kill "${TCPDUMP_PID}" + wait "${TCPDUMP_PID}" 2>/dev/null || true + fi ip netns del "${NETNS}" } diff --git a/gtests/net/packetdrill/run_all.py b/gtests/net/packetdrill/run_all.py index 15e317ce..e5ca0433 100755 --- a/gtests/net/packetdrill/run_all.py +++ b/gtests/net/packetdrill/run_all.py @@ -50,7 +50,7 @@ def CmdTest(self, path, variant, extra_args=None): cmd.append('-' + 'v' * (self.args['verbose'] - 1)) cmd.append(basename) - return (cmd, execdir, path, variant) + return (cmd, execdir, path, variant, basename) def CmdTestIPv4(self, path): """Return a command to run a packetdrill test over ipv4.""" @@ -116,13 +116,19 @@ def Log(self, outfile, errfile): errfile.seek(0) sys.stderr.write(errfile.read()) - def StartTest(self, cmd, execdir, path, variant): + def StartTest(self, cmd, execdir, path, variant, basename): """Run a packetdrill test""" outfile = tempfile.TemporaryFile(mode='w+') errfile = tempfile.TemporaryFile(mode='w+') + env = os.environ + if self.args['capture'] is not None: + fname = os.path.splitext(basename)[0] + "_" + variant + ".pcap" + env = dict(env, TCPDUMP_OUTPUT=os.path.join(self.args['capture'], fname)) + time_start = time.time() - process = subprocess.Popen(cmd, stdout=outfile, stderr=errfile, cwd=execdir) + process = subprocess.Popen(cmd, stdout=outfile, stderr=errfile, cwd=execdir, + env=env) return (process, path, variant, outfile, errfile, time_start) @@ -254,6 +260,8 @@ def ParseArgs(): """Parse commandline arguments.""" args = argparse.ArgumentParser() args.add_argument('path', default='.', nargs='?') + args.add_argument('-c', '--capture', metavar='DIR', + help='capture packets in the specified directory') args.add_argument('-l', '--log_on_error', action='store_true', help='requires verbose') args.add_argument('-L', '--log_on_success', action='store_true', From 1f42aebf7a447f6250ad91718313e375d5b36f44 Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Thu, 28 Sep 2023 14:30:04 +0200 Subject: [PATCH 5/5] net-test: packetdrill: run_all: add --dry_run mode It is useful to have an easy way to display the commands that are going to be executed: for debugging purposes, to relaunch one specific test with different arguments or from a different environment, etc. Now a new '--dry_run' parameter is available to only display commands and not execute anything. Signed-off-by: Matthieu Baerts --- gtests/net/packetdrill/run_all.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/gtests/net/packetdrill/run_all.py b/gtests/net/packetdrill/run_all.py index e5ca0433..d4415acb 100755 --- a/gtests/net/packetdrill/run_all.py +++ b/gtests/net/packetdrill/run_all.py @@ -194,6 +194,13 @@ def RunTests(self, path): time_start = time.time() cmds = self.CmdsTests(tests) + + if self.args['dry_run']: + print('Dry-run mode:') + for cmd in cmds: + print(' '.join(cmd[0])) + return + self.StartPollTestSet(cmds) print( @@ -269,6 +276,7 @@ def ParseArgs(): args.add_argument('-p', '--parallelize_dirs', action='store_true') args.add_argument('-P', '--max_in_parallel', metavar='N', type=int, default=0, help="max number of tests running in parallel") + args.add_argument('--dry_run', action='store_true') args.add_argument('-s', '--subdirs', action='store_true') args.add_argument('-S', '--serialized', action='store_true') args.add_argument('-v', '--verbose', action='count', default=0,