From e3ac7c3bf0405aaf958bcf76b51e74deb551f841 Mon Sep 17 00:00:00 2001
From: Matthieu Baerts <matthieu.baerts@tessares.net>
Date: Thu, 14 Sep 2023 12:09:32 +0200
Subject: [PATCH 1/5] net-test: packetdrill: run_all: limit tests running in
 parallel

When using machines with limited resources (e.g. public CI) and a kernel
with a debug config, some errors have been observed when too many tests
were executed in parallel, e.g. packets being received too late, etc.

This commit allows to limit the number of tests being launched in
parallel while still keeping a timeout per test and all the other
features. If the new '-P' option is not used, the behaviour is not
modified.

With MPTCP tests, this new limit is set to twice the number of cores
(nproc). It seems like it is very helpful to improve the stability of
the tests while still not taking too long to execute them.

Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
---
 gtests/net/packetdrill/run_all.py | 98 +++++++++++++++++++------------
 1 file changed, 60 insertions(+), 38 deletions(-)

diff --git a/gtests/net/packetdrill/run_all.py b/gtests/net/packetdrill/run_all.py
index de629910..3056c717 100755
--- a/gtests/net/packetdrill/run_all.py
+++ b/gtests/net/packetdrill/run_all.py
@@ -34,8 +34,8 @@ def FindTests(self, path='.'):
         tests.append(dirpath + '/' + filename)
     return sorted(tests)
 
-  def StartTest(self, path, variant, extra_args=None):
-    """Run a test using packetdrill in a subprocess."""
+  def CmdTest(self, path, variant, extra_args=None):
+    """Return a command to run a test using packetdrill in a subprocess."""
     bin_path = self.tools_path + '/' + 'packetdrill'
     nswrap_path = self.tools_path + '/' + 'in_netns.sh'
 
@@ -48,17 +48,11 @@ def StartTest(self, path, variant, extra_args=None):
       cmd.extend(extra_args.split())
     cmd.append(basename)
 
-    outfile = tempfile.TemporaryFile(mode='w+')
-    errfile = tempfile.TemporaryFile(mode='w+')
-
-    process = subprocess.Popen(cmd, stdout=outfile, stderr=errfile, cwd=execdir)
-    if self.args['serialized']:
-      process.wait()
-    return (process, path, variant, outfile, errfile)
+    return (cmd, execdir, path, variant)
 
-  def StartTestIPv4(self, path):
-    """Run a packetdrill test over ipv4."""
-    return self.StartTest(
+  def CmdTestIPv4(self, path):
+    """Return a command to run a packetdrill test over ipv4."""
+    return self.CmdTest(
         path, 'ipv4',
         ('--ip_version=ipv4 '
          '--local_ip=192.168.0.1 '
@@ -71,9 +65,9 @@ def StartTestIPv4(self, path):
          '-D CMSG_TYPE_RECVERR=IP_RECVERR')
     )
 
-  def StartTestIPv6(self, path):
-    """Run a packetdrill test over ipv6."""
-    return self.StartTest(
+  def CmdTestIPv6(self, path):
+    """Return a command to run a packetdrill test over ipv6."""
+    return self.CmdTest(
         path, 'ipv6',
         ('--ip_version=ipv6 --mtu=1520 '
          '--local_ip=fd3d:0a0b:17d6::1 '
@@ -84,9 +78,9 @@ def StartTestIPv6(self, path):
          '-D CMSG_TYPE_RECVERR=IPV6_RECVERR')
     )
 
-  def StartTestIPv4Mappedv6(self, path):
-    """Run a packetdrill test over ipv4-mapped-v6."""
-    return self.StartTest(
+  def CmdTestIPv4Mappedv6(self, path):
+    """Return a command to run a packetdrill test over ipv4-mapped-v6."""
+    return self.CmdTest(
         path, 'ipv4-mapped-v6',
         ('--ip_version=ipv4-mapped-ipv6 '
          '--local_ip=192.168.0.1 '
@@ -99,17 +93,17 @@ def StartTestIPv4Mappedv6(self, path):
          '-D CMSG_TYPE_RECVERR=IPV6_RECVERR')
     )
 
-  def StartTests(self, tests):
+  def CmdsTests(self, tests):
     """Run every test in tests in all three variants (v4, v6, v4-mapped-v6)."""
-    procs = []
+    cmds = []
     for test in tests:
       if not test.endswith('v6.pkt'):
-        procs.append(self.StartTestIPv4(test))
-        procs.append(self.StartTestIPv4Mappedv6(test))
+        cmds.append(self.CmdTestIPv4(test))
+        cmds.append(self.CmdTestIPv4Mappedv6(test))
       if not test.endswith('v4.pkt'):
-        procs.append(self.StartTestIPv6(test))
+        cmds.append(self.CmdTestIPv6(test))
 
-    return procs
+    return cmds
 
   def Log(self, outfile, errfile):
     """Print a background process's stdout and stderr streams."""
@@ -120,12 +114,22 @@ def Log(self, outfile, errfile):
     errfile.seek(0)
     sys.stderr.write(errfile.read())
 
-  def PollTest(self, test):
-    """Test whether a test has finished and if so record its return value."""
-    process, path, variant, outfile, errfile = test
+  def StartTest(self, cmd, execdir, path, variant):
+    """Run a packetdrill test"""
+    outfile = tempfile.TemporaryFile(mode='w+')
+    errfile = tempfile.TemporaryFile(mode='w+')
 
+    time_start = time.time()
+    process = subprocess.Popen(cmd, stdout=outfile, stderr=errfile, cwd=execdir)
+    if self.args['serialized']:
+      process.wait()
+
+    return (process, path, variant, outfile, errfile, time_start)
+
+  def PollTest(self, process, path, variant, outfile, errfile, time_start, now):
+    """Test whether a test has finished and if so record its return value."""
     if process.poll() is None:
-      return False
+      return False, now - time_start >= self.max_runtime
 
     if not process.returncode:
       self.num_pass += 1
@@ -140,18 +144,34 @@ def PollTest(self, test):
         if self.args['log_on_error']:
           self.Log(outfile, errfile)
 
-    return True
+    return True, False
+
+  def StartPollTestSet(self, cmds):
+    """Start and wait until all tests in procs have finished or until timeout."""
+    max_in_parallel = self.args['max_in_parallel']
+    if max_in_parallel == 0 or max_in_parallel > len(cmds):
+      max_in_parallel = len(cmds)
 
-  def PollTestSet(self, procs, time_start):
-    """Wait until a,l tests in procs have finished or until timeout."""
-    while time.time() - time_start < self.max_runtime and procs:
-      time.sleep(1)
+    procs = []
+    while len(procs) < max_in_parallel:
+      procs.append(self.StartTest(*cmds.pop(0)))
+
+    timedouts = []
+    while procs:
+      time.sleep(.1)
+      now = time.time()
       for entry in procs:
-        if self.PollTest(entry):
+        stopped, timedout = self.PollTest(*entry, now)
+
+        if stopped or timedout:
           procs.remove(entry)
+          if cmds:
+            procs.append(self.StartTest(*cmds.pop(0)))
+          if timedout:
+            timedouts.append(entry)
 
-    self.num_timedout = len(procs)
-    for proc, path, variant, outfile, errfile in procs:
+    self.num_timedout = len(timedouts)
+    for proc, path, variant, outfile, errfile, _ in timedouts:
       try:
         proc.kill()
       except:
@@ -167,8 +187,8 @@ def RunTests(self, path):
     tests = self.FindTests(path)
 
     time_start = time.time()
-    procs = self.StartTests(tests)
-    self.PollTestSet(procs, time_start)
+    cmds = self.CmdsTests(tests)
+    self.StartPollTestSet(cmds)
 
     print(
         'Ran % 4d tests: % 4d passing, % 4d failing, % 4d timed out (%.2f sec): %s'     # pylint: disable=line-too-long
@@ -239,6 +259,8 @@ def ParseArgs():
   args.add_argument('-L', '--log_on_success', action='store_true',
                     help='requires verbose')
   args.add_argument('-p', '--parallelize_dirs', action='store_true')
+  args.add_argument('-P', '--max_in_parallel', metavar='N', type=int, default=0,
+                    help="max number of tests running in parallel")
   args.add_argument('-s', '--subdirs', action='store_true')
   args.add_argument('-S', '--serialized', action='store_true')
   args.add_argument('-v', '--verbose', action='store_true')

From 704af70b7c7fcc5a11ab3ae4e43cd15042b681df Mon Sep 17 00:00:00 2001
From: Matthieu Baerts <matthieu.baerts@tessares.net>
Date: Thu, 14 Sep 2023 15:51:39 +0200
Subject: [PATCH 2/5] net-test: packetdrill: run_all: support timeout with
 --serialized

Before, when using the --serialized argument, the script was simply
waiting for the packetdrill command to finish but without any timeout.

Now that the limitation of the parallel tasks has been put in place, it
is possible to easily support this timeout feature.

Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
---
 gtests/net/packetdrill/run_all.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/gtests/net/packetdrill/run_all.py b/gtests/net/packetdrill/run_all.py
index 3056c717..e3e66a45 100755
--- a/gtests/net/packetdrill/run_all.py
+++ b/gtests/net/packetdrill/run_all.py
@@ -121,8 +121,6 @@ def StartTest(self, cmd, execdir, path, variant):
 
     time_start = time.time()
     process = subprocess.Popen(cmd, stdout=outfile, stderr=errfile, cwd=execdir)
-    if self.args['serialized']:
-      process.wait()
 
     return (process, path, variant, outfile, errfile, time_start)
 
@@ -148,7 +146,7 @@ def PollTest(self, process, path, variant, outfile, errfile, time_start, now):
 
   def StartPollTestSet(self, cmds):
     """Start and wait until all tests in procs have finished or until timeout."""
-    max_in_parallel = self.args['max_in_parallel']
+    max_in_parallel = 1 if self.args['serialized'] else self.args['max_in_parallel']
     if max_in_parallel == 0 or max_in_parallel > len(cmds):
       max_in_parallel = len(cmds)
 

From 3be5b12c78b96d920227cb7f77a6f9645a80ba0f Mon Sep 17 00:00:00 2001
From: Matthieu Baerts <matthieu.baerts@tessares.net>
Date: Thu, 14 Sep 2023 10:50:31 +0200
Subject: [PATCH 3/5] net-test: packetdrill: run_all: extra -v are passed to
 pktdrill cmd

Sometimes, it is useful to have more info from packetdrill which can be
done by passing '-v' parameter to it.

Instead of having to modify run_all.py or run packetdrill manually, it
is now possible to add extra -v, e.g.

  $ run_all.py -lL -vvvv

Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
---
 gtests/net/packetdrill/run_all.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/gtests/net/packetdrill/run_all.py b/gtests/net/packetdrill/run_all.py
index e3e66a45..15e317ce 100755
--- a/gtests/net/packetdrill/run_all.py
+++ b/gtests/net/packetdrill/run_all.py
@@ -46,6 +46,8 @@ def CmdTest(self, path, variant, extra_args=None):
     cmd.extend(self.default_args.split())
     if extra_args is not None:
       cmd.extend(extra_args.split())
+    if self.args['verbose'] > 1:
+      cmd.append('-' + 'v' * (self.args['verbose'] - 1))
     cmd.append(basename)
 
     return (cmd, execdir, path, variant)
@@ -261,7 +263,8 @@ def ParseArgs():
                     help="max number of tests running in parallel")
   args.add_argument('-s', '--subdirs', action='store_true')
   args.add_argument('-S', '--serialized', action='store_true')
-  args.add_argument('-v', '--verbose', action='store_true')
+  args.add_argument('-v', '--verbose', action='count', default=0,
+                    help="can be repeated to run packetdrill with -v")
   return vars(args.parse_args())
 
 

From 7f9e1fd33ba00d1267bb17f17cbf85a963a25f2c Mon Sep 17 00:00:00 2001
From: Matthieu Baerts <matthieu.baerts@tessares.net>
Date: Fri, 28 Jan 2022 14:31:16 +0100
Subject: [PATCH 4/5] net-test: packetdrill: run_all: add --capture option

This new option is useful to capture packet traces per test for debug
purposes.

With the new parameter, we can specify to capture packets with TCPDump
and write the .pcap files in the given directory without modifying
in_netns.sh or re-launching the tests manually outside the netns.

e.g. to write .pcap files in the home directory:

  $ run_all.py -c ~

Files will be generated using the name of the test and its variant.

Note that this adds some delay: one second before starting not to miss
the first packets and one second at the end to make sure the last
packets will be captured by TCPDump.

Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
---
 gtests/net/packetdrill/in_netns.sh | 12 ++++++++++++
 gtests/net/packetdrill/run_all.py  | 14 +++++++++++---
 2 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/gtests/net/packetdrill/in_netns.sh b/gtests/net/packetdrill/in_netns.sh
index 0851ed39..ef1f72c7 100755
--- a/gtests/net/packetdrill/in_netns.sh
+++ b/gtests/net/packetdrill/in_netns.sh
@@ -6,12 +6,24 @@ set -e
 
 readonly NETNS="ns-$(mktemp -u XXXXXX)"
 
+TCPDUMP_PID=
+
 setup() {
 	ip netns add "${NETNS}"
 	ip -netns "${NETNS}" link set lo up
+	if [ -n "${TCPDUMP_OUTPUT}" ]; then
+		ip netns exec "${NETNS}" tcpdump -i any -s 150 -w "${TCPDUMP_OUTPUT}" &
+		TCPDUMP_PID=$!
+		sleep 1 # give some time to TCPDump to start
+	fi
 }
 
 cleanup() {
+	if [ -n "${TCPDUMP_PID}" ]; then
+		sleep 1 # give some time to TCPDump to process the packets
+		kill "${TCPDUMP_PID}"
+		wait "${TCPDUMP_PID}" 2>/dev/null || true
+	fi
 	ip netns del "${NETNS}"
 }
 
diff --git a/gtests/net/packetdrill/run_all.py b/gtests/net/packetdrill/run_all.py
index 15e317ce..e5ca0433 100755
--- a/gtests/net/packetdrill/run_all.py
+++ b/gtests/net/packetdrill/run_all.py
@@ -50,7 +50,7 @@ def CmdTest(self, path, variant, extra_args=None):
       cmd.append('-' + 'v' * (self.args['verbose'] - 1))
     cmd.append(basename)
 
-    return (cmd, execdir, path, variant)
+    return (cmd, execdir, path, variant, basename)
 
   def CmdTestIPv4(self, path):
     """Return a command to run a packetdrill test over ipv4."""
@@ -116,13 +116,19 @@ def Log(self, outfile, errfile):
     errfile.seek(0)
     sys.stderr.write(errfile.read())
 
-  def StartTest(self, cmd, execdir, path, variant):
+  def StartTest(self, cmd, execdir, path, variant, basename):
     """Run a packetdrill test"""
     outfile = tempfile.TemporaryFile(mode='w+')
     errfile = tempfile.TemporaryFile(mode='w+')
 
+    env = os.environ
+    if self.args['capture'] is not None:
+      fname = os.path.splitext(basename)[0] +  "_" + variant + ".pcap"
+      env = dict(env, TCPDUMP_OUTPUT=os.path.join(self.args['capture'], fname))
+
     time_start = time.time()
-    process = subprocess.Popen(cmd, stdout=outfile, stderr=errfile, cwd=execdir)
+    process = subprocess.Popen(cmd, stdout=outfile, stderr=errfile, cwd=execdir,
+                               env=env)
 
     return (process, path, variant, outfile, errfile, time_start)
 
@@ -254,6 +260,8 @@ def ParseArgs():
   """Parse commandline arguments."""
   args = argparse.ArgumentParser()
   args.add_argument('path', default='.', nargs='?')
+  args.add_argument('-c', '--capture', metavar='DIR',
+                    help='capture packets in the specified directory')
   args.add_argument('-l', '--log_on_error', action='store_true',
                     help='requires verbose')
   args.add_argument('-L', '--log_on_success', action='store_true',

From 1f42aebf7a447f6250ad91718313e375d5b36f44 Mon Sep 17 00:00:00 2001
From: Matthieu Baerts <matthieu.baerts@tessares.net>
Date: Thu, 28 Sep 2023 14:30:04 +0200
Subject: [PATCH 5/5] net-test: packetdrill: run_all: add --dry_run mode

It is useful to have an easy way to display the commands that are going
to be executed: for debugging purposes, to relaunch one specific test
with different arguments or from a different environment, etc.

Now a new '--dry_run' parameter is available to only display commands
and not execute anything.

Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
---
 gtests/net/packetdrill/run_all.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/gtests/net/packetdrill/run_all.py b/gtests/net/packetdrill/run_all.py
index e5ca0433..d4415acb 100755
--- a/gtests/net/packetdrill/run_all.py
+++ b/gtests/net/packetdrill/run_all.py
@@ -194,6 +194,13 @@ def RunTests(self, path):
 
     time_start = time.time()
     cmds = self.CmdsTests(tests)
+
+    if self.args['dry_run']:
+      print('Dry-run mode:')
+      for cmd in cmds:
+        print(' '.join(cmd[0]))
+      return
+
     self.StartPollTestSet(cmds)
 
     print(
@@ -269,6 +276,7 @@ def ParseArgs():
   args.add_argument('-p', '--parallelize_dirs', action='store_true')
   args.add_argument('-P', '--max_in_parallel', metavar='N', type=int, default=0,
                     help="max number of tests running in parallel")
+  args.add_argument('--dry_run', action='store_true')
   args.add_argument('-s', '--subdirs', action='store_true')
   args.add_argument('-S', '--serialized', action='store_true')
   args.add_argument('-v', '--verbose', action='count', default=0,