Merge branch 'main' into cgroupsv2

sosy-lab · Mar 17, 2023 · 2efefe1 · 2efefe1
2 parents efc4f19 + 5d4fe8d
commit 2efefe1
Show file tree

Hide file tree

Showing 17 changed files with 89 additions and 49 deletions.
diff --git a/.appveyor.yml b/.appveyor.yml
@@ -17,8 +17,7 @@ install:
   - set PATH=%PYTHON%;%PYTHON%\\Scripts;%PATH%
   # The package installation via our setup.py file uses easy_install, which fails to correctly install lxml.
   # As some Python environments don't have lxml preinstalled, install it here to avoid errors during the execution in those cases.
-  - python -m pip install lxml
-  - python -m pip install --user .
+  - python -m pip install --user ".[dev]"
 
 test_script:
-  - python setup.py nosetests --test benchexec.tablegenerator
+  - python -m nose --tests benchexec.tablegenerator
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
@@ -44,14 +44,14 @@ stages:
     - test/for_each_of_my_cgroups.sh chgrp $PRIMARY_USER
     - test/for_each_of_my_cgroups.sh chmod g+w $PRIMARY_USER
     - apt install -y libsystemd-dev python-dev
-    # Install BenchExec
-    - sudo -u $PRIMARY_USER pip install --user .
+    # Install BenchExec with `dev` dependencies
+    - sudo -u $PRIMARY_USER pip install --user ".[dev]"
     # Start lxcfs
     - lxcfs /var/lib/lxcfs &
   script:
     - sudo -u $PRIMARY_USER
         COVERAGE_PROCESS_START=.coveragerc
-        coverage run setup.py test
+        coverage run -m nose
   after_script:
     - sudo -u $PRIMARY_USER coverage combine
     - sudo -u $PRIMARY_USER coverage report
@@ -92,7 +92,7 @@ check-format:
   stage: test
   image: python:3.9
   before_script:
-    - pip install black
+    - pip install 'black<2024'
   script:
     - black . --check --diff
   <<: *pip-cache

diff --git a/.travis.yml b/.travis.yml
@@ -16,7 +16,7 @@ python:
   - "3.8"
   - "3.9"
 install:
-  - pip install .
+  - pip install ".[dev]"
   # Install code formatter black, but only on versions where it is available
   - pip install black || true
 before_script:
@@ -26,7 +26,7 @@ before_script:
   # and this is difficult to fix, but occurs only in high-load environments.
   - sed -i benchexec/test_integration/__init__.py -e '/test_simple_parallel/ i \    @unittest.skip("Fails nondeterministically on Travis, probably issue 656")'
 script:
-  - python setup.py test
+  - python -m nose
   # Revert local modification before checking source format
   - git checkout .
   - if which black; then black . --check --diff; fi

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -9,6 +9,30 @@ SPDX-License-Identifier: Apache-2.0
 
 # BenchExec Changelog
 
+## BenchExec 3.16
+
+This release works only on Python 3.7 and newer!
+
+- Improve performance of the "Merging results" step of `table-generator`.  
+  In the common case of all run sets having the same set of tasks,
+  this step is now much faster.
+- Avoid deadlock in `table-generator` for cases with many child processes
+  and heavy system load.
+- On systems with many cores and if there are many input files or columns,
+  `table-generator` will now use more than 32 child processes.
+- Support the scoring schema used in SV-COMP'23 for witness validators,
+  and show the witness category as part of the task identifier in tables.
+  Note that just like for the category `correct-unconfirmed`
+  external postprocessing of the results is necessary for this.
+- If the pattern inside a `<propertyfile>` tag in the benchmark definition
+  does not match a file,
+  BenchExec now terminates with an error instead of just logging a warning.
+- Improved logging in `table-generator`.  
+  The log messages about missing run results and missing properties,
+  which could occur many times in certain use cases, are now omitted.
+  Instead, some numbers about the size of the resulting tables
+  and the number of missing results will be logged.
+
 ## BenchExec 3.15
 
 - Updated installation instructions for Debian.  

diff --git a/README.md b/README.md
@@ -17,6 +17,7 @@ SPDX-License-Identifier: Apache-2.0
 
 
 **News**:
+- BenchExec is part of [Google Summer of Code](https://summerofcode.withgoogle.com/) again! If you are interested in being paid by Google for contributing to BenchExec, check our [project ideas and instructions](https://www.sosy-lab.org/gsoc/).
 - Linux kernel 5.11 finally [makes it possible](https://github.com/sosy-lab/benchexec/blob/main/doc/INSTALL.md#kernel-requirements) to use all BenchExec features on other distributions than Ubuntu!
 - We now provide an [Ubuntu PPA](https://launchpad.net/~sosy-lab/+archive/ubuntu/benchmarking) that makes installing and upgrading BenchExec easier ([docs](https://github.com/sosy-lab/benchexec/blob/main/doc/INSTALL.md#debianubuntu)).
 - An extended version of our paper on BenchExec and its background was published as open access in the journal STTT,
@@ -136,6 +137,7 @@ The developers of the following tools use BenchExec:
 - [Dartagnan](https://github.com/hernanponcedeleon/Dat3M)
 - [SMACK](https://github.com/smackers/smack)
 - [SMTInterpol](https://github.com/ultimate-pa/smtinterpol)
+- [TriCera](https://github.com/uuverifiers/tricera)
 - [Ultimate](https://github.com/ultimate-pa/ultimate)
 
 If you would like to be listed here, [contact us](https://github.com/sosy-lab/benchexec/issues/new).
diff --git a/benchexec/__init__.py b/benchexec/__init__.py
@@ -34,7 +34,7 @@
 Variables ending with "tag" contain references to XML tag objects created by the XML parser.
 """
 
-__version__ = "3.16-dev"
+__version__ = "3.17-dev"
 
 
 class BenchExecException(Exception):  # noqa: N818 backwards compatibility

diff --git a/benchexec/container.py b/benchexec/container.py
@@ -106,14 +106,6 @@
 
 LXCFS_PROC_DIR = b"/var/lib/lxcfs/proc"
 
-# Python before 3.7 does not have BeforeFork and AfterFork_(Child|Parent)
-if not hasattr(ctypes.pythonapi, "PyOS_BeforeFork"):
-    ctypes.pythonapi.PyOS_BeforeFork = lambda: None
-if not hasattr(ctypes.pythonapi, "PyOS_AfterFork_Parent"):
-    ctypes.pythonapi.PyOS_AfterFork_Parent = lambda: None
-if not hasattr(ctypes.pythonapi, "PyOS_AfterFork_Child"):
-    ctypes.pythonapi.PyOS_AfterFork_Child = ctypes.pythonapi.PyOS_AfterFork
-
 _CLONE_NESTED_CALLBACK = ctypes.CFUNCTYPE(ctypes.c_int)
 """Type for callback of execute_in_namespace, nested in our primary callback."""
 
@@ -165,7 +157,7 @@ def execute_in_namespace(func, use_network_ns=True):
 
     # We need to use the syscall clone(), which is similar to fork(), but not available
     # in the Python API. We can call it directly using ctypes, but then the state of the
-    # Python interpreter is inconsistent, so we need to fix that. Python >= 3.7 has
+    # Python interpreter is inconsistent, so we need to fix that. Python has
     # three C functions that should be called before and after fork/clone:
     # https://docs.python.org/3/c-api/sys.html#c.PyOS_BeforeFork
     # This is the same that os.fork() does (cf. os_fork_impl
@@ -175,14 +167,7 @@ def execute_in_namespace(func, use_network_ns=True):
     # Luckily, the ctypes module allows us to hold the GIL while executing the
     # function by using ctypes.PyDLL as library access instead of ctypes.CLL.
 
-    # Two difficulties remain:
-    # 1. On Python < 3.7, only PyOS_AfterFork() (to be called in the child) exists.
-    # Other cleanup done by os_fork_impl is not accessible to us, so we ignore it.
-    # For example, we do not take the import lock because it is only
-    # available via an internal API, and because the child should never import anything
-    # anyway (inside the container, modules might not be visible).
-
-    # 2. On all Python versions, the interpreter state in the child is inconsistent
+    # One difficulty remains: The interpreter state in the child is inconsistent
     # until PyOS_AfterFork_Child() is called. However, if we pass the Python function
     # _python_clone_child_callback() as callback to clone and do the cleanup in
     # its first line, it is too late because the Python interpreter is already used.

diff --git a/benchexec/outputhandler.py b/benchexec/outputhandler.py
@@ -879,7 +879,7 @@ def _write_rough_result_xml_to_file(self, xml, filename):
             ElementTree.ElementTree(xml).write(
                 file, encoding="utf-8", xml_declaration=True
             )
-        os.rename(temp_filename, filename)
+        os.replace(temp_filename, filename)
         if error is not None:
             xml.set("error", error)
         else:
@@ -918,7 +918,7 @@ def _write_pretty_result_xml_to_file(self, xml, filename):
             self.all_created_files.discard(filename)
             self.all_created_files.add(actual_filename)
         else:
-            os.rename(actual_filename, filename)
+            os.replace(actual_filename, filename)
             self.all_created_files.add(filename)
 
         return filename

diff --git a/benchexec/runexecutor.py b/benchexec/runexecutor.py
@@ -529,7 +529,7 @@ def _setup_output_file(self, output_filename, args, write_header=True):
                 os.makedirs(parent_dir, exist_ok=True)
             output_file = open(output_filename, "w")  # override existing file
         except OSError as e:
-            sys.exit(e)
+            sys.exit("Could not write to output file: " + str(e))
 
         if write_header:
             output_file.write(

diff --git a/benchexec/tablegenerator/__init__.py b/benchexec/tablegenerator/__init__.py
@@ -698,7 +698,9 @@ def apply_task_list(runset_results, tasks):
     """
     Set the results of all RunSetResult elements so that they contain the same tasks
     in the same order. For missing tasks a dummy element is inserted.
+    Returns the number of missing run results.
     """
+    missing = 0
     for runset in runset_results:
         # create mapping from id to RunResult object
         dic = {run_result.task_id: run_result for run_result in runset.results}
@@ -707,7 +709,8 @@ def apply_task_list(runset_results, tasks):
         for task in tasks:
             run_result = dic.get(task)
             if run_result is None:
-                logging.info("    No result for task %s in '%s'.", task, runset)
+                logging.debug("    No result for task %s in '%s'.", task, runset)
+                missing += 1
                 # create an empty dummy element
                 run_result = RunResult(
                     task,
@@ -719,6 +722,7 @@ def apply_task_list(runset_results, tasks):
                     [None] * len(runset.columns),
                 )
             runset.results.append(run_result)
+    return missing
 
 
 class RunResult(object):
@@ -1036,6 +1040,8 @@ def all_equal_result(listOfResults):
             "---> DIFFERENCES FOUND IN ALL ROWS, NO NEED TO CREATE DIFFERENCE TABLE"
         )
         return []
+    else:
+        logging.info("The difference table will have %s rows.", len(rowsDiff))
 
     return rowsDiff
 
@@ -1683,9 +1689,22 @@ def main(args=None):
         # merge list of tasks, so that all run sets contain the same tasks
         task_list = util.merge_lists(r.get_tasks() for r in runSetResults)
     # make sure that all run sets contain exactly the same tasks in the same order
-    apply_task_list(runSetResults, task_list)
+    missing_run_results = apply_task_list(runSetResults, task_list)
 
     rows = get_rows(runSetResults)
+    logging.info(
+        "The resulting table will have %s rows and %s columns (in %s run sets).",
+        len(rows),
+        sum(len(runset.columns) for runset in runSetResults),
+        len(runSetResults),
+    )
+    if missing_run_results:
+        logging.info(
+            "%s run results were not found in the input files "
+            "and will be empty in the table (%s run results are present).",
+            missing_run_results,
+            len(rows) * len(runSetResults) - missing_run_results,
+        )
     if not rows:
         handle_error("No results found, no tables produced.")
     rowsDiff = filter_rows_with_differences(rows) if options.write_diff_table else []

diff --git a/benchexec/tablegenerator/htmltable.py b/benchexec/tablegenerator/htmltable.py
@@ -217,7 +217,7 @@ def _get_task_counts(rows):
     max_score = None
     for row in rows:
         if not row.id.property:
-            logging.info("Missing property for task %s.", row.id)
+            logging.debug("Missing property for task %s.", row.id)
             continue
         expected_result = row.id.expected_result
         if not expected_result:

diff --git a/benchexec/test_runexecutor.py b/benchexec/test_runexecutor.py
@@ -46,16 +46,16 @@ def setUpClass(cls):
 
         cls.cgroups = Cgroups.initialize()
 
+        cls.echo = shutil.which("echo") or "/bin/echo"
+        cls.sleep = shutil.which("sleep") or "/bin/sleep"
+        cls.cat = shutil.which("cat") or "/bin/cat"
+
     def setUp(self, *args, **kwargs):
         with self.skip_if_logs(
             "Cannot reliably kill sub-processes without freezer cgroup"
         ):
             self.runexecutor = RunExecutor(*args, use_namespaces=False, **kwargs)
 
-        self.echo = shutil.which("echo") or "/bin/echo"
-        self.sleep = shutil.which("sleep") or "/bin/sleep"
-        self.cat = shutil.which("cat") or "/bin/cat"
-
     @contextlib.contextmanager
     def skip_if_logs(self, error_msg):
         """A context manager that automatically marks the test as skipped if SystemExit

diff --git a/contrib/aws/awsexecutor.py b/contrib/aws/awsexecutor.py
@@ -233,7 +233,7 @@ def execute_benchmark(benchmark, output_handler):
             logging.debug("Handling url: %s", aws_s3_link)
             aws_s3_link_encoded = urllib.parse.quote(aws_s3_link, safe=":/")
             logging.debug("Downloading file from url: %s", aws_s3_link_encoded)
-            result_file = requests.get(aws_s3_link_encoded)
+            result_file = requests.get(aws_s3_link_encoded)  # noqa: S113
             with zipfile.ZipFile(io.BytesIO(result_file.content)) as zipf:
                 zipf.extractall(benchmark.log_folder)
     except KeyboardInterrupt:

diff --git a/debian/changelog b/debian/changelog
@@ -1,3 +1,9 @@
+benchexec (3.16-1) jammy; urgency=medium
+
+  * New upstream version.
+
+ -- Philipp Wendler <debian@philippwendler.de>  Mon, 06 Feb 2023 13:23:03 +0100
+
 benchexec (3.15-1) jammy; urgency=medium
 
   * New upstream version.

diff --git a/doc/DEVELOPMENT.md b/doc/DEVELOPMENT.md
@@ -30,9 +30,9 @@ and to install BenchExec in development mode within this environment:
 
     virtualenv -p /usr/bin/python3 path/to/venv
     source path/to/venv/bin/activate
-    pip install -e path/to/benchexec/working/directory
+    pip install -e "path/to/benchexec/working/directory[dev]"
 
-This will automatically install all dependencies
+This will automatically install all required and `dev` dependencies
 and place appropriate start scripts on the PATH.
 
 Some tests of BenchExec require the `lxml` Python module.
@@ -80,9 +80,11 @@ please raise an issue.
         git tag -s <VERSION>
 
  * In a clean checkout and in a virtual environment with Python 3 (as described above),
-   create the release archives:
+   exececute the following commands to build the source distribution (`sdist`) and
+   `wheel` file:
 
-        python3 setup.py sdist bdist_wheel
+        python3 -m pip install build
+        python3 -m build
 
  * Sign the files and upload them to PyPi inside the build directory:
 

diff --git a/release.sh b/release.sh
@@ -83,9 +83,11 @@ pushd "$TEMP3/benchexec"
 pip install "pip >= 10.0" "setuptools >= 42.0.0, < 58" "wheel >= 0.32.0"
 # avoid the wheel on PyPi for nose, it does not work on Python 3.10
 pip install nose --no-binary :all:
-pip install -e "."
-python setup.py nosetests
-python setup.py sdist bdist_wheel
+# install build if it is not installed by default (it usually is)
+pip install build
+pip install -e ".[dev]"
+python -m nose
+python -m build
 popd
 deactivate
 cp "$TEMP3/benchexec/dist/"* "$DIST_DIR"

diff --git a/setup.cfg b/setup.cfg
@@ -55,13 +55,14 @@ license_files =
 packages = benchexec, benchexec.tablegenerator, benchexec.tools
 install_requires =
   PyYAML >= 3.12
-setup_requires =
-  nose >= 1.0
-  lxml
-  PyYAML >= 3.12
 test_suite = nose.collector
 zip_safe = True
 
+[options.extras_require]
+dev =
+  nose >= 1.0
+  lxml
+
 [options.entry_points]
 console_scripts =
   runexec = benchexec.runexecutor:main