From e19717f00578068345ac683210f2154977ed1e66 Mon Sep 17 00:00:00 2001 From: Nicholas McDonnell <50747025+mcdonnnj@users.noreply.github.com> Date: Fri, 24 Feb 2023 14:39:00 -0500 Subject: [PATCH] Separate stdout and stderr in the `execute()` helper function If stderr is piped into stdout analyzing a repository will fail if any file fails during cloc's processing. This is most problematic for projects that contain minified JavaScript code. The design of cloc is around typical multi-line source files. Large minified JavaScript will run the risk of causing analysis to fail with a "Line count, exceeded timeout" error. This then causes the entire cloc analysis to fail due to the design of the execute() function. With this change to execute() I added a warning if stderr is not empty following the cloc call. --- scraper/util.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/scraper/util.py b/scraper/util.py index e74875e..67c788c 100644 --- a/scraper/util.py +++ b/scraper/util.py @@ -3,7 +3,7 @@ import logging import logging.config import os -from subprocess import PIPE, STDOUT, Popen # nosec +from subprocess import PIPE, Popen # nosec import tempfile logger = logging.getLogger(__name__) @@ -22,10 +22,9 @@ def execute(command, cwd=None): raise ValueError("path does not exist: %s" % cwd) with Popen( - command, cwd=cwd, stdout=PIPE, stderr=STDOUT, shell=False + command, cwd=cwd, stdout=PIPE, stderr=PIPE, shell=False ) as process: # nosec - # We redirect stderr to stdout so we can safely ignore stderr in the returned tuple - out, _ = process.communicate() + out, err = process.communicate() if process.returncode: logging.error( @@ -34,7 +33,7 @@ def execute(command, cwd=None): process.returncode, ) - return out.decode("utf-8") + return out.decode("utf-8"), err.decode("utf-8") def configure_logging(verbose=False): @@ -136,7 +135,12 @@ def git_repo_to_sloc(url): execute(cmd) cmd = ["cloc", "--json", tmp_clone] - out = execute(cmd) + out, err = execute(cmd) + + if err: + logger.warning( + "Error encountered while analyzing: url=%s stderr=%s", url, err + ) try: cloc_json = json.loads(out)