Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add support for allowing some PyTorch tests to fail + print warning if one or more tests fail #2742

Merged
merged 7 commits into from
Jul 7, 2022
26 changes: 22 additions & 4 deletions easybuild/easyblocks/generic/pythonpackage.py
Original file line number Diff line number Diff line change
Expand Up @@ -639,8 +639,12 @@ def build_step(self):
# We consider the build and install output together as downloads likely happen here if this is run
self.install_cmd_output += out

def test_step(self):
"""Test the built Python package."""
def test_step(self, return_output_ec=False):
"""
Test the built Python package.

:param return_output: return output and exit code of test command
"""

if isinstance(self.cfg['runtest'], string_type):
self.testcmd = self.cfg['runtest']
Expand All @@ -649,6 +653,8 @@ def test_step(self):
extrapath = ""
testinstalldir = None

out, ec = (None, None)

if self.testinstall:
# install in test directory and export PYTHONPATH

Expand All @@ -670,12 +676,24 @@ def test_step(self):

if self.testcmd:
testcmd = self.testcmd % {'python': self.python_cmd}
cmd = ' '.join([extrapath, self.cfg['pretestopts'], testcmd, self.cfg['testopts']])
run_cmd(cmd, log_all=True, simple=True)
cmd = ' '.join([
extrapath,
self.cfg['pretestopts'],
testcmd,
self.cfg['testopts'],
])

if return_output_ec:
(out, ec) = run_cmd(cmd, log_all=False, log_ok=False, simple=False)
else:
run_cmd(cmd, log_all=True, simple=True)

if testinstalldir:
remove_dir(testinstalldir)

if return_output_ec:
return (out, ec)

def install_step(self):
"""Install Python package to a custom path using setup.py"""

Expand Down
51 changes: 44 additions & 7 deletions easybuild/easyblocks/p/pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,16 @@
import os
import re
import tempfile
import easybuild.tools.environment as env
from distutils.version import LooseVersion
from easybuild.easyblocks.generic.pythonpackage import PythonPackage
from easybuild.framework.easyconfig import CUSTOM
from easybuild.tools.build_log import EasyBuildError
from easybuild.tools.build_log import EasyBuildError, print_warning
from easybuild.tools.config import build_option
import easybuild.tools.environment as env
from easybuild.tools.filetools import symlink, apply_regex_substitutions
from easybuild.tools.modules import get_software_root, get_software_version
from easybuild.tools.systemtools import POWER, get_cpu_architecture
from easybuild.tools.filetools import symlink, apply_regex_substitutions
from easybuild.tools.utilities import nub


class EB_PyTorch(PythonPackage):
Expand All @@ -49,9 +50,10 @@ class EB_PyTorch(PythonPackage):
def extra_options():
extra_vars = PythonPackage.extra_options()
extra_vars.update({
'excluded_tests': [{}, 'Mapping of architecture strings to list of tests to be excluded', CUSTOM],
'custom_opts': [[], 'List of options for the build/install command. Can be used to change the defaults ' +
'set by the PyTorch EasyBlock, for example ["USE_MKLDNN=0"].', CUSTOM],
'custom_opts': [[], "List of options for the build/install command. Can be used to change the defaults " +
"set by the PyTorch EasyBlock, for example ['USE_MKLDNN=0'].", CUSTOM],
'excluded_tests': [{}, "Mapping of architecture strings to list of tests to be excluded", CUSTOM],
'max_failed_tests': [0, "Maximum number of failing tests", CUSTOM],
})
extra_vars['download_dep_fail'][0] = True
extra_vars['sanity_pip_check'][0] = True
Expand Down Expand Up @@ -253,7 +255,42 @@ def test_step(self):
'python': self.python_cmd,
'excluded_tests': ' '.join(excluded_tests)
})
super(EB_PyTorch, self).test_step()

(tests_out, tests_ec) = super(EB_PyTorch, self).test_step(return_output_ec=True)

ran_tests_hits = re.findall(r"^Ran (?P<test_cnt>[0-9]+) tests in", tests_out, re.M)
test_cnt = 0
for hit in ran_tests_hits:
test_cnt += int(hit)

failed_tests = nub(re.findall(r"^(?P<failed_test_name>.*) failed!\s*$", tests_out, re.M))
failed_test_cnt = len(failed_tests)

if failed_test_cnt:
boegel marked this conversation as resolved.
Show resolved Hide resolved
max_failed_tests = self.cfg['max_failed_tests']

test_or_tests = 'tests' if failed_test_cnt > 1 else 'test'
msg = "%d %s (out of %d) failed:\n" % (failed_test_cnt, test_or_tests, test_cnt)
msg += '\n'.join('* %s' % t for t in sorted(failed_tests))

if max_failed_tests == 0:
raise EasyBuildError(msg)
else:
msg += '\n\n' + ' '.join([
"The PyTorch test suite is known to include some flaky tests,",
"which may fail depending on the specifics of the system or the context in which they are run.",
"For this PyTorch installation, EasyBuild allows up to %d tests to fail." % max_failed_tests,
"We recommend to double check that the failing tests listed above ",
"are known to be flaky, or do not affect your intended usage of PyTorch.",
"In case of doubt, reach out to the EasyBuild community (via GitHub, Slack, or mailing list).",
])
print_warning(msg)

if failed_test_cnt > max_failed_tests:
raise EasyBuildError("Too many failed tests (%d), maximum allowed is %d",
failed_test_cnt, max_failed_tests)
elif tests_ec:
raise EasyBuildError("Test command had non-zero exit code (%s), but no failed tests found?!", tests_ec)

def test_cases_step(self):
# Make PyTorch tests not use the user home
Expand Down