Skip to content

Commit

Permalink
Merge pull request #4701 from lexming/sync-20241110
Browse files Browse the repository at this point in the history
sync with develop (2024-11-10)
  • Loading branch information
Micket authored Nov 12, 2024
2 parents 48bdb94 + 90544c8 commit 6fcfe97
Show file tree
Hide file tree
Showing 3 changed files with 135 additions and 90 deletions.
205 changes: 118 additions & 87 deletions easybuild/scripts/findPythonDeps.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,21 @@
#!/usr/bin/env python

"""
Find Python dependencies for a given Python package after loading dependencies specified in an EasyConfig.
This is intended for writing or updating PythonBundle EasyConfigs:
1. Create a EasyConfig with at least 'Python' as a dependency.
When updating to a new toolchain it is a good idea to reduce the dependencies to a minimum
as e.g. the new "Python" module might have different packages included.
2. Run this script
3. For each dependency found by this script search existing EasyConfigs for ones providing that Python package.
E.g many are contained in Python-bundle-PyPI. Some can be updated from an earlier toolchain.
4. Add those EasyConfigs as dependencies to your new EasyConfig.
5. Rerun this script so it takes the newly provided packages into account.
You can do steps 3-5 iteratively adding EasyConfig-dependencies one-by-one.
6. Finally you copy the packages found by this script as "exts_list" into the new EasyConfig.
You usually want the list printed as "in install order", the format is already suitable to be copied as-is.
"""

import argparse
import json
import os
Expand All @@ -8,12 +24,13 @@
import subprocess
import sys
import tempfile
import textwrap
from contextlib import contextmanager
from pprint import pprint
try:
import pkg_resources
except ImportError as e:
print('pkg_resources could not be imported: %s\nYou might need to install setuptools!' % e)
print(f'pkg_resources could not be imported: {e}\nYou might need to install setuptools!')
sys.exit(1)

try:
Expand All @@ -22,6 +39,7 @@
_canonicalize_regex = re.compile(r"[-_.]+")

def canonicalize_name(name):
"""Fallback if the import doesn't work with same behavior."""
return _canonicalize_regex.sub("-", name).lower()


Expand All @@ -36,16 +54,16 @@ def temporary_directory(*args, **kwargs):


def extract_pkg_name(package_spec):
return re.split('<|>|=|~', args.package, 1)[0]
"""Get the package name from a specification such as 'package>=3.42'"""
return re.split('<|>|=|~', package_spec, 1)[0]


def can_run(cmd, argument):
def can_run(cmd, *arguments):
"""Check if the given cmd and argument can be run successfully"""
with open(os.devnull, 'w') as FNULL:
try:
return subprocess.call([cmd, argument], stdout=FNULL, stderr=subprocess.STDOUT) == 0
except (subprocess.CalledProcessError, OSError):
return False
try:
return subprocess.call([cmd, *arguments], stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT) == 0
except (subprocess.CalledProcessError, OSError):
return False


def run_shell_cmd(arguments, action_desc, capture_stderr=True, **kwargs):
Expand All @@ -59,13 +77,13 @@ def run_shell_cmd(arguments, action_desc, capture_stderr=True, **kwargs):
if p.returncode != 0:
if err:
err = "\nSTDERR:\n" + err
raise RuntimeError('Failed to %s: %s%s' % (action_desc, out, err))
raise RuntimeError(f'Failed to {action_desc}: {out}{err}')
return out


def run_in_venv(cmd, venv_path, action_desc):
"""Run the given command in the virtualenv at the given path"""
cmd = 'source %s/bin/activate && %s' % (venv_path, cmd)
cmd = f'source {venv_path}/bin/activate && {cmd}'
return run_shell_cmd(cmd, action_desc, shell=True, executable='/bin/bash')


Expand All @@ -78,17 +96,19 @@ def get_dep_tree(package_spec, verbose):
venv_dir = os.path.join(tmp_dir, 'venv')
if verbose:
print('Creating virtualenv at ' + venv_dir)
run_shell_cmd(['virtualenv', '--system-site-packages', venv_dir], action_desc='create virtualenv')
run_shell_cmd(
[sys.executable, '-m', 'venv', '--system-site-packages', venv_dir], action_desc='create virtualenv'
)
if verbose:
print('Updating pip in virtualenv')
run_in_venv('pip install --upgrade pip', venv_dir, action_desc='update pip')
if verbose:
print('Installing %s into virtualenv' % package_spec)
out = run_in_venv('pip install "%s"' % package_spec, venv_dir, action_desc='install ' + package_spec)
print('%s installed: %s' % (package_spec, out))
print(f'Installing {package_spec} into virtualenv')
out = run_in_venv(f'pip install "{package_spec}"', venv_dir, action_desc='install ' + package_spec)
print(f'{package_spec} installed: {out}')
# install pipdeptree, figure out dependency tree for installed package
run_in_venv('pip install pipdeptree', venv_dir, action_desc='install pipdeptree')
dep_tree = run_in_venv('pipdeptree -j -p "%s"' % package_name,
dep_tree = run_in_venv(f'pipdeptree -j -p "{package_name}"',
venv_dir, action_desc='collect dependencies')
return json.loads(dep_tree)

Expand All @@ -108,26 +128,27 @@ def find_deps(pkgs, dep_tree):
for orig_pkg in cur_pkgs:
count += 1
if count > MAX_PACKAGES:
raise RuntimeError("Aborting after checking %s packages. Possibly cycle detected!" % MAX_PACKAGES)
raise RuntimeError(f"Aborting after checking {MAX_PACKAGES} packages. Possibly cycle detected!")
pkg = canonicalize_name(orig_pkg)
matching_entries = [entry for entry in dep_tree
if pkg in (entry['package']['package_name'], entry['package']['key'])]
if not matching_entries:
matching_entries = [entry for entry in dep_tree
if orig_pkg in (entry['package']['package_name'], entry['package']['key'])]
if not matching_entries:
raise RuntimeError("Found no installed package for '%s' in %s" % (pkg, dep_tree))
raise RuntimeError(f"Found no installed package for '{pkg}' in {dep_tree}")
if len(matching_entries) > 1:
raise RuntimeError("Found multiple installed packages for '%s' in %s" % (pkg, dep_tree))
raise RuntimeError(f"Found multiple installed packages for '{pkg}' in {dep_tree}")
entry = matching_entries[0]
res.append((entry['package']['package_name'], entry['package']['installed_version']))
res.append(entry['package'])
# Add dependencies to list of packages to check next
# Could call this function recursively but that might exceed the max recursion depth
next_pkgs.update(dep['package_name'] for dep in entry['dependencies'])
return res


def print_deps(package, verbose):
"""Print dependencies of the given package that are not installed yet in a format usable as 'exts_list'"""
if verbose:
print('Getting dep tree of ' + package)
dep_tree = get_dep_tree(package, verbose)
Expand All @@ -144,82 +165,92 @@ def print_deps(package, verbose):
res = []
handled = set()
for dep in reversed(deps):
if dep not in handled:
handled.add(dep)
if dep[0] in installed_modules:
# Tuple as we need it for exts_list
dep_entry = (dep['package_name'], dep['installed_version'])
if dep_entry not in handled:
handled.add(dep_entry)
# Need to check for key and package_name as naming is not consistent. E.g.:
# "PyQt5-sip": 'key': 'pyqt5-sip', 'package_name': 'PyQt5-sip'
# "jupyter-core": 'key': 'jupyter-core', 'package_name': 'jupyter_core'
if dep['key'] in installed_modules or dep['package_name'] in installed_modules:
if verbose:
print("Skipping installed module '%s'" % dep[0])
print(f"Skipping installed module '{dep['package_name']}'")
else:
res.append(dep)
res.append(dep_entry)

print("List of dependencies in (likely) install order:")
pprint(res, indent=4)
print("Sorted list of dependencies:")
pprint(sorted(res), indent=4)


examples = [
'Example usage with EasyBuild (after installing dependency modules):',
'\t' + sys.argv[0] + ' --ec TensorFlow-2.3.4.eb tensorflow==2.3.4',
'Which is the same as:',
'\t' + ' && '.join(['eb TensorFlow-2.3.4.eb --dump-env',
'source TensorFlow-2.3.4.env',
sys.argv[0] + ' tensorflow==2.3.4',
]),
]
parser = argparse.ArgumentParser(
description='Find dependencies of Python packages by installing it in a temporary virtualenv. ',
epilog='\n'.join(examples),
formatter_class=argparse.RawDescriptionHelpFormatter
)
parser.add_argument('package', metavar='python-pkg-spec',
help='Python package spec, e.g. tensorflow==2.3.4')
parser.add_argument('--ec', metavar='easyconfig', help='EasyConfig to use as the build environment. '
'You need to have dependency modules installed already!')
parser.add_argument('--verbose', help='Verbose output', action='store_true')
args = parser.parse_args()

if args.ec:
if not can_run('eb', '--version'):
print('EasyBuild not found or executable. Make sure it is in your $PATH when using --ec!')
sys.exit(1)
if args.verbose:
print('Checking with EasyBuild for missing dependencies')
missing_dep_out = run_shell_cmd(['eb', args.ec, '--missing'],
capture_stderr=False,
action_desc='Get missing dependencies')
excluded_dep = '(%s)' % os.path.basename(args.ec)
missing_deps = [dep for dep in missing_dep_out.split('\n')
if dep.startswith('*') and excluded_dep not in dep
]
if missing_deps:
print('You need to install all modules on which %s depends first!' % args.ec)
print('\n\t'.join(['Missing:'] + missing_deps))
sys.exit(1)

# If the --ec argument is a (relative) existing path make it absolute so we can find it after the chdir
ec_arg = os.path.abspath(args.ec) if os.path.exists(args.ec) else args.ec
with temporary_directory() as tmp_dir:
old_dir = os.getcwd()
os.chdir(tmp_dir)
if args.verbose:
print('Running EasyBuild to get build environment')
run_shell_cmd(['eb', ec_arg, '--dump-env', '--force'], action_desc='Dump build environment')
os.chdir(old_dir)
def main():
"""Entrypoint of the script"""
examples = textwrap.dedent(f"""
Example usage with EasyBuild (after installing dependency modules):
{sys.argv[0]} --ec TensorFlow-2.3.4.eb tensorflow==2.3.4
Which is the same as:
eb TensorFlow-2.3.4.eb --dump-env && source TensorFlow-2.3.4.env && {sys.argv[0]} tensorflow==2.3.4
Using the '--ec' parameter is recommended as the latter requires manually updating the .env file
after each change to the EasyConfig.
""")
parser = argparse.ArgumentParser(
description='Find dependencies of Python packages by installing it in a temporary virtualenv. ',
epilog='\n'.join(examples),
formatter_class=argparse.RawDescriptionHelpFormatter
)
parser.add_argument('package', metavar='python-pkg-spec',
help='Python package spec, e.g. tensorflow==2.3.4')
parser.add_argument('--ec', metavar='easyconfig', help='EasyConfig to use as the build environment. '
'You need to have dependency modules installed already!')
parser.add_argument('--verbose', help='Verbose output', action='store_true')
args = parser.parse_args()

cmd = "source %s/*.env && python %s '%s'" % (tmp_dir, sys.argv[0], args.package)
if args.ec:
if not can_run('eb', '--version'):
print('EasyBuild not found or executable. Make sure it is in your $PATH when using --ec!')
sys.exit(1)
if args.verbose:
cmd += ' --verbose'
print('Restarting script in new build environment')

out = run_shell_cmd(cmd, action_desc='Run in new environment', shell=True, executable='/bin/bash')
print(out)
else:
if not can_run('virtualenv', '--version'):
print('Virtualenv not found or executable. ' +
'Make sure it is installed (e.g. in the currently loaded Python module)!')
sys.exit(1)
if 'PIP_PREFIX' in os.environ:
print("$PIP_PREFIX is set. Unsetting it as it doesn't work well with virtualenv.")
del os.environ['PIP_PREFIX']
print_deps(args.package, args.verbose)
print('Checking with EasyBuild for missing dependencies')
missing_dep_out = run_shell_cmd(['eb', args.ec, '--missing'],
capture_stderr=False,
action_desc='Get missing dependencies')
excluded_dep = f'({os.path.basename(args.ec)})'
missing_deps = [dep for dep in missing_dep_out.split('\n')
if dep.startswith('*') and excluded_dep not in dep
]
if missing_deps:
print(f'You need to install all modules on which {args.ec} depends first!')
print('\n\t'.join(['Missing:'] + missing_deps))
sys.exit(1)

# If the --ec argument is a (relative) existing path make it absolute so we can find it after the chdir
ec_arg = os.path.abspath(args.ec) if os.path.exists(args.ec) else args.ec
with temporary_directory() as tmp_dir:
old_dir = os.getcwd()
os.chdir(tmp_dir)
if args.verbose:
print('Running EasyBuild to get build environment')
run_shell_cmd(['eb', ec_arg, '--dump-env', '--force'], action_desc='Dump build environment')
os.chdir(old_dir)

cmd = f"source {tmp_dir}/*.env && python {sys.argv[0]} '{args.package}'"
if args.verbose:
cmd += ' --verbose'
print('Restarting script in new build environment')

out = run_shell_cmd(cmd, action_desc='Run in new environment', shell=True, executable='/bin/bash')
print(out)
else:
if not can_run(sys.executable, '-m', 'venv', '-h'):
print("'venv' module not found. This should be available in Python 3.3+.")
sys.exit(1)
if 'PIP_PREFIX' in os.environ:
print("$PIP_PREFIX is set. Unsetting it as it doesn't work well with virtualenv.")
del os.environ['PIP_PREFIX']
os.environ['PYTHONNOUSERSITE'] = '1'
print_deps(args.package, args.verbose)


if __name__ == "__main__":
main()
11 changes: 8 additions & 3 deletions easybuild/tools/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -2090,6 +2090,7 @@ def opts_dict_to_eb_opts(args_dict):
:return: a list of strings representing command-line options for the 'eb' command
"""

allow_multiple_calls = ['amend', 'try-amend']
_log.debug("Converting dictionary %s to argument list" % args_dict)
args = []
for arg in sorted(args_dict):
Expand All @@ -2099,14 +2100,18 @@ def opts_dict_to_eb_opts(args_dict):
prefix = '--'
option = prefix + str(arg)
value = args_dict[arg]
if isinstance(value, (list, tuple)):
value = ','.join(str(x) for x in value)

if value in [True, None]:
if str(arg) in allow_multiple_calls:
if not isinstance(value, (list, tuple)):
value = [value]
args.extend(option + '=' + str(x) for x in value)
elif value in [True, None]:
args.append(option)
elif value is False:
args.append('--disable-' + option[2:])
elif value is not None:
if isinstance(value, (list, tuple)):
value = ','.join(str(x) for x in value)
args.append(option + '=' + str(value))

_log.debug("Converted dictionary %s to argument list %s" % (args_dict, args))
Expand Down
9 changes: 9 additions & 0 deletions test/framework/options.py
Original file line number Diff line number Diff line change
Expand Up @@ -7334,6 +7334,15 @@ def test_opts_dict_to_eb_opts(self):
]
self.assertEqual(opts_dict_to_eb_opts(opts_dict), expected)

# multi-call options
opts_dict = {'try-amend': ['a=1', 'b=2', 'c=3']}
expected = ['--try-amend=a=1', '--try-amend=b=2', '--try-amend=c=3']
self.assertEqual(opts_dict_to_eb_opts(opts_dict), expected)

opts_dict = {'amend': ['a=1', 'b=2', 'c=3']}
expected = ['--amend=a=1', '--amend=b=2', '--amend=c=3']
self.assertEqual(opts_dict_to_eb_opts(opts_dict), expected)


def suite():
""" returns all the testcases in this module """
Expand Down

0 comments on commit 6fcfe97

Please sign in to comment.