From d81cb3525b3c14aa73dbc09dd276a3fcadba28b3 Mon Sep 17 00:00:00 2001 From: Alexander Grund Date: Tue, 15 Oct 2024 10:55:26 +0200 Subject: [PATCH] Update findPythonDeps - Require Python 3.3 - Add/update documentation - Use f-strings for formatted output - Fix detecting packages where the name might appear as the "key" not the "package_name" - Set PYTHONNOUSERSITE - Use venv module instead of virtualenv to require less packages --- easybuild/scripts/findPythonDeps.py | 206 ++++++++++++++++------------ 1 file changed, 118 insertions(+), 88 deletions(-) diff --git a/easybuild/scripts/findPythonDeps.py b/easybuild/scripts/findPythonDeps.py index 23d2763148..09d0f8226f 100755 --- a/easybuild/scripts/findPythonDeps.py +++ b/easybuild/scripts/findPythonDeps.py @@ -1,4 +1,20 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 + +""" +Find Python dependencies for a given Python package after loading dependencies specified in an EasyConfig. +This is intended for writing or updating PythonBundle EasyConfigs: + 1. Create a EasyConfig with at least 'Python' as a dependency. + When updating to a new toolchain it is a good idea to reduce the dependencies to a minimum + as e.g. the new "Python" module might have different packages included. + 2. Run this script + 3. For each dependency found by this script search existing EasyConfigs for ones providing that Python package. + E.g many are contained in Python-bundle-PyPI. Some can be updated from an earlier toolchain. + 4. Add those EasyConfigs as dependencies to your new EasyConfig. + 5. Rerun this script so it takes the newly provided packages into account. + You can do steps 3-5 iteratively adding EasyConfig-dependencies one-by-one. + 6. Finally you copy the packages found by this script as "exts_list" into the new EasyConfig. + You usually want the list printed as "in install order", the format is already suitable to be copied as-is. +""" import argparse import json @@ -8,12 +24,13 @@ import subprocess import sys import tempfile +import textwrap from contextlib import contextmanager from pprint import pprint try: import pkg_resources except ImportError as e: - print('pkg_resources could not be imported: %s\nYou might need to install setuptools!' % e) + print(f'pkg_resources could not be imported: {e}\nYou might need to install setuptools!') sys.exit(1) try: @@ -22,6 +39,7 @@ _canonicalize_regex = re.compile(r"[-_.]+") def canonicalize_name(name): + """Fallback if the import doesn't work with same behavior.""" return _canonicalize_regex.sub("-", name).lower() @@ -36,16 +54,16 @@ def temporary_directory(*args, **kwargs): def extract_pkg_name(package_spec): - return re.split('<|>|=|~', args.package, 1)[0] + """Get the package name from a specification such as 'package>=3.42'""" + return re.split('<|>|=|~', package_spec, 1)[0] -def can_run(cmd, argument): +def can_run(cmd, *arguments): """Check if the given cmd and argument can be run successfully""" - with open(os.devnull, 'w') as FNULL: - try: - return subprocess.call([cmd, argument], stdout=FNULL, stderr=subprocess.STDOUT) == 0 - except (subprocess.CalledProcessError, OSError): - return False + try: + return subprocess.call([cmd, *arguments], stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT) == 0 + except (subprocess.CalledProcessError, OSError): + return False def run_cmd(arguments, action_desc, capture_stderr=True, **kwargs): @@ -59,13 +77,13 @@ def run_cmd(arguments, action_desc, capture_stderr=True, **kwargs): if p.returncode != 0: if err: err = "\nSTDERR:\n" + err - raise RuntimeError('Failed to %s: %s%s' % (action_desc, out, err)) + raise RuntimeError(f'Failed to {action_desc}: {out}{err}') return out def run_in_venv(cmd, venv_path, action_desc): """Run the given command in the virtualenv at the given path""" - cmd = 'source %s/bin/activate && %s' % (venv_path, cmd) + cmd = f'source {venv_path}/bin/activate && {cmd}' return run_cmd(cmd, action_desc, shell=True, executable='/bin/bash') @@ -83,12 +101,12 @@ def get_dep_tree(package_spec, verbose): print('Updating pip in virtualenv') run_in_venv('pip install --upgrade pip', venv_dir, action_desc='update pip') if verbose: - print('Installing %s into virtualenv' % package_spec) - out = run_in_venv('pip install "%s"' % package_spec, venv_dir, action_desc='install ' + package_spec) - print('%s installed: %s' % (package_spec, out)) + print(f'Installing {package_spec} into virtualenv') + out = run_in_venv(f'pip install "{package_spec}"', venv_dir, action_desc='install ' + package_spec) + print(f'{package_spec} installed: {out}') # install pipdeptree, figure out dependency tree for installed package run_in_venv('pip install pipdeptree', venv_dir, action_desc='install pipdeptree') - dep_tree = run_in_venv('pipdeptree -j -p "%s"' % package_name, + dep_tree = run_in_venv(f'pipdeptree -j -p "{package_name}"', venv_dir, action_desc='collect dependencies') return json.loads(dep_tree) @@ -108,7 +126,7 @@ def find_deps(pkgs, dep_tree): for orig_pkg in cur_pkgs: count += 1 if count > MAX_PACKAGES: - raise RuntimeError("Aborting after checking %s packages. Possibly cycle detected!" % MAX_PACKAGES) + raise RuntimeError(f"Aborting after checking {MAX_PACKAGES} packages. Possibly cycle detected!") pkg = canonicalize_name(orig_pkg) matching_entries = [entry for entry in dep_tree if pkg in (entry['package']['package_name'], entry['package']['key'])] @@ -116,11 +134,11 @@ def find_deps(pkgs, dep_tree): matching_entries = [entry for entry in dep_tree if orig_pkg in (entry['package']['package_name'], entry['package']['key'])] if not matching_entries: - raise RuntimeError("Found no installed package for '%s' in %s" % (pkg, dep_tree)) + raise RuntimeError(f"Found no installed package for '{pkg}' in {dep_tree}") if len(matching_entries) > 1: - raise RuntimeError("Found multiple installed packages for '%s' in %s" % (pkg, dep_tree)) + raise RuntimeError(f"Found multiple installed packages for '{pkg}' in {dep_tree}") entry = matching_entries[0] - res.append((entry['package']['package_name'], entry['package']['installed_version'])) + res.append(entry['package']) # Add dependencies to list of packages to check next # Could call this function recursively but that might exceed the max recursion depth next_pkgs.update(dep['package_name'] for dep in entry['dependencies']) @@ -128,6 +146,7 @@ def find_deps(pkgs, dep_tree): def print_deps(package, verbose): + """Print dependencies of the given package that are not installed yet in a format usable as 'exts_list'""" if verbose: print('Getting dep tree of ' + package) dep_tree = get_dep_tree(package, verbose) @@ -144,13 +163,18 @@ def print_deps(package, verbose): res = [] handled = set() for dep in reversed(deps): - if dep not in handled: - handled.add(dep) - if dep[0] in installed_modules: + # Tuple as we need it for exts_list + dep_entry = (dep['package_name'], dep['installed_version']) + if dep_entry not in handled: + handled.add(dep_entry) + # Need to check for key and package_name as naming is not consistent. E.g.: + # "PyQt5-sip": 'key': 'pyqt5-sip', 'package_name': 'PyQt5-sip' + # "jupyter-core": 'key': 'jupyter-core', 'package_name': 'jupyter_core' + if dep['key'] in installed_modules or dep['package_name'] in installed_modules: if verbose: - print("Skipping installed module '%s'" % dep[0]) + print(f"Skipping installed module '{dep['package_name']}'") else: - res.append(dep) + res.append(dep_entry) print("List of dependencies in (likely) install order:") pprint(res, indent=4) @@ -158,69 +182,75 @@ def print_deps(package, verbose): pprint(sorted(res), indent=4) -examples = [ - 'Example usage with EasyBuild (after installing dependency modules):', - '\t' + sys.argv[0] + ' --ec TensorFlow-2.3.4.eb tensorflow==2.3.4', - 'Which is the same as:', - '\t' + ' && '.join(['eb TensorFlow-2.3.4.eb --dump-env', - 'source TensorFlow-2.3.4.env', - sys.argv[0] + ' tensorflow==2.3.4', - ]), -] -parser = argparse.ArgumentParser( - description='Find dependencies of Python packages by installing it in a temporary virtualenv. ', - epilog='\n'.join(examples), - formatter_class=argparse.RawDescriptionHelpFormatter -) -parser.add_argument('package', metavar='python-pkg-spec', - help='Python package spec, e.g. tensorflow==2.3.4') -parser.add_argument('--ec', metavar='easyconfig', help='EasyConfig to use as the build environment. ' - 'You need to have dependency modules installed already!') -parser.add_argument('--verbose', help='Verbose output', action='store_true') -args = parser.parse_args() - -if args.ec: - if not can_run('eb', '--version'): - print('EasyBuild not found or executable. Make sure it is in your $PATH when using --ec!') - sys.exit(1) - if args.verbose: - print('Checking with EasyBuild for missing dependencies') - missing_dep_out = run_cmd(['eb', args.ec, '--missing'], - capture_stderr=False, - action_desc='Get missing dependencies' - ) - excluded_dep = '(%s)' % os.path.basename(args.ec) - missing_deps = [dep for dep in missing_dep_out.split('\n') - if dep.startswith('*') and excluded_dep not in dep - ] - if missing_deps: - print('You need to install all modules on which %s depends first!' % args.ec) - print('\n\t'.join(['Missing:'] + missing_deps)) - sys.exit(1) - - # If the --ec argument is a (relative) existing path make it absolute so we can find it after the chdir - ec_arg = os.path.abspath(args.ec) if os.path.exists(args.ec) else args.ec - with temporary_directory() as tmp_dir: - old_dir = os.getcwd() - os.chdir(tmp_dir) - if args.verbose: - print('Running EasyBuild to get build environment') - run_cmd(['eb', ec_arg, '--dump-env', '--force'], action_desc='Dump build environment') - os.chdir(old_dir) +def main(): + """Entrypoint of the script""" + examples = textwrap.dedent(f""" + Example usage with EasyBuild (after installing dependency modules): + {sys.argv[0]} --ec TensorFlow-2.3.4.eb tensorflow==2.3.4 + Which is the same as: + eb TensorFlow-2.3.4.eb --dump-env && source TensorFlow-2.3.4.env && {sys.argv[0]} tensorflow==2.3.4 + Using the '--ec' parameter is recommended as the latter requires manually updating the .env file + after each change to the EasyConfig. + """) + parser = argparse.ArgumentParser( + description='Find dependencies of Python packages by installing it in a temporary virtualenv. ', + epilog='\n'.join(examples), + formatter_class=argparse.RawDescriptionHelpFormatter + ) + parser.add_argument('package', metavar='python-pkg-spec', + help='Python package spec, e.g. tensorflow==2.3.4') + parser.add_argument('--ec', metavar='easyconfig', help='EasyConfig to use as the build environment. ' + 'You need to have dependency modules installed already!') + parser.add_argument('--verbose', help='Verbose output', action='store_true') + args = parser.parse_args() - cmd = "source %s/*.env && python %s '%s'" % (tmp_dir, sys.argv[0], args.package) + if args.ec: + if not can_run('eb', '--version'): + print('EasyBuild not found or executable. Make sure it is in your $PATH when using --ec!') + sys.exit(1) if args.verbose: - cmd += ' --verbose' - print('Restarting script in new build environment') - - out = run_cmd(cmd, action_desc='Run in new environment', shell=True, executable='/bin/bash') - print(out) -else: - if not can_run('virtualenv', '--version'): - print('Virtualenv not found or executable. ' + - 'Make sure it is installed (e.g. in the currently loaded Python module)!') - sys.exit(1) - if 'PIP_PREFIX' in os.environ: - print("$PIP_PREFIX is set. Unsetting it as it doesn't work well with virtualenv.") - del os.environ['PIP_PREFIX'] - print_deps(args.package, args.verbose) + print('Checking with EasyBuild for missing dependencies') + missing_dep_out = run_cmd(['eb', args.ec, '--missing'], + capture_stderr=False, + action_desc='Get missing dependencies' + ) + excluded_dep = f'({os.path.basename(args.ec)})' + missing_deps = [dep for dep in missing_dep_out.split('\n') + if dep.startswith('*') and excluded_dep not in dep + ] + if missing_deps: + print(f'You need to install all modules on which {args.ec} depends first!') + print('\n\t'.join(['Missing:'] + missing_deps)) + sys.exit(1) + + # If the --ec argument is a (relative) existing path make it absolute so we can find it after the chdir + ec_arg = os.path.abspath(args.ec) if os.path.exists(args.ec) else args.ec + with temporary_directory() as tmp_dir: + old_dir = os.getcwd() + os.chdir(tmp_dir) + if args.verbose: + print('Running EasyBuild to get build environment') + run_cmd(['eb', ec_arg, '--dump-env', '--force'], action_desc='Dump build environment') + os.chdir(old_dir) + + cmd = f"source {tmp_dir}/*.env && python {sys.argv[0]} '{args.package}'" + if args.verbose: + cmd += ' --verbose' + print('Restarting script in new build environment') + + out = run_cmd(cmd, action_desc='Run in new environment', shell=True, executable='/bin/bash') + print(out) + else: + if not can_run('python', '-m', 'venv', '-h'): + print('Virtualenv not found or executable. ' + + 'Make sure it is installed (e.g. in the currently loaded Python module)!') + sys.exit(1) + if 'PIP_PREFIX' in os.environ: + print("$PIP_PREFIX is set. Unsetting it as it doesn't work well with virtualenv.") + del os.environ['PIP_PREFIX'] + os.environ['PYTHONNOUSERSITE'] = '1' + print_deps(args.package, args.verbose) + + +if __name__ == "__main__": + main()