Skip to content

Commit

Permalink
Update findPythonDeps
Browse files Browse the repository at this point in the history
- Require Python 3.3
- Add/update documentation
- Use f-strings for formatted output
- Fix detecting packages where the name might appear as the "key" not the "package_name"
- Set PYTHONNOUSERSITE
- Use venv module instead of virtualenv to require less packages
  • Loading branch information
Flamefire committed Oct 15, 2024
1 parent 3b01ba9 commit d81cb35
Showing 1 changed file with 118 additions and 88 deletions.
206 changes: 118 additions & 88 deletions easybuild/scripts/findPythonDeps.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,20 @@
#!/usr/bin/env python
#!/usr/bin/env python3

"""
Find Python dependencies for a given Python package after loading dependencies specified in an EasyConfig.
This is intended for writing or updating PythonBundle EasyConfigs:
1. Create a EasyConfig with at least 'Python' as a dependency.
When updating to a new toolchain it is a good idea to reduce the dependencies to a minimum
as e.g. the new "Python" module might have different packages included.
2. Run this script
3. For each dependency found by this script search existing EasyConfigs for ones providing that Python package.
E.g many are contained in Python-bundle-PyPI. Some can be updated from an earlier toolchain.
4. Add those EasyConfigs as dependencies to your new EasyConfig.
5. Rerun this script so it takes the newly provided packages into account.
You can do steps 3-5 iteratively adding EasyConfig-dependencies one-by-one.
6. Finally you copy the packages found by this script as "exts_list" into the new EasyConfig.
You usually want the list printed as "in install order", the format is already suitable to be copied as-is.
"""

import argparse
import json
Expand All @@ -8,12 +24,13 @@
import subprocess
import sys
import tempfile
import textwrap
from contextlib import contextmanager
from pprint import pprint
try:
import pkg_resources
except ImportError as e:
print('pkg_resources could not be imported: %s\nYou might need to install setuptools!' % e)
print(f'pkg_resources could not be imported: {e}\nYou might need to install setuptools!')
sys.exit(1)

try:
Expand All @@ -22,6 +39,7 @@
_canonicalize_regex = re.compile(r"[-_.]+")

def canonicalize_name(name):
"""Fallback if the import doesn't work with same behavior."""
return _canonicalize_regex.sub("-", name).lower()


Expand All @@ -36,16 +54,16 @@ def temporary_directory(*args, **kwargs):


def extract_pkg_name(package_spec):
return re.split('<|>|=|~', args.package, 1)[0]
"""Get the package name from a specification such as 'package>=3.42'"""
return re.split('<|>|=|~', package_spec, 1)[0]


def can_run(cmd, argument):
def can_run(cmd, *arguments):
"""Check if the given cmd and argument can be run successfully"""
with open(os.devnull, 'w') as FNULL:
try:
return subprocess.call([cmd, argument], stdout=FNULL, stderr=subprocess.STDOUT) == 0
except (subprocess.CalledProcessError, OSError):
return False
try:
return subprocess.call([cmd, *arguments], stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT) == 0
except (subprocess.CalledProcessError, OSError):
return False


def run_cmd(arguments, action_desc, capture_stderr=True, **kwargs):
Expand All @@ -59,13 +77,13 @@ def run_cmd(arguments, action_desc, capture_stderr=True, **kwargs):
if p.returncode != 0:
if err:
err = "\nSTDERR:\n" + err
raise RuntimeError('Failed to %s: %s%s' % (action_desc, out, err))
raise RuntimeError(f'Failed to {action_desc}: {out}{err}')
return out


def run_in_venv(cmd, venv_path, action_desc):
"""Run the given command in the virtualenv at the given path"""
cmd = 'source %s/bin/activate && %s' % (venv_path, cmd)
cmd = f'source {venv_path}/bin/activate && {cmd}'
return run_cmd(cmd, action_desc, shell=True, executable='/bin/bash')


Expand All @@ -83,12 +101,12 @@ def get_dep_tree(package_spec, verbose):
print('Updating pip in virtualenv')
run_in_venv('pip install --upgrade pip', venv_dir, action_desc='update pip')
if verbose:
print('Installing %s into virtualenv' % package_spec)
out = run_in_venv('pip install "%s"' % package_spec, venv_dir, action_desc='install ' + package_spec)
print('%s installed: %s' % (package_spec, out))
print(f'Installing {package_spec} into virtualenv')
out = run_in_venv(f'pip install "{package_spec}"', venv_dir, action_desc='install ' + package_spec)
print(f'{package_spec} installed: {out}')
# install pipdeptree, figure out dependency tree for installed package
run_in_venv('pip install pipdeptree', venv_dir, action_desc='install pipdeptree')
dep_tree = run_in_venv('pipdeptree -j -p "%s"' % package_name,
dep_tree = run_in_venv(f'pipdeptree -j -p "{package_name}"',
venv_dir, action_desc='collect dependencies')
return json.loads(dep_tree)

Expand All @@ -108,26 +126,27 @@ def find_deps(pkgs, dep_tree):
for orig_pkg in cur_pkgs:
count += 1
if count > MAX_PACKAGES:
raise RuntimeError("Aborting after checking %s packages. Possibly cycle detected!" % MAX_PACKAGES)
raise RuntimeError(f"Aborting after checking {MAX_PACKAGES} packages. Possibly cycle detected!")
pkg = canonicalize_name(orig_pkg)
matching_entries = [entry for entry in dep_tree
if pkg in (entry['package']['package_name'], entry['package']['key'])]
if not matching_entries:
matching_entries = [entry for entry in dep_tree
if orig_pkg in (entry['package']['package_name'], entry['package']['key'])]
if not matching_entries:
raise RuntimeError("Found no installed package for '%s' in %s" % (pkg, dep_tree))
raise RuntimeError(f"Found no installed package for '{pkg}' in {dep_tree}")
if len(matching_entries) > 1:
raise RuntimeError("Found multiple installed packages for '%s' in %s" % (pkg, dep_tree))
raise RuntimeError(f"Found multiple installed packages for '{pkg}' in {dep_tree}")
entry = matching_entries[0]
res.append((entry['package']['package_name'], entry['package']['installed_version']))
res.append(entry['package'])
# Add dependencies to list of packages to check next
# Could call this function recursively but that might exceed the max recursion depth
next_pkgs.update(dep['package_name'] for dep in entry['dependencies'])
return res


def print_deps(package, verbose):
"""Print dependencies of the given package that are not installed yet in a format usable as 'exts_list'"""
if verbose:
print('Getting dep tree of ' + package)
dep_tree = get_dep_tree(package, verbose)
Expand All @@ -144,83 +163,94 @@ def print_deps(package, verbose):
res = []
handled = set()
for dep in reversed(deps):
if dep not in handled:
handled.add(dep)
if dep[0] in installed_modules:
# Tuple as we need it for exts_list
dep_entry = (dep['package_name'], dep['installed_version'])
if dep_entry not in handled:
handled.add(dep_entry)
# Need to check for key and package_name as naming is not consistent. E.g.:
# "PyQt5-sip": 'key': 'pyqt5-sip', 'package_name': 'PyQt5-sip'
# "jupyter-core": 'key': 'jupyter-core', 'package_name': 'jupyter_core'
if dep['key'] in installed_modules or dep['package_name'] in installed_modules:
if verbose:
print("Skipping installed module '%s'" % dep[0])
print(f"Skipping installed module '{dep['package_name']}'")
else:
res.append(dep)
res.append(dep_entry)

print("List of dependencies in (likely) install order:")
pprint(res, indent=4)
print("Sorted list of dependencies:")
pprint(sorted(res), indent=4)


examples = [
'Example usage with EasyBuild (after installing dependency modules):',
'\t' + sys.argv[0] + ' --ec TensorFlow-2.3.4.eb tensorflow==2.3.4',
'Which is the same as:',
'\t' + ' && '.join(['eb TensorFlow-2.3.4.eb --dump-env',
'source TensorFlow-2.3.4.env',
sys.argv[0] + ' tensorflow==2.3.4',
]),
]
parser = argparse.ArgumentParser(
description='Find dependencies of Python packages by installing it in a temporary virtualenv. ',
epilog='\n'.join(examples),
formatter_class=argparse.RawDescriptionHelpFormatter
)
parser.add_argument('package', metavar='python-pkg-spec',
help='Python package spec, e.g. tensorflow==2.3.4')
parser.add_argument('--ec', metavar='easyconfig', help='EasyConfig to use as the build environment. '
'You need to have dependency modules installed already!')
parser.add_argument('--verbose', help='Verbose output', action='store_true')
args = parser.parse_args()

if args.ec:
if not can_run('eb', '--version'):
print('EasyBuild not found or executable. Make sure it is in your $PATH when using --ec!')
sys.exit(1)
if args.verbose:
print('Checking with EasyBuild for missing dependencies')
missing_dep_out = run_cmd(['eb', args.ec, '--missing'],
capture_stderr=False,
action_desc='Get missing dependencies'
)
excluded_dep = '(%s)' % os.path.basename(args.ec)
missing_deps = [dep for dep in missing_dep_out.split('\n')
if dep.startswith('*') and excluded_dep not in dep
]
if missing_deps:
print('You need to install all modules on which %s depends first!' % args.ec)
print('\n\t'.join(['Missing:'] + missing_deps))
sys.exit(1)

# If the --ec argument is a (relative) existing path make it absolute so we can find it after the chdir
ec_arg = os.path.abspath(args.ec) if os.path.exists(args.ec) else args.ec
with temporary_directory() as tmp_dir:
old_dir = os.getcwd()
os.chdir(tmp_dir)
if args.verbose:
print('Running EasyBuild to get build environment')
run_cmd(['eb', ec_arg, '--dump-env', '--force'], action_desc='Dump build environment')
os.chdir(old_dir)
def main():
"""Entrypoint of the script"""
examples = textwrap.dedent(f"""
Example usage with EasyBuild (after installing dependency modules):
{sys.argv[0]} --ec TensorFlow-2.3.4.eb tensorflow==2.3.4
Which is the same as:
eb TensorFlow-2.3.4.eb --dump-env && source TensorFlow-2.3.4.env && {sys.argv[0]} tensorflow==2.3.4
Using the '--ec' parameter is recommended as the latter requires manually updating the .env file
after each change to the EasyConfig.
""")
parser = argparse.ArgumentParser(
description='Find dependencies of Python packages by installing it in a temporary virtualenv. ',
epilog='\n'.join(examples),
formatter_class=argparse.RawDescriptionHelpFormatter
)
parser.add_argument('package', metavar='python-pkg-spec',
help='Python package spec, e.g. tensorflow==2.3.4')
parser.add_argument('--ec', metavar='easyconfig', help='EasyConfig to use as the build environment. '
'You need to have dependency modules installed already!')
parser.add_argument('--verbose', help='Verbose output', action='store_true')
args = parser.parse_args()

cmd = "source %s/*.env && python %s '%s'" % (tmp_dir, sys.argv[0], args.package)
if args.ec:
if not can_run('eb', '--version'):
print('EasyBuild not found or executable. Make sure it is in your $PATH when using --ec!')
sys.exit(1)
if args.verbose:
cmd += ' --verbose'
print('Restarting script in new build environment')

out = run_cmd(cmd, action_desc='Run in new environment', shell=True, executable='/bin/bash')
print(out)
else:
if not can_run('virtualenv', '--version'):
print('Virtualenv not found or executable. ' +
'Make sure it is installed (e.g. in the currently loaded Python module)!')
sys.exit(1)
if 'PIP_PREFIX' in os.environ:
print("$PIP_PREFIX is set. Unsetting it as it doesn't work well with virtualenv.")
del os.environ['PIP_PREFIX']
print_deps(args.package, args.verbose)
print('Checking with EasyBuild for missing dependencies')
missing_dep_out = run_cmd(['eb', args.ec, '--missing'],
capture_stderr=False,
action_desc='Get missing dependencies'
)
excluded_dep = f'({os.path.basename(args.ec)})'
missing_deps = [dep for dep in missing_dep_out.split('\n')
if dep.startswith('*') and excluded_dep not in dep
]
if missing_deps:
print(f'You need to install all modules on which {args.ec} depends first!')
print('\n\t'.join(['Missing:'] + missing_deps))
sys.exit(1)

# If the --ec argument is a (relative) existing path make it absolute so we can find it after the chdir
ec_arg = os.path.abspath(args.ec) if os.path.exists(args.ec) else args.ec
with temporary_directory() as tmp_dir:
old_dir = os.getcwd()
os.chdir(tmp_dir)
if args.verbose:
print('Running EasyBuild to get build environment')
run_cmd(['eb', ec_arg, '--dump-env', '--force'], action_desc='Dump build environment')
os.chdir(old_dir)

cmd = f"source {tmp_dir}/*.env && python {sys.argv[0]} '{args.package}'"
if args.verbose:
cmd += ' --verbose'
print('Restarting script in new build environment')

out = run_cmd(cmd, action_desc='Run in new environment', shell=True, executable='/bin/bash')
print(out)
else:
if not can_run('python', '-m', 'venv', '-h'):
print('Virtualenv not found or executable. ' +
'Make sure it is installed (e.g. in the currently loaded Python module)!')
sys.exit(1)
if 'PIP_PREFIX' in os.environ:
print("$PIP_PREFIX is set. Unsetting it as it doesn't work well with virtualenv.")
del os.environ['PIP_PREFIX']
os.environ['PYTHONNOUSERSITE'] = '1'
print_deps(args.package, args.verbose)


if __name__ == "__main__":
main()

0 comments on commit d81cb35

Please sign in to comment.