Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

{bio}[foss/2020b] AlphaFold v2.0.0 w/ Python 3.8.6 #13744

Merged
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
4ce31c1
adding easyconfigs: AlphaFold-2.0.0-foss-2020b.eb
boegel Aug 14, 2021
4d65a50
stick to TensorFlow 2.4.1 dependency for AlphaFold 2.0.0 with foss/2020b
boegel Aug 14, 2021
75b034f
add patch for AlphaFold to fix setup.py, add OpenMM and PDBFixer depe…
boegel Aug 16, 2021
9fb09ee
add missing dependencies for AlphaFold + patch to pick up on them and…
boegel Aug 16, 2021
7ecf851
avoid warnigns about non-default values for required options to run_a…
boegel Aug 16, 2021
b7b1ccd
fix default value for template_mmcif_dir based on \ALPHAFOLD_DATA_DIR
boegel Aug 16, 2021
c032e63
add patch for HHBlits and JackHmmer scripts to avoid hardcoded number…
boegel Aug 16, 2021
08d2902
include stereo_chemical_props.txt in AlphaFold installaiton directory…
boegel Aug 16, 2021
e82389d
include patched OpenMM as component in AlphaFold installation
boegel Aug 16, 2021
6cf72d7
don't pick up on \SLURM_CPUS_ON_NODE, only add environment variable t…
boegel Aug 16, 2021
0342cb8
fix default paths to downloaded AlphaFold data
boegel Aug 16, 2021
0d5795d
Merge branch 'develop' into 20210814063853_new_pr_AlphaFold200
boegel Aug 16, 2021
1784637
make run_alphafold.py print a warning when $ALPHAFOLD_DATA_DIR is not…
boegel Aug 17, 2021
e380a8c
Merge branch 'develop' of https://github.com/easybuilders/easybuild-e…
boegel Aug 27, 2021
0dd5ba0
avoid that existing OpenMM Python bindings get wiped + run tests for …
boegel Aug 30, 2021
a53f6c8
fix AlphaFold sanity check (stereo_chemical_props.txt is a file, not …
boegel Aug 30, 2021
bf7ab99
collapse PDBFixer easyconfig as extension into AlphaFold easyconfig, …
boegel Aug 30, 2021
f9c5efa
consistently use %(version)s template in list of patches for AlphaFold
boegel Aug 30, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
106 changes: 106 additions & 0 deletions easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0-foss-2020b.eb
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
easyblock = 'PythonBundle'

name = 'AlphaFold'
version = '2.0.0'

homepage = 'https://deepmind.com/research/case-studies/alphafold'
description = "AlphaFold can predict protein structures with atomic accuracy even where no similar structure is known"

toolchain = {'name': 'foss', 'version': '2020b'}

builddependencies = [
# required for installing dm-tree
('Bazel', '3.7.2'),
]

dependencies = [
('Python', '3.8.6'),
('SciPy-bundle', '2020.11'),
('PyYAML', '5.3.1'),
('TensorFlow', '2.4.1'),
('Biopython', '1.78'),
('OpenMM', '7.5.1'), # for simtk
('HH-suite', '3.3.0'),
('HMMER', '3.3.2'),
('Kalign', '3.3.1'),
('PDBFixer', '1.7'),
('jax', '0.2.19'), # also provides absl-py
]
boegel marked this conversation as resolved.
Show resolved Hide resolved

use_pip = True

exts_list = [
boegel marked this conversation as resolved.
Show resolved Hide resolved
('toolz', '0.11.1', {
'checksums': ['c7a47921f07822fe534fb1c01c9931ab335a4390c782bd28c6bcc7c2f71f3fbf'],
}),
('chex', '0.0.8', {
'checksums': ['d6ce1329470116b6f172a72b1131bfd4d11fb7eb465e6077c3b36224b5b09fe4'],
}),
('tabulate', '0.8.9', {
'checksums': ['eb1d13f25760052e8931f2ef80aaf6045a6cceb47514db8beab24cded16f13a7'],
}),
('dm-haiku', '0.0.4', {
# source tarball on PyPI is missing requirements.txt,
# see https://github.com/deepmind/dm-haiku/issues/44
'source_urls': ['https://github.com/deepmind/dm-haiku/archive/refs/tags/'],
'sources': [{'download_filename': 'v%(version)s.tar.gz', 'filename': SOURCE_TAR_GZ}],
'checksums': ['e9896d161938b53f869fde207f3f6fca496d09b2a47d21dd9b6b65e897ad6aab'],
'modulename': 'haiku',
}),
('dm-tree', '0.1.6', {
'checksums': ['6776404b23b4522c01012ffb314632aba092c9541577004ab153321e87da439a'],
'modulename': 'tree',
}),
('websocket-client', '1.2.1', {
'checksums': ['8dfb715d8a992f5712fff8c843adae94e22b22a99b2c5e6b0ec4a1a981cc4e0d'],
'modulename': 'websocket',
}),
('docker', '5.0.0', {
'checksums': ['3e8bc47534e0ca9331d72c32f2881bb13b93ded0bcdeab3c833fb7cf61c0a9a5'],
}),
('immutabledict', '2.1.0', {
'checksums': ['673fb8f30f46d23dd394050b979f5b7f4c5398982b99ebc854fb873e646b967a'],
}),
('contextlib2', '21.6.0', {
'checksums': ['ab1e2bfe1d01d968e1b7e8d9023bc51ef3509bba217bb730cee3827e1ee82869'],
}),
('ml_collections', '0.1.0', {
'checksums': ['59a17fcd1c140153009788517f304caaddd7a94f06690f9f0ed09987beebcf3c'],
# see https://github.com/google/ml_collections/issues/7
'preinstallopts': "touch requirements.txt && touch requirements-test.txt && ",
}),
(name, version, {
'source_urls': ['https://github.com/deepmind/alphafold/archive/refs/tags/'],
'sources': ['v%(version)s.tar.gz'],
'patches': [
'AlphaFold-%(version)s_fix-packages.patch',
'AlphaFold-%(version)s_data-dep-paths.patch',
],
'checksums': [
'78cf443a2d9250917b05f5d40ede140ed8e8341b42fdfa54340336aca52f53f2', # v2.0.0.tar.gz
'826d2d1a5d6ac52c51a60ba210e1947d5631a1e2d76f8815305b5d23f74458db', # AlphaFold-2.0.0_fix-packages.patch
'2ccf9a9c8dbae9d47ef1958ede78710cb7ebd4c5231bdb268f44664b45f4e950', # AlphaFold-2.0.0_data-dep-paths.patch
],
}),
]

postinstallcmds = [
"mkdir -p %(installdir)s/bin",
# run_alphafold.py script is missing a shebang...
"echo '#!/usr/bin/env python' > %(installdir)s/bin/run_alphafold.py",
"cat %(builddir)s/AlphaFold/alphafold-%(version)s/run_alphafold.py >> %(installdir)s/bin/run_alphafold.py",
"chmod a+x %(installdir)s/bin/run_alphafold.py",
"cd %(installdir)s/bin && ln -s run_alphafold.py alphafold",
"cp -a %(builddir)s/AlphaFold/alphafold-%(version)s/scripts %(installdir)s/",
]

sanity_check_paths = {
'files': ['bin/alphafold', 'bin/run_alphafold.py'],
'dirs': ['lib/python%(pyshortver)s/site-packages', 'scripts'],
}

sanity_check_commands = ["alphafold --help 2>&1 | grep 'Full AlphaFold protein structure prediction script'"]

sanity_pip_check = True

moduleclass = 'bio'
113 changes: 113 additions & 0 deletions easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0_data-dep-paths.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
pick up on $ALPHAFOLD_DATA_DIR to specify location to downloaded data
pick up on HH-suite, HHMER, Kalign dependencies provided via EasyBuild
author: Kenneth Hoste (HPC-UGent)
diff --git a/run_alphafold.py b/run_alphafold.py
index 61f7ef2..81c97a0 100644
--- a/run_alphafold.py
+++ b/run_alphafold.py
@@ -36,6 +36,45 @@ from alphafold.model import model
from alphafold.relax import relax
# Internal import (7716).

+
+data_dir = os.getenv('ALPHAFOLD_DATA_DIR')
+if data_dir:
+ uniref90_database_path = os.path.join(data_dir, 'uniref90')
+ mgnify_database_path = os.path.join(data_dir, 'mgnify')
+ bfd_database_path = os.path.join(data_dir, 'bfd')
+ uniclust30_database_path = os.path.join(data_dir, 'uniclust30')
+ pdb70_database_path = os.path.join(data_dir, 'pdb70')
+ template_mmcif_dir = os.path.join(data_dir, 'pdb_mmcif', 'mmcif_files')
+ obsolete_pdbs_path = os.path.join(data_dir, 'pdb_mmcif', 'obsolete.dat')
branfosj marked this conversation as resolved.
Show resolved Hide resolved
+else:
+ uniref90_database_path = None
+ mgnify_database_path = None
+ bfd_database_path = None
+ uniclust30_database_path = None
+ pdb70_database_path = None
+ template_mmcif_dir = None
+ obsolete_pdbs_path = None
+
+hmmer_root = os.getenv('EBROOTHMMER')
+if hmmer_root:
+ jackhmmer_binary_path = os.path.join(hmmer_root, 'bin', 'jackhmmer')
+else:
+ jackhmmer_binary_path = '/usr/bin/jackhmmer'
+
+hhsuite_root = os.getenv('EBROOTHHMINSUITE')
+if hhsuite_root:
+ hhblits_binary_path = os.path.join(hhsuite_root, 'bin', 'hhblits')
+ hhsearch_binary_path = os.path.join(hhsuite_root, 'bin', 'hhsearch')
+else:
+ hhblits_binary_path = '/usr/bin/hhblits'
+ hhsearch_binary_path = '/usr/bin/hhsearch'
+
+kalign_root = os.getenv('EBROOTKALIGN')
+if kalign_root:
+ kalign_binary_path = os.path.join(kalign_root, 'bin', 'kalign')
+else:
+ kalign_binary_path = '/usr/bin/kalign'
+
flags.DEFINE_list('fasta_paths', None, 'Paths to FASTA files, each containing '
'one sequence. Paths should be separated by commas. '
'All FASTA paths must have a unique basename as the '
@@ -44,30 +83,30 @@ flags.DEFINE_list('fasta_paths', None, 'Paths to FASTA files, each containing '
flags.DEFINE_string('output_dir', None, 'Path to a directory that will '
'store the results.')
flags.DEFINE_list('model_names', None, 'Names of models to use.')
-flags.DEFINE_string('data_dir', None, 'Path to directory of supporting data.')
-flags.DEFINE_string('jackhmmer_binary_path', '/usr/bin/jackhmmer',
+flags.DEFINE_string('data_dir', data_dir, 'Path to directory of supporting data.')
+flags.DEFINE_string('jackhmmer_binary_path', jackhmmer_binary_path,
'Path to the JackHMMER executable.')
-flags.DEFINE_string('hhblits_binary_path', '/usr/bin/hhblits',
+flags.DEFINE_string('hhblits_binary_path', hhblits_binary_path,
'Path to the HHblits executable.')
-flags.DEFINE_string('hhsearch_binary_path', '/usr/bin/hhsearch',
+flags.DEFINE_string('hhsearch_binary_path', hhsearch_binary_path,
'Path to the HHsearch executable.')
-flags.DEFINE_string('kalign_binary_path', '/usr/bin/kalign',
+flags.DEFINE_string('kalign_binary_path', kalign_binary_path,
'Path to the Kalign executable.')
-flags.DEFINE_string('uniref90_database_path', None, 'Path to the Uniref90 '
+flags.DEFINE_string('uniref90_database_path', uniref90_database_path, 'Path to the Uniref90 '
'database for use by JackHMMER.')
-flags.DEFINE_string('mgnify_database_path', None, 'Path to the MGnify '
+flags.DEFINE_string('mgnify_database_path', mgnify_database_path, 'Path to the MGnify '
'database for use by JackHMMER.')
-flags.DEFINE_string('bfd_database_path', None, 'Path to the BFD '
+flags.DEFINE_string('bfd_database_path', bfd_database_path, 'Path to the BFD '
'database for use by HHblits.')
-flags.DEFINE_string('uniclust30_database_path', None, 'Path to the Uniclust30 '
+flags.DEFINE_string('uniclust30_database_path', uniclust30_database_path, 'Path to the Uniclust30 '
'database for use by HHblits.')
-flags.DEFINE_string('pdb70_database_path', None, 'Path to the PDB70 '
+flags.DEFINE_string('pdb70_database_path', pdb70_database_path, 'Path to the PDB70 '
'database for use by HHsearch.')
-flags.DEFINE_string('template_mmcif_dir', None, 'Path to a directory with '
+flags.DEFINE_string('template_mmcif_dir', template_mmcif_dir, 'Path to a directory with '
'template mmCIF structures, each named <pdb_id>.cif')
flags.DEFINE_string('max_template_date', None, 'Maximum template release date '
'to consider. Important if folding historical test sets.')
-flags.DEFINE_string('obsolete_pdbs_path', None, 'Path to file containing a '
+flags.DEFINE_string('obsolete_pdbs_path', obsolete_pdbs_path, 'Path to file containing a '
'mapping from obsolete PDB IDs to the PDB IDs of their '
'replacements.')
flags.DEFINE_enum('preset', 'full_dbs', ['full_dbs', 'casp14'],
@@ -268,16 +307,8 @@ if __name__ == '__main__':
'fasta_paths',
'output_dir',
'model_names',
- 'data_dir',
'preset',
- 'uniref90_database_path',
- 'mgnify_database_path',
- 'uniclust30_database_path',
- 'bfd_database_path',
- 'pdb70_database_path',
- 'template_mmcif_dir',
'max_template_date',
- 'obsolete_pdbs_path',
])

app.run(main)
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
also include alphafold.data.tools and alphafold.model.tf, which are not picked up by find_packages()
author: Kenneth Hoste (HPC-UGent)
--- alphafold-2.0.0/setup.py.orig 2021-08-14 08:42:43.030919330 +0200
+++ alphafold-2.0.0/setup.py 2021-08-14 08:43:59.183245602 +0200
@@ -26,7 +26,7 @@
author_email='alphafold@deepmind.com',
license='Apache License, Version 2.0',
url='https://github.com/deepmind/alphafold',
- packages=find_packages(),
+ packages=find_packages() + ['alphafold/data/tools', 'alphafold/model/tf'],
install_requires=[
'absl-py',
'biopython',
34 changes: 34 additions & 0 deletions easybuild/easyconfigs/p/PDBFixer/PDBFixer-1.7-foss-2020b.eb
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
easyblock = 'PythonPackage'

name = 'PDBFixer'
version = '1.7'

homepage = 'https://github.com/openmm/pdbfixer'
description = """PDBFixer is an easy to use application for fixing problems in Protein Data Bank files in preparation
for simulating them."""

toolchain = {'name': 'foss', 'version': '2020b'}

source_urls = ['https://github.com/openmm/pdbfixer/archive/refs/tags/']
sources = ['v%(version)s.tar.gz']
checksums = ['a0bef3c52a7bbe69a6aea5333f51f3e7d158339be5829aed19b0344bd66d4eea']

dependencies = [
('Python', '3.8.6'),
('SciPy-bundle', '2020.11'),
('OpenMM', '7.5.1'),
boegel marked this conversation as resolved.
Show resolved Hide resolved
]

download_dep_fail = True
use_pip = True

sanity_check_paths = {
'files': ['bin/pdbfixer'],
'dirs': ['lib/python%(pyshortver)s/site-packages'],
}

sanity_check_commands = ["pdbfixer --help"]

sanity_pip_check = True

moduleclass = 'bio'