From 4ce31c1ffc83a44e7b06e4719a0bc8b6074aa64f Mon Sep 17 00:00:00 2001 From: Kenneth Hoste Date: Sat, 14 Aug 2021 06:38:57 +0200 Subject: [PATCH 01/16] adding easyconfigs: AlphaFold-2.0.0-foss-2020b.eb --- .../a/AlphaFold/AlphaFold-2.0.0-foss-2020b.eb | 76 +++++++++++++++++++ 1 file changed, 76 insertions(+) create mode 100644 easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0-foss-2020b.eb diff --git a/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0-foss-2020b.eb b/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0-foss-2020b.eb new file mode 100644 index 00000000000..aa4d2744649 --- /dev/null +++ b/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0-foss-2020b.eb @@ -0,0 +1,76 @@ +easyblock = 'PythonBundle' + +name = 'AlphaFold' +version = '2.0.0' + +homepage = 'https://deepmind.com/research/case-studies/alphafold' +description = "AlphaFold can predict protein structures with atomic accuracy even where no similar structure is known" + +toolchain = {'name': 'foss', 'version': '2020b'} + +builddependencies = [ + # required for installing dm-tree + ('Bazel', '3.7.2'), +] + +dependencies = [ + ('Python', '3.8.6'), + ('SciPy-bundle', '2020.11'), + ('PyYAML', '5.3.1'), + ('TensorFlow', '2.5.0'), + ('Biopython', '1.78'), + ('jax', '0.2.19'), # also provides absl-py +] + +use_pip = True + +exts_list = [ + ('toolz', '0.11.1', { + 'checksums': ['c7a47921f07822fe534fb1c01c9931ab335a4390c782bd28c6bcc7c2f71f3fbf'], + }), + ('chex', '0.0.8', { + 'checksums': ['d6ce1329470116b6f172a72b1131bfd4d11fb7eb465e6077c3b36224b5b09fe4'], + }), + ('tabulate', '0.8.9', { + 'checksums': ['eb1d13f25760052e8931f2ef80aaf6045a6cceb47514db8beab24cded16f13a7'], + }), + ('dm-haiku', '0.0.4', { + # source tarball on PyPI is missing requirements.txt, + # see https://github.com/deepmind/dm-haiku/issues/44 + 'source_urls': ['https://github.com/deepmind/dm-haiku/archive/refs/tags/'], + 'sources': [{'download_filename': 'v%(version)s.tar.gz', 'filename': SOURCE_TAR_GZ}], + 'checksums': ['e9896d161938b53f869fde207f3f6fca496d09b2a47d21dd9b6b65e897ad6aab'], + 'modulename': 'haiku', + }), + ('dm-tree', '0.1.6', { + 'checksums': ['6776404b23b4522c01012ffb314632aba092c9541577004ab153321e87da439a'], + 'modulename': 'tree', + }), + ('websocket-client', '1.2.1', { + 'checksums': ['8dfb715d8a992f5712fff8c843adae94e22b22a99b2c5e6b0ec4a1a981cc4e0d'], + 'modulename': 'websocket', + }), + ('docker', '5.0.0', { + 'checksums': ['3e8bc47534e0ca9331d72c32f2881bb13b93ded0bcdeab3c833fb7cf61c0a9a5'], + }), + ('immutabledict', '2.1.0', { + 'checksums': ['673fb8f30f46d23dd394050b979f5b7f4c5398982b99ebc854fb873e646b967a'], + }), + ('contextlib2', '21.6.0', { + 'checksums': ['ab1e2bfe1d01d968e1b7e8d9023bc51ef3509bba217bb730cee3827e1ee82869'], + }), + ('ml_collections', '0.1.0', { + 'checksums': ['59a17fcd1c140153009788517f304caaddd7a94f06690f9f0ed09987beebcf3c'], + # see https://github.com/google/ml_collections/issues/7 + 'preinstallopts': "touch requirements.txt && touch requirements-test.txt && ", + }), + (name, version, { + 'source_urls': ['https://github.com/deepmind/alphafold/archive/refs/tags/'], + 'sources': ['v%(version)s.tar.gz'], + 'checksums': ['78cf443a2d9250917b05f5d40ede140ed8e8341b42fdfa54340336aca52f53f2'], + }), +] + +sanity_pip_check = True + +moduleclass = 'bio' From 4d65a507ed330f7b71f19a63aa9daec5fb365117 Mon Sep 17 00:00:00 2001 From: Kenneth Hoste Date: Sat, 14 Aug 2021 08:00:36 +0200 Subject: [PATCH 02/16] stick to TensorFlow 2.4.1 dependency for AlphaFold 2.0.0 with foss/2020b --- easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0-foss-2020b.eb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0-foss-2020b.eb b/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0-foss-2020b.eb index aa4d2744649..9fe9c5a44e3 100644 --- a/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0-foss-2020b.eb +++ b/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0-foss-2020b.eb @@ -17,7 +17,7 @@ dependencies = [ ('Python', '3.8.6'), ('SciPy-bundle', '2020.11'), ('PyYAML', '5.3.1'), - ('TensorFlow', '2.5.0'), + ('TensorFlow', '2.4.1'), ('Biopython', '1.78'), ('jax', '0.2.19'), # also provides absl-py ] From 75b034f4679c9c20eb352190abb0bd3754d3ee5d Mon Sep 17 00:00:00 2001 From: Kenneth Hoste Date: Mon, 16 Aug 2021 09:32:29 +0200 Subject: [PATCH 03/16] add patch for AlphaFold to fix setup.py, add OpenMM and PDBFixer dependencies, also install scripts and 'alphafold' command --- .../a/AlphaFold/AlphaFold-2.0.0-foss-2020b.eb | 25 +++++++++++++- .../AlphaFold-2.0.0_fix-packages.patch | 13 +++++++ .../p/PDBFixer/PDBFixer-1.7-foss-2020b.eb | 34 +++++++++++++++++++ 3 files changed, 71 insertions(+), 1 deletion(-) create mode 100644 easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0_fix-packages.patch create mode 100644 easybuild/easyconfigs/p/PDBFixer/PDBFixer-1.7-foss-2020b.eb diff --git a/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0-foss-2020b.eb b/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0-foss-2020b.eb index 9fe9c5a44e3..6da221ba1a7 100644 --- a/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0-foss-2020b.eb +++ b/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0-foss-2020b.eb @@ -19,6 +19,8 @@ dependencies = [ ('PyYAML', '5.3.1'), ('TensorFlow', '2.4.1'), ('Biopython', '1.78'), + ('OpenMM', '7.5.1'), # for simtk + ('PDBFixer', '1.7'), ('jax', '0.2.19'), # also provides absl-py ] @@ -67,10 +69,31 @@ exts_list = [ (name, version, { 'source_urls': ['https://github.com/deepmind/alphafold/archive/refs/tags/'], 'sources': ['v%(version)s.tar.gz'], - 'checksums': ['78cf443a2d9250917b05f5d40ede140ed8e8341b42fdfa54340336aca52f53f2'], + 'patches': ['AlphaFold-%(version)s_fix-packages.patch'], + 'checksums': [ + '78cf443a2d9250917b05f5d40ede140ed8e8341b42fdfa54340336aca52f53f2', # v2.0.0.tar.gz + '826d2d1a5d6ac52c51a60ba210e1947d5631a1e2d76f8815305b5d23f74458db', # AlphaFold-2.0.0_fix-packages.patch + ], }), ] +postinstallcmds = [ + "mkdir -p %(installdir)s/bin", + # run_alphafold.py script is missing a shebang... + "echo '#!/usr/bin/env python' > %(installdir)s/bin/run_alphafold.py", + "cat %(builddir)s/AlphaFold/alphafold-%(version)s/run_alphafold.py >> %(installdir)s/bin/run_alphafold.py", + "chmod a+x %(installdir)s/bin/run_alphafold.py", + "cd %(installdir)s/bin && ln -s run_alphafold.py alphafold", + "cp -a %(builddir)s/AlphaFold/alphafold-%(version)s/scripts %(installdir)s/", +] + +sanity_check_paths = { + 'files': ['bin/alphafold', 'bin/run_alphafold.py'], + 'dirs': ['lib/python%(pyshortver)s/site-packages', 'scripts'], +} + +sanity_check_commands = ["alphafold --help"] + sanity_pip_check = True moduleclass = 'bio' diff --git a/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0_fix-packages.patch b/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0_fix-packages.patch new file mode 100644 index 00000000000..f42a59513e1 --- /dev/null +++ b/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0_fix-packages.patch @@ -0,0 +1,13 @@ +also include alphafold.data.tools and alphafold.model.tf, which are not picked up by find_packages() +author: Kenneth Hoste (HPC-UGent) +--- alphafold-2.0.0/setup.py.orig 2021-08-14 08:42:43.030919330 +0200 ++++ alphafold-2.0.0/setup.py 2021-08-14 08:43:59.183245602 +0200 +@@ -26,7 +26,7 @@ + author_email='alphafold@deepmind.com', + license='Apache License, Version 2.0', + url='https://github.com/deepmind/alphafold', +- packages=find_packages(), ++ packages=find_packages() + ['alphafold/data/tools', 'alphafold/model/tf'], + install_requires=[ + 'absl-py', + 'biopython', diff --git a/easybuild/easyconfigs/p/PDBFixer/PDBFixer-1.7-foss-2020b.eb b/easybuild/easyconfigs/p/PDBFixer/PDBFixer-1.7-foss-2020b.eb new file mode 100644 index 00000000000..89173238ef0 --- /dev/null +++ b/easybuild/easyconfigs/p/PDBFixer/PDBFixer-1.7-foss-2020b.eb @@ -0,0 +1,34 @@ +easyblock = 'PythonPackage' + +name = 'PDBFixer' +version = '1.7' + +homepage = 'https://github.com/openmm/pdbfixer' +description = """PDBFixer is an easy to use application for fixing problems in Protein Data Bank files in preparation +for simulating them.""" + +toolchain = {'name': 'foss', 'version': '2020b'} + +source_urls = ['https://github.com/openmm/pdbfixer/archive/refs/tags/'] +sources = ['v%(version)s.tar.gz'] +checksums = ['a0bef3c52a7bbe69a6aea5333f51f3e7d158339be5829aed19b0344bd66d4eea'] + +dependencies = [ + ('Python', '3.8.6'), + ('SciPy-bundle', '2020.11'), + ('OpenMM', '7.5.1'), +] + +download_dep_fail = True +use_pip = True + +sanity_check_paths = { + 'files': ['bin/pdbfixer'], + 'dirs': ['lib/python%(pyshortver)s/site-packages'], +} + +sanity_check_commands = ["pdbfixer --help"] + +sanity_pip_check = True + +moduleclass = 'bio' From 9fb09eeba11c0c47b9d5497cc2b22fb3d40b7690 Mon Sep 17 00:00:00 2001 From: Kenneth Hoste Date: Mon, 16 Aug 2021 11:34:21 +0200 Subject: [PATCH 04/16] add missing dependencies for AlphaFold + patch to pick up on them and take into account $ALPHAFOLD_DATA_DIR, fix sanity check command --- .../a/AlphaFold/AlphaFold-2.0.0-foss-2020b.eb | 11 ++- .../AlphaFold-2.0.0_data-dep-paths.patch | 96 +++++++++++++++++++ 2 files changed, 105 insertions(+), 2 deletions(-) create mode 100644 easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0_data-dep-paths.patch diff --git a/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0-foss-2020b.eb b/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0-foss-2020b.eb index 6da221ba1a7..af15e18801c 100644 --- a/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0-foss-2020b.eb +++ b/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0-foss-2020b.eb @@ -20,6 +20,9 @@ dependencies = [ ('TensorFlow', '2.4.1'), ('Biopython', '1.78'), ('OpenMM', '7.5.1'), # for simtk + ('HH-suite', '3.3.0'), + ('HMMER', '3.3.2'), + ('Kalign', '3.3.1'), ('PDBFixer', '1.7'), ('jax', '0.2.19'), # also provides absl-py ] @@ -69,10 +72,14 @@ exts_list = [ (name, version, { 'source_urls': ['https://github.com/deepmind/alphafold/archive/refs/tags/'], 'sources': ['v%(version)s.tar.gz'], - 'patches': ['AlphaFold-%(version)s_fix-packages.patch'], + 'patches': [ + 'AlphaFold-%(version)s_fix-packages.patch', + 'AlphaFold-%(version)s_data-dep-paths.patch', + ], 'checksums': [ '78cf443a2d9250917b05f5d40ede140ed8e8341b42fdfa54340336aca52f53f2', # v2.0.0.tar.gz '826d2d1a5d6ac52c51a60ba210e1947d5631a1e2d76f8815305b5d23f74458db', # AlphaFold-2.0.0_fix-packages.patch + '5a1b17002273cc05a06541297b8017b1928063fe54cffe0b133fa4b39b5435ad', # AlphaFold-2.0.0_data-dep-paths.patch ], }), ] @@ -92,7 +99,7 @@ sanity_check_paths = { 'dirs': ['lib/python%(pyshortver)s/site-packages', 'scripts'], } -sanity_check_commands = ["alphafold --help"] +sanity_check_commands = ["alphafold --help 2>&1 | grep 'Full AlphaFold protein structure prediction script'"] sanity_pip_check = True diff --git a/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0_data-dep-paths.patch b/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0_data-dep-paths.patch new file mode 100644 index 00000000000..96fea848153 --- /dev/null +++ b/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0_data-dep-paths.patch @@ -0,0 +1,96 @@ +pick up on $ALPHAFOLD_DATA_DIR to specify location to downloaded data +pick up on HH-suite, HHMER, Kalign dependencies provided via EasyBuild +author: Kenneth Hoste (HPC-UGent) +diff --git a/run_alphafold.py b/run_alphafold.py +index 61f7ef2..6d80d38 100644 +--- a/run_alphafold.py ++++ b/run_alphafold.py +@@ -36,6 +36,45 @@ from alphafold.model import model + from alphafold.relax import relax + # Internal import (7716). + ++ ++data_dir = os.getenv('ALPHAFOLD_DATA_DIR') ++if data_dir: ++ uniref90_database_path = os.path.join(data_dir, 'uniref90') ++ mgnify_database_path = os.path.join(data_dir, 'mgnify') ++ bfd_database_path = os.path.join(data_dir, 'bfd') ++ uniclust30_database_path = os.path.join(data_dir, 'uniclust30') ++ pdb70_database_path = os.path.join(data_dir, 'pdb70') ++ template_mmcif_dir = os.path.join(data_dir, 'pdb_mmcif') ++ obsolete_pdbs_path = os.path.join(data_dir, 'pdb_mmcif', 'obsolete.dat') ++else: ++ uniref90_database_path = None ++ mgnify_database_path = None ++ bfd_database_path = None ++ uniclust30_database_path = None ++ pdb70_database_path = None ++ template_mmcif_dir = None ++ obsolete_pdbs_path = None ++ ++hmmer_root = os.getenv('EBROOTHMMER') ++if hmmer_root: ++ jackhmmer_binary_path = os.path.join(hmmer_root, 'bin', 'jackhmmer') ++else: ++ jackhmmer_binary_path = '/usr/bin/jackhmmer' ++ ++hhsuite_root = os.getenv('EBROOTHHMINSUITE') ++if hhsuite_root: ++ hhblits_binary_path = os.path.join(hhsuite_root, 'bin', 'hhblits') ++ hhsearch_binary_path = os.path.join(hhsuite_root, 'bin', 'hhsearch') ++else: ++ hhblits_binary_path = '/usr/bin/hhblits' ++ hhsearch_binary_path = '/usr/bin/hhsearch' ++ ++kalign_root = os.getenv('EBROOTKALIGN') ++if kalign_root: ++ kalign_binary_path = os.path.join(kalign_root, 'bin', 'kalign') ++else: ++ kalign_binary_path = '/usr/bin/kalign' ++ + flags.DEFINE_list('fasta_paths', None, 'Paths to FASTA files, each containing ' + 'one sequence. Paths should be separated by commas. ' + 'All FASTA paths must have a unique basename as the ' +@@ -44,30 +83,30 @@ flags.DEFINE_list('fasta_paths', None, 'Paths to FASTA files, each containing ' + flags.DEFINE_string('output_dir', None, 'Path to a directory that will ' + 'store the results.') + flags.DEFINE_list('model_names', None, 'Names of models to use.') +-flags.DEFINE_string('data_dir', None, 'Path to directory of supporting data.') +-flags.DEFINE_string('jackhmmer_binary_path', '/usr/bin/jackhmmer', ++flags.DEFINE_string('data_dir', data_dir, 'Path to directory of supporting data.') ++flags.DEFINE_string('jackhmmer_binary_path', jackhmmer_binary_path, + 'Path to the JackHMMER executable.') +-flags.DEFINE_string('hhblits_binary_path', '/usr/bin/hhblits', ++flags.DEFINE_string('hhblits_binary_path', hhblits_binary_path, + 'Path to the HHblits executable.') +-flags.DEFINE_string('hhsearch_binary_path', '/usr/bin/hhsearch', ++flags.DEFINE_string('hhsearch_binary_path', hhsearch_binary_path, + 'Path to the HHsearch executable.') +-flags.DEFINE_string('kalign_binary_path', '/usr/bin/kalign', ++flags.DEFINE_string('kalign_binary_path', kalign_binary_path, + 'Path to the Kalign executable.') +-flags.DEFINE_string('uniref90_database_path', None, 'Path to the Uniref90 ' ++flags.DEFINE_string('uniref90_database_path', uniref90_database_path, 'Path to the Uniref90 ' + 'database for use by JackHMMER.') +-flags.DEFINE_string('mgnify_database_path', None, 'Path to the MGnify ' ++flags.DEFINE_string('mgnify_database_path', mgnify_database_path, 'Path to the MGnify ' + 'database for use by JackHMMER.') +-flags.DEFINE_string('bfd_database_path', None, 'Path to the BFD ' ++flags.DEFINE_string('bfd_database_path', bfd_database_path, 'Path to the BFD ' + 'database for use by HHblits.') +-flags.DEFINE_string('uniclust30_database_path', None, 'Path to the Uniclust30 ' ++flags.DEFINE_string('uniclust30_database_path', uniclust30_database_path, 'Path to the Uniclust30 ' + 'database for use by HHblits.') +-flags.DEFINE_string('pdb70_database_path', None, 'Path to the PDB70 ' ++flags.DEFINE_string('pdb70_database_path', pdb70_database_path, 'Path to the PDB70 ' + 'database for use by HHsearch.') +-flags.DEFINE_string('template_mmcif_dir', None, 'Path to a directory with ' ++flags.DEFINE_string('template_mmcif_dir', template_mmcif_dir, 'Path to a directory with ' + 'template mmCIF structures, each named .cif') + flags.DEFINE_string('max_template_date', None, 'Maximum template release date ' + 'to consider. Important if folding historical test sets.') +-flags.DEFINE_string('obsolete_pdbs_path', None, 'Path to file containing a ' ++flags.DEFINE_string('obsolete_pdbs_path', obsolete_pdbs_path, 'Path to file containing a ' + 'mapping from obsolete PDB IDs to the PDB IDs of their ' + 'replacements.') + flags.DEFINE_enum('preset', 'full_dbs', ['full_dbs', 'casp14'], From 7ecf851bb86ef903b28ca3c2afcf8ea7b822c903 Mon Sep 17 00:00:00 2001 From: Kenneth Hoste Date: Mon, 16 Aug 2021 11:49:08 +0200 Subject: [PATCH 05/16] avoid warnigns about non-default values for required options to run_alphafold.py script --- .../a/AlphaFold/AlphaFold-2.0.0-foss-2020b.eb | 2 +- .../AlphaFold-2.0.0_data-dep-paths.patch | 19 ++++++++++++++++++- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0-foss-2020b.eb b/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0-foss-2020b.eb index af15e18801c..9038dac8d56 100644 --- a/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0-foss-2020b.eb +++ b/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0-foss-2020b.eb @@ -79,7 +79,7 @@ exts_list = [ 'checksums': [ '78cf443a2d9250917b05f5d40ede140ed8e8341b42fdfa54340336aca52f53f2', # v2.0.0.tar.gz '826d2d1a5d6ac52c51a60ba210e1947d5631a1e2d76f8815305b5d23f74458db', # AlphaFold-2.0.0_fix-packages.patch - '5a1b17002273cc05a06541297b8017b1928063fe54cffe0b133fa4b39b5435ad', # AlphaFold-2.0.0_data-dep-paths.patch + '7e19d944934b8004e4ee114c94f580d899ac503f3147fc7746c960cc5ca18c0d', # AlphaFold-2.0.0_data-dep-paths.patch ], }), ] diff --git a/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0_data-dep-paths.patch b/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0_data-dep-paths.patch index 96fea848153..e7f052dcde7 100644 --- a/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0_data-dep-paths.patch +++ b/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0_data-dep-paths.patch @@ -2,7 +2,7 @@ pick up on $ALPHAFOLD_DATA_DIR to specify location to downloaded data pick up on HH-suite, HHMER, Kalign dependencies provided via EasyBuild author: Kenneth Hoste (HPC-UGent) diff --git a/run_alphafold.py b/run_alphafold.py -index 61f7ef2..6d80d38 100644 +index 61f7ef2..96df783 100644 --- a/run_alphafold.py +++ b/run_alphafold.py @@ -36,6 +36,45 @@ from alphafold.model import model @@ -94,3 +94,20 @@ index 61f7ef2..6d80d38 100644 'mapping from obsolete PDB IDs to the PDB IDs of their ' 'replacements.') flags.DEFINE_enum('preset', 'full_dbs', ['full_dbs', 'casp14'], +@@ -268,16 +307,8 @@ if __name__ == '__main__': + 'fasta_paths', + 'output_dir', + 'model_names', +- 'data_dir', + 'preset', +- 'uniref90_database_path', +- 'mgnify_database_path', +- 'uniclust30_database_path', +- 'bfd_database_path', +- 'pdb70_database_path', +- 'template_mmcif_dir', + 'max_template_date', +- 'obsolete_pdbs_path', + ]) + + app.run(main) From b7b1ccdd5b84c4dc9baa3d678301cd5c21eb1f7f Mon Sep 17 00:00:00 2001 From: Kenneth Hoste Date: Mon, 16 Aug 2021 11:52:30 +0200 Subject: [PATCH 06/16] fix default value for template_mmcif_dir based on \ALPHAFOLD_DATA_DIR --- .../easyconfigs/a/AlphaFold/AlphaFold-2.0.0-foss-2020b.eb | 2 +- .../a/AlphaFold/AlphaFold-2.0.0_data-dep-paths.patch | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0-foss-2020b.eb b/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0-foss-2020b.eb index 9038dac8d56..cab8b7153a4 100644 --- a/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0-foss-2020b.eb +++ b/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0-foss-2020b.eb @@ -79,7 +79,7 @@ exts_list = [ 'checksums': [ '78cf443a2d9250917b05f5d40ede140ed8e8341b42fdfa54340336aca52f53f2', # v2.0.0.tar.gz '826d2d1a5d6ac52c51a60ba210e1947d5631a1e2d76f8815305b5d23f74458db', # AlphaFold-2.0.0_fix-packages.patch - '7e19d944934b8004e4ee114c94f580d899ac503f3147fc7746c960cc5ca18c0d', # AlphaFold-2.0.0_data-dep-paths.patch + '2ccf9a9c8dbae9d47ef1958ede78710cb7ebd4c5231bdb268f44664b45f4e950', # AlphaFold-2.0.0_data-dep-paths.patch ], }), ] diff --git a/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0_data-dep-paths.patch b/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0_data-dep-paths.patch index e7f052dcde7..d23e7c15904 100644 --- a/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0_data-dep-paths.patch +++ b/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0_data-dep-paths.patch @@ -2,7 +2,7 @@ pick up on $ALPHAFOLD_DATA_DIR to specify location to downloaded data pick up on HH-suite, HHMER, Kalign dependencies provided via EasyBuild author: Kenneth Hoste (HPC-UGent) diff --git a/run_alphafold.py b/run_alphafold.py -index 61f7ef2..96df783 100644 +index 61f7ef2..81c97a0 100644 --- a/run_alphafold.py +++ b/run_alphafold.py @@ -36,6 +36,45 @@ from alphafold.model import model @@ -17,7 +17,7 @@ index 61f7ef2..96df783 100644 + bfd_database_path = os.path.join(data_dir, 'bfd') + uniclust30_database_path = os.path.join(data_dir, 'uniclust30') + pdb70_database_path = os.path.join(data_dir, 'pdb70') -+ template_mmcif_dir = os.path.join(data_dir, 'pdb_mmcif') ++ template_mmcif_dir = os.path.join(data_dir, 'pdb_mmcif', 'mmcif_files') + obsolete_pdbs_path = os.path.join(data_dir, 'pdb_mmcif', 'obsolete.dat') +else: + uniref90_database_path = None From c032e63216bc9cac93c566a85ad82f794f1d1ca6 Mon Sep 17 00:00:00 2001 From: Kenneth Hoste Date: Mon, 16 Aug 2021 12:21:02 +0200 Subject: [PATCH 07/16] add patch for HHBlits and JackHmmer scripts to avoid hardcoded number of cores to use --- .../a/AlphaFold/AlphaFold-2.0.0-foss-2020b.eb | 2 + .../a/AlphaFold/AlphaFold-2.0.0_n-cpu.patch | 48 +++++++++++++++++++ 2 files changed, 50 insertions(+) create mode 100644 easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0_n-cpu.patch diff --git a/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0-foss-2020b.eb b/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0-foss-2020b.eb index cab8b7153a4..c3325e0a46a 100644 --- a/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0-foss-2020b.eb +++ b/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0-foss-2020b.eb @@ -75,11 +75,13 @@ exts_list = [ 'patches': [ 'AlphaFold-%(version)s_fix-packages.patch', 'AlphaFold-%(version)s_data-dep-paths.patch', + 'AlphaFold-%(version)s_n-cpu.patch', ], 'checksums': [ '78cf443a2d9250917b05f5d40ede140ed8e8341b42fdfa54340336aca52f53f2', # v2.0.0.tar.gz '826d2d1a5d6ac52c51a60ba210e1947d5631a1e2d76f8815305b5d23f74458db', # AlphaFold-2.0.0_fix-packages.patch '2ccf9a9c8dbae9d47ef1958ede78710cb7ebd4c5231bdb268f44664b45f4e950', # AlphaFold-2.0.0_data-dep-paths.patch + '90db20bf420c56ab787c1fdeddc190d47095f9717523e0058767d2c209e21a1a', # AlphaFold-2.0.0_n-cpu.patch ], }), ] diff --git a/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0_n-cpu.patch b/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0_n-cpu.patch new file mode 100644 index 00000000000..a61e8394395 --- /dev/null +++ b/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0_n-cpu.patch @@ -0,0 +1,48 @@ +don't hardcode number of cores to use by HHBlits and Jackhmmer scripts +author: Kenneth Hoste (HPC-UGent) + +diff --git a/alphafold/data/tools/hhblits.py b/alphafold/data/tools/hhblits.py +index e0aa098..e624f3a 100644 +--- a/alphafold/data/tools/hhblits.py ++++ b/alphafold/data/tools/hhblits.py +@@ -27,6 +27,8 @@ from alphafold.data.tools import utils + _HHBLITS_DEFAULT_P = 20 + _HHBLITS_DEFAULT_Z = 500 + ++HHBLITS_N_CPU = os.getenv('ALPHAFOLD_HHBLITS_N_CPU', os.getenv('SLURM_CPUS_ON_NODE', 4)) ++ + + class HHBlits: + """Python wrapper of the HHblits binary.""" +@@ -35,7 +37,7 @@ class HHBlits: + *, + binary_path: str, + databases: Sequence[str], +- n_cpu: int = 4, ++ n_cpu: int = HHBLITS_N_CPU, + n_iter: int = 3, + e_value: float = 0.001, + maxseq: int = 1_000_000, +diff --git a/alphafold/data/tools/jackhmmer.py b/alphafold/data/tools/jackhmmer.py +index ae53df9..800bf99 100644 +--- a/alphafold/data/tools/jackhmmer.py ++++ b/alphafold/data/tools/jackhmmer.py +@@ -24,6 +24,9 @@ from alphafold.data.tools import utils + # Internal import (7716). + + ++JACKHMMER_N_CPU = os.getenv('ALPHAFOLD_JACKHMMER_N_CPU', os.getenv('SLURM_CPUS_ON_NODE', 4)) ++ ++ + class Jackhmmer: + """Python wrapper of the Jackhmmer binary.""" + +@@ -31,7 +34,7 @@ class Jackhmmer: + *, + binary_path: str, + database_path: str, +- n_cpu: int = 8, ++ n_cpu: int = JACKHMMER_N_CPU, + n_iter: int = 1, + e_value: float = 0.0001, + z_value: Optional[int] = None, From 08d29023d21179d62a433f3217926974c70d3fc8 Mon Sep 17 00:00:00 2001 From: Kenneth Hoste Date: Mon, 16 Aug 2021 15:12:33 +0200 Subject: [PATCH 08/16] include stereo_chemical_props.txt in AlphaFold installaiton directory, switch to installing AlphaFold via 'components' because it now requires multiple source files --- .../a/AlphaFold/AlphaFold-2.0.0-foss-2020b.eb | 63 +++++++++++++------ .../AlphaFold-2.0.0_fix-scp-path.patch | 23 +++++++ 2 files changed, 68 insertions(+), 18 deletions(-) create mode 100644 easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0_fix-scp-path.patch diff --git a/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0-foss-2020b.eb b/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0-foss-2020b.eb index c3325e0a46a..dafa3b16123 100644 --- a/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0-foss-2020b.eb +++ b/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0-foss-2020b.eb @@ -27,6 +27,44 @@ dependencies = [ ('jax', '0.2.19'), # also provides absl-py ] +# commit to use for downloading stereo_chemical_props.txt and copy to alphafold/common, +# see docker/Dockerfile in AlphaFold repository +local_scp_commit = '7102c6' + +components = [ + (name, version, { + 'easyblock': 'PythonPackage', + 'source_urls': [ + 'https://github.com/deepmind/alphafold/archive/refs/tags/', + 'https://git.scicore.unibas.ch/schwede/openstructure/-/raw/%s/modules/mol/alg/src/' % local_scp_commit, + ], + 'sources': [ + 'v%(version)s.tar.gz', + { + 'download_filename': 'stereo_chemical_props.txt', + 'filename': 'stereo_chemical_props-%s.txt' % local_scp_commit, + 'extract_cmd': "cp -a %s .", + }, + ], + 'patches': [ + 'AlphaFold-%(version)s_fix-packages.patch', + 'AlphaFold-%(version)s_data-dep-paths.patch', + 'AlphaFold-%(version)s_n-cpu.patch', + 'AlphaFold-2.0.0_fix-scp-path.patch', + ], + 'checksums': [ + '78cf443a2d9250917b05f5d40ede140ed8e8341b42fdfa54340336aca52f53f2', # v2.0.0.tar.gz + '24510899eeb49167cffedec8fa45363a4d08279c0c637a403b452f7d0ac09451', # stereo_chemical_props-7102c6.txt + '826d2d1a5d6ac52c51a60ba210e1947d5631a1e2d76f8815305b5d23f74458db', # AlphaFold-2.0.0_fix-packages.patch + '2ccf9a9c8dbae9d47ef1958ede78710cb7ebd4c5231bdb268f44664b45f4e950', # AlphaFold-2.0.0_data-dep-paths.patch + '90db20bf420c56ab787c1fdeddc190d47095f9717523e0058767d2c209e21a1a', # AlphaFold-2.0.0_n-cpu.patch + '83dc82a8b1c647eb7e217aef683153e98a4fc7f871a85280976c92a1bfe28f27', # AlphaFold-2.0.0_fix-scp-path.patch + ], + 'start_dir': 'alphafold-%(version)s', + 'use_pip': True, + }), +] + use_pip = True exts_list = [ @@ -69,31 +107,17 @@ exts_list = [ # see https://github.com/google/ml_collections/issues/7 'preinstallopts': "touch requirements.txt && touch requirements-test.txt && ", }), - (name, version, { - 'source_urls': ['https://github.com/deepmind/alphafold/archive/refs/tags/'], - 'sources': ['v%(version)s.tar.gz'], - 'patches': [ - 'AlphaFold-%(version)s_fix-packages.patch', - 'AlphaFold-%(version)s_data-dep-paths.patch', - 'AlphaFold-%(version)s_n-cpu.patch', - ], - 'checksums': [ - '78cf443a2d9250917b05f5d40ede140ed8e8341b42fdfa54340336aca52f53f2', # v2.0.0.tar.gz - '826d2d1a5d6ac52c51a60ba210e1947d5631a1e2d76f8815305b5d23f74458db', # AlphaFold-2.0.0_fix-packages.patch - '2ccf9a9c8dbae9d47ef1958ede78710cb7ebd4c5231bdb268f44664b45f4e950', # AlphaFold-2.0.0_data-dep-paths.patch - '90db20bf420c56ab787c1fdeddc190d47095f9717523e0058767d2c209e21a1a', # AlphaFold-2.0.0_n-cpu.patch - ], - }), ] postinstallcmds = [ "mkdir -p %(installdir)s/bin", # run_alphafold.py script is missing a shebang... "echo '#!/usr/bin/env python' > %(installdir)s/bin/run_alphafold.py", - "cat %(builddir)s/AlphaFold/alphafold-%(version)s/run_alphafold.py >> %(installdir)s/bin/run_alphafold.py", + "cat %(builddir)s/alphafold-%(version)s/run_alphafold.py >> %(installdir)s/bin/run_alphafold.py", "chmod a+x %(installdir)s/bin/run_alphafold.py", "cd %(installdir)s/bin && ln -s run_alphafold.py alphafold", - "cp -a %(builddir)s/AlphaFold/alphafold-%(version)s/scripts %(installdir)s/", + "cp -a %(builddir)s/alphafold-%(version)s/scripts %(installdir)s/", + "cp -a %%(builddir)s/stereo_chemical_props-%s.txt %%(installdir)s/stereo_chemical_props.txt" % local_scp_commit, ] sanity_check_paths = { @@ -101,7 +125,10 @@ sanity_check_paths = { 'dirs': ['lib/python%(pyshortver)s/site-packages', 'scripts'], } -sanity_check_commands = ["alphafold --help 2>&1 | grep 'Full AlphaFold protein structure prediction script'"] +sanity_check_commands = [ + "python -c 'import alphafold'", + "alphafold --help 2>&1 | grep 'Full AlphaFold protein structure prediction script'", +] sanity_pip_check = True diff --git a/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0_fix-scp-path.patch b/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0_fix-scp-path.patch new file mode 100644 index 00000000000..c44ee6be008 --- /dev/null +++ b/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0_fix-scp-path.patch @@ -0,0 +1,23 @@ +fix path to stereo_chemical_props.txt when AlphaFold is installed with EasyBuild +author: Kenneth Hoste (HPC-UGent) +diff --git a/alphafold/common/residue_constants.py b/alphafold/common/residue_constants.py +index 318b44a..fd840e3 100644 +--- a/alphafold/common/residue_constants.py ++++ b/alphafold/common/residue_constants.py +@@ -19,6 +19,7 @@ import functools + from typing import Mapping, List, Tuple + + import numpy as np ++import os + import tree + + # Internal import (35fd). +@@ -403,7 +404,7 @@ def load_stereo_chemical_props() -> Tuple[Mapping[str, List[Bond]], + residue_bond_angles: dict that maps resname --> list of BondAngle tuples + """ + stereo_chemical_props_path = ( +- 'alphafold/common/stereo_chemical_props.txt') ++ os.path.join(os.getenv('EBROOTALPHAFOLD', 'alphafold/common'), 'stereo_chemical_props.txt')) + with open(stereo_chemical_props_path, 'rt') as f: + stereo_chemical_props = f.read() + lines_iter = iter(stereo_chemical_props.splitlines()) From e82389d431c23a0935d99621865191421fe23622 Mon Sep 17 00:00:00 2001 From: Kenneth Hoste Date: Mon, 16 Aug 2021 17:19:39 +0200 Subject: [PATCH 09/16] include patched OpenMM as component in AlphaFold installation --- .../a/AlphaFold/AlphaFold-2.0.0-foss-2020b.eb | 37 +++++++++++++-- .../a/AlphaFold/OpenMM-7.5.1_AlphaFold.patch | 45 +++++++++++++++++++ 2 files changed, 79 insertions(+), 3 deletions(-) create mode 100644 easybuild/easyconfigs/a/AlphaFold/OpenMM-7.5.1_AlphaFold.patch diff --git a/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0-foss-2020b.eb b/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0-foss-2020b.eb index dafa3b16123..f91253267d6 100644 --- a/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0-foss-2020b.eb +++ b/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0-foss-2020b.eb @@ -9,8 +9,12 @@ description = "AlphaFold can predict protein structures with atomic accuracy eve toolchain = {'name': 'foss', 'version': '2020b'} builddependencies = [ + # CMake/oxygen/SWIG are required for building OpenMM + ('CMake', '3.18.4'), + ('Doxygen', '1.8.20'), # required for installing dm-tree ('Bazel', '3.7.2'), + ('SWIG', '4.0.2'), ] dependencies = [ @@ -19,7 +23,6 @@ dependencies = [ ('PyYAML', '5.3.1'), ('TensorFlow', '2.4.1'), ('Biopython', '1.78'), - ('OpenMM', '7.5.1'), # for simtk ('HH-suite', '3.3.0'), ('HMMER', '3.3.2'), ('Kalign', '3.3.1'), @@ -27,11 +30,29 @@ dependencies = [ ('jax', '0.2.19'), # also provides absl-py ] +local_openmm_preinstallopts = "export OPENMM_INCLUDE_PATH=%(installdir)s/include && " +local_openmm_preinstallopts += " export OPENMM_LIB_PATH=%(installdir)s/lib && " + # commit to use for downloading stereo_chemical_props.txt and copy to alphafold/common, # see docker/Dockerfile in AlphaFold repository local_scp_commit = '7102c6' components = [ + # for simtk + ('OpenMM', '7.5.1', { + 'easyblock': 'CMakeMake', + 'source_urls': ['https://github.com/openmm/openmm/archive/'], + 'sources': [{'download_filename': '%(version)s.tar.gz', 'filename': SOURCE_TAR_GZ}], + 'patches': [('OpenMM-%(version)s_AlphaFold.patch', 'wrappers/python')], + 'checksums': [ + 'c88d6946468a2bde2619acb834f57b859b5e114a93093cf562165612e10f4ff7', # OpenMM-7.5.1.tar.gz + '1b109dfff3af5c6aa70690bca14618612953c68840a7e64f679db7ca33c1aff6', # OpenMM-7.5.1_AlphaFold.patch + ], + 'start_dir': 'openmm-%(version)s', + 'preinstallopts': local_openmm_preinstallopts, + # required to install OpenMM Python API + 'installopts': " && cd python && python setup.py build && python setup.py install --prefix=%(installdir)s", + }), (name, version, { 'easyblock': 'PythonPackage', 'source_urls': [ @@ -39,7 +60,10 @@ components = [ 'https://git.scicore.unibas.ch/schwede/openstructure/-/raw/%s/modules/mol/alg/src/' % local_scp_commit, ], 'sources': [ - 'v%(version)s.tar.gz', + { + 'download_filename': 'v%(version)s.tar.gz', + 'filename': SOURCE_TAR_GZ, + }, { 'download_filename': 'stereo_chemical_props.txt', 'filename': 'stereo_chemical_props-%s.txt' % local_scp_commit, @@ -121,15 +145,22 @@ postinstallcmds = [ ] sanity_check_paths = { - 'files': ['bin/alphafold', 'bin/run_alphafold.py'], + 'files': ['bin/alphafold', 'bin/run_alphafold.py', 'lib/libOpenMM.%s' % SHLIB_EXT, + 'lib/python%(pyshortver)s/site-packages/simtk/openmm/openmm.py'], 'dirs': ['lib/python%(pyshortver)s/site-packages', 'scripts'], } sanity_check_commands = [ + "python -m simtk.testInstallation", "python -c 'import alphafold'", "alphafold --help 2>&1 | grep 'Full AlphaFold protein structure prediction script'", ] +modextrapaths = { + 'OPENMM_INCLUDE_PATH': 'include', + 'OPENMM_LIB_PATH': 'lib', +} + sanity_pip_check = True moduleclass = 'bio' diff --git a/easybuild/easyconfigs/a/AlphaFold/OpenMM-7.5.1_AlphaFold.patch b/easybuild/easyconfigs/a/AlphaFold/OpenMM-7.5.1_AlphaFold.patch new file mode 100644 index 00000000000..878ffd11b03 --- /dev/null +++ b/easybuild/easyconfigs/a/AlphaFold/OpenMM-7.5.1_AlphaFold.patch @@ -0,0 +1,45 @@ +custom patch for OpenMM to use in conjunction with AlphaFold +see https://github.com/deepmind/alphafold/blob/main/docker/openmm.patch + +Index: simtk/openmm/app/topology.py +=================================================================== +--- simtk.orig/openmm/app/topology.py ++++ simtk/openmm/app/topology.py +@@ -356,19 +356,35 @@ + def isCyx(res): + names = [atom.name for atom in res._atoms] + return 'SG' in names and 'HG' not in names ++ # This function is used to prevent multiple di-sulfide bonds from being ++ # assigned to a given atom. This is a DeepMind modification. ++ def isDisulfideBonded(atom): ++ for b in self._bonds: ++ if (atom in b and b[0].name == 'SG' and ++ b[1].name == 'SG'): ++ return True ++ ++ return False + + cyx = [res for res in self.residues() if res.name == 'CYS' and isCyx(res)] + atomNames = [[atom.name for atom in res._atoms] for res in cyx] + for i in range(len(cyx)): + sg1 = cyx[i]._atoms[atomNames[i].index('SG')] + pos1 = positions[sg1.index] ++ candidate_distance, candidate_atom = 0.3*nanometers, None + for j in range(i): + sg2 = cyx[j]._atoms[atomNames[j].index('SG')] + pos2 = positions[sg2.index] + delta = [x-y for (x,y) in zip(pos1, pos2)] + distance = sqrt(delta[0]*delta[0] + delta[1]*delta[1] + delta[2]*delta[2]) +- if distance < 0.3*nanometers: +- self.addBond(sg1, sg2) ++ if distance < candidate_distance and not isDisulfideBonded(sg2): ++ candidate_distance = distance ++ candidate_atom = sg2 ++ # Assign bond to closest pair. ++ if candidate_atom: ++ self.addBond(sg1, candidate_atom) ++ ++ + + class Chain(object): + """A Chain object represents a chain within a Topology.""" From 6cf72d7645a5ba9fa064ce330bae693fb68c776f Mon Sep 17 00:00:00 2001 From: Kenneth Hoste Date: Mon, 16 Aug 2021 17:21:38 +0200 Subject: [PATCH 10/16] don't pick up on \SLURM_CPUS_ON_NODE, only add environment variable to avoid hardcoding core count for jackhmmer and hhblits --- .../easyconfigs/a/AlphaFold/AlphaFold-2.0.0-foss-2020b.eb | 2 +- .../easyconfigs/a/AlphaFold/AlphaFold-2.0.0_n-cpu.patch | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0-foss-2020b.eb b/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0-foss-2020b.eb index f91253267d6..ecfaeb43f03 100644 --- a/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0-foss-2020b.eb +++ b/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0-foss-2020b.eb @@ -81,7 +81,7 @@ components = [ '24510899eeb49167cffedec8fa45363a4d08279c0c637a403b452f7d0ac09451', # stereo_chemical_props-7102c6.txt '826d2d1a5d6ac52c51a60ba210e1947d5631a1e2d76f8815305b5d23f74458db', # AlphaFold-2.0.0_fix-packages.patch '2ccf9a9c8dbae9d47ef1958ede78710cb7ebd4c5231bdb268f44664b45f4e950', # AlphaFold-2.0.0_data-dep-paths.patch - '90db20bf420c56ab787c1fdeddc190d47095f9717523e0058767d2c209e21a1a', # AlphaFold-2.0.0_n-cpu.patch + 'dfda4dd5f9aba19fe2b6eb9a0ec583d12dcefdfee8ab8803fc57ad48d582db04', # AlphaFold-2.0.0_n-cpu.patch '83dc82a8b1c647eb7e217aef683153e98a4fc7f871a85280976c92a1bfe28f27', # AlphaFold-2.0.0_fix-scp-path.patch ], 'start_dir': 'alphafold-%(version)s', diff --git a/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0_n-cpu.patch b/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0_n-cpu.patch index a61e8394395..8849e17462c 100644 --- a/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0_n-cpu.patch +++ b/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0_n-cpu.patch @@ -1,4 +1,5 @@ -don't hardcode number of cores to use by HHBlits and Jackhmmer scripts +don't hardcode number of cores to use by HHBlits and Jackhmmer scripts; +see https://github.com/deepmind/alphafold/issues/110 author: Kenneth Hoste (HPC-UGent) diff --git a/alphafold/data/tools/hhblits.py b/alphafold/data/tools/hhblits.py @@ -9,7 +10,7 @@ index e0aa098..e624f3a 100644 _HHBLITS_DEFAULT_P = 20 _HHBLITS_DEFAULT_Z = 500 -+HHBLITS_N_CPU = os.getenv('ALPHAFOLD_HHBLITS_N_CPU', os.getenv('SLURM_CPUS_ON_NODE', 4)) ++HHBLITS_N_CPU = os.getenv('ALPHAFOLD_HHBLITS_N_CPU', 4) + class HHBlits: @@ -31,7 +32,7 @@ index ae53df9..800bf99 100644 # Internal import (7716). -+JACKHMMER_N_CPU = os.getenv('ALPHAFOLD_JACKHMMER_N_CPU', os.getenv('SLURM_CPUS_ON_NODE', 4)) ++JACKHMMER_N_CPU = os.getenv('ALPHAFOLD_JACKHMMER_N_CPU', 8) + + class Jackhmmer: From 0342cb8286b955c06325d27dbf74d51b38a815a1 Mon Sep 17 00:00:00 2001 From: Kenneth Hoste Date: Mon, 16 Aug 2021 17:37:07 +0200 Subject: [PATCH 11/16] fix default paths to downloaded AlphaFold data --- .../a/AlphaFold/AlphaFold-2.0.0-foss-2020b.eb | 2 +- .../a/AlphaFold/AlphaFold-2.0.0_data-dep-paths.patch | 11 ++++++----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0-foss-2020b.eb b/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0-foss-2020b.eb index ecfaeb43f03..b0ff243dc72 100644 --- a/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0-foss-2020b.eb +++ b/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0-foss-2020b.eb @@ -80,7 +80,7 @@ components = [ '78cf443a2d9250917b05f5d40ede140ed8e8341b42fdfa54340336aca52f53f2', # v2.0.0.tar.gz '24510899eeb49167cffedec8fa45363a4d08279c0c637a403b452f7d0ac09451', # stereo_chemical_props-7102c6.txt '826d2d1a5d6ac52c51a60ba210e1947d5631a1e2d76f8815305b5d23f74458db', # AlphaFold-2.0.0_fix-packages.patch - '2ccf9a9c8dbae9d47ef1958ede78710cb7ebd4c5231bdb268f44664b45f4e950', # AlphaFold-2.0.0_data-dep-paths.patch + '1d426748bec4f0f3cc49448ebeb14c4350717f88287afd2aabf6738dea387d81', # AlphaFold-2.0.0_data-dep-paths.patch 'dfda4dd5f9aba19fe2b6eb9a0ec583d12dcefdfee8ab8803fc57ad48d582db04', # AlphaFold-2.0.0_n-cpu.patch '83dc82a8b1c647eb7e217aef683153e98a4fc7f871a85280976c92a1bfe28f27', # AlphaFold-2.0.0_fix-scp-path.patch ], diff --git a/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0_data-dep-paths.patch b/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0_data-dep-paths.patch index d23e7c15904..0dcbedcdce8 100644 --- a/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0_data-dep-paths.patch +++ b/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0_data-dep-paths.patch @@ -1,4 +1,5 @@ pick up on $ALPHAFOLD_DATA_DIR to specify location to downloaded data +(see https://github.com/deepmind/alphafold/blob/main/docker/run_docker.py); pick up on HH-suite, HHMER, Kalign dependencies provided via EasyBuild author: Kenneth Hoste (HPC-UGent) diff --git a/run_alphafold.py b/run_alphafold.py @@ -12,11 +13,11 @@ index 61f7ef2..81c97a0 100644 + +data_dir = os.getenv('ALPHAFOLD_DATA_DIR') +if data_dir: -+ uniref90_database_path = os.path.join(data_dir, 'uniref90') -+ mgnify_database_path = os.path.join(data_dir, 'mgnify') -+ bfd_database_path = os.path.join(data_dir, 'bfd') -+ uniclust30_database_path = os.path.join(data_dir, 'uniclust30') -+ pdb70_database_path = os.path.join(data_dir, 'pdb70') ++ uniref90_database_path = os.path.join(data_dir, 'uniref90', 'uniref90.fasta') ++ mgnify_database_path = os.path.join(data_dir, 'mgnify', 'mgy_clusters.fa') ++ bfd_database_path = os.path.join(data_dir, 'bfd', 'bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt') ++ uniclust30_database_path = os.path.join(data_dir, 'uniclust30', 'uniclust30_2018_08', 'uniclust30_2018_08') ++ pdb70_database_path = os.path.join(data_dir, 'pdb70', 'pdb70') + template_mmcif_dir = os.path.join(data_dir, 'pdb_mmcif', 'mmcif_files') + obsolete_pdbs_path = os.path.join(data_dir, 'pdb_mmcif', 'obsolete.dat') +else: From 178463798adc6db719de7efb506515d26515af3c Mon Sep 17 00:00:00 2001 From: Kenneth Hoste Date: Tue, 17 Aug 2021 11:38:17 +0200 Subject: [PATCH 12/16] make run_alphafold.py print a warning when $ALPHAFOLD_DATA_DIR is not defined --- .../easyconfigs/a/AlphaFold/AlphaFold-2.0.0-foss-2020b.eb | 2 +- .../a/AlphaFold/AlphaFold-2.0.0_data-dep-paths.patch | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0-foss-2020b.eb b/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0-foss-2020b.eb index b0ff243dc72..91d5b657e7f 100644 --- a/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0-foss-2020b.eb +++ b/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0-foss-2020b.eb @@ -80,7 +80,7 @@ components = [ '78cf443a2d9250917b05f5d40ede140ed8e8341b42fdfa54340336aca52f53f2', # v2.0.0.tar.gz '24510899eeb49167cffedec8fa45363a4d08279c0c637a403b452f7d0ac09451', # stereo_chemical_props-7102c6.txt '826d2d1a5d6ac52c51a60ba210e1947d5631a1e2d76f8815305b5d23f74458db', # AlphaFold-2.0.0_fix-packages.patch - '1d426748bec4f0f3cc49448ebeb14c4350717f88287afd2aabf6738dea387d81', # AlphaFold-2.0.0_data-dep-paths.patch + '7223e297b23f90816219095696bc6453910c617add60b907a0d3e869da8733bb', # AlphaFold-2.0.0_data-dep-paths.patch 'dfda4dd5f9aba19fe2b6eb9a0ec583d12dcefdfee8ab8803fc57ad48d582db04', # AlphaFold-2.0.0_n-cpu.patch '83dc82a8b1c647eb7e217aef683153e98a4fc7f871a85280976c92a1bfe28f27', # AlphaFold-2.0.0_fix-scp-path.patch ], diff --git a/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0_data-dep-paths.patch b/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0_data-dep-paths.patch index 0dcbedcdce8..674bcd2f935 100644 --- a/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0_data-dep-paths.patch +++ b/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0_data-dep-paths.patch @@ -6,7 +6,7 @@ diff --git a/run_alphafold.py b/run_alphafold.py index 61f7ef2..81c97a0 100644 --- a/run_alphafold.py +++ b/run_alphafold.py -@@ -36,6 +36,45 @@ from alphafold.model import model +@@ -36,6 +36,46 @@ from alphafold.model import model from alphafold.relax import relax # Internal import (7716). @@ -21,6 +21,7 @@ index 61f7ef2..81c97a0 100644 + template_mmcif_dir = os.path.join(data_dir, 'pdb_mmcif', 'mmcif_files') + obsolete_pdbs_path = os.path.join(data_dir, 'pdb_mmcif', 'obsolete.dat') +else: ++ sys.stderr.write("$ALPHAFOLD_DATA_DIR is not defined!") + uniref90_database_path = None + mgnify_database_path = None + bfd_database_path = None From 0dd5ba041d50c44ca79a48ba19e80c838ca869ce Mon Sep 17 00:00:00 2001 From: Kenneth Hoste Date: Mon, 30 Aug 2021 12:12:06 +0200 Subject: [PATCH 13/16] avoid that existing OpenMM Python bindings get wiped + run tests for run_alphafold.py script --- .../a/AlphaFold/AlphaFold-2.0.0-foss-2020b.eb | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0-foss-2020b.eb b/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0-foss-2020b.eb index 91d5b657e7f..084f05c3353 100644 --- a/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0-foss-2020b.eb +++ b/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0-foss-2020b.eb @@ -33,6 +33,11 @@ dependencies = [ local_openmm_preinstallopts = "export OPENMM_INCLUDE_PATH=%(installdir)s/include && " local_openmm_preinstallopts += " export OPENMM_LIB_PATH=%(installdir)s/lib && " +# required to install OpenMM Python API; +# avoid that setup.py partially uninstalls existing OpenMM Python bindings... +local_openmm_installopts = " && cd python && sed -i 's/uninstall()/pass/g' setup.py && " +local_openmm_installopts += "python setup.py build && python setup.py install --prefix=%(installdir)s" + # commit to use for downloading stereo_chemical_props.txt and copy to alphafold/common, # see docker/Dockerfile in AlphaFold repository local_scp_commit = '7102c6' @@ -50,8 +55,7 @@ components = [ ], 'start_dir': 'openmm-%(version)s', 'preinstallopts': local_openmm_preinstallopts, - # required to install OpenMM Python API - 'installopts': " && cd python && python setup.py build && python setup.py install --prefix=%(installdir)s", + 'installopts': local_openmm_installopts, }), (name, version, { 'easyblock': 'PythonPackage', @@ -142,12 +146,16 @@ postinstallcmds = [ "cd %(installdir)s/bin && ln -s run_alphafold.py alphafold", "cp -a %(builddir)s/alphafold-%(version)s/scripts %(installdir)s/", "cp -a %%(builddir)s/stereo_chemical_props-%s.txt %%(installdir)s/stereo_chemical_props.txt" % local_scp_commit, + # run tests for run_alphafold.py script; + # shouldn't do this in sanity check to avoid breaking use of --module-only + "PYTHONPATH=%(installdir)s/lib/python%(pyshortver)s/site-packages:$PYTHONPATH " + "python %(builddir)s/alphafold-%(version)s/run_alphafold_test.py", ] sanity_check_paths = { 'files': ['bin/alphafold', 'bin/run_alphafold.py', 'lib/libOpenMM.%s' % SHLIB_EXT, 'lib/python%(pyshortver)s/site-packages/simtk/openmm/openmm.py'], - 'dirs': ['lib/python%(pyshortver)s/site-packages', 'scripts'], + 'dirs': ['lib/python%(pyshortver)s/site-packages', 'scripts', 'stereo_chemical_props.txt'], } sanity_check_commands = [ @@ -156,11 +164,11 @@ sanity_check_commands = [ "alphafold --help 2>&1 | grep 'Full AlphaFold protein structure prediction script'", ] +sanity_pip_check = True + modextrapaths = { 'OPENMM_INCLUDE_PATH': 'include', 'OPENMM_LIB_PATH': 'lib', } -sanity_pip_check = True - moduleclass = 'bio' From a53f6c809eb2401b3d417c6369db7d0e3c52f330 Mon Sep 17 00:00:00 2001 From: Kenneth Hoste Date: Mon, 30 Aug 2021 13:47:17 +0200 Subject: [PATCH 14/16] fix AlphaFold sanity check (stereo_chemical_props.txt is a file, not a directory) --- .../easyconfigs/a/AlphaFold/AlphaFold-2.0.0-foss-2020b.eb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0-foss-2020b.eb b/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0-foss-2020b.eb index 084f05c3353..68e9ef8b855 100644 --- a/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0-foss-2020b.eb +++ b/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0-foss-2020b.eb @@ -154,8 +154,8 @@ postinstallcmds = [ sanity_check_paths = { 'files': ['bin/alphafold', 'bin/run_alphafold.py', 'lib/libOpenMM.%s' % SHLIB_EXT, - 'lib/python%(pyshortver)s/site-packages/simtk/openmm/openmm.py'], - 'dirs': ['lib/python%(pyshortver)s/site-packages', 'scripts', 'stereo_chemical_props.txt'], + 'lib/python%(pyshortver)s/site-packages/simtk/openmm/openmm.py', 'stereo_chemical_props.txt'], + 'dirs': ['lib/python%(pyshortver)s/site-packages', 'scripts'], } sanity_check_commands = [ From bf7ab99cb86b28677dcff6e9e27f0df8dd74bb04 Mon Sep 17 00:00:00 2001 From: Kenneth Hoste Date: Mon, 30 Aug 2021 15:13:06 +0200 Subject: [PATCH 15/16] collapse PDBFixer easyconfig as extension into AlphaFold easyconfig, to avoid depending on two variants of OpenMM --- .../a/AlphaFold/AlphaFold-2.0.0-foss-2020b.eb | 9 +++-- .../p/PDBFixer/PDBFixer-1.7-foss-2020b.eb | 34 ------------------- 2 files changed, 7 insertions(+), 36 deletions(-) delete mode 100644 easybuild/easyconfigs/p/PDBFixer/PDBFixer-1.7-foss-2020b.eb diff --git a/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0-foss-2020b.eb b/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0-foss-2020b.eb index 68e9ef8b855..624edcc5ab5 100644 --- a/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0-foss-2020b.eb +++ b/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0-foss-2020b.eb @@ -26,7 +26,6 @@ dependencies = [ ('HH-suite', '3.3.0'), ('HMMER', '3.3.2'), ('Kalign', '3.3.1'), - ('PDBFixer', '1.7'), ('jax', '0.2.19'), # also provides absl-py ] @@ -96,6 +95,11 @@ components = [ use_pip = True exts_list = [ + ('PDBFixer', '1.7', { + 'source_urls': ['https://github.com/openmm/pdbfixer/archive/refs/tags/'], + 'sources': [{'download_filename': 'v%(version)s.tar.gz', 'filename': SOURCE_TAR_GZ}], + 'checksums': ['a0bef3c52a7bbe69a6aea5333f51f3e7d158339be5829aed19b0344bd66d4eea'], + }), ('toolz', '0.11.1', { 'checksums': ['c7a47921f07822fe534fb1c01c9931ab335a4390c782bd28c6bcc7c2f71f3fbf'], }), @@ -153,12 +157,13 @@ postinstallcmds = [ ] sanity_check_paths = { - 'files': ['bin/alphafold', 'bin/run_alphafold.py', 'lib/libOpenMM.%s' % SHLIB_EXT, + 'files': ['bin/alphafold', 'bin/pdbfixer', 'bin/run_alphafold.py', 'lib/libOpenMM.%s' % SHLIB_EXT, 'lib/python%(pyshortver)s/site-packages/simtk/openmm/openmm.py', 'stereo_chemical_props.txt'], 'dirs': ['lib/python%(pyshortver)s/site-packages', 'scripts'], } sanity_check_commands = [ + "pdbfixer --help", "python -m simtk.testInstallation", "python -c 'import alphafold'", "alphafold --help 2>&1 | grep 'Full AlphaFold protein structure prediction script'", diff --git a/easybuild/easyconfigs/p/PDBFixer/PDBFixer-1.7-foss-2020b.eb b/easybuild/easyconfigs/p/PDBFixer/PDBFixer-1.7-foss-2020b.eb deleted file mode 100644 index 89173238ef0..00000000000 --- a/easybuild/easyconfigs/p/PDBFixer/PDBFixer-1.7-foss-2020b.eb +++ /dev/null @@ -1,34 +0,0 @@ -easyblock = 'PythonPackage' - -name = 'PDBFixer' -version = '1.7' - -homepage = 'https://github.com/openmm/pdbfixer' -description = """PDBFixer is an easy to use application for fixing problems in Protein Data Bank files in preparation -for simulating them.""" - -toolchain = {'name': 'foss', 'version': '2020b'} - -source_urls = ['https://github.com/openmm/pdbfixer/archive/refs/tags/'] -sources = ['v%(version)s.tar.gz'] -checksums = ['a0bef3c52a7bbe69a6aea5333f51f3e7d158339be5829aed19b0344bd66d4eea'] - -dependencies = [ - ('Python', '3.8.6'), - ('SciPy-bundle', '2020.11'), - ('OpenMM', '7.5.1'), -] - -download_dep_fail = True -use_pip = True - -sanity_check_paths = { - 'files': ['bin/pdbfixer'], - 'dirs': ['lib/python%(pyshortver)s/site-packages'], -} - -sanity_check_commands = ["pdbfixer --help"] - -sanity_pip_check = True - -moduleclass = 'bio' From f9c5efa34b522e5f3c2d5f8ba6da4fcdd9f77591 Mon Sep 17 00:00:00 2001 From: Kenneth Hoste Date: Mon, 30 Aug 2021 15:19:29 +0200 Subject: [PATCH 16/16] consistently use %(version)s template in list of patches for AlphaFold --- easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0-foss-2020b.eb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0-foss-2020b.eb b/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0-foss-2020b.eb index 624edcc5ab5..bcfc97f3b65 100644 --- a/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0-foss-2020b.eb +++ b/easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0-foss-2020b.eb @@ -77,7 +77,7 @@ components = [ 'AlphaFold-%(version)s_fix-packages.patch', 'AlphaFold-%(version)s_data-dep-paths.patch', 'AlphaFold-%(version)s_n-cpu.patch', - 'AlphaFold-2.0.0_fix-scp-path.patch', + 'AlphaFold-%(version)s_fix-scp-path.patch', ], 'checksums': [ '78cf443a2d9250917b05f5d40ede140ed8e8341b42fdfa54340336aca52f53f2', # v2.0.0.tar.gz