Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

{bio}[foss/2020b] AlphaFold v2.0.0 w/ Python 3.8.6 #13744

Merged
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
4ce31c1
adding easyconfigs: AlphaFold-2.0.0-foss-2020b.eb
boegel Aug 14, 2021
4d65a50
stick to TensorFlow 2.4.1 dependency for AlphaFold 2.0.0 with foss/2020b
boegel Aug 14, 2021
75b034f
add patch for AlphaFold to fix setup.py, add OpenMM and PDBFixer depe…
boegel Aug 16, 2021
9fb09ee
add missing dependencies for AlphaFold + patch to pick up on them and…
boegel Aug 16, 2021
7ecf851
avoid warnigns about non-default values for required options to run_a…
boegel Aug 16, 2021
b7b1ccd
fix default value for template_mmcif_dir based on \ALPHAFOLD_DATA_DIR
boegel Aug 16, 2021
c032e63
add patch for HHBlits and JackHmmer scripts to avoid hardcoded number…
boegel Aug 16, 2021
08d2902
include stereo_chemical_props.txt in AlphaFold installaiton directory…
boegel Aug 16, 2021
e82389d
include patched OpenMM as component in AlphaFold installation
boegel Aug 16, 2021
6cf72d7
don't pick up on \SLURM_CPUS_ON_NODE, only add environment variable t…
boegel Aug 16, 2021
0342cb8
fix default paths to downloaded AlphaFold data
boegel Aug 16, 2021
0d5795d
Merge branch 'develop' into 20210814063853_new_pr_AlphaFold200
boegel Aug 16, 2021
1784637
make run_alphafold.py print a warning when $ALPHAFOLD_DATA_DIR is not…
boegel Aug 17, 2021
e380a8c
Merge branch 'develop' of https://github.com/easybuilders/easybuild-e…
boegel Aug 27, 2021
0dd5ba0
avoid that existing OpenMM Python bindings get wiped + run tests for …
boegel Aug 30, 2021
a53f6c8
fix AlphaFold sanity check (stereo_chemical_props.txt is a file, not …
boegel Aug 30, 2021
bf7ab99
collapse PDBFixer easyconfig as extension into AlphaFold easyconfig, …
boegel Aug 30, 2021
f9c5efa
consistently use %(version)s template in list of patches for AlphaFold
boegel Aug 30, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
174 changes: 174 additions & 0 deletions easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0-foss-2020b.eb
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
easyblock = 'PythonBundle'

name = 'AlphaFold'
version = '2.0.0'

homepage = 'https://deepmind.com/research/case-studies/alphafold'
description = "AlphaFold can predict protein structures with atomic accuracy even where no similar structure is known"

toolchain = {'name': 'foss', 'version': '2020b'}

builddependencies = [
# CMake/oxygen/SWIG are required for building OpenMM
('CMake', '3.18.4'),
('Doxygen', '1.8.20'),
# required for installing dm-tree
('Bazel', '3.7.2'),
('SWIG', '4.0.2'),
]

dependencies = [
('Python', '3.8.6'),
('SciPy-bundle', '2020.11'),
('PyYAML', '5.3.1'),
('TensorFlow', '2.4.1'),
('Biopython', '1.78'),
('HH-suite', '3.3.0'),
('HMMER', '3.3.2'),
('Kalign', '3.3.1'),
('PDBFixer', '1.7'),
('jax', '0.2.19'), # also provides absl-py
]
boegel marked this conversation as resolved.
Show resolved Hide resolved

local_openmm_preinstallopts = "export OPENMM_INCLUDE_PATH=%(installdir)s/include && "
local_openmm_preinstallopts += " export OPENMM_LIB_PATH=%(installdir)s/lib && "

# required to install OpenMM Python API;
# avoid that setup.py partially uninstalls existing OpenMM Python bindings...
local_openmm_installopts = " && cd python && sed -i 's/uninstall()/pass/g' setup.py && "
local_openmm_installopts += "python setup.py build && python setup.py install --prefix=%(installdir)s"

# commit to use for downloading stereo_chemical_props.txt and copy to alphafold/common,
# see docker/Dockerfile in AlphaFold repository
local_scp_commit = '7102c6'

components = [
# for simtk
('OpenMM', '7.5.1', {
'easyblock': 'CMakeMake',
'source_urls': ['https://github.com/openmm/openmm/archive/'],
'sources': [{'download_filename': '%(version)s.tar.gz', 'filename': SOURCE_TAR_GZ}],
'patches': [('OpenMM-%(version)s_AlphaFold.patch', 'wrappers/python')],
'checksums': [
'c88d6946468a2bde2619acb834f57b859b5e114a93093cf562165612e10f4ff7', # OpenMM-7.5.1.tar.gz
'1b109dfff3af5c6aa70690bca14618612953c68840a7e64f679db7ca33c1aff6', # OpenMM-7.5.1_AlphaFold.patch
],
'start_dir': 'openmm-%(version)s',
'preinstallopts': local_openmm_preinstallopts,
'installopts': local_openmm_installopts,
}),
(name, version, {
'easyblock': 'PythonPackage',
'source_urls': [
'https://github.com/deepmind/alphafold/archive/refs/tags/',
'https://git.scicore.unibas.ch/schwede/openstructure/-/raw/%s/modules/mol/alg/src/' % local_scp_commit,
],
'sources': [
{
'download_filename': 'v%(version)s.tar.gz',
'filename': SOURCE_TAR_GZ,
},
{
'download_filename': 'stereo_chemical_props.txt',
'filename': 'stereo_chemical_props-%s.txt' % local_scp_commit,
'extract_cmd': "cp -a %s .",
},
],
'patches': [
'AlphaFold-%(version)s_fix-packages.patch',
'AlphaFold-%(version)s_data-dep-paths.patch',
'AlphaFold-%(version)s_n-cpu.patch',
'AlphaFold-2.0.0_fix-scp-path.patch',
boegel marked this conversation as resolved.
Show resolved Hide resolved
],
'checksums': [
'78cf443a2d9250917b05f5d40ede140ed8e8341b42fdfa54340336aca52f53f2', # v2.0.0.tar.gz
'24510899eeb49167cffedec8fa45363a4d08279c0c637a403b452f7d0ac09451', # stereo_chemical_props-7102c6.txt
'826d2d1a5d6ac52c51a60ba210e1947d5631a1e2d76f8815305b5d23f74458db', # AlphaFold-2.0.0_fix-packages.patch
'7223e297b23f90816219095696bc6453910c617add60b907a0d3e869da8733bb', # AlphaFold-2.0.0_data-dep-paths.patch
'dfda4dd5f9aba19fe2b6eb9a0ec583d12dcefdfee8ab8803fc57ad48d582db04', # AlphaFold-2.0.0_n-cpu.patch
'83dc82a8b1c647eb7e217aef683153e98a4fc7f871a85280976c92a1bfe28f27', # AlphaFold-2.0.0_fix-scp-path.patch
],
'start_dir': 'alphafold-%(version)s',
'use_pip': True,
}),
]

use_pip = True

exts_list = [
boegel marked this conversation as resolved.
Show resolved Hide resolved
('toolz', '0.11.1', {
'checksums': ['c7a47921f07822fe534fb1c01c9931ab335a4390c782bd28c6bcc7c2f71f3fbf'],
}),
('chex', '0.0.8', {
'checksums': ['d6ce1329470116b6f172a72b1131bfd4d11fb7eb465e6077c3b36224b5b09fe4'],
}),
('tabulate', '0.8.9', {
'checksums': ['eb1d13f25760052e8931f2ef80aaf6045a6cceb47514db8beab24cded16f13a7'],
}),
('dm-haiku', '0.0.4', {
# source tarball on PyPI is missing requirements.txt,
# see https://github.com/deepmind/dm-haiku/issues/44
'source_urls': ['https://github.com/deepmind/dm-haiku/archive/refs/tags/'],
'sources': [{'download_filename': 'v%(version)s.tar.gz', 'filename': SOURCE_TAR_GZ}],
'checksums': ['e9896d161938b53f869fde207f3f6fca496d09b2a47d21dd9b6b65e897ad6aab'],
'modulename': 'haiku',
}),
('dm-tree', '0.1.6', {
'checksums': ['6776404b23b4522c01012ffb314632aba092c9541577004ab153321e87da439a'],
'modulename': 'tree',
}),
('websocket-client', '1.2.1', {
'checksums': ['8dfb715d8a992f5712fff8c843adae94e22b22a99b2c5e6b0ec4a1a981cc4e0d'],
'modulename': 'websocket',
}),
('docker', '5.0.0', {
'checksums': ['3e8bc47534e0ca9331d72c32f2881bb13b93ded0bcdeab3c833fb7cf61c0a9a5'],
}),
('immutabledict', '2.1.0', {
'checksums': ['673fb8f30f46d23dd394050b979f5b7f4c5398982b99ebc854fb873e646b967a'],
}),
('contextlib2', '21.6.0', {
'checksums': ['ab1e2bfe1d01d968e1b7e8d9023bc51ef3509bba217bb730cee3827e1ee82869'],
}),
('ml_collections', '0.1.0', {
'checksums': ['59a17fcd1c140153009788517f304caaddd7a94f06690f9f0ed09987beebcf3c'],
# see https://github.com/google/ml_collections/issues/7
'preinstallopts': "touch requirements.txt && touch requirements-test.txt && ",
}),
]

postinstallcmds = [
"mkdir -p %(installdir)s/bin",
# run_alphafold.py script is missing a shebang...
"echo '#!/usr/bin/env python' > %(installdir)s/bin/run_alphafold.py",
"cat %(builddir)s/alphafold-%(version)s/run_alphafold.py >> %(installdir)s/bin/run_alphafold.py",
"chmod a+x %(installdir)s/bin/run_alphafold.py",
"cd %(installdir)s/bin && ln -s run_alphafold.py alphafold",
"cp -a %(builddir)s/alphafold-%(version)s/scripts %(installdir)s/",
"cp -a %%(builddir)s/stereo_chemical_props-%s.txt %%(installdir)s/stereo_chemical_props.txt" % local_scp_commit,
# run tests for run_alphafold.py script;
# shouldn't do this in sanity check to avoid breaking use of --module-only
"PYTHONPATH=%(installdir)s/lib/python%(pyshortver)s/site-packages:$PYTHONPATH "
"python %(builddir)s/alphafold-%(version)s/run_alphafold_test.py",
]

sanity_check_paths = {
'files': ['bin/alphafold', 'bin/run_alphafold.py', 'lib/libOpenMM.%s' % SHLIB_EXT,
'lib/python%(pyshortver)s/site-packages/simtk/openmm/openmm.py', 'stereo_chemical_props.txt'],
'dirs': ['lib/python%(pyshortver)s/site-packages', 'scripts'],
}

sanity_check_commands = [
"python -m simtk.testInstallation",
"python -c 'import alphafold'",
"alphafold --help 2>&1 | grep 'Full AlphaFold protein structure prediction script'",
]

sanity_pip_check = True

modextrapaths = {
'OPENMM_INCLUDE_PATH': 'include',
'OPENMM_LIB_PATH': 'lib',
}

moduleclass = 'bio'
115 changes: 115 additions & 0 deletions easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0_data-dep-paths.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
pick up on $ALPHAFOLD_DATA_DIR to specify location to downloaded data
(see https://github.com/deepmind/alphafold/blob/main/docker/run_docker.py);
pick up on HH-suite, HHMER, Kalign dependencies provided via EasyBuild
author: Kenneth Hoste (HPC-UGent)
diff --git a/run_alphafold.py b/run_alphafold.py
index 61f7ef2..81c97a0 100644
--- a/run_alphafold.py
+++ b/run_alphafold.py
@@ -36,6 +36,46 @@ from alphafold.model import model
from alphafold.relax import relax
# Internal import (7716).

+
+data_dir = os.getenv('ALPHAFOLD_DATA_DIR')
+if data_dir:
+ uniref90_database_path = os.path.join(data_dir, 'uniref90', 'uniref90.fasta')
+ mgnify_database_path = os.path.join(data_dir, 'mgnify', 'mgy_clusters.fa')
+ bfd_database_path = os.path.join(data_dir, 'bfd', 'bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt')
+ uniclust30_database_path = os.path.join(data_dir, 'uniclust30', 'uniclust30_2018_08', 'uniclust30_2018_08')
+ pdb70_database_path = os.path.join(data_dir, 'pdb70', 'pdb70')
+ template_mmcif_dir = os.path.join(data_dir, 'pdb_mmcif', 'mmcif_files')
+ obsolete_pdbs_path = os.path.join(data_dir, 'pdb_mmcif', 'obsolete.dat')
+else:
+ sys.stderr.write("$ALPHAFOLD_DATA_DIR is not defined!")
+ uniref90_database_path = None
+ mgnify_database_path = None
+ bfd_database_path = None
+ uniclust30_database_path = None
+ pdb70_database_path = None
+ template_mmcif_dir = None
+ obsolete_pdbs_path = None
+
+hmmer_root = os.getenv('EBROOTHMMER')
+if hmmer_root:
+ jackhmmer_binary_path = os.path.join(hmmer_root, 'bin', 'jackhmmer')
+else:
+ jackhmmer_binary_path = '/usr/bin/jackhmmer'
+
+hhsuite_root = os.getenv('EBROOTHHMINSUITE')
+if hhsuite_root:
+ hhblits_binary_path = os.path.join(hhsuite_root, 'bin', 'hhblits')
+ hhsearch_binary_path = os.path.join(hhsuite_root, 'bin', 'hhsearch')
+else:
+ hhblits_binary_path = '/usr/bin/hhblits'
+ hhsearch_binary_path = '/usr/bin/hhsearch'
+
+kalign_root = os.getenv('EBROOTKALIGN')
+if kalign_root:
+ kalign_binary_path = os.path.join(kalign_root, 'bin', 'kalign')
+else:
+ kalign_binary_path = '/usr/bin/kalign'
+
flags.DEFINE_list('fasta_paths', None, 'Paths to FASTA files, each containing '
'one sequence. Paths should be separated by commas. '
'All FASTA paths must have a unique basename as the '
@@ -44,30 +83,30 @@ flags.DEFINE_list('fasta_paths', None, 'Paths to FASTA files, each containing '
flags.DEFINE_string('output_dir', None, 'Path to a directory that will '
'store the results.')
flags.DEFINE_list('model_names', None, 'Names of models to use.')
-flags.DEFINE_string('data_dir', None, 'Path to directory of supporting data.')
-flags.DEFINE_string('jackhmmer_binary_path', '/usr/bin/jackhmmer',
+flags.DEFINE_string('data_dir', data_dir, 'Path to directory of supporting data.')
+flags.DEFINE_string('jackhmmer_binary_path', jackhmmer_binary_path,
'Path to the JackHMMER executable.')
-flags.DEFINE_string('hhblits_binary_path', '/usr/bin/hhblits',
+flags.DEFINE_string('hhblits_binary_path', hhblits_binary_path,
'Path to the HHblits executable.')
-flags.DEFINE_string('hhsearch_binary_path', '/usr/bin/hhsearch',
+flags.DEFINE_string('hhsearch_binary_path', hhsearch_binary_path,
'Path to the HHsearch executable.')
-flags.DEFINE_string('kalign_binary_path', '/usr/bin/kalign',
+flags.DEFINE_string('kalign_binary_path', kalign_binary_path,
'Path to the Kalign executable.')
-flags.DEFINE_string('uniref90_database_path', None, 'Path to the Uniref90 '
+flags.DEFINE_string('uniref90_database_path', uniref90_database_path, 'Path to the Uniref90 '
'database for use by JackHMMER.')
-flags.DEFINE_string('mgnify_database_path', None, 'Path to the MGnify '
+flags.DEFINE_string('mgnify_database_path', mgnify_database_path, 'Path to the MGnify '
'database for use by JackHMMER.')
-flags.DEFINE_string('bfd_database_path', None, 'Path to the BFD '
+flags.DEFINE_string('bfd_database_path', bfd_database_path, 'Path to the BFD '
'database for use by HHblits.')
-flags.DEFINE_string('uniclust30_database_path', None, 'Path to the Uniclust30 '
+flags.DEFINE_string('uniclust30_database_path', uniclust30_database_path, 'Path to the Uniclust30 '
'database for use by HHblits.')
-flags.DEFINE_string('pdb70_database_path', None, 'Path to the PDB70 '
+flags.DEFINE_string('pdb70_database_path', pdb70_database_path, 'Path to the PDB70 '
'database for use by HHsearch.')
-flags.DEFINE_string('template_mmcif_dir', None, 'Path to a directory with '
+flags.DEFINE_string('template_mmcif_dir', template_mmcif_dir, 'Path to a directory with '
'template mmCIF structures, each named <pdb_id>.cif')
flags.DEFINE_string('max_template_date', None, 'Maximum template release date '
'to consider. Important if folding historical test sets.')
-flags.DEFINE_string('obsolete_pdbs_path', None, 'Path to file containing a '
+flags.DEFINE_string('obsolete_pdbs_path', obsolete_pdbs_path, 'Path to file containing a '
'mapping from obsolete PDB IDs to the PDB IDs of their '
'replacements.')
flags.DEFINE_enum('preset', 'full_dbs', ['full_dbs', 'casp14'],
@@ -268,16 +307,8 @@ if __name__ == '__main__':
'fasta_paths',
'output_dir',
'model_names',
- 'data_dir',
'preset',
- 'uniref90_database_path',
- 'mgnify_database_path',
- 'uniclust30_database_path',
- 'bfd_database_path',
- 'pdb70_database_path',
- 'template_mmcif_dir',
'max_template_date',
- 'obsolete_pdbs_path',
])

app.run(main)
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
also include alphafold.data.tools and alphafold.model.tf, which are not picked up by find_packages()
author: Kenneth Hoste (HPC-UGent)
--- alphafold-2.0.0/setup.py.orig 2021-08-14 08:42:43.030919330 +0200
+++ alphafold-2.0.0/setup.py 2021-08-14 08:43:59.183245602 +0200
@@ -26,7 +26,7 @@
author_email='alphafold@deepmind.com',
license='Apache License, Version 2.0',
url='https://github.com/deepmind/alphafold',
- packages=find_packages(),
+ packages=find_packages() + ['alphafold/data/tools', 'alphafold/model/tf'],
install_requires=[
'absl-py',
'biopython',
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
fix path to stereo_chemical_props.txt when AlphaFold is installed with EasyBuild
author: Kenneth Hoste (HPC-UGent)
diff --git a/alphafold/common/residue_constants.py b/alphafold/common/residue_constants.py
index 318b44a..fd840e3 100644
--- a/alphafold/common/residue_constants.py
+++ b/alphafold/common/residue_constants.py
@@ -19,6 +19,7 @@ import functools
from typing import Mapping, List, Tuple

import numpy as np
+import os
import tree

# Internal import (35fd).
@@ -403,7 +404,7 @@ def load_stereo_chemical_props() -> Tuple[Mapping[str, List[Bond]],
residue_bond_angles: dict that maps resname --> list of BondAngle tuples
"""
stereo_chemical_props_path = (
- 'alphafold/common/stereo_chemical_props.txt')
+ os.path.join(os.getenv('EBROOTALPHAFOLD', 'alphafold/common'), 'stereo_chemical_props.txt'))
branfosj marked this conversation as resolved.
Show resolved Hide resolved
with open(stereo_chemical_props_path, 'rt') as f:
stereo_chemical_props = f.read()
lines_iter = iter(stereo_chemical_props.splitlines())
49 changes: 49 additions & 0 deletions easybuild/easyconfigs/a/AlphaFold/AlphaFold-2.0.0_n-cpu.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
don't hardcode number of cores to use by HHBlits and Jackhmmer scripts;
see https://github.com/deepmind/alphafold/issues/110
author: Kenneth Hoste (HPC-UGent)

diff --git a/alphafold/data/tools/hhblits.py b/alphafold/data/tools/hhblits.py
index e0aa098..e624f3a 100644
--- a/alphafold/data/tools/hhblits.py
+++ b/alphafold/data/tools/hhblits.py
@@ -27,6 +27,8 @@ from alphafold.data.tools import utils
_HHBLITS_DEFAULT_P = 20
_HHBLITS_DEFAULT_Z = 500

+HHBLITS_N_CPU = os.getenv('ALPHAFOLD_HHBLITS_N_CPU', 4)
+

class HHBlits:
"""Python wrapper of the HHblits binary."""
@@ -35,7 +37,7 @@ class HHBlits:
*,
binary_path: str,
databases: Sequence[str],
- n_cpu: int = 4,
+ n_cpu: int = HHBLITS_N_CPU,
n_iter: int = 3,
e_value: float = 0.001,
maxseq: int = 1_000_000,
diff --git a/alphafold/data/tools/jackhmmer.py b/alphafold/data/tools/jackhmmer.py
index ae53df9..800bf99 100644
--- a/alphafold/data/tools/jackhmmer.py
+++ b/alphafold/data/tools/jackhmmer.py
@@ -24,6 +24,9 @@ from alphafold.data.tools import utils
# Internal import (7716).


+JACKHMMER_N_CPU = os.getenv('ALPHAFOLD_JACKHMMER_N_CPU', 8)
+
+
class Jackhmmer:
"""Python wrapper of the Jackhmmer binary."""

@@ -31,7 +34,7 @@ class Jackhmmer:
*,
binary_path: str,
database_path: str,
- n_cpu: int = 8,
+ n_cpu: int = JACKHMMER_N_CPU,
n_iter: int = 1,
e_value: float = 0.0001,
z_value: Optional[int] = None,
Loading