From a672395ee7b540eb89c209bc3f49fd0e789a8af1 Mon Sep 17 00:00:00 2001 From: gcroci2 Date: Tue, 28 Nov 2023 23:13:21 +0100 Subject: [PATCH] add suggestions from reviews --- Dockerfile | 10 +++-- README.md | 63 ++++++++++++++++------------- docs/installation.md | 51 +++++++++++++---------- tutorials/data_generation_ppi.ipynb | 4 +- tutorials/data_generation_srv.ipynb | 8 ++-- 5 files changed, 77 insertions(+), 59 deletions(-) diff --git a/Dockerfile b/Dockerfile index f7534a85d..4a99fd5c1 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,8 +3,8 @@ FROM --platform=linux/x86_64 continuumio/miniconda3:23.10.0-1 # Add files ADD ./tutorials /home/deeprank2/tutorials -ADD ./env/environment.yml /home -ADD ./env/requirements.txt /home +ADD ./env/environment.yml /home/deeprank2 +ADD ./env/requirements.txt /home/deeprank2 # Install RUN \ @@ -17,8 +17,10 @@ RUN \ mv mkdssp-4.4.0-linux-x64 /usr/local/bin/mkdssp && \ chmod a+x /usr/local/bin/mkdssp && \ ## Conda and pip deps - conda env create -f /home/environment.yml && \ + conda env create -f /home/deeprank2/environment.yml && \ ## Get the data for running the tutorials + if [ -d "/home/deeprank2/tutorials/data_raw" ]; then rm -Rf /home/deeprank2/tutorials/data_raw; fi && \ + if [ -d "/home/deeprank2/tutorials/data_processed" ]; then rm -Rf /home/deeprank2/tutorials/data_processed; fi && \ wget https://zenodo.org/records/8349335/files/data_raw.zip && \ unzip data_raw.zip -d data_raw && \ mv data_raw /home/deeprank2/tutorials @@ -31,4 +33,4 @@ ENV PATH /opt/conda/envs/deeprank2/bin:$PATH WORKDIR /home/deeprank2 # Define default command -CMD ["bash"] +CMD ["jupyter", "notebook", "--ip=0.0.0.0", "--NotebookApp.token=''","--NotebookApp.password=''", "--allow-root"] diff --git a/README.md b/README.md index f0fbf24f1..ca7a0b813 100644 --- a/README.md +++ b/README.md @@ -36,12 +36,14 @@ DeepRank2 extensive documentation can be found [here](https://deeprank2.rtfd.io/ - [Deeprank2](#deeprank2) - [Overview](#overview) - [Table of contents](#table-of-contents) - - [Installation](#installation) - - [Dockerfile](#dockerfile) - - [Non-pythonic dependencies](#non-pythonic-dependencies) - - [Pythonic dependencies](#pythonic-dependencies) + - [Installations](#installations) + - [Containerized Installation](#containerized-installation) + - [Local/remote installation](#localremote-installation) + - [Non-pythonic dependencies](#non-pythonic-dependencies) + - [Pythonic dependencies](#pythonic-dependencies) + - [Install DeepRank2](#install-deeprank2) - [Test installation](#test-installation) - - [Contributing](#contributing) + - [Contributing](#contributing) - [Data generation](#data-generation) - [Datasets](#datasets) - [GraphDataset](#graphdataset) @@ -50,36 +52,39 @@ DeepRank2 extensive documentation can be found [here](https://deeprank2.rtfd.io/ - [Computational performances](#computational-performances) - [Package development](#package-development) -## Installation +## Installations Note that the package officially supports ubuntu-latest OS only, whose functioning is widely tested through the continuous integration workflows. -### Dockerfile +You can either install DeepRank2 in a [dockerized container](#containerized-installation), which will allow you to run our [tutorial notebooks](https://github.com/DeepRank/deeprank2/tree/main/tutorials), or you can [install the package locally](#localremote-installation). -In order to try out the package without worrying about your OS and without the need of installing all the required dependencies, we created a `Dockerfile` that can be used for taking care of everything in a suitable container. After having cloned the repository and installed [Docker](https://docs.docker.com/engine/install/), run the following commands from the root of the repository. +### Containerized Installation + +In order to try out the package without worrying about your OS and without the need of installing all the required dependencies, we created a `Dockerfile` that can be used for taking care of everything in a suitable container. After having cloned the repository and installed [Docker](https://docs.docker.com/engine/install/), run the following commands (you may need to have sudo permission) from the root of the repository. Build the Docker image: + ```bash docker build -t deeprank2 . ``` -SSH to a running container: +Run the Docker container: ```bash -docker run -it --expose 3000 -p 3000:3000 deeprank2 +docker run -p 8888:8888 deeprank2 ``` -Run the tutorials' notebooks from within the running container: -```bash -cd tutorials -jupyter notebook --ip 0.0.0.0 --no-browser --allow-root --port 3000 -``` +This assumes that your application inside the container is listening on port 8888, and you want to map it to port 8888 on your host machine. Open a browser and go to `http://localhost:8888` to access the application running inside the Docker container and run the tutorials' notebooks. -Now you can run the tutorials' notebook. More details about their content can be found [here](https://github.com/DeepRank/deeprank2/blob/main/tutorials/TUTORIAL.md). Note that in the docker container only the raw PDB files are downloaded, needed as a starting point for the tutorials. You can obtain the processed HDF5 files by running the `data_generation_xxx.ipynb` notebooks. Because Docker containers are limited in memory resources, we limit the number of data points processed in the tutorials'. Please install the package locally to fully leverage its capabilities. +More details about the tutorials' content can be found [here](https://github.com/DeepRank/deeprank2/blob/main/tutorials/TUTORIAL.md). Note that in the docker container only the raw PDB files are downloaded, needed as a starting point for the tutorials. You can obtain the processed HDF5 files by running the `data_generation_xxx.ipynb` notebooks. Because Docker containers are limited in memory resources, we limit the number of data points processed in the tutorials'. Please install the package locally to fully leverage its capabilities. -### Non-pythonic dependencies +After running the tutorials, you may want to remove the (quite large) Docker image from your machine. In this case, remember to [stop the container](https://docs.docker.com/engine/reference/commandline/stop/) and then [remove the image](https://docs.docker.com/engine/reference/commandline/image_rm/). More general information about Docker can be found on the [official website docs](https://docs.docker.com/get-started/). -Instructions are updated as of 14/09/2023. +### Local/remote installation + +#### Non-pythonic dependencies + +Instructions are up to date as of 14/09/2023. Before installing deeprank2 you need to install some dependencies: @@ -90,34 +95,39 @@ Before installing deeprank2 you need to install some dependencies: * [GCC](https://gcc.gnu.org/install/) * Check if gcc is installed: `gcc --version`. If this gives an error, run `sudo apt-get install gcc`. -### Pythonic dependencies +#### Pythonic dependencies -Instructions are updated as of 14/09/2023. +Instructions are up to date as of 14/09/2023. Then, you can use the YML file we provide for creating a [conda environment](https://conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html) containing the latest stable release of the package and all the other necessary conda and pip dependencies (CPU only, Python 3.10): ```bash +# Ensure you are in your base environment +conda activate # Create the environment conda env create -f env/environment.yml # Activate the environment conda activate deeprank2 ``` -Alternatively, if you are a MacOS user, if the .YML file installation is not successfull, or if you want to use CUDA or Python 3.11, you can install each dependency separately, and then the latest stable release of the package using the PyPi package manager. Also in this case, we advise to use a [conda environment](https://conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html). In case of issues during installation you should always refer to the official documentation which is linked below: +Alternatively, if you are a MacOS user, if the YML file installation is not successfull, or if you want to use CUDA or Python 3.11, you can install each dependency separately, and then the latest stable release of the package using the PyPi package manager. Also in this case, we advise to use a [conda environment](https://conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html). In case of issues during installation, please refer to the official documentation for each package (linked below), as our instructions may be out of date: * [MSMS](https://anaconda.org/bioconda/msms): `conda install -c bioconda msms`. * [Here](https://ssbio.readthedocs.io/en/latest/instructions/msms.html) for MacOS with M1 chip users. * [PyTorch](https://pytorch.org/get-started/locally/) -* [PyTorch Geometric](https://pytorch-geometric.readthedocs.io/en/latest/install/installation.html) and its optional dependencies: `torch_scatter`, `torch_sparse`, `torch_cluster`, `torch_spline_conv`. +* [PyTorch Geometric](https://pytorch-geometric.readthedocs.io/en/latest/install/installation.html) `conda install pyg -c pyg` + * Also install all [optional additions to PyTorch Geometric](https://pytorch-geometric.readthedocs.io/en/latest/install/installation.html#installation-from-wheels), namely: `torch_scatter`, `torch_sparse`, `torch_cluster`, `torch_spline_conv`. * For MacOS with M1 chip users only install [the conda version of PyTables](https://www.pytables.org/usersguide/installation.html). +#### Install DeepRank2 + Finally do: ```bash pip install deeprank2 ``` -Alternatively, get all the new developments by cloning the repo and installing the editable version of the package with: +Alternatively, get the latest updates by cloning the repo and installing the editable version of the package with: ```bash git clone https://github.com/DeepRank/deeprank2 @@ -129,17 +139,16 @@ The `test` extra is optional, and can be used to install test-related dependenci #### Test installation -If you have installed the package from a cloned repository (the latter option above), you can check that all components were installed correctly, using pytest. +If you have installed the package from a cloned repository (the latter option above), you can check that all components were installed correctly, using pytest (run `pip install pytest` if you did not install it above). The quick test should be sufficient to ensure that the software works, while the full test (a few minutes) will cover a much broader range of settings to ensure everything is correct. Run `pytest tests/test_integration.py` for the quick test or just `pytest` for the full test (expect a few minutes to run). -### Contributing +## Contributing If you would like to contribute to the package in any way, please see [our guidelines](CONTRIBUTING.rst). -The following section serves as a first guide to start using the package, using protein-protein Interface (PPI) queries -as example. For an enhanced learning experience, we provide in-depth [tutorial notebooks](https://github.com/DeepRank/deeprank2/tree/main/tutorials) for generating PPI data, generating SVR data, and for the training pipeline. +The following section serves as a first guide to start using the package, using protein-protein Interface (PPI) queries as example. For an enhanced learning experience, we provide in-depth [tutorial notebooks](https://github.com/DeepRank/deeprank2/tree/main/tutorials) for generating PPI data, generating SVR data, and for the training pipeline. For more details, see the [extended documentation](https://deeprank2.rtfd.io/). ## Data generation diff --git a/docs/installation.md b/docs/installation.md index f34d11f70..5a886883d 100644 --- a/docs/installation.md +++ b/docs/installation.md @@ -1,33 +1,36 @@ -# Installation +# Installations Note that the package officially supports ubuntu-latest OS only, whose functioning is widely tested through the continuous integration workflows. -## Dockerfile +You can either install DeepRank2 in a [dockerized container](#containerized-installation), which will allow you to run our [tutorial notebooks](https://github.com/DeepRank/deeprank2/tree/main/tutorials), or you can [install the package locally](#localremote-installation). -In order to try out the package without worrying about your OS and without the need of installing all the required dependencies, we created a `Dockerfile` that can be used for taking care of everything in a suitable container. After having cloned the repository and installed [Docker](https://docs.docker.com/engine/install/), run the following commands from the root of the repository. +## Containerized Installation + +In order to try out the package without worrying about your OS and without the need of installing all the required dependencies, we created a `Dockerfile` that can be used for taking care of everything in a suitable container. After having cloned the repository and installed [Docker](https://docs.docker.com/engine/install/), run the following commands (you may need to have sudo permission) from the root of the repository. Build the Docker image: + ```bash docker build -t deeprank2 . ``` -SSH to a running container: +Run the Docker container: ```bash -docker run -it --expose 3000 -p 3000:3000 deeprank2 +docker run -p 8888:8888 deeprank2 ``` -Run the tutorials' notebooks from within the running container: -```bash -cd tutorials -jupyter notebook --ip 0.0.0.0 --no-browser --allow-root --port 3000 -``` +This assumes that your application inside the container is listening on port 8888, and you want to map it to port 8888 on your host machine. Open a browser and go to `http://localhost:8888` to access the application running inside the Docker container and run the tutorials' notebooks. -Now you can run the tutorials' notebook. More details about their content can be found [here](https://github.com/DeepRank/deeprank2/blob/main/tutorials/TUTORIAL.md). Note that in the docker container only the raw PDB files are downloaded, needed as a starting point for the tutorials. You can obtain the processed HDF5 files by running the `data_generation_xxx.ipynb` notebooks. Because Docker containers are limited in memory resources, we limit the number of data points processed in the tutorials'. Please install the package locally to fully leverage its capabilities. +More details about the tutorials' content can be found [here](https://github.com/DeepRank/deeprank2/blob/main/tutorials/TUTORIAL.md). Note that in the docker container only the raw PDB files are downloaded, needed as a starting point for the tutorials. You can obtain the processed HDF5 files by running the `data_generation_xxx.ipynb` notebooks. Because Docker containers are limited in memory resources, we limit the number of data points processed in the tutorials'. Please install the package locally to fully leverage its capabilities. -## Non-pythonic dependencies +After running the tutorials, you may want to remove the (quite large) Docker image from your machine. In this case, remember to [stop the container](https://docs.docker.com/engine/reference/commandline/stop/) and then [remove the image](https://docs.docker.com/engine/reference/commandline/image_rm/). More general information about Docker can be found on the [official website docs](https://docs.docker.com/get-started/). -Instructions are updated as of 14/09/2023. +## Local/remote installation + +### Non-pythonic dependencies + +Instructions are up to date as of 14/09/2023. Before installing deeprank2 you need to install some dependencies: @@ -38,34 +41,39 @@ Before installing deeprank2 you need to install some dependencies: * [GCC](https://gcc.gnu.org/install/) * Check if gcc is installed: `gcc --version`. If this gives an error, run `sudo apt-get install gcc`. -## Pythonic dependencies +### Pythonic dependencies -Instructions are updated as of 14/09/2023. +Instructions are up to date as of 14/09/2023. Then, you can use the YML file we provide for creating a [conda environment](https://conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html) containing the latest stable release of the package and all the other necessary conda and pip dependencies (CPU only, Python 3.10): ```bash +# Ensure you are in your base environment +conda activate # Create the environment conda env create -f env/environment.yml # Activate the environment conda activate deeprank2 ``` -Alternatively, if you are a MacOS user, if the .YML file installation is not successfull, or if you want to use CUDA or Python 3.11, you can install each dependency separately, and then the latest stable release of the package using the PyPi package manager. Also in this case, we advise to use a [conda environment](https://conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html). In case of issues during installation you should always refer to the official documentation which is linked below: +Alternatively, if you are a MacOS user, if the YML file installation is not successfull, or if you want to use CUDA or Python 3.11, you can install each dependency separately, and then the latest stable release of the package using the PyPi package manager. Also in this case, we advise to use a [conda environment](https://conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html). In case of issues during installation, please refer to the official documentation for each package (linked below), as our instructions may be out of date: * [MSMS](https://anaconda.org/bioconda/msms): `conda install -c bioconda msms`. * [Here](https://ssbio.readthedocs.io/en/latest/instructions/msms.html) for MacOS with M1 chip users. * [PyTorch](https://pytorch.org/get-started/locally/) -* [PyTorch Geometric](https://pytorch-geometric.readthedocs.io/en/latest/install/installation.html) and its optional dependencies: `torch_scatter`, `torch_sparse`, `torch_cluster`, `torch_spline_conv`. +* [PyTorch Geometric](https://pytorch-geometric.readthedocs.io/en/latest/install/installation.html) `conda install pyg -c pyg` + * Also install all [optional additions to PyTorch Geometric](https://pytorch-geometric.readthedocs.io/en/latest/install/installation.html#installation-from-wheels), namely: `torch_scatter`, `torch_sparse`, `torch_cluster`, `torch_spline_conv`. * For MacOS with M1 chip users only install [the conda version of PyTables](https://www.pytables.org/usersguide/installation.html). +#### Install DeepRank2 + Finally do: ```bash pip install deeprank2 ``` -Alternatively, get all the new developments by cloning the repo and installing the editable version of the package with: +Alternatively, get the latest updates by cloning the repo and installing the editable version of the package with: ```bash git clone https://github.com/DeepRank/deeprank2 @@ -77,15 +85,14 @@ The `test` extra is optional, and can be used to install test-related dependenci ### Test installation -If you have installed the package from a cloned repository (the latter option above), you can check that all components were installed correctly, using pytest. +If you have installed the package from a cloned repository (the latter option above), you can check that all components were installed correctly, using pytest (run `pip install pytest` if you did not install it above). The quick test should be sufficient to ensure that the software works, while the full test (a few minutes) will cover a much broader range of settings to ensure everything is correct. Run `pytest tests/test_integration.py` for the quick test or just `pytest` for the full test (expect a few minutes to run). -## Contributing +# Contributing If you would like to contribute to the package in any way, please see [our guidelines](CONTRIBUTING.rst). -The following section serves as a first guide to start using the package, using protein-protein Interface (PPI) queries -as example. For an enhanced learning experience, we provide in-depth [tutorial notebooks](https://github.com/DeepRank/deeprank2/tree/main/tutorials) for generating PPI data, generating SVR data, and for the training pipeline. +The following section serves as a first guide to start using the package, using protein-protein Interface (PPI) queries as example. For an enhanced learning experience, we provide in-depth [tutorial notebooks](https://github.com/DeepRank/deeprank2/tree/main/tutorials) for generating PPI data, generating SVR data, and for the training pipeline. For more details, see the [extended documentation](https://deeprank2.rtfd.io/). diff --git a/tutorials/data_generation_ppi.ipynb b/tutorials/data_generation_ppi.ipynb index 2118cdb86..2a1b8db6b 100644 --- a/tutorials/data_generation_ppi.ipynb +++ b/tutorials/data_generation_ppi.ipynb @@ -190,7 +190,7 @@ "source": [ "queries = QueryCollection()\n", "\n", - "interface_distance_cutoff = 8 # max distance in Å between two interacting residues/atoms of two proteins\n", + "interface_distance_cutoff = 3 # max distance in Å between two interacting residues/atoms of two proteins\n", "\n", "print(f'Adding {len(pdb_files)} queries to the query collection ...')\n", "count = 0\n", @@ -423,7 +423,7 @@ "source": [ "queries = QueryCollection()\n", "\n", - "interface_distance_cutoff = 5 # max distance in Å between two interacting residues/atoms of two proteins\n", + "interface_distance_cutoff = 3 # max distance in Å between two interacting residues/atoms of two proteins\n", "\n", "print(f'Adding {len(pdb_files)} queries to the query collection ...')\n", "count = 0\n", diff --git a/tutorials/data_generation_srv.ipynb b/tutorials/data_generation_srv.ipynb index 04c63e0e0..42d1eb572 100644 --- a/tutorials/data_generation_srv.ipynb +++ b/tutorials/data_generation_srv.ipynb @@ -214,8 +214,8 @@ "source": [ "queries = QueryCollection()\n", "\n", - "radius = 10.0 # radius to select the local neighborhood around the SRV\n", - "distance_cutoff = 4.5 # ??\n", + "radius = 3.0 # radius to select the local neighborhood around the SRV\n", + "distance_cutoff = 3\n", "\n", "print(f'Adding {len(pdb_files)} queries to the query collection ...')\n", "count = 0\n", @@ -457,8 +457,8 @@ "source": [ "queries = QueryCollection()\n", "\n", - "radius = 10.0 # radius to select the local neighborhood around the SRV\n", - "distance_cutoff = 4.5 # ??\n", + "radius = 3 # radius to select the local neighborhood around the SRV\n", + "distance_cutoff = 3\n", "\n", "print(f'Adding {len(pdb_files)} queries to the query collection ...')\n", "count = 0\n",