BiomedSciAI · SagiPolaczek · Jun 15, 2023 · Jun 12, 2023 · Jun 12, 2023 · Jun 12, 2023
diff --git a/fusedrug_examples/interaction/drug_target/affinity_prediction/PLM_DTI/plm_dti.py b/fusedrug_examples/interaction/drug_target/affinity_prediction/PLM_DTI/plm_dti.py
@@ -1,4 +1,3 @@
-from typing import List
 from fuse.utils.ndict import NDict
 import torch
 import pytorch_lightning as pl
@@ -65,7 +64,7 @@ def validation_step(self, batch_dict: NDict, batch_idx: int) -> None:
             met_instance(batch_dict["model.output"], batch_dict["data.label"].type(torch.int))
         self.log("validation_loss", loss)
 
-    def validation_epoch_end(self, validation_step_outputs: List) -> dict:
+    def on_validation_epoch_end(self) -> dict:
         results = {}
         for (k, met_instance) in self.val_metric_dict.items():
             res = met_instance.compute()
@@ -80,7 +79,7 @@ def test_step(self, batch_dict: NDict, batch_idx: int) -> None:
             met_instance(batch_dict["model.output"], batch_dict["data.label"].type(torch.int))
         self.log("test_loss", loss)
 
-    def test_epoch_end(self, test_step_outputs: List) -> dict:
+    def on_test_epoch_end(self) -> dict:
         results = {}
         for (k, met_instance) in self.test_metric_dict.items():
             res = met_instance.compute()

diff --git a/fusedrug_examples/interaction/drug_target/affinity_prediction/PLM_DTI/runner.py b/fusedrug_examples/interaction/drug_target/affinity_prediction/PLM_DTI/runner.py
@@ -46,8 +46,6 @@ def main(cfg: DictConfig) -> None:
     trainer = pl.Trainer(
         callbacks=[checkpoint_callback],
         default_root_dir=cfg.experiment.dir,
-        gpus=1,
-        auto_select_gpus=True,
         check_val_every_n_epoch=cfg.trainer.every_n_val,
         max_epochs=cfg.trainer.epochs,
         benchmark=True,

diff --git a/...xamples/interaction/drug_target/affinity_prediction/bimodal_mca/configs/train_config.yaml b/...xamples/interaction/drug_target/affinity_prediction/bimodal_mca/configs/train_config.yaml
@@ -7,26 +7,24 @@ hydra:
     dir: ${paths.results}/lenient_split/${now:%Y-%m-%d_%H-%M-%S-%f}
     # Please set the BIMCA_RESULTS environment variable to your desired output location.
     # You can override this value when running train.py - for example: python train.py hydra.run.dir='/some/path/you/want'
-    # this approach is especially useful when you run a daemonized process which has its stdout/err redirected to a log file within this dir. 
+    # this approach is especially useful when you run a daemonized process which has its stdout/err redirected to a log file within this dir.
     # alternatively, you can use multi-run (looks useful for grid searches etc.)  - read: https://hydra.cc/docs/configure_hydra/workdir/#configuration-for-multirun
     # it uses hydra.sweep.dir and hydra.sweep.subdir (which uses ${hydra.job.num})
 
 trainer:
-  gpus: 1
-  max_epochs: 1000 
-  auto_lr_find: false
+  max_epochs: 1000
 
-model:  
+model:
   ligand_attention_size: 16
   receptor_attention_size: 16
   ligand_embedding_size: 32
   receptor_embedding_size: 35
-  ligand_embedding: "learned"   
-  ligand_filters: [256, 256, 256]    
+  ligand_embedding: "learned"
+  ligand_filters: [256, 256, 256]
   ligand_vocabulary_size: 575
   ligand_padding_length: 696
-  receptor_embedding: "learned"  
-  receptor_filters: [256, 256, 256]      
+  receptor_embedding: "learned"
+  receptor_filters: [256, 256, 256]
   receptor_vocabulary_size: 33
   receptor_padding_length: 2536
   dense_hidden_sizes: [256]
@@ -38,7 +36,7 @@ model:
   learning_rate: 0.001
 
 data:
-  lightning_data_module:       
+  lightning_data_module:
     molecules_smi: "${paths.data}/pretraining/bindingdb_ligands.smi"
     proteins_smi: "${paths.data}/pretraining/bindingdb_sequence.smi"
     train_dataset_path: "${paths.data}/pretraining/non_kinase_train.csv"
@@ -50,18 +48,17 @@ data:
     pairs_table_affinity_column: pIC50
     ligand_padding_length: ${model.ligand_padding_length}
     receptor_padding_length: ${model.receptor_padding_length}
-    
+
     # tokenizer related
     pytoda_ligand_tokenizer_json: ${pytoda_ligand_tokenizer_path:}
     pytoda_target_tokenizer_amino_acid_dict: "iupac"
     pytoda_wrapped_tokenizer: true
 
     # sample processing pipeline related
-    train_augment_molecule_shuffle_atoms: false    
+    train_augment_molecule_shuffle_atoms: false
     train_augment_protein_flip: true
-  
+
     train_batch_size: 128
     eval_batch_size: 512
-    num_workers: 20   
+    num_workers: 20
     train_shuffle: true
-
diff --git a/...raction/drug_target/affinity_prediction/bimodal_mca_PPI/configs/train_bmmca_full_PPI.yaml b/...raction/drug_target/affinity_prediction/bimodal_mca_PPI/configs/train_bmmca_full_PPI.yaml
@@ -5,22 +5,22 @@ seed_everything: null
 only_get_expected_working_dir: false
 
 task_info:
-  class_label_to_idx: 
+  class_label_to_idx:
     Inactive: 0
     Inconclusive: 1
     Active: 2
     Unspecified: 3
     Probe: 4
 
-clearml:  
+clearml:
   active: true
   project_name: PPI_affinity/TITAN
-  task_name: ${current_username:}  
+  task_name: ${current_username:}
   load_from_checkpoint_continues_within_session: true
 
 session_group_name: ${clearml.project_name}@${clearml.task_name}
 
-paths:    
+paths:
   session_dir: ${cwd:}/run_${local_time:Israel}
   #will be used if no session manager is involved. Useful for quick debugging
   #if you modify it be aware that the code may "climb up" 1-2 directory levels up and create files/dirs there.
@@ -30,7 +30,7 @@ hydra:
   run:
     dir: ${paths.session_dir}
     # You can override this value when running run.py - for example: python run.py hydra.run.dir='/some/path/you/want'
-    # this approach is especially useful when you run a daemonized process which has its stdout/err redirected to a log file within this dir. 
+    # this approach is especially useful when you run a daemonized process which has its stdout/err redirected to a log file within this dir.
     # alternatively, you can use multi-run (looks useful for grid searches etc.)  - read: https://hydra.cc/docs/configure_hydra/workdir/#configuration-for-multirun
     # it uses hydra.sweep.dir and hydra.sweep.subdir (which uses ${hydra.job.num})
 
@@ -40,41 +40,40 @@ caching:
 trainer:
   gpus: 1
   max_epochs: 1000 #99999
-  auto_lr_find: false
-
-load_from_checkpoint: null 
 
-model:  
+load_from_checkpoint: null
+
+model:
 
   ####
-  base_model: "bimodal_mca" 
-  
+  base_model: "bimodal_mca"
+
   ligand_attention_size: 16
   receptor_attention_size: 16
 
   ligand_embedding_size: 35
   receptor_embedding_size: 35
-  
+
   ####
 
-  ligand_embedding: "learned" 
-  
-  ligand_filters: [256, 256, 256]    
-  
+  ligand_embedding: "learned"
+
+  ligand_filters: [256, 256, 256]
+
 
   ligand_vocabulary_size: 28
   #ligand_vocabulary_size: 3000
   ligand_padding_length: 696
-  
-    
+
+
   #receptor
   receptor_embedding: "learned"
-  
-  receptor_filters: [256, 256, 256]      
-  
-  receptor_vocabulary_size: 28  
+
+  receptor_filters: [256, 256, 256]
+
+  receptor_vocabulary_size: 28
   receptor_padding_length: 2536
-  
+
   dense_hidden_sizes: [256]
   activation_fn: "relu"
   final_activation: false
@@ -85,15 +84,15 @@ model:
   learning_rate: 1e-04
   # learning_rate: 0.001
 
-  
+
 
 
 data:
-  
-  lightning_data_module:       
+
+  lightning_data_module:
 
     # ### kinase active-sites
-    # ### AA sequence version 
+    # ### AA sequence version
     peptides_smi: "_YOUR_DATA_PATH_/public/epitopes.csv"
     proteins_smi: "_YOUR_DATA_PATH_/public/tcr.csv"
     train_dataset_path: "_YOUR_DATA_PATH_/public/strict_split/fold0/train.csv"
@@ -118,17 +117,17 @@ data:
     ligand_vocabulary_size: ${model.ligand_vocabulary_size}
     target_vocabulary_size: ${model.receptor_vocabulary_size}
     receptor_vocabulary_size: ${model.receptor_vocabulary_size}
-    
+
     ligand_padding_length: ${model.ligand_padding_length}
     target_padding_length: ${model.receptor_padding_length}
     receptor_padding_length: ${model.receptor_padding_length}
 
     # sample_pipeline_desc:
-    #   - 
+    #   -
     #     _target_: fusedrug.data.molecule.ops.SmilesToRDKitMol
 
 
-    
+
     # tokenizer related
     pytoda_SMILES_tokenizer_json: "_YOUR_PACCMANN_PATH_/paccmann_datasets/pytoda/smiles/metadata/tokenizer/vocab.json"
     pytoda_target_target_tokenizer_amino_acid_dict: "human-kinase-alignment"
@@ -137,15 +136,14 @@ data:
     # sample processing pipeline related
 
     active_site_alignment_info_smi: null #"/gpfs/haifa/projects/m/msieve/MedicalSieve/mol_bio_datasets/paccmann_related/active_sites_alignment_from_Tien_Huynh/joint_alignment_info.smi"
-    train_augment_peptide_shuffle_atoms: false    
+    train_augment_peptide_shuffle_atoms: false
     train_augment_protein_flip: true
 
     protein_augment_full_sequence_noise: true #Whether or not to add noise to protein/peptide representation. TODO: split between proteins and peptides
     protein_augment_full_sequence_noise_p: 0.1 #Noise probability
-    
+
     train_batch_size: 128
     eval_batch_size: 512
-    num_workers: 4    
+    num_workers: 4
     train_shuffle: true
 ckpt_path: "_YOUR_SESSIONS_PATH_/sessions/fuse_based/models/pkbr/pretrain/val_rmse-v3.ckpt"
-
diff --git a/fusedrug_examples/interaction/drug_target/affinity_prediction/cat_dti/config.yaml b/fusedrug_examples/interaction/drug_target/affinity_prediction/cat_dti/config.yaml
@@ -39,7 +39,7 @@ params:
     num_epochs: 100
     accelerator: gpu
     num_devices: 1
-    strategy: null #"auto" for pl 2.0.0
+    strategy: "auto" # null for Lightning<2.0.0
     data:
       drug_fixed_size: 60
       target_fixed_size: 842

diff --git a/fusedrug_examples/tests/test_bimodal_mca.py b/fusedrug_examples/tests/test_bimodal_mca.py
@@ -50,6 +50,7 @@ def test_data_and_model(self) -> None:
         # forward pass
         lightning_module.forward(smiles, proteins)
 
+    @unittest.skip("Doesn't support Lightning >= 2.0.0")
     def test_runner(self) -> None:
         """
         full runner test *with partial sample ids*.

diff --git a/requirements/requirements.txt b/requirements/requirements.txt
@@ -1,7 +1,7 @@
 click
 pyfastx
 torch
-pytorch-lightning<2.0.0 # temp - need to make all tests pass with this version first
+pytorch-lightning
 torchvision
 tokenizers
 hydra-core