dauparas · oushu1zhangxiangxuan1 · Mar 3, 2023
diff --git a/README.md b/README.md
@@ -28,7 +28,7 @@ Input flags for `protein_mpnn_run.py`:
     argparser.add_argument("--seed", type=int, default=0, help="If set to 0 then a random seed will be picked;")
     argparser.add_argument("--save_score", type=int, default=0, help="0 for False, 1 for True; save score=-log_prob to npy files")
     argparser.add_argument("--path_to_fasta", type=str, default="", help="score provided input sequence in a fasta format; e.g. GGGGGG/PPPPS/WWW for chains A, B, C sorted alphabetically and separated by /")
-    argparser.add_argument("--save_probs", type=int, default=0, help="0 for False, 1 for True; save MPNN predicted probabilites per position")
+    argparser.add_argument("--save_probs", type=int, default=0, help="0 for False, 1 for True; save MPNN predicted probabilities per position")
     argparser.add_argument("--score_only", type=int, default=0, help="0 for False, 1 for True; score input backbone-sequence pairs")
     argparser.add_argument("--conditional_probs_only", type=int, default=0, help="0 for False, 1 for True; output conditional probabilities p(s_i given the rest of the sequence and backbone)")
     argparser.add_argument("--conditional_probs_only_backbone", type=int, default=0, help="0 for False, 1 for True; if true output conditional probabilities p(s_i given backbone)")
@@ -42,15 +42,15 @@ Input flags for `protein_mpnn_run.py`:
     argparser.add_argument("--pdb_path", type=str, default='', help="Path to a single PDB to be designed")
     argparser.add_argument("--pdb_path_chains", type=str, default='', help="Define which chains need to be designed for a single PDB ")
     argparser.add_argument("--jsonl_path", type=str, help="Path to a folder with parsed pdb into jsonl")
-    argparser.add_argument("--chain_id_jsonl",type=str, default='', help="Path to a dictionary specifying which chains need to be designed and which ones are fixed, if not specied all chains will be designed.")
+    argparser.add_argument("--chain_id_jsonl",type=str, default='', help="Path to a dictionary specifying which chains need to be designed and which ones are fixed, if not specified all chains will be designed.")
     argparser.add_argument("--fixed_positions_jsonl", type=str, default='', help="Path to a dictionary with fixed positions")
     argparser.add_argument("--omit_AAs", type=list, default='X', help="Specify which amino acids should be omitted in the generated sequence, e.g. 'AC' would omit alanine and cystine.")
-    argparser.add_argument("--bias_AA_jsonl", type=str, default='', help="Path to a dictionary which specifies AA composion bias if neededi, e.g. {A: -1.1, F: 0.7} would make A less likely and F more likely.")
+    argparser.add_argument("--bias_AA_jsonl", type=str, default='', help="Path to a dictionary which specifies AA composition bias if needed, e.g. {A: -1.1, F: 0.7} would make A less likely and F more likely.")
     argparser.add_argument("--bias_by_res_jsonl", default='', help="Path to dictionary with per position bias.")
-    argparser.add_argument("--omit_AA_jsonl", type=str, default='', help="Path to a dictionary which specifies which amino acids need to be omited from design at specific chain indices")
+    argparser.add_argument("--omit_AA_jsonl", type=str, default='', help="Path to a dictionary which specifies which amino acids need to be omitted from design at specific chain indices")
     argparser.add_argument("--pssm_jsonl", type=str, default='', help="Path to a dictionary with pssm")
     argparser.add_argument("--pssm_multi", type=float, default=0.0, help="A value between [0.0, 1.0], 0.0 means do not use pssm, 1.0 ignore MPNN predictions")
-    argparser.add_argument("--pssm_threshold", type=float, default=0.0, help="A value between -inf + inf to restric per position AAs")
+    argparser.add_argument("--pssm_threshold", type=float, default=0.0, help="A value between -inf + inf to restrict per position AAs")
     argparser.add_argument("--pssm_log_odds_flag", type=int, default=0, help="0 for False, 1 for True")
     argparser.add_argument("--pssm_bias_flag", type=int, default=0, help="0 for False, 1 for True")
     argparser.add_argument("--tied_positions_jsonl", type=str, default='', help="Path to a dictionary with tied positions")

diff --git a/protein_mpnn_run.py b/protein_mpnn_run.py
@@ -420,7 +420,7 @@ def main(args):
     argparser.add_argument("--seed", type=int, default=0, help="If set to 0 then a random seed will be picked;")
 
     argparser.add_argument("--save_score", type=int, default=0, help="0 for False, 1 for True; save score=-log_prob to npy files")
-    argparser.add_argument("--save_probs", type=int, default=0, help="0 for False, 1 for True; save MPNN predicted probabilites per position")
+    argparser.add_argument("--save_probs", type=int, default=0, help="0 for False, 1 for True; save MPNN predicted probabilities per position")
 
     argparser.add_argument("--score_only", type=int, default=0, help="0 for False, 1 for True; score input backbone-sequence pairs")
     argparser.add_argument("--path_to_fasta", type=str, default="", help="score provided input sequence in a fasta format; e.g. GGGGGG/PPPPS/WWW for chains A, B, C sorted alphabetically and separated by /")
@@ -440,16 +440,16 @@ def main(args):
     argparser.add_argument("--pdb_path", type=str, default='', help="Path to a single PDB to be designed")
     argparser.add_argument("--pdb_path_chains", type=str, default='', help="Define which chains need to be designed for a single PDB ")
     argparser.add_argument("--jsonl_path", type=str, help="Path to a folder with parsed pdb into jsonl")
-    argparser.add_argument("--chain_id_jsonl",type=str, default='', help="Path to a dictionary specifying which chains need to be designed and which ones are fixed, if not specied all chains will be designed.")
+    argparser.add_argument("--chain_id_jsonl",type=str, default='', help="Path to a dictionary specifying which chains need to be designed and which ones are fixed, if not specified all chains will be designed.")
     argparser.add_argument("--fixed_positions_jsonl", type=str, default='', help="Path to a dictionary with fixed positions")
     argparser.add_argument("--omit_AAs", type=list, default='X', help="Specify which amino acids should be omitted in the generated sequence, e.g. 'AC' would omit alanine and cystine.")
-    argparser.add_argument("--bias_AA_jsonl", type=str, default='', help="Path to a dictionary which specifies AA composion bias if neededi, e.g. {A: -1.1, F: 0.7} would make A less likely and F more likely.")
+    argparser.add_argument("--bias_AA_jsonl", type=str, default='', help="Path to a dictionary which specifies AA composition bias if needed, e.g. {A: -1.1, F: 0.7} would make A less likely and F more likely.")
 
     argparser.add_argument("--bias_by_res_jsonl", default='', help="Path to dictionary with per position bias.") 
-    argparser.add_argument("--omit_AA_jsonl", type=str, default='', help="Path to a dictionary which specifies which amino acids need to be omited from design at specific chain indices")
+    argparser.add_argument("--omit_AA_jsonl", type=str, default='', help="Path to a dictionary which specifies which amino acids need to be omitted from design at specific chain indices")
     argparser.add_argument("--pssm_jsonl", type=str, default='', help="Path to a dictionary with pssm")
     argparser.add_argument("--pssm_multi", type=float, default=0.0, help="A value between [0.0, 1.0], 0.0 means do not use pssm, 1.0 ignore MPNN predictions")
-    argparser.add_argument("--pssm_threshold", type=float, default=0.0, help="A value between -inf + inf to restric per position AAs")
+    argparser.add_argument("--pssm_threshold", type=float, default=0.0, help="A value between -inf + inf to restrict per position AAs")
     argparser.add_argument("--pssm_log_odds_flag", type=int, default=0, help="0 for False, 1 for True")
     argparser.add_argument("--pssm_bias_flag", type=int, default=0, help="0 for False, 1 for True")
 

diff --git a/protein_mpnn_utils.py b/protein_mpnn_utils.py
@@ -255,11 +255,11 @@ def tied_featurize(batch, device, chain_dict, fixed_position_dict=None, omit_AA_
                 chain_coords = b[f'coords_chain_{letter}'] #this is a dictionary
                 chain_mask = np.zeros(chain_length) #0.0 for visible chains
                 if ca_only:
-                    x_chain = np.array(chain_coords[f'CA_chain_{letter}']) #[chain_lenght,1,3] #CA_diff
+                    x_chain = np.array(chain_coords[f'CA_chain_{letter}']) #[chain_length,1,3] #CA_diff
                     if len(x_chain.shape) == 2:
                         x_chain = x_chain[:,None,:]
                 else:
-                    x_chain = np.stack([chain_coords[c] for c in [f'N_chain_{letter}', f'CA_chain_{letter}', f'C_chain_{letter}', f'O_chain_{letter}']], 1) #[chain_lenght,4,3]
+                    x_chain = np.stack([chain_coords[c] for c in [f'N_chain_{letter}', f'CA_chain_{letter}', f'C_chain_{letter}', f'O_chain_{letter}']], 1) #[chain_length,4,3]
                 x_chain_list.append(x_chain)
                 chain_mask_list.append(chain_mask)
                 chain_seq_list.append(chain_seq)
@@ -290,11 +290,11 @@ def tied_featurize(batch, device, chain_dict, fixed_position_dict=None, omit_AA_
                 chain_coords = b[f'coords_chain_{letter}'] #this is a dictionary
                 chain_mask = np.ones(chain_length) #1.0 for masked
                 if ca_only:
-                    x_chain = np.array(chain_coords[f'CA_chain_{letter}']) #[chain_lenght,1,3] #CA_diff
+                    x_chain = np.array(chain_coords[f'CA_chain_{letter}']) #[chain_length,1,3] #CA_diff
                     if len(x_chain.shape) == 2:
                         x_chain = x_chain[:,None,:]
                 else:
-                    x_chain = np.stack([chain_coords[c] for c in [f'N_chain_{letter}', f'CA_chain_{letter}', f'C_chain_{letter}', f'O_chain_{letter}']], 1) #[chain_lenght,4,3]               
+                    x_chain = np.stack([chain_coords[c] for c in [f'N_chain_{letter}', f'CA_chain_{letter}', f'C_chain_{letter}', f'O_chain_{letter}']], 1) #[chain_length,4,3]               
                 x_chain_list.append(x_chain)
                 chain_mask_list.append(chain_mask)
                 chain_seq_list.append(chain_seq)

diff --git a/training/README.md b/training/README.md
@@ -62,7 +62,7 @@ Input flags for `training.py`:
     argparser.add_argument("--reload_data_every_n_epochs", type=int, default=2, help="reload training data every n epochs")
     argparser.add_argument("--num_examples_per_epoch", type=int, default=1000000, help="number of training example to load for one epoch")
     argparser.add_argument("--batch_size", type=int, default=10000, help="number of tokens for one batch")
-    argparser.add_argument("--max_protein_length", type=int, default=10000, help="maximum length of the protein complext")
+    argparser.add_argument("--max_protein_length", type=int, default=10000, help="maximum length of the protein complex")
     argparser.add_argument("--hidden_dim", type=int, default=128, help="hidden model dimension")
     argparser.add_argument("--num_encoder_layers", type=int, default=3, help="number of encoder layers")
     argparser.add_argument("--num_decoder_layers", type=int, default=3, help="number of decoder layers")

diff --git a/training/colab_training_example.ipynb b/training/colab_training_example.ipynb
@@ -1035,7 +1035,7 @@
         "# argparser.add_argument(\"--reload_data_every_n_epochs\", type=int, default=2, help=\"reload training data every n epochs\")\n",
         "# argparser.add_argument(\"--num_examples_per_epoch\", type=int, default=1000000, help=\"number of training example to load for one epoch\")\n",
         "# argparser.add_argument(\"--batch_size\", type=int, default=10000, help=\"number of tokens for one batch\")\n",
-        "# argparser.add_argument(\"--max_protein_length\", type=int, default=10000, help=\"maximum length of the protein complext\")\n",
+        "# argparser.add_argument(\"--max_protein_length\", type=int, default=10000, help=\"maximum length of the protein complex\")\n",
         "# argparser.add_argument(\"--hidden_dim\", type=int, default=128, help=\"hidden model dimension\")\n",
         "# argparser.add_argument(\"--num_encoder_layers\", type=int, default=3, help=\"number of encoder layers\") \n",
         "# argparser.add_argument(\"--num_decoder_layers\", type=int, default=3, help=\"number of decoder layers\")\n",

diff --git a/training/model_utils.py b/training/model_utils.py
@@ -81,7 +81,7 @@ def featurize(batch, device):
                 chain_length = len(chain_seq)
                 chain_coords = b[f'coords_chain_{letter}'] #this is a dictionary
                 chain_mask = np.ones(chain_length) #0.0 for visible chains
-                x_chain = np.stack([chain_coords[c] for c in [f'N_chain_{letter}', f'CA_chain_{letter}', f'C_chain_{letter}', f'O_chain_{letter}']], 1) #[chain_lenght,4,3]
+                x_chain = np.stack([chain_coords[c] for c in [f'N_chain_{letter}', f'CA_chain_{letter}', f'C_chain_{letter}', f'O_chain_{letter}']], 1) #[chain_length,4,3]
                 x_chain_list.append(x_chain)
                 chain_mask_list.append(chain_mask)
                 chain_seq_list.append(chain_seq)

diff --git a/training/training.py b/training/training.py
@@ -235,7 +235,7 @@ def main(args):
     argparser.add_argument("--reload_data_every_n_epochs", type=int, default=2, help="reload training data every n epochs")
     argparser.add_argument("--num_examples_per_epoch", type=int, default=1000000, help="number of training example to load for one epoch")
     argparser.add_argument("--batch_size", type=int, default=10000, help="number of tokens for one batch")
-    argparser.add_argument("--max_protein_length", type=int, default=10000, help="maximum length of the protein complext")
+    argparser.add_argument("--max_protein_length", type=int, default=10000, help="maximum length of the protein complex")
     argparser.add_argument("--hidden_dim", type=int, default=128, help="hidden model dimension")
     argparser.add_argument("--num_encoder_layers", type=int, default=3, help="number of encoder layers") 
     argparser.add_argument("--num_decoder_layers", type=int, default=3, help="number of decoder layers")