Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fixed some confusing typos #43

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ Input flags for `protein_mpnn_run.py`:
argparser.add_argument("--seed", type=int, default=0, help="If set to 0 then a random seed will be picked;")
argparser.add_argument("--save_score", type=int, default=0, help="0 for False, 1 for True; save score=-log_prob to npy files")
argparser.add_argument("--path_to_fasta", type=str, default="", help="score provided input sequence in a fasta format; e.g. GGGGGG/PPPPS/WWW for chains A, B, C sorted alphabetically and separated by /")
argparser.add_argument("--save_probs", type=int, default=0, help="0 for False, 1 for True; save MPNN predicted probabilites per position")
argparser.add_argument("--save_probs", type=int, default=0, help="0 for False, 1 for True; save MPNN predicted probabilities per position")
argparser.add_argument("--score_only", type=int, default=0, help="0 for False, 1 for True; score input backbone-sequence pairs")
argparser.add_argument("--conditional_probs_only", type=int, default=0, help="0 for False, 1 for True; output conditional probabilities p(s_i given the rest of the sequence and backbone)")
argparser.add_argument("--conditional_probs_only_backbone", type=int, default=0, help="0 for False, 1 for True; if true output conditional probabilities p(s_i given backbone)")
Expand All @@ -42,15 +42,15 @@ Input flags for `protein_mpnn_run.py`:
argparser.add_argument("--pdb_path", type=str, default='', help="Path to a single PDB to be designed")
argparser.add_argument("--pdb_path_chains", type=str, default='', help="Define which chains need to be designed for a single PDB ")
argparser.add_argument("--jsonl_path", type=str, help="Path to a folder with parsed pdb into jsonl")
argparser.add_argument("--chain_id_jsonl",type=str, default='', help="Path to a dictionary specifying which chains need to be designed and which ones are fixed, if not specied all chains will be designed.")
argparser.add_argument("--chain_id_jsonl",type=str, default='', help="Path to a dictionary specifying which chains need to be designed and which ones are fixed, if not specified all chains will be designed.")
argparser.add_argument("--fixed_positions_jsonl", type=str, default='', help="Path to a dictionary with fixed positions")
argparser.add_argument("--omit_AAs", type=list, default='X', help="Specify which amino acids should be omitted in the generated sequence, e.g. 'AC' would omit alanine and cystine.")
argparser.add_argument("--bias_AA_jsonl", type=str, default='', help="Path to a dictionary which specifies AA composion bias if neededi, e.g. {A: -1.1, F: 0.7} would make A less likely and F more likely.")
argparser.add_argument("--bias_AA_jsonl", type=str, default='', help="Path to a dictionary which specifies AA composition bias if needed, e.g. {A: -1.1, F: 0.7} would make A less likely and F more likely.")
argparser.add_argument("--bias_by_res_jsonl", default='', help="Path to dictionary with per position bias.")
argparser.add_argument("--omit_AA_jsonl", type=str, default='', help="Path to a dictionary which specifies which amino acids need to be omited from design at specific chain indices")
argparser.add_argument("--omit_AA_jsonl", type=str, default='', help="Path to a dictionary which specifies which amino acids need to be omitted from design at specific chain indices")
argparser.add_argument("--pssm_jsonl", type=str, default='', help="Path to a dictionary with pssm")
argparser.add_argument("--pssm_multi", type=float, default=0.0, help="A value between [0.0, 1.0], 0.0 means do not use pssm, 1.0 ignore MPNN predictions")
argparser.add_argument("--pssm_threshold", type=float, default=0.0, help="A value between -inf + inf to restric per position AAs")
argparser.add_argument("--pssm_threshold", type=float, default=0.0, help="A value between -inf + inf to restrict per position AAs")
argparser.add_argument("--pssm_log_odds_flag", type=int, default=0, help="0 for False, 1 for True")
argparser.add_argument("--pssm_bias_flag", type=int, default=0, help="0 for False, 1 for True")
argparser.add_argument("--tied_positions_jsonl", type=str, default='', help="Path to a dictionary with tied positions")
Expand Down
10 changes: 5 additions & 5 deletions protein_mpnn_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -420,7 +420,7 @@ def main(args):
argparser.add_argument("--seed", type=int, default=0, help="If set to 0 then a random seed will be picked;")

argparser.add_argument("--save_score", type=int, default=0, help="0 for False, 1 for True; save score=-log_prob to npy files")
argparser.add_argument("--save_probs", type=int, default=0, help="0 for False, 1 for True; save MPNN predicted probabilites per position")
argparser.add_argument("--save_probs", type=int, default=0, help="0 for False, 1 for True; save MPNN predicted probabilities per position")

argparser.add_argument("--score_only", type=int, default=0, help="0 for False, 1 for True; score input backbone-sequence pairs")
argparser.add_argument("--path_to_fasta", type=str, default="", help="score provided input sequence in a fasta format; e.g. GGGGGG/PPPPS/WWW for chains A, B, C sorted alphabetically and separated by /")
Expand All @@ -440,16 +440,16 @@ def main(args):
argparser.add_argument("--pdb_path", type=str, default='', help="Path to a single PDB to be designed")
argparser.add_argument("--pdb_path_chains", type=str, default='', help="Define which chains need to be designed for a single PDB ")
argparser.add_argument("--jsonl_path", type=str, help="Path to a folder with parsed pdb into jsonl")
argparser.add_argument("--chain_id_jsonl",type=str, default='', help="Path to a dictionary specifying which chains need to be designed and which ones are fixed, if not specied all chains will be designed.")
argparser.add_argument("--chain_id_jsonl",type=str, default='', help="Path to a dictionary specifying which chains need to be designed and which ones are fixed, if not specified all chains will be designed.")
argparser.add_argument("--fixed_positions_jsonl", type=str, default='', help="Path to a dictionary with fixed positions")
argparser.add_argument("--omit_AAs", type=list, default='X', help="Specify which amino acids should be omitted in the generated sequence, e.g. 'AC' would omit alanine and cystine.")
argparser.add_argument("--bias_AA_jsonl", type=str, default='', help="Path to a dictionary which specifies AA composion bias if neededi, e.g. {A: -1.1, F: 0.7} would make A less likely and F more likely.")
argparser.add_argument("--bias_AA_jsonl", type=str, default='', help="Path to a dictionary which specifies AA composition bias if needed, e.g. {A: -1.1, F: 0.7} would make A less likely and F more likely.")

argparser.add_argument("--bias_by_res_jsonl", default='', help="Path to dictionary with per position bias.")
argparser.add_argument("--omit_AA_jsonl", type=str, default='', help="Path to a dictionary which specifies which amino acids need to be omited from design at specific chain indices")
argparser.add_argument("--omit_AA_jsonl", type=str, default='', help="Path to a dictionary which specifies which amino acids need to be omitted from design at specific chain indices")
argparser.add_argument("--pssm_jsonl", type=str, default='', help="Path to a dictionary with pssm")
argparser.add_argument("--pssm_multi", type=float, default=0.0, help="A value between [0.0, 1.0], 0.0 means do not use pssm, 1.0 ignore MPNN predictions")
argparser.add_argument("--pssm_threshold", type=float, default=0.0, help="A value between -inf + inf to restric per position AAs")
argparser.add_argument("--pssm_threshold", type=float, default=0.0, help="A value between -inf + inf to restrict per position AAs")
argparser.add_argument("--pssm_log_odds_flag", type=int, default=0, help="0 for False, 1 for True")
argparser.add_argument("--pssm_bias_flag", type=int, default=0, help="0 for False, 1 for True")

Expand Down
8 changes: 4 additions & 4 deletions protein_mpnn_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,11 +255,11 @@ def tied_featurize(batch, device, chain_dict, fixed_position_dict=None, omit_AA_
chain_coords = b[f'coords_chain_{letter}'] #this is a dictionary
chain_mask = np.zeros(chain_length) #0.0 for visible chains
if ca_only:
x_chain = np.array(chain_coords[f'CA_chain_{letter}']) #[chain_lenght,1,3] #CA_diff
x_chain = np.array(chain_coords[f'CA_chain_{letter}']) #[chain_length,1,3] #CA_diff
if len(x_chain.shape) == 2:
x_chain = x_chain[:,None,:]
else:
x_chain = np.stack([chain_coords[c] for c in [f'N_chain_{letter}', f'CA_chain_{letter}', f'C_chain_{letter}', f'O_chain_{letter}']], 1) #[chain_lenght,4,3]
x_chain = np.stack([chain_coords[c] for c in [f'N_chain_{letter}', f'CA_chain_{letter}', f'C_chain_{letter}', f'O_chain_{letter}']], 1) #[chain_length,4,3]
x_chain_list.append(x_chain)
chain_mask_list.append(chain_mask)
chain_seq_list.append(chain_seq)
Expand Down Expand Up @@ -290,11 +290,11 @@ def tied_featurize(batch, device, chain_dict, fixed_position_dict=None, omit_AA_
chain_coords = b[f'coords_chain_{letter}'] #this is a dictionary
chain_mask = np.ones(chain_length) #1.0 for masked
if ca_only:
x_chain = np.array(chain_coords[f'CA_chain_{letter}']) #[chain_lenght,1,3] #CA_diff
x_chain = np.array(chain_coords[f'CA_chain_{letter}']) #[chain_length,1,3] #CA_diff
if len(x_chain.shape) == 2:
x_chain = x_chain[:,None,:]
else:
x_chain = np.stack([chain_coords[c] for c in [f'N_chain_{letter}', f'CA_chain_{letter}', f'C_chain_{letter}', f'O_chain_{letter}']], 1) #[chain_lenght,4,3]
x_chain = np.stack([chain_coords[c] for c in [f'N_chain_{letter}', f'CA_chain_{letter}', f'C_chain_{letter}', f'O_chain_{letter}']], 1) #[chain_length,4,3]
x_chain_list.append(x_chain)
chain_mask_list.append(chain_mask)
chain_seq_list.append(chain_seq)
Expand Down
2 changes: 1 addition & 1 deletion training/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ Input flags for `training.py`:
argparser.add_argument("--reload_data_every_n_epochs", type=int, default=2, help="reload training data every n epochs")
argparser.add_argument("--num_examples_per_epoch", type=int, default=1000000, help="number of training example to load for one epoch")
argparser.add_argument("--batch_size", type=int, default=10000, help="number of tokens for one batch")
argparser.add_argument("--max_protein_length", type=int, default=10000, help="maximum length of the protein complext")
argparser.add_argument("--max_protein_length", type=int, default=10000, help="maximum length of the protein complex")
argparser.add_argument("--hidden_dim", type=int, default=128, help="hidden model dimension")
argparser.add_argument("--num_encoder_layers", type=int, default=3, help="number of encoder layers")
argparser.add_argument("--num_decoder_layers", type=int, default=3, help="number of decoder layers")
Expand Down
2 changes: 1 addition & 1 deletion training/colab_training_example.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1035,7 +1035,7 @@
"# argparser.add_argument(\"--reload_data_every_n_epochs\", type=int, default=2, help=\"reload training data every n epochs\")\n",
"# argparser.add_argument(\"--num_examples_per_epoch\", type=int, default=1000000, help=\"number of training example to load for one epoch\")\n",
"# argparser.add_argument(\"--batch_size\", type=int, default=10000, help=\"number of tokens for one batch\")\n",
"# argparser.add_argument(\"--max_protein_length\", type=int, default=10000, help=\"maximum length of the protein complext\")\n",
"# argparser.add_argument(\"--max_protein_length\", type=int, default=10000, help=\"maximum length of the protein complex\")\n",
"# argparser.add_argument(\"--hidden_dim\", type=int, default=128, help=\"hidden model dimension\")\n",
"# argparser.add_argument(\"--num_encoder_layers\", type=int, default=3, help=\"number of encoder layers\") \n",
"# argparser.add_argument(\"--num_decoder_layers\", type=int, default=3, help=\"number of decoder layers\")\n",
Expand Down
2 changes: 1 addition & 1 deletion training/model_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def featurize(batch, device):
chain_length = len(chain_seq)
chain_coords = b[f'coords_chain_{letter}'] #this is a dictionary
chain_mask = np.ones(chain_length) #0.0 for visible chains
x_chain = np.stack([chain_coords[c] for c in [f'N_chain_{letter}', f'CA_chain_{letter}', f'C_chain_{letter}', f'O_chain_{letter}']], 1) #[chain_lenght,4,3]
x_chain = np.stack([chain_coords[c] for c in [f'N_chain_{letter}', f'CA_chain_{letter}', f'C_chain_{letter}', f'O_chain_{letter}']], 1) #[chain_length,4,3]
x_chain_list.append(x_chain)
chain_mask_list.append(chain_mask)
chain_seq_list.append(chain_seq)
Expand Down
2 changes: 1 addition & 1 deletion training/training.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ def main(args):
argparser.add_argument("--reload_data_every_n_epochs", type=int, default=2, help="reload training data every n epochs")
argparser.add_argument("--num_examples_per_epoch", type=int, default=1000000, help="number of training example to load for one epoch")
argparser.add_argument("--batch_size", type=int, default=10000, help="number of tokens for one batch")
argparser.add_argument("--max_protein_length", type=int, default=10000, help="maximum length of the protein complext")
argparser.add_argument("--max_protein_length", type=int, default=10000, help="maximum length of the protein complex")
argparser.add_argument("--hidden_dim", type=int, default=128, help="hidden model dimension")
argparser.add_argument("--num_encoder_layers", type=int, default=3, help="number of encoder layers")
argparser.add_argument("--num_decoder_layers", type=int, default=3, help="number of decoder layers")
Expand Down