Skip to content

Commit

Permalink
Add dropout noise only variant
Browse files Browse the repository at this point in the history
  • Loading branch information
yutanagano committed Apr 15, 2024
1 parent 3ced665 commit 7f5395f
Show file tree
Hide file tree
Showing 6 changed files with 314 additions and 0 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,7 @@ This `Sceptr` object will then have the methods: `calc_pdist_vector`, `calc_cdis
|`sceptr.variant.average_pooling`|variant using the average-pooling method to generate the TCR representation vector|
|`sceptr.variant.unpaired`|variant trained on the Tanno et al. dataset with randomised alpha/beta pairing|
|`sceptr.variant.olga`|variant trained using synthetic TCR sequences generated by OLGA|
|`sceptr.variant.dropout_noise_only`|variant trained without residue/chain dropping during autocontrastive learning|
|`sceptr.variant.finetuned`|variant fine-tuned using supervised contrastive learning for six pMHCs with peptides GILGFVFTL, NLVPMVATV, SPRWYFYYL, TFEYVSQPFLMDLE, TTDPSFLGRY and YLQPRTFLL (from [VDJdb](https://vdjdb.cdr3.net/))|

#### Single-chain variants
Expand Down
103 changes: 103 additions & 0 deletions src/sceptr/_model_saves/SCEPTR_dropout_noise_only/config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
{
"training_delegate": {
"class": "ClTrainingDelegate",
"initargs": {}
},
"model": {
"name": "SCEPTR (dropout noise only)",
"path_to_pretrained_state_dict": null,
"token_embedder": {
"class": "CdrSimpleEmbedder",
"initargs": {}
},
"self_attention_stack": {
"class": "SelfAttentionStackWithInitialProjection",
"initargs": {
"num_layers": 3,
"embedding_dim": 29,
"d_model": 64,
"nhead": 8
}
},
"mlm_token_prediction_projector": {
"class": "AminoAcidTokenProjector",
"initargs": {
"d_model": 64
}
},
"vector_representation_delegate": {
"class": "ClsVectorRepresentationDelegate",
"initargs": {}
},
"trainable_model": {
"class": "ClTrainableModel",
"initargs": {}
}
},
"data": {
"training_data": {
"dataset": {
"class": "TcrDataset",
"initargs": {}
},
"dataloader": {
"class": "SingleDatasetDataLoader",
"initargs": {
"batch_size": 1024,
"num_workers": 4
}
},
"csv_paths": [
"tcr_data/preprocessed/tanno/train.csv"
]
},
"validation_data": {
"dataset": {
"class": "TcrDataset",
"initargs": {}
},
"dataloader": {
"class": "SingleDatasetDataLoader",
"initargs": {
"batch_size": 1024,
"num_workers": 4
}
},
"csv_paths": [
"tcr_data/preprocessed/tanno/test.csv"
]
},
"tokeniser": {
"class": "CdrTokeniser",
"initargs": {}
},
"batch_collator": {
"class": "ClBatchCollator",
"initargs": {
"frac_dropped_tokens": 0,
"prob_drop_chain": 0
}
}
},
"loss": {
"cross_entropy_loss": {
"class": "AdjustedCrossEntropyLoss",
"initargs": {
"label_smoothing": 0.1
}
},
"contrastive_loss": {
"class": "DotProductLoss",
"initargs": {
"temp": 0.05
}
}
},
"optimiser": {
"initargs": {
"n_warmup_steps": 10000,
"d_model": 64
}
},
"num_epochs": 200
}
Loading

0 comments on commit 7f5395f

Please sign in to comment.