forked from McGill-NLP/llm2vec
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Mistral.json
28 lines (28 loc) · 911 Bytes
/
Mistral.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
{
"model_name_or_path": "mistralai/Mistral-7B-Instruct-v0.2",
"peft_model_name_or_path": "McGill-NLP/LLM2Vec-Mistral-7B-Instruct-v2-mntp",
"simcse_dropout": 0.3,
"bidirectional": true,
"pooling_mode": "mean",
"dataset_name": "Wiki1M",
"dataset_file_path": "cache/wiki1m_for_simcse.txt",
"remove_unused_columns": false,
"learning_rate": 3e-5,
"loss_scale": 20,
"per_device_train_batch_size": 128,
"gradient_accumulation_steps": 1,
"do_train": true,
"disable_tqdm": false,
"max_seq_length": 128,
"overwrite_output_dir": true,
"output_dir": "output/mntp-simcse/Mistral-7B-Instruct-v0.2",
"logging_steps": 50,
"save_steps": 200,
"save_only_model": true,
"stop_after_n_steps": 1000,
"lora_r": 16,
"gradient_checkpointing": true,
"torch_dtype": "bfloat16",
"attn_implementation": "flash_attention_2",
"seed": 42
}