-
Notifications
You must be signed in to change notification settings - Fork 0
/
attack.yaml
97 lines (78 loc) · 1.87 KB
/
attack.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
defaults:
- _self_
- override hydra/hydra_logging: disabled
- override hydra/job_logging: disabled
dataset_path: datasets/advbench.csv
start: null
end: null
# attack_method: "overflow"
# attack_method: "overflow_short"
attack_method: "overflow_fs"
# attack_method: "mismatch_no"
# attack_method: "mismatch_vicuna"
# attack_method: "mismatch_chatml"
# attack_method: "direct_inst"
jailbreak_add: null
fs_target_dir: datasets
output_dir: "./output_data"
defense: null
system_prompt: null
seed: 42
llm_params:
# open source models
# model_name: "lmsys/vicuna-7b-v1.5"
# model_name: "mistralai/Mistral-7B-Instruct-v0.2"
model_name: "meta-llama/Llama-2-7b-chat-hf"
# model_name: "meta-llama/Meta-Llama-3-8B-Instruct"
# api models
# model_name: "claude-2.1"
# model_name: "claude-3-opus-20240229"
# model_name: "gemini-1.0-pro-latest"
# "model_name": "gpt-3.5-turbo-azure"
lora_path: null # setup for finetuned model ckpt
use_api: false
endpoint: null
device_map: "auto"
dtype: "float16"
new_gen_length: 100
batch_size: 64
generation_config:
do_sample: false
temperature: null
top_p: 1
eval:
kw_enable: true
llm_enable: false
llm_eval_params:
model_name: "meta-llama/Meta-Llama-Guard-2-8B"
use_api: False
endpoint: null
device_map: "auto"
dtype: "float16"
new_gen_length: 100
batch_size: 64
generation_config:
do_sample: false
temperature: 0
top_p: 1
# for fs attack gen
llm_attack_params:
model_name: "cognitivecomputations/Wizard-Vicuna-13B-Uncensored"
use_api: false
lora_path: null
endpoint: null
device_map: "auto"
dtype: "float16"
new_gen_length: 20
batch_size: 5
quantized_config:
load_in_8bit: true
load_in_4bit: false
generation_config:
do_sample: true
num_return_sequences: 100
temperature: 1
top_p: 1
wandb:
enable: false
project: "Chatbug"