forked from survivi/HaluEval-2.0
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathmetric.sh
116 lines (96 loc) · 4.97 KB
/
metric.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
# !/bin/bash
## Calculate Metrics for Each Task
# advanced_decoding
DirList=("gns_response" "fns_response")
for dir in ${DirList[*]}; do
python metric.py --model llama-2-7b-chat-hf --data-dir "./task/advanced_decoding/advanced_decoding_judge/$dir"
done
# decoding_strategy
DirList=("chatgpt_greedy" "chatgpt_top-p" "llama-2-7b-chat-hf_beam" "llama-2-7b-chat-hf_top-k" "llama-2-7b-chat-hf_top-p" "llama-2-7b-chat-hf_greedy")
for dir in ${DirList[*]}; do
model=${dir%%_*}
python metric.py --model $model --data-dir "./task/decoding-strategy/decoding-strategy_judge/$dir"
done
DirList=("llama-2-7b-chat-hf_top-p_2" "llama-2-7b-chat-hf_top-p_4" "llama-2-7b-chat-hf_top-p_6" "llama-2-7b-chat-hf_top-p_8" "llama-2-7b-chat-hf_top-p_10")
for dir in ${DirList[*]}; do
model=${dir%%_*}
python metric.py --model $model --data-dir "./task/decoding-strategy/decoding-strategy_judge/$dir"
done
DirList=("chatgpt_top-p_2" "chatgpt_top-p_4" "chatgpt_top-p_6" "chatgpt_top-p_8" "chatgpt_top-p_10")
for dir in ${DirList[*]}; do
model=${dir%%_*}
python metric.py --model $model --data-dir "./task/decoding-strategy/decoding-strategy_judge/$dir"
done
DirList=("llama-2-13b-chat-hf_greedy" "llama-2-70b-chat-hf_greedy" "vicuna-7b_greedy" "vicuna-13b_greedy" "alpaca-7b_greedy")
for dir in ${DirList[*]}; do
model=${dir%%_*}
python metric.py --model $model --data-dir "./task/decoding-strategy/decoding-strategy_judge/$dir"
done
# ir
DirList=("response_1docs_top1" "response_5docs_top5" "response_10docs_top10" "response_1docs_top2" "response_1docs_top5" "response_1docs_top10")
ModelList=("chatgpt" "llama-2-7b-chat-hf")
for model in ${ModelList[*]}; do
for dir in ${DirList[*]}; do
python metric.py --model $model --data-dir "./task/ir/ir_judge/$dir/$model"
done
done
DirList=("response_2docs_top2")
ModelList=("chatgpt" "llama-2-7b-chat-hf" "llama-2-13b-chat-hf" "vicuna-7b" "vicuna-13b")
for model in ${ModelList[*]}; do
for dir in ${DirList[*]}; do
python metric.py --model $model --data-dir "./task/ir/ir_judge/$dir/$model"
done
done
# main
ModelList=("chatgpt" "text-davinci-002" "text-davinci-003" "llama-2-7b-chat-hf" "llama-2-13b-chat-hf" "alpaca-7b" "vicuna-7b" "vicuna-13b" "claude-1" "claude-2" "yulan-chat-2-13b-fp16")
for model in ${ModelList[*]}; do
python metric.py --model $model --data-dir "./task/main/judge/$model"
done
# pretrain
ModelList=("baichuan2-7b-intermediate-00220" "baichuan2-7b-intermediate-00440" "baichuan2-7b-intermediate-00660" "baichuan2-7b-intermediate-00880" "baichuan2-7b-intermediate-01100" "baichuan2-7b-intermediate-01320" "baichuan2-7b-intermediate-01540" "baichuan2-7b-intermediate-01760" "baichuan2-7b-intermediate-01980" "baichuan2-7b-intermediate-02200" "baichuan2-7b-intermediate-02420")
for model in ${ModelList[*]}; do
python metric.py --model $model --data-dir "./task/pretrain/baichuan_judge/$model"
done
ModelList=("falcon-40b" "galactica-30b" "gpt-neox-20b")
for model in ${ModelList[*]}; do
python metric.py --model $model --data-dir "./task/pretrain/pretrain_judge/$model"
done
# prompt_task
DirList=("0-shot" "base" "character_info" "domain_info" "few-shot" "in-context_demo" "mannual_desc" "refine_q" "retrieved_demo" "reverse_pos" "synthetic_demo" "synthetic_desc" "wrong_demo")
for dir in ${DirList[*]}; do
python metric.py --model chatgpt --data-dir "./task/prompt_task/prompt_judge/chatgpt/$dir"
done
DirList=("0-shot" "base" "character_info" "domain_info" "few-shot" "in-context_demo" "mannual_desc" "refine_q" "retrieved_demo" "reverse_pos" "synthetic_demo" "synthetic_desc" "wrong_demo")
for dir in ${DirList[*]}; do
python metric.py --model llama-2-7b-chat-hf --data-dir "./task/prompt_task/prompt_judge/llama-2-7b-chat-hf/$dir"
done
ModelList=("llama-2-7b-chat-hf" "llama-2-13b-chat-hf" "llama-2-70b-chat-hf")
for model in ${ModelList[*]}; do
python metric.py --model $model --data-dir "./task/prompt_task/prompt_judge/self_reflexion/$model"
done
# quantization
DirList=("llama-2-7b-chat-hf_INT4" "llama-2-7b-chat-hf_INT8" "llama-2-13b-chat-hf_INT4" "llama-2-13b-chat-hf_INT8")
for dir in ${DirList[*]}; do
model=${dir%%_*}
python metric.py --model $model --data-dir "./task/quantization/quantization_judge/$dir"
done
# RLHF
ModelList=("alpaca-7b" "vicuna-7b")
for model in ${ModelList[*]}; do
python metric.py --model $model --data-dir "./task/RLHF/rlhf_judge/$model"
done
# SFT
DirList=("complexity" "difficulty" "diversity" "scaling")
for dir in ${DirList[*]}; do
python metric.py --model llama-7b --data-dir "./task/SFT/sft_judge/$dir"
done
DirList=("flan" "sharegpt" "sinstruct" "sinstruct_sharegpt" "sinstruct_sharegpt_flan_10k")
for dir in ${DirList[*]}; do
python metric.py --model llama-7b --data-dir "./task/SFT/sft_judge/$dir"
done
python metric.py --model llama-7b --data-dir "./task/SFT/sft_judge/llama-7b/"
# wiki_entity
ModelList=("chatgpt" "llama-2-7b-chat-hf")
for model in ${ModelList[*]}; do
python metric.py --model $model --data-dir "./task/wiki_entity/wiki_entity_judge/$model"
done