Skip to content

Commit

Permalink
re-evaluate models with jsquad prompt with title
Browse files Browse the repository at this point in the history
  • Loading branch information
kumapo committed Sep 16, 2023
1 parent 11d8c89 commit 18a2996
Show file tree
Hide file tree
Showing 43 changed files with 366 additions and 54 deletions.
3 changes: 3 additions & 0 deletions models/abeja-gpt-neox-japanese-2.7b/harness.jsquad-1.2.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
MODEL_ARGS="pretrained=abeja/gpt-neox-japanese-2.7b,device_map=auto,torch_dtype=auto"
TASK="jsquad-1.2-0.2"
python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "3" --device "cuda" --output_path "models/abeja-gpt-neox-japanese-2.7b/result.jsquad-1.2.json"
2 changes: 1 addition & 1 deletion models/abeja-gpt-neox-japanese-2.7b/harness.sh
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
MODEL_ARGS="pretrained=abeja/gpt-neox-japanese-2.7b"
TASK="jcommonsenseqa-1.1-0.2,jnli-1.1-0.2,marc_ja-1.1-0.2,jsquad-1.1-0.2,xlsum_ja"
TASK="jcommonsenseqa-1.1-0.2,jnli-1.1-0.2,marc_ja-1.1-0.2,jsquad-1.2-0.2,xlsum_ja"
python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2,3,3,3,1" --device "cuda" --output_path "models/abeja-gpt-neox-japanese-2.7b/result.json"
8 changes: 4 additions & 4 deletions models/abeja-gpt-neox-japanese-2.7b/result.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@
"acc_norm": 0.749912800837112,
"acc_norm_stderr": 0.005719527388015089
},
"jsquad-1.1-0.2": {
"exact_match": 13.665015758667266,
"f1": 22.909453892411364
"jsquad-1.2-0.2": {
"exact_match": 15.803692030616839,
"f1": 25.18326978234071
},
"xlsum_ja": {
"rouge2": 6.149952794206885
Expand All @@ -33,7 +33,7 @@
"versions": {
"jcommonsenseqa-1.1-0.2": 1.1,
"jnli-1.1-0.2": 1.1,
"jsquad-1.1-0.2": 1.1,
"jsquad-1.2-0.2": 1.2,
"marc_ja-1.1-0.2": 1.1,
"xlsum_ja": 1.0,
"xwinograd_ja": 1.0
Expand Down
22 changes: 22 additions & 0 deletions models/abeja-gpt-neox-japanese-2.7b/result.jsquad-1.2.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"results": {
"jsquad-1.2-0.2": {
"exact_match": 15.803692030616839,
"f1": 25.18326978234071
}
},
"versions": {
"jsquad-1.2-0.2": 1.2
},
"config": {
"model": "hf-causal",
"model_args": "pretrained=abeja/gpt-neox-japanese-2.7b,device_map=auto,torch_dtype=auto",
"num_fewshot": 3,
"batch_size": null,
"device": "cuda",
"no_cache": false,
"limit": null,
"bootstrap_iters": 100000,
"description_dict": {}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
MODEL_ARGS="pretrained=cyberagent/open-calm-1b,device_map=auto,torch_dtype=auto"
TASK="jsquad-1.2-0.2"
python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "3" --device "cuda" --output_path "models/cyberagent-open-calm-1b/result.jsquad-1.2.json"
2 changes: 1 addition & 1 deletion models/cyberagent/cyberagent-open-calm-1b/harness.sh
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
MODEL_ARGS="pretrained=cyberagent/open-calm-1b"
TASK="jcommonsenseqa-1.1-0.2,jnli-1.1-0.2,marc_ja-1.1-0.2,jsquad-1.1-0.2,xlsum_ja"
TASK="jcommonsenseqa-1.1-0.2,jnli-1.1-0.2,marc_ja-1.1-0.2,jsquad-1.2-0.2,xlsum_ja"
python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2,3,3,3,1" --device "cuda" --output_path "models/cyberagent-open-calm-1b/result.json"
8 changes: 4 additions & 4 deletions models/cyberagent/cyberagent-open-calm-1b/result.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@
"acc_norm": 0.7792117195674921,
"acc_norm_stderr": 0.005478034657719626
},
"jsquad-1.1-0.2": {
"exact_match": 37.12291760468258,
"f1": 47.171446643186265
"jsquad-1.2-0.2": {
"exact_match": 39.53174245835209,
"f1": 49.49399460234075
},
"xlsum_ja": {
"rouge2": 2.288077088085482
Expand All @@ -33,7 +33,7 @@
"versions": {
"jcommonsenseqa-1.1-0.2": 1.1,
"jnli-1.1-0.2": 1.1,
"jsquad-1.1-0.2": 1.1,
"jsquad-1.2-0.2": 1.2,
"marc_ja-1.1-0.2": 1.1,
"xlsum_ja": 1.0,
"xwinograd_ja": 1.0
Expand Down
22 changes: 22 additions & 0 deletions models/cyberagent/cyberagent-open-calm-1b/result.jsquad-1.2.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"results": {
"jsquad-1.2-0.2": {
"exact_match": 39.53174245835209,
"f1": 49.49399460234075
}
},
"versions": {
"jsquad-1.2-0.2": 1.2
},
"config": {
"model": "hf-causal",
"model_args": "pretrained=cyberagent/open-calm-1b",
"num_fewshot": 3,
"batch_size": null,
"device": "cuda",
"no_cache": false,
"limit": null,
"bootstrap_iters": 100000,
"description_dict": {}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
MODEL_ARGS="pretrained=cyberagent/open-calm-3b,device_map=auto,torch_dtype=auto"
TASK="jsquad-1.2-0.2"
python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/cyberagent/cyberagent-open-calm-3b/result.jsquad-1.2.json"
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
MODEL_ARGS="pretrained=cyberagent/open-calm-7b,device_map=auto,torch_dtype=auto"
TASK="jsquad-1.2-0.2"
python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/cyberagent/cyberagent-open-calm-7b/result.jsquad-1.2.json"
8 changes: 4 additions & 4 deletions models/cyberagent/cyberagent-open-calm-7b/result.json
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@
"acc": 0.6506777893639207,
"acc_stderr": 0.01540328448938605
},
"jsquad-1.1-0.2": {
"exact_match": 45.79018460153084,
"f1": 59.03158509144496
"jsquad-1.2-0.2": {
"exact_match": 48.10895992796038,
"f1": 60.90961937230767
},
"jaqket_v2-0.1-0.2": {
"exact_match": 60.738831615120276,
Expand All @@ -42,7 +42,7 @@
"jcommonsenseqa-1.1-0.2": 1.1,
"jnli-1.1-0.2": 1.1,
"marc_ja-1.1-0.2": 1.1,
"jsquad-1.1-0.2": 1.1,
"jsquad-1.2-0.2": 1.2,
"jaqket_v2-0.1-0.2": 0.1,
"xlsum_ja": 1.0,
"xwinograd_ja": 1.0,
Expand Down
22 changes: 22 additions & 0 deletions models/cyberagent/cyberagent-open-calm-7b/result.jsquad-1.2.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"results": {
"jsquad-1.2-0.2": {
"exact_match": 48.10895992796038,
"f1": 60.90961937230767
}
},
"versions": {
"jsquad-1.2-0.2": 1.2
},
"config": {
"model": "hf-causal",
"model_args": "pretrained=cyberagent/open-calm-7b,device_map=auto,torch_dtype=auto",
"num_fewshot": 2,
"batch_size": null,
"device": "cuda",
"no_cache": false,
"limit": null,
"bootstrap_iters": 100000,
"description_dict": {}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
MODEL_ARGS="pretrained=cyberagent/open-calm-large,use_fast=True,device_map=auto,torch_dtype=auto"
TASK="jsquad-1.2-0.2"
python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "3" --device "cuda" --output_path "models/cyberagent-open-calm-large/result.jsquad-1.2.json"
6 changes: 3 additions & 3 deletions models/cyberagent/cyberagent-open-calm-large/result.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@
"acc_norm": 0.7912452040460412,
"acc_norm_stderr": 0.005367632889806105
},
"jsquad-1.1-0.2": {
"exact_match": 37.23547951373255,
"f1": 48.50349592141573
"jsquad-1.2-0.2": {
"exact_match": 40.4997748761819,
"f1": 51.32160467436942
},
"xlsum_ja": {
"rouge2": 1.9854375467671679
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"results": {
"jsquad-1.2-0.2": {
"exact_match": 40.4997748761819,
"f1": 51.32160467436942
}
},
"versions": {
"jsquad-1.2-0.2": 1.2
},
"config": {
"model": "hf-causal",
"model_args": "pretrained=cyberagent/open-calm-large,use_fast=True,device_map=auto,torch_dtype=auto",
"num_fewshot": 3,
"batch_size": null,
"device": "cuda",
"no_cache": false,
"limit": null,
"bootstrap_iters": 100000,
"description_dict": {}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
MODEL_ARGS="pretrained=cyberagent/open-calm-medium,use_fast=True,device_map=auto,torch_dtype=auto"
TASK="jsquad-1.2-0.2"
python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "3" --device "cuda" --output_path "models/cyberagent-open-calm-medium/result.jsquad-1.2.json"
8 changes: 4 additions & 4 deletions models/cyberagent/cyberagent-open-calm-medium/result.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@
"acc_norm": 0.8357167771189397,
"acc_norm_stderr": 0.004893675823612713
},
"jsquad-1.1-0.2": {
"exact_match": 28.725799189554255,
"f1": 39.80333448254385
"jsquad-1.2-0.2": {
"exact_match": 29.85141828005403,
"f1": 40.49655778214922
},
"xlsum_ja": {
"rouge2": 2.5775988917922406
Expand All @@ -33,7 +33,7 @@
"versions": {
"jcommonsenseqa-1.1-0.2": 1.1,
"jnli-1.1-0.2": 1.1,
"jsquad-1.1-0.2": 1.1,
"jsquad-1.2-0.2": 1.2,
"marc_ja-1.1-0.2": 1.1,
"xlsum_ja": 1.0,
"xwinograd_ja": 1.0
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"results": {
"jsquad-1.2-0.2": {
"exact_match": 29.85141828005403,
"f1": 40.49655778214922
}
},
"versions": {
"jsquad-1.2-0.2": 1.2
},
"config": {
"model": "hf-causal",
"model_args": "pretrained=cyberagent/open-calm-medium,use_fast=True,device_map=auto,torch_dtype=auto",
"num_fewshot": 3,
"batch_size": null,
"device": "cuda",
"no_cache": false,
"limit": null,
"bootstrap_iters": 100000,
"description_dict": {}
}
}
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
MODEL_ARGS="pretrained=meta-llama/Llama-2-7b-hf,use_accelerate=True,dtype=auto"
TASK="jsquad-1.1-0.31"
python main.py --model hf-causal-experimental --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/llama2/llama2-2.7b/result.jsquad-1.1-0.31.json" --batch_size 2
TASK="jsquad-1.2-0.3"
python main.py --model hf-causal-experimental --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/llama2/llama2-7b/result.jsquad-1.2.json" --batch_size 2

4 changes: 4 additions & 0 deletions models/llama2/llama2-2.7b/harness.jsquad-1.2.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
MODEL_ARGS="pretrained=meta-llama/Llama-2-7b-hf,use_accelerate=True,dtype=auto"
TASK="jsquad-1.2-0.3"
python main.py --model hf-causal-experimental --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/llama2/llama2-2.7b/result.jsquad-1.2.json" --batch_size 2

2 changes: 1 addition & 1 deletion models/llama2/llama2-2.7b/harness.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
MODEL_ARGS="pretrained=meta-llama/Llama-2-7b-hf,use_accelerate=True"
TASK="jsquad-1.1-0.3,jcommonsenseqa-1.1-0.3,jnli-1.1-0.3,marc_ja-1.1-0.3"
TASK="jsquad-1.2-0.3,jcommonsenseqa-1.1-0.3,jnli-1.1-0.3,marc_ja-1.1-0.3"
python main.py --model hf-causal-experimental --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2,3,3,3" --device "cuda" --output_path "models/llama2/llama2-2.7b/result.json" --batch_size 2 > models/llama2/llama2-2.7b/harness.out 2> models/llama2/llama2-2.7b/harness.err


8 changes: 4 additions & 4 deletions models/llama2/llama2-2.7b/result.json
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
{
"results": {
"jsquad-1.1-0.3": {
"exact_match": 58.37460603331832,
"f1": 69.51836154287909
"jsquad-1.2-0.3": {
"exact_match": 59.92796037820801,
"f1": 70.8236875084182
},
"jcommonsenseqa-1.1-0.3": {
"acc": 0.5263628239499554,
Expand All @@ -24,7 +24,7 @@
}
},
"versions": {
"jsquad-1.1-0.3": 1.1,
"jsquad-1.2-0.3": 1.2,
"jcommonsenseqa-1.1-0.3": 1.1,
"jnli-1.1-0.3": 1.1,
"marc_ja-1.1-0.3": 1.1
Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
{
"results": {
"jsquad-1.1-0.31": {
"jsquad-1.2-0.3": {
"exact_match": 59.92796037820801,
"f1": 70.8236875084182
}
},
"versions": {
"jsquad-1.1-0.31": 1.1
"jsquad-1.2-0.3": 1.2
},
"config": {
"model": "hf-causal-experimental",
Expand All @@ -19,4 +19,4 @@
"bootstrap_iters": 100000,
"description_dict": {}
}
}
}
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
MODEL_ARGS="pretrained=meta-llama/Llama-2-7b-chat-hf,use_accelerate=True,dtype=auto"
TASK="jsquad-1.1-0.31"
python main.py --model hf-causal-experimental --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/llama2/llama2-7b-chat/result.json" --batch_size 2
TASK="jsquad-1.2-0.3"
python main.py --model hf-causal-experimental --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/llama2/llama2-7b-chat/result.jsquad-1.2.json" --batch_size 2
22 changes: 22 additions & 0 deletions models/llama2/llama2-7b-chat/result.jsquad-1.2.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"results": {
"jsquad-1.2-0.3": {
"exact_match": 62.17919855920756,
"f1": 74.84345935966519
}
},
"versions": {
"jsquad-1.2-0.3": 1.2
},
"config": {
"model": "hf-causal-experimental",
"model_args": "pretrained=meta-llama/Llama-2-7b-chat-hf,use_accelerate=True,dtype=auto",
"num_fewshot": 2,
"batch_size": 2,
"device": "cuda",
"no_cache": false,
"limit": null,
"bootstrap_iters": 100000,
"description_dict": {}
}
}
4 changes: 4 additions & 0 deletions models/llama2/llama2-7b/harness.jsquad-1.2.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
MODEL_ARGS="pretrained=meta-llama/Llama-2-7b-hf,use_accelerate=True,dtype=auto"
TASK="jsquad-1.2-0.3"
python main.py --model hf-causal-experimental --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/llama2/llama2-7b/result.jsquad-1.2.json" --batch_size 2

22 changes: 22 additions & 0 deletions models/llama2/llama2-7b/result.jsquad-1.2.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"results": {
"jsquad-1.2-0.3": {
"exact_match": 59.92796037820801,
"f1": 70.8236875084182
}
},
"versions": {
"jsquad-1.2-0.3": 1.2
},
"config": {
"model": "hf-causal-experimental",
"model_args": "pretrained=meta-llama/Llama-2-7b-hf,use_accelerate=True,dtype=auto",
"num_fewshot": 2,
"batch_size": 2,
"device": "cuda",
"no_cache": false,
"limit": null,
"bootstrap_iters": 100000,
"description_dict": {}
}
}
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
MODEL_ARGS="pretrained=rinna/bilingual-gpt-neox-4b-instruction-ppo,use_fast=False,device_map=auto,torch_dtype=auto"
TASK="jsquad-1.1-0.51"
python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/rinna/rinna-bilingual-gpt-neox-4b-instruction-ppo/result.jsquad-1.1-0.51.json"
TASK="jsquad-1.2-0.5"
python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/rinna/rinna-bilingual-gpt-neox-4b-instruction-ppo/result.jsquad-1.2-0.5.json"
Loading

0 comments on commit 18a2996

Please sign in to comment.