Skip to content

Commit

Permalink
re-evaluate models with jsquad prompt with title
Browse files Browse the repository at this point in the history
  • Loading branch information
kumapo committed Sep 12, 2023
1 parent 6fefd43 commit a73f7a1
Show file tree
Hide file tree
Showing 33 changed files with 315 additions and 29 deletions.
3 changes: 3 additions & 0 deletions models/abeja-gpt-neox-japanese-2.7b/harness.jsquad-1.2.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
MODEL_ARGS="pretrained=abeja/gpt-neox-japanese-2.7b,device_map=auto,torch_dtype=auto"
TASK="jsquad-1.2-0.2"
python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "3" --device "cuda" --output_path "models/abeja-gpt-neox-japanese-2.7b/result.jsquad-1.2.json"
22 changes: 22 additions & 0 deletions models/abeja-gpt-neox-japanese-2.7b/result.jsquad-1.2.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"results": {
"jsquad-1.2-0.2": {
"exact_match": 15.803692030616839,
"f1": 25.18326978234071
}
},
"versions": {
"jsquad-1.2-0.2": 1.2
},
"config": {
"model": "hf-causal",
"model_args": "pretrained=abeja/gpt-neox-japanese-2.7b,device_map=auto,torch_dtype=auto",
"num_fewshot": 3,
"batch_size": null,
"device": "cuda",
"no_cache": false,
"limit": null,
"bootstrap_iters": 100000,
"description_dict": {}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
MODEL_ARGS="pretrained=cyberagent/open-calm-1b,device_map=auto,torch_dtype=auto"
TASK="jsquad-1.2-0.2"
python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "3" --device "cuda" --output_path "models/cyberagent-open-calm-1b/result.jsquad-1.2.json"
2 changes: 1 addition & 1 deletion models/cyberagent/cyberagent-open-calm-1b/harness.sh
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
MODEL_ARGS="pretrained=cyberagent/open-calm-1b"
TASK="jcommonsenseqa-1.1-0.2,jnli-1.1-0.2,marc_ja-1.1-0.2,jsquad-1.1-0.2,xlsum_ja"
TASK="jcommonsenseqa-1.1-0.2,jnli-1.1-0.2,marc_ja-1.1-0.2,jsquad-1.2-0.2,xlsum_ja"
python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2,3,3,3,1" --device "cuda" --output_path "models/cyberagent-open-calm-1b/result.json"
22 changes: 22 additions & 0 deletions models/cyberagent/cyberagent-open-calm-1b/result.jsquad-1.2.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"results": {
"jsquad-1.2-0.2": {
"exact_match": 39.53174245835209,
"f1": 49.49399460234075
}
},
"versions": {
"jsquad-1.2-0.2": 1.2
},
"config": {
"model": "hf-causal",
"model_args": "pretrained=cyberagent/open-calm-1b",
"num_fewshot": 3,
"batch_size": null,
"device": "cuda",
"no_cache": false,
"limit": null,
"bootstrap_iters": 100000,
"description_dict": {}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
MODEL_ARGS="pretrained=cyberagent/open-calm-3b,device_map=auto,torch_dtype=auto"
TASK="jsquad-1.2-0.2"
python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/cyberagent/cyberagent-open-calm-3b/result.jsquad-1.2.json"
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
MODEL_ARGS="pretrained=cyberagent/open-calm-7b,device_map=auto,torch_dtype=auto"
TASK="jsquad-1.2-0.2"
python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/cyberagent/cyberagent-open-calm-7b/result.jsquad-1.2.json"
22 changes: 22 additions & 0 deletions models/cyberagent/cyberagent-open-calm-7b/result.jsquad-1.2.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"results": {
"jsquad-1.2-0.2": {
"exact_match": 48.10895992796038,
"f1": 60.90961937230767
}
},
"versions": {
"jsquad-1.2-0.2": 1.2
},
"config": {
"model": "hf-causal",
"model_args": "pretrained=cyberagent/open-calm-7b,device_map=auto,torch_dtype=auto",
"num_fewshot": 2,
"batch_size": null,
"device": "cuda",
"no_cache": false,
"limit": null,
"bootstrap_iters": 100000,
"description_dict": {}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
MODEL_ARGS="pretrained=cyberagent/open-calm-large,use_fast=True,device_map=auto,torch_dtype=auto"
TASK="jsquad-1.2-0.2"
python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "3" --device "cuda" --output_path "models/cyberagent-open-calm-large/result.jsquad-1.2.json"
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"results": {
"jsquad-1.2-0.2": {
"exact_match": 40.4997748761819,
"f1": 51.32160467436942
}
},
"versions": {
"jsquad-1.2-0.2": 1.2
},
"config": {
"model": "hf-causal",
"model_args": "pretrained=cyberagent/open-calm-large,use_fast=True,device_map=auto,torch_dtype=auto",
"num_fewshot": 3,
"batch_size": null,
"device": "cuda",
"no_cache": false,
"limit": null,
"bootstrap_iters": 100000,
"description_dict": {}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
MODEL_ARGS="pretrained=cyberagent/open-calm-medium,use_fast=True,device_map=auto,torch_dtype=auto"
TASK="jsquad-1.2-0.2"
python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "3" --device "cuda" --output_path "models/cyberagent-open-calm-medium/result.jsquad-1.2.json"
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"results": {
"jsquad-1.2-0.2": {
"exact_match": 29.85141828005403,
"f1": 40.49655778214922
}
},
"versions": {
"jsquad-1.2-0.2": 1.2
},
"config": {
"model": "hf-causal",
"model_args": "pretrained=cyberagent/open-calm-medium,use_fast=True,device_map=auto,torch_dtype=auto",
"num_fewshot": 3,
"batch_size": null,
"device": "cuda",
"no_cache": false,
"limit": null,
"bootstrap_iters": 100000,
"description_dict": {}
}
}
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
MODEL_ARGS="pretrained=meta-llama/Llama-2-7b-hf,use_accelerate=True,dtype=auto"
TASK="jsquad-1.1-0.31"
python main.py --model hf-causal-experimental --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/llama2/llama2-2.7b/result.jsquad-1.1-0.31.json" --batch_size 2
TASK="jsquad-1.2-0.3"
python main.py --model hf-causal-experimental --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/llama2/llama2-2.7b/result.jsquad-1.2.json" --batch_size 2

Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
{
"results": {
"jsquad-1.1-0.31": {
"jsquad-1.2-0.3": {
"exact_match": 59.92796037820801,
"f1": 70.8236875084182
}
},
"versions": {
"jsquad-1.1-0.31": 1.1
"jsquad-1.2-0.3": 1.2
},
"config": {
"model": "hf-causal-experimental",
Expand All @@ -19,4 +19,4 @@
"bootstrap_iters": 100000,
"description_dict": {}
}
}
}
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
MODEL_ARGS="pretrained=meta-llama/Llama-2-7b-chat-hf,use_accelerate=True,dtype=auto"
TASK="jsquad-1.1-0.31"
python main.py --model hf-causal-experimental --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/llama2/llama2-7b-chat/result.json" --batch_size 2
TASK="jsquad-1.2-0.3"
python main.py --model hf-causal-experimental --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/llama2/llama2-7b-chat/result.jsquad-1.2.json" --batch_size 2
22 changes: 22 additions & 0 deletions models/llama2/llama2-7b-chat/result.jsquad-1.2.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"results": {
"jsquad-1.2-0.3": {
"exact_match": 62.17919855920756,
"f1": 74.84345935966519
}
},
"versions": {
"jsquad-1.2-0.3": 1.2
},
"config": {
"model": "hf-causal-experimental",
"model_args": "pretrained=meta-llama/Llama-2-7b-chat-hf,use_accelerate=True,dtype=auto",
"num_fewshot": 2,
"batch_size": 2,
"device": "cuda",
"no_cache": false,
"limit": null,
"bootstrap_iters": 100000,
"description_dict": {}
}
}
4 changes: 4 additions & 0 deletions models/llama2/llama2-7b/harness.jsquad-1.2.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
MODEL_ARGS="pretrained=meta-llama/Llama-2-7b-hf,use_accelerate=True,dtype=auto"
TASK="jsquad-1.2-0.3"
python main.py --model hf-causal-experimental --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/llama2/llama2-7b/result.jsquad-1.2.json" --batch_size 2

Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
MODEL_ARGS="pretrained=rinna/bilingual-gpt-neox-4b-instruction-ppo,use_fast=False,device_map=auto,torch_dtype=auto"
TASK="jsquad-1.1-0.51"
python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/rinna/rinna-bilingual-gpt-neox-4b-instruction-ppo/result.jsquad-1.1-0.51.json"
TASK="jsquad-1.2-0.5"
python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/rinna/rinna-bilingual-gpt-neox-4b-instruction-ppo/result.jsquad-1.2-0.5.json"
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"results": {
"jsquad-1.2-0.5": {
"exact_match": 55.94326879783881,
"f1": 70.64052956733126
}
},
"versions": {
"jsquad-1.2-0.5": 1.2
},
"config": {
"model": "hf-causal",
"model_args": "pretrained=rinna/bilingual-gpt-neox-4b-instruction-ppo,use_fast=False,device_map=auto,torch_dtype=auto",
"num_fewshot": 2,
"batch_size": null,
"device": "cuda",
"no_cache": false,
"limit": null,
"bootstrap_iters": 100000,
"description_dict": {}
}
}
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
MODEL_ARGS="pretrained=rinna/bilingual-gpt-neox-4b-instruction-sft,use_fast=False,device_map=auto,torch_dtype=auto"
TASK="jsquad-1.1-0.51"
python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/rinna/rinna-bilingual-gpt-neox-4b-instruction-sft/result.jsquad-1.1-0.51.json"
TASK="jsquad-1.2-0.5"
python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/rinna/rinna-bilingual-gpt-neox-4b-instruction-sft/result.jsquad-1.2.json"
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"results": {
"jsquad-1.2-0.5": {
"exact_match": 58.66726699684827,
"f1": 72.38803519363597
}
},
"versions": {
"jsquad-1.2-0.5": 1.2
},
"config": {
"model": "hf-causal",
"model_args": "pretrained=rinna/bilingual-gpt-neox-4b-instruction-sft,use_fast=False,device_map=auto,torch_dtype=auto",
"num_fewshot": 2,
"batch_size": null,
"device": "cuda",
"no_cache": false,
"limit": null,
"bootstrap_iters": 100000,
"description_dict": {}
}
}
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
MODEL_ARGS="pretrained=rinna/bilingual-gpt-neox-4b,use_fast=False,device_map=auto,torch_dtype=auto"
TASK="jsquad-1.1-0.21"
python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/rinna/rinna-bilingual-gpt-neox-4b/result.jsquad-1.1-0.21.json"
TASK="jsquad-1.2-0.2"
python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/rinna/rinna-bilingual-gpt-neox-4b/result.jsquad-1.2.json"
22 changes: 22 additions & 0 deletions models/rinna/rinna-bilingual-gpt-neox-4b/result.jsquad-1.2.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"results": {
"jsquad-1.2-0.2": {
"exact_match": 51.32823052678973,
"f1": 61.9390389728309
}
},
"versions": {
"jsquad-1.2-0.2": 1.2
},
"config": {
"model": "hf-causal",
"model_args": "pretrained=rinna/bilingual-gpt-neox-4b,use_fast=False,device_map=auto,torch_dtype=auto",
"num_fewshot": 2,
"batch_size": null,
"device": "cuda",
"no_cache": false,
"limit": null,
"bootstrap_iters": 100000,
"description_dict": {}
}
}
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
MODEL_ARGS="pretrained=rinna/japanese-gpt-1b,use_fast=False"
TASK="jsquad-1.1-0.21"
python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/rinna/rinna-japanese-gpt-1b/result.jsquad-1.1-0.21.json"
TASK="jsquad-1.2-0.2"
python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/rinna/rinna-japanese-gpt-1b/result.jsquad-1.2.json"
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
{
"results": {
"jsquad-1.1-0.21": {
"jsquad-1.2-0.2": {
"exact_match": 30.189104007203962,
"f1": 47.12467642283419
}
},
"versions": {
"jsquad-1.1-0.21": 1.1
"jsquad-1.2-0.2": 1.1
},
"config": {
"model": "hf-causal",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
MODEL_ARGS="pretrained=rinna/japanese-gpt-neox-3.6b-instruction-ppo,use_fast=False,device_map=auto,torch_dtype=auto"
TASK="jsquad-1.1-0.41"
python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-ppo/result.jsquad-1.1-0.41.json"
TASK="jsquad-1.2-0.4"
python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-ppo/result.jsquad-1.2.json"
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"results": {
"jsquad-1.2-0.4": {
"exact_match": 53.89464205312922,
"f1": 65.73194869643035
}
},
"versions": {
"jsquad-1.2-0.4": 1.2
},
"config": {
"model": "hf-causal",
"model_args": "pretrained=rinna/japanese-gpt-neox-3.6b-instruction-ppo,use_fast=False,device_map=auto,torch_dtype=auto",
"num_fewshot": 2,
"batch_size": null,
"device": "cuda",
"no_cache": false,
"limit": null,
"bootstrap_iters": 100000,
"description_dict": {}
}
}
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
MODEL_ARGS="pretrained=rinna/japanese-gpt-neox-3.6b-instruction-sft-v2,use_fast=False,device_map=auto,torch_dtype=auto"
TASK="jsquad-1.1-0.41"
python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft-v2/result.jsquad-1.1-0.41.json"
TASK="jsquad-1.2-0.4"
python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft-v2/result.jsquad-1.2.json"
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
{
"results": {
"jsquad-1.1-0.41": {
"jsquad-1.2-0.4": {
"exact_match": 47.90634849167042,
"f1": 62.1059309037734
}
},
"versions": {
"jsquad-1.1-0.41": 1.1
"jsquad-1.2-0.4": 1.2
},
"config": {
"model": "hf-causal",
Expand All @@ -19,4 +19,4 @@
"bootstrap_iters": 100000,
"description_dict": {}
}
}
}
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
MODEL_ARGS="pretrained=rinna/japanese-gpt-neox-3.6b-instruction-sft,use_fast=False,device_map=auto,torch_dtype=auto"
TASK="jsquad-1.1-0.41"
python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft/result.jsquad-1.1-0.41.json"
TASK="jsquad-1.2-0.4"
python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft/result.jsquad-1.2.json"
Loading

0 comments on commit a73f7a1

Please sign in to comment.