From 18a2996edb4e8bd7cacd9e5e810b1b18ddafe759 Mon Sep 17 00:00:00 2001 From: kumapo Date: Sun, 10 Sep 2023 15:52:30 +0900 Subject: [PATCH] re-evaluate models with jsquad prompt with title --- .../harness.jsquad-1.2.sh | 3 +++ .../abeja-gpt-neox-japanese-2.7b/harness.sh | 2 +- .../abeja-gpt-neox-japanese-2.7b/result.json | 8 +++---- .../result.jsquad-1.2.json | 22 +++++++++++++++++++ .../harness.jsquad-1.2.sh | 3 +++ .../cyberagent-open-calm-1b/harness.sh | 2 +- .../cyberagent-open-calm-1b/result.json | 8 +++---- .../result.jsquad-1.2.json | 22 +++++++++++++++++++ .../harness.jsquad-1.2.sh | 3 +++ .../harness.jsquad-1.2.sh | 3 +++ .../cyberagent-open-calm-7b/result.json | 8 +++---- .../result.jsquad-1.2.json | 22 +++++++++++++++++++ .../harness.jsquad-1.2.sh | 3 +++ .../cyberagent-open-calm-large/result.json | 6 ++--- .../result.jsquad-1.2.json | 22 +++++++++++++++++++ .../harness.jsquad-1.2.sh | 3 +++ .../cyberagent-open-calm-medium/result.json | 8 +++---- .../result.jsquad-1.2.json | 22 +++++++++++++++++++ .../llama-7b/harness.jsquad-1.2.sh} | 4 ++-- .../llama2/llama2-2.7b/harness.jsquad-1.2.sh | 4 ++++ models/llama2/llama2-2.7b/harness.sh | 2 +- models/llama2/llama2-2.7b/result.json | 8 +++---- ...d-1.1-0.31.json => result.jsquad-1.2.json} | 6 ++--- ...quad-1.1-0.31.sh => harness.jsquad-1.2.sh} | 4 ++-- .../llama2-7b-chat/result.jsquad-1.2.json | 22 +++++++++++++++++++ models/llama2/llama2-7b/harness.jsquad-1.2.sh | 4 ++++ .../llama2/llama2-7b/result.jsquad-1.2.json | 22 +++++++++++++++++++ ...quad-1.1-0.51.sh => harness.jsquad-1.2.sh} | 4 ++-- .../result.jsquad-1.2.json | 22 +++++++++++++++++++ ...quad-1.1-0.51.sh => harness.jsquad-1.2.sh} | 4 ++-- .../result.jsquad-1.2.json | 22 +++++++++++++++++++ ...quad-1.1-0.21.sh => harness.jsquad-1.2.sh} | 4 ++-- .../result.jsquad-1.2.json | 22 +++++++++++++++++++ ...quad-1.1-0.21.sh => harness.jsquad-1.2.sh} | 4 ++-- ...d-1.1-0.21.json => result.jsquad-1.2.json} | 4 ++-- ...quad-1.1-0.41.sh => harness.jsquad-1.2.sh} | 4 ++-- .../result.jsquad-1.2.json | 22 +++++++++++++++++++ ...quad-1.1-0.41.sh => harness.jsquad-1.2.sh} | 4 ++-- ...d-1.1-0.41.json => result.jsquad-1.2.json} | 6 ++--- ...quad-1.1-0.41.sh => harness.jsquad-1.2.sh} | 4 ++-- .../result.jsquad-1.2.json | 22 +++++++++++++++++++ ...quad-1.1-0.21.sh => harness.jsquad-1.2.sh} | 4 ++-- .../result.jsquad-1.2.json | 22 +++++++++++++++++++ 43 files changed, 366 insertions(+), 54 deletions(-) create mode 100644 models/abeja-gpt-neox-japanese-2.7b/harness.jsquad-1.2.sh create mode 100644 models/abeja-gpt-neox-japanese-2.7b/result.jsquad-1.2.json create mode 100644 models/cyberagent/cyberagent-open-calm-1b/harness.jsquad-1.2.sh create mode 100644 models/cyberagent/cyberagent-open-calm-1b/result.jsquad-1.2.json create mode 100644 models/cyberagent/cyberagent-open-calm-3b/harness.jsquad-1.2.sh create mode 100644 models/cyberagent/cyberagent-open-calm-7b/harness.jsquad-1.2.sh create mode 100644 models/cyberagent/cyberagent-open-calm-7b/result.jsquad-1.2.json create mode 100644 models/cyberagent/cyberagent-open-calm-large/harness.jsquad-1.2.sh create mode 100644 models/cyberagent/cyberagent-open-calm-large/result.jsquad-1.2.json create mode 100644 models/cyberagent/cyberagent-open-calm-medium/harness.jsquad-1.2.sh create mode 100644 models/cyberagent/cyberagent-open-calm-medium/result.jsquad-1.2.json rename models/{llama2/llama2-2.7b/harness.jsquad-1.1-0.31.sh => llama/llama-7b/harness.jsquad-1.2.sh} (67%) create mode 100644 models/llama2/llama2-2.7b/harness.jsquad-1.2.sh rename models/llama2/llama2-2.7b/{result.jsquad-1.1-0.31.json => result.jsquad-1.2.json} (88%) rename models/llama2/llama2-7b-chat/{harness.jsquad-1.1-0.31.sh => harness.jsquad-1.2.sh} (68%) create mode 100644 models/llama2/llama2-7b-chat/result.jsquad-1.2.json create mode 100644 models/llama2/llama2-7b/harness.jsquad-1.2.sh create mode 100644 models/llama2/llama2-7b/result.jsquad-1.2.json rename models/rinna/rinna-bilingual-gpt-neox-4b-instruction-ppo/{harness.jsquad-1.1-0.51.sh => harness.jsquad-1.2.sh} (89%) create mode 100644 models/rinna/rinna-bilingual-gpt-neox-4b-instruction-ppo/result.jsquad-1.2.json rename models/rinna/rinna-bilingual-gpt-neox-4b-instruction-sft/{harness.jsquad-1.1-0.51.sh => harness.jsquad-1.2.sh} (89%) create mode 100644 models/rinna/rinna-bilingual-gpt-neox-4b-instruction-sft/result.jsquad-1.2.json rename models/rinna/rinna-bilingual-gpt-neox-4b/{harness.jsquad-1.1-0.21.sh => harness.jsquad-1.2.sh} (72%) create mode 100644 models/rinna/rinna-bilingual-gpt-neox-4b/result.jsquad-1.2.json rename models/rinna/rinna-japanese-gpt-1b/{harness.jsquad-1.1-0.21.sh => harness.jsquad-1.2.sh} (70%) rename models/rinna/rinna-japanese-gpt-1b/{result.jsquad-1.1-0.21.json => result.jsquad-1.2.json} (88%) rename models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-ppo/{harness.jsquad-1.1-0.41.sh => harness.jsquad-1.2.sh} (88%) create mode 100644 models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-ppo/result.jsquad-1.2.json rename models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft-v2/{harness.jsquad-1.1-0.41.sh => harness.jsquad-1.2.sh} (88%) rename models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft-v2/{result.jsquad-1.1-0.41.json => result.jsquad-1.2.json} (89%) rename models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft/{harness.jsquad-1.1-0.41.sh => harness.jsquad-1.2.sh} (88%) create mode 100644 models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft/result.jsquad-1.2.json rename models/rinna/rinna-japanese-gpt-neox-3.6b/{harness.jsquad-1.1-0.21.sh => harness.jsquad-1.2.sh} (72%) create mode 100644 models/rinna/rinna-japanese-gpt-neox-3.6b/result.jsquad-1.2.json diff --git a/models/abeja-gpt-neox-japanese-2.7b/harness.jsquad-1.2.sh b/models/abeja-gpt-neox-japanese-2.7b/harness.jsquad-1.2.sh new file mode 100644 index 0000000000..974fea8736 --- /dev/null +++ b/models/abeja-gpt-neox-japanese-2.7b/harness.jsquad-1.2.sh @@ -0,0 +1,3 @@ +MODEL_ARGS="pretrained=abeja/gpt-neox-japanese-2.7b,device_map=auto,torch_dtype=auto" +TASK="jsquad-1.2-0.2" +python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "3" --device "cuda" --output_path "models/abeja-gpt-neox-japanese-2.7b/result.jsquad-1.2.json" \ No newline at end of file diff --git a/models/abeja-gpt-neox-japanese-2.7b/harness.sh b/models/abeja-gpt-neox-japanese-2.7b/harness.sh index a9d071805c..f1203972df 100644 --- a/models/abeja-gpt-neox-japanese-2.7b/harness.sh +++ b/models/abeja-gpt-neox-japanese-2.7b/harness.sh @@ -1,3 +1,3 @@ MODEL_ARGS="pretrained=abeja/gpt-neox-japanese-2.7b" -TASK="jcommonsenseqa-1.1-0.2,jnli-1.1-0.2,marc_ja-1.1-0.2,jsquad-1.1-0.2,xlsum_ja" +TASK="jcommonsenseqa-1.1-0.2,jnli-1.1-0.2,marc_ja-1.1-0.2,jsquad-1.2-0.2,xlsum_ja" python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2,3,3,3,1" --device "cuda" --output_path "models/abeja-gpt-neox-japanese-2.7b/result.json" \ No newline at end of file diff --git a/models/abeja-gpt-neox-japanese-2.7b/result.json b/models/abeja-gpt-neox-japanese-2.7b/result.json index 302db7dba0..8e6c00a311 100644 --- a/models/abeja-gpt-neox-japanese-2.7b/result.json +++ b/models/abeja-gpt-neox-japanese-2.7b/result.json @@ -18,9 +18,9 @@ "acc_norm": 0.749912800837112, "acc_norm_stderr": 0.005719527388015089 }, - "jsquad-1.1-0.2": { - "exact_match": 13.665015758667266, - "f1": 22.909453892411364 + "jsquad-1.2-0.2": { + "exact_match": 15.803692030616839, + "f1": 25.18326978234071 }, "xlsum_ja": { "rouge2": 6.149952794206885 @@ -33,7 +33,7 @@ "versions": { "jcommonsenseqa-1.1-0.2": 1.1, "jnli-1.1-0.2": 1.1, - "jsquad-1.1-0.2": 1.1, + "jsquad-1.2-0.2": 1.2, "marc_ja-1.1-0.2": 1.1, "xlsum_ja": 1.0, "xwinograd_ja": 1.0 diff --git a/models/abeja-gpt-neox-japanese-2.7b/result.jsquad-1.2.json b/models/abeja-gpt-neox-japanese-2.7b/result.jsquad-1.2.json new file mode 100644 index 0000000000..7b13dd2c3c --- /dev/null +++ b/models/abeja-gpt-neox-japanese-2.7b/result.jsquad-1.2.json @@ -0,0 +1,22 @@ +{ + "results": { + "jsquad-1.2-0.2": { + "exact_match": 15.803692030616839, + "f1": 25.18326978234071 + } + }, + "versions": { + "jsquad-1.2-0.2": 1.2 + }, + "config": { + "model": "hf-causal", + "model_args": "pretrained=abeja/gpt-neox-japanese-2.7b,device_map=auto,torch_dtype=auto", + "num_fewshot": 3, + "batch_size": null, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} diff --git a/models/cyberagent/cyberagent-open-calm-1b/harness.jsquad-1.2.sh b/models/cyberagent/cyberagent-open-calm-1b/harness.jsquad-1.2.sh new file mode 100644 index 0000000000..015a9b2650 --- /dev/null +++ b/models/cyberagent/cyberagent-open-calm-1b/harness.jsquad-1.2.sh @@ -0,0 +1,3 @@ +MODEL_ARGS="pretrained=cyberagent/open-calm-1b,device_map=auto,torch_dtype=auto" +TASK="jsquad-1.2-0.2" +python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "3" --device "cuda" --output_path "models/cyberagent-open-calm-1b/result.jsquad-1.2.json" \ No newline at end of file diff --git a/models/cyberagent/cyberagent-open-calm-1b/harness.sh b/models/cyberagent/cyberagent-open-calm-1b/harness.sh index 20eb407ff1..517335274b 100644 --- a/models/cyberagent/cyberagent-open-calm-1b/harness.sh +++ b/models/cyberagent/cyberagent-open-calm-1b/harness.sh @@ -1,3 +1,3 @@ MODEL_ARGS="pretrained=cyberagent/open-calm-1b" -TASK="jcommonsenseqa-1.1-0.2,jnli-1.1-0.2,marc_ja-1.1-0.2,jsquad-1.1-0.2,xlsum_ja" +TASK="jcommonsenseqa-1.1-0.2,jnli-1.1-0.2,marc_ja-1.1-0.2,jsquad-1.2-0.2,xlsum_ja" python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2,3,3,3,1" --device "cuda" --output_path "models/cyberagent-open-calm-1b/result.json" \ No newline at end of file diff --git a/models/cyberagent/cyberagent-open-calm-1b/result.json b/models/cyberagent/cyberagent-open-calm-1b/result.json index c13bee407d..ec424ddfc7 100644 --- a/models/cyberagent/cyberagent-open-calm-1b/result.json +++ b/models/cyberagent/cyberagent-open-calm-1b/result.json @@ -18,9 +18,9 @@ "acc_norm": 0.7792117195674921, "acc_norm_stderr": 0.005478034657719626 }, - "jsquad-1.1-0.2": { - "exact_match": 37.12291760468258, - "f1": 47.171446643186265 + "jsquad-1.2-0.2": { + "exact_match": 39.53174245835209, + "f1": 49.49399460234075 }, "xlsum_ja": { "rouge2": 2.288077088085482 @@ -33,7 +33,7 @@ "versions": { "jcommonsenseqa-1.1-0.2": 1.1, "jnli-1.1-0.2": 1.1, - "jsquad-1.1-0.2": 1.1, + "jsquad-1.2-0.2": 1.2, "marc_ja-1.1-0.2": 1.1, "xlsum_ja": 1.0, "xwinograd_ja": 1.0 diff --git a/models/cyberagent/cyberagent-open-calm-1b/result.jsquad-1.2.json b/models/cyberagent/cyberagent-open-calm-1b/result.jsquad-1.2.json new file mode 100644 index 0000000000..7fce196a57 --- /dev/null +++ b/models/cyberagent/cyberagent-open-calm-1b/result.jsquad-1.2.json @@ -0,0 +1,22 @@ +{ + "results": { + "jsquad-1.2-0.2": { + "exact_match": 39.53174245835209, + "f1": 49.49399460234075 + } + }, + "versions": { + "jsquad-1.2-0.2": 1.2 + }, + "config": { + "model": "hf-causal", + "model_args": "pretrained=cyberagent/open-calm-1b", + "num_fewshot": 3, + "batch_size": null, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} diff --git a/models/cyberagent/cyberagent-open-calm-3b/harness.jsquad-1.2.sh b/models/cyberagent/cyberagent-open-calm-3b/harness.jsquad-1.2.sh new file mode 100644 index 0000000000..32424c0a81 --- /dev/null +++ b/models/cyberagent/cyberagent-open-calm-3b/harness.jsquad-1.2.sh @@ -0,0 +1,3 @@ +MODEL_ARGS="pretrained=cyberagent/open-calm-3b,device_map=auto,torch_dtype=auto" +TASK="jsquad-1.2-0.2" +python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/cyberagent/cyberagent-open-calm-3b/result.jsquad-1.2.json" diff --git a/models/cyberagent/cyberagent-open-calm-7b/harness.jsquad-1.2.sh b/models/cyberagent/cyberagent-open-calm-7b/harness.jsquad-1.2.sh new file mode 100644 index 0000000000..730fdce25d --- /dev/null +++ b/models/cyberagent/cyberagent-open-calm-7b/harness.jsquad-1.2.sh @@ -0,0 +1,3 @@ +MODEL_ARGS="pretrained=cyberagent/open-calm-7b,device_map=auto,torch_dtype=auto" +TASK="jsquad-1.2-0.2" +python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/cyberagent/cyberagent-open-calm-7b/result.jsquad-1.2.json" diff --git a/models/cyberagent/cyberagent-open-calm-7b/result.json b/models/cyberagent/cyberagent-open-calm-7b/result.json index 6dd2cb5062..c539844346 100644 --- a/models/cyberagent/cyberagent-open-calm-7b/result.json +++ b/models/cyberagent/cyberagent-open-calm-7b/result.json @@ -22,9 +22,9 @@ "acc": 0.6506777893639207, "acc_stderr": 0.01540328448938605 }, - "jsquad-1.1-0.2": { - "exact_match": 45.79018460153084, - "f1": 59.03158509144496 + "jsquad-1.2-0.2": { + "exact_match": 48.10895992796038, + "f1": 60.90961937230767 }, "jaqket_v2-0.1-0.2": { "exact_match": 60.738831615120276, @@ -42,7 +42,7 @@ "jcommonsenseqa-1.1-0.2": 1.1, "jnli-1.1-0.2": 1.1, "marc_ja-1.1-0.2": 1.1, - "jsquad-1.1-0.2": 1.1, + "jsquad-1.2-0.2": 1.2, "jaqket_v2-0.1-0.2": 0.1, "xlsum_ja": 1.0, "xwinograd_ja": 1.0, diff --git a/models/cyberagent/cyberagent-open-calm-7b/result.jsquad-1.2.json b/models/cyberagent/cyberagent-open-calm-7b/result.jsquad-1.2.json new file mode 100644 index 0000000000..6ccffe99f7 --- /dev/null +++ b/models/cyberagent/cyberagent-open-calm-7b/result.jsquad-1.2.json @@ -0,0 +1,22 @@ +{ + "results": { + "jsquad-1.2-0.2": { + "exact_match": 48.10895992796038, + "f1": 60.90961937230767 + } + }, + "versions": { + "jsquad-1.2-0.2": 1.2 + }, + "config": { + "model": "hf-causal", + "model_args": "pretrained=cyberagent/open-calm-7b,device_map=auto,torch_dtype=auto", + "num_fewshot": 2, + "batch_size": null, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} diff --git a/models/cyberagent/cyberagent-open-calm-large/harness.jsquad-1.2.sh b/models/cyberagent/cyberagent-open-calm-large/harness.jsquad-1.2.sh new file mode 100644 index 0000000000..3d4fa40d5a --- /dev/null +++ b/models/cyberagent/cyberagent-open-calm-large/harness.jsquad-1.2.sh @@ -0,0 +1,3 @@ +MODEL_ARGS="pretrained=cyberagent/open-calm-large,use_fast=True,device_map=auto,torch_dtype=auto" +TASK="jsquad-1.2-0.2" +python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "3" --device "cuda" --output_path "models/cyberagent-open-calm-large/result.jsquad-1.2.json" \ No newline at end of file diff --git a/models/cyberagent/cyberagent-open-calm-large/result.json b/models/cyberagent/cyberagent-open-calm-large/result.json index 7d6f95f225..0ade0ab0bf 100644 --- a/models/cyberagent/cyberagent-open-calm-large/result.json +++ b/models/cyberagent/cyberagent-open-calm-large/result.json @@ -18,9 +18,9 @@ "acc_norm": 0.7912452040460412, "acc_norm_stderr": 0.005367632889806105 }, - "jsquad-1.1-0.2": { - "exact_match": 37.23547951373255, - "f1": 48.50349592141573 + "jsquad-1.2-0.2": { + "exact_match": 40.4997748761819, + "f1": 51.32160467436942 }, "xlsum_ja": { "rouge2": 1.9854375467671679 diff --git a/models/cyberagent/cyberagent-open-calm-large/result.jsquad-1.2.json b/models/cyberagent/cyberagent-open-calm-large/result.jsquad-1.2.json new file mode 100644 index 0000000000..8f6d038661 --- /dev/null +++ b/models/cyberagent/cyberagent-open-calm-large/result.jsquad-1.2.json @@ -0,0 +1,22 @@ +{ + "results": { + "jsquad-1.2-0.2": { + "exact_match": 40.4997748761819, + "f1": 51.32160467436942 + } + }, + "versions": { + "jsquad-1.2-0.2": 1.2 + }, + "config": { + "model": "hf-causal", + "model_args": "pretrained=cyberagent/open-calm-large,use_fast=True,device_map=auto,torch_dtype=auto", + "num_fewshot": 3, + "batch_size": null, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} diff --git a/models/cyberagent/cyberagent-open-calm-medium/harness.jsquad-1.2.sh b/models/cyberagent/cyberagent-open-calm-medium/harness.jsquad-1.2.sh new file mode 100644 index 0000000000..c70a3e5024 --- /dev/null +++ b/models/cyberagent/cyberagent-open-calm-medium/harness.jsquad-1.2.sh @@ -0,0 +1,3 @@ +MODEL_ARGS="pretrained=cyberagent/open-calm-medium,use_fast=True,device_map=auto,torch_dtype=auto" +TASK="jsquad-1.2-0.2" +python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "3" --device "cuda" --output_path "models/cyberagent-open-calm-medium/result.jsquad-1.2.json" \ No newline at end of file diff --git a/models/cyberagent/cyberagent-open-calm-medium/result.json b/models/cyberagent/cyberagent-open-calm-medium/result.json index 25799df78b..ef8116b7d4 100644 --- a/models/cyberagent/cyberagent-open-calm-medium/result.json +++ b/models/cyberagent/cyberagent-open-calm-medium/result.json @@ -18,9 +18,9 @@ "acc_norm": 0.8357167771189397, "acc_norm_stderr": 0.004893675823612713 }, - "jsquad-1.1-0.2": { - "exact_match": 28.725799189554255, - "f1": 39.80333448254385 + "jsquad-1.2-0.2": { + "exact_match": 29.85141828005403, + "f1": 40.49655778214922 }, "xlsum_ja": { "rouge2": 2.5775988917922406 @@ -33,7 +33,7 @@ "versions": { "jcommonsenseqa-1.1-0.2": 1.1, "jnli-1.1-0.2": 1.1, - "jsquad-1.1-0.2": 1.1, + "jsquad-1.2-0.2": 1.2, "marc_ja-1.1-0.2": 1.1, "xlsum_ja": 1.0, "xwinograd_ja": 1.0 diff --git a/models/cyberagent/cyberagent-open-calm-medium/result.jsquad-1.2.json b/models/cyberagent/cyberagent-open-calm-medium/result.jsquad-1.2.json new file mode 100644 index 0000000000..4de6f23de7 --- /dev/null +++ b/models/cyberagent/cyberagent-open-calm-medium/result.jsquad-1.2.json @@ -0,0 +1,22 @@ +{ + "results": { + "jsquad-1.2-0.2": { + "exact_match": 29.85141828005403, + "f1": 40.49655778214922 + } + }, + "versions": { + "jsquad-1.2-0.2": 1.2 + }, + "config": { + "model": "hf-causal", + "model_args": "pretrained=cyberagent/open-calm-medium,use_fast=True,device_map=auto,torch_dtype=auto", + "num_fewshot": 3, + "batch_size": null, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} diff --git a/models/llama2/llama2-2.7b/harness.jsquad-1.1-0.31.sh b/models/llama/llama-7b/harness.jsquad-1.2.sh similarity index 67% rename from models/llama2/llama2-2.7b/harness.jsquad-1.1-0.31.sh rename to models/llama/llama-7b/harness.jsquad-1.2.sh index fdfa73aea4..a3fcaf3975 100644 --- a/models/llama2/llama2-2.7b/harness.jsquad-1.1-0.31.sh +++ b/models/llama/llama-7b/harness.jsquad-1.2.sh @@ -1,4 +1,4 @@ MODEL_ARGS="pretrained=meta-llama/Llama-2-7b-hf,use_accelerate=True,dtype=auto" -TASK="jsquad-1.1-0.31" -python main.py --model hf-causal-experimental --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/llama2/llama2-2.7b/result.jsquad-1.1-0.31.json" --batch_size 2 +TASK="jsquad-1.2-0.3" +python main.py --model hf-causal-experimental --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/llama2/llama2-7b/result.jsquad-1.2.json" --batch_size 2 diff --git a/models/llama2/llama2-2.7b/harness.jsquad-1.2.sh b/models/llama2/llama2-2.7b/harness.jsquad-1.2.sh new file mode 100644 index 0000000000..568bea2699 --- /dev/null +++ b/models/llama2/llama2-2.7b/harness.jsquad-1.2.sh @@ -0,0 +1,4 @@ +MODEL_ARGS="pretrained=meta-llama/Llama-2-7b-hf,use_accelerate=True,dtype=auto" +TASK="jsquad-1.2-0.3" +python main.py --model hf-causal-experimental --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/llama2/llama2-2.7b/result.jsquad-1.2.json" --batch_size 2 + diff --git a/models/llama2/llama2-2.7b/harness.sh b/models/llama2/llama2-2.7b/harness.sh index e1a9ca435c..15ad27c2d9 100644 --- a/models/llama2/llama2-2.7b/harness.sh +++ b/models/llama2/llama2-2.7b/harness.sh @@ -1,5 +1,5 @@ MODEL_ARGS="pretrained=meta-llama/Llama-2-7b-hf,use_accelerate=True" -TASK="jsquad-1.1-0.3,jcommonsenseqa-1.1-0.3,jnli-1.1-0.3,marc_ja-1.1-0.3" +TASK="jsquad-1.2-0.3,jcommonsenseqa-1.1-0.3,jnli-1.1-0.3,marc_ja-1.1-0.3" python main.py --model hf-causal-experimental --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2,3,3,3" --device "cuda" --output_path "models/llama2/llama2-2.7b/result.json" --batch_size 2 > models/llama2/llama2-2.7b/harness.out 2> models/llama2/llama2-2.7b/harness.err diff --git a/models/llama2/llama2-2.7b/result.json b/models/llama2/llama2-2.7b/result.json index c085b5a6c1..717adb66c1 100644 --- a/models/llama2/llama2-2.7b/result.json +++ b/models/llama2/llama2-2.7b/result.json @@ -1,8 +1,8 @@ { "results": { - "jsquad-1.1-0.3": { - "exact_match": 58.37460603331832, - "f1": 69.51836154287909 + "jsquad-1.2-0.3": { + "exact_match": 59.92796037820801, + "f1": 70.8236875084182 }, "jcommonsenseqa-1.1-0.3": { "acc": 0.5263628239499554, @@ -24,7 +24,7 @@ } }, "versions": { - "jsquad-1.1-0.3": 1.1, + "jsquad-1.2-0.3": 1.2, "jcommonsenseqa-1.1-0.3": 1.1, "jnli-1.1-0.3": 1.1, "marc_ja-1.1-0.3": 1.1 diff --git a/models/llama2/llama2-2.7b/result.jsquad-1.1-0.31.json b/models/llama2/llama2-2.7b/result.jsquad-1.2.json similarity index 88% rename from models/llama2/llama2-2.7b/result.jsquad-1.1-0.31.json rename to models/llama2/llama2-2.7b/result.jsquad-1.2.json index e7d8043de8..e08c3863ae 100644 --- a/models/llama2/llama2-2.7b/result.jsquad-1.1-0.31.json +++ b/models/llama2/llama2-2.7b/result.jsquad-1.2.json @@ -1,12 +1,12 @@ { "results": { - "jsquad-1.1-0.31": { + "jsquad-1.2-0.3": { "exact_match": 59.92796037820801, "f1": 70.8236875084182 } }, "versions": { - "jsquad-1.1-0.31": 1.1 + "jsquad-1.2-0.3": 1.2 }, "config": { "model": "hf-causal-experimental", @@ -19,4 +19,4 @@ "bootstrap_iters": 100000, "description_dict": {} } -} \ No newline at end of file +} diff --git a/models/llama2/llama2-7b-chat/harness.jsquad-1.1-0.31.sh b/models/llama2/llama2-7b-chat/harness.jsquad-1.2.sh similarity index 68% rename from models/llama2/llama2-7b-chat/harness.jsquad-1.1-0.31.sh rename to models/llama2/llama2-7b-chat/harness.jsquad-1.2.sh index 469af41860..02d2c04d11 100644 --- a/models/llama2/llama2-7b-chat/harness.jsquad-1.1-0.31.sh +++ b/models/llama2/llama2-7b-chat/harness.jsquad-1.2.sh @@ -1,3 +1,3 @@ MODEL_ARGS="pretrained=meta-llama/Llama-2-7b-chat-hf,use_accelerate=True,dtype=auto" -TASK="jsquad-1.1-0.31" -python main.py --model hf-causal-experimental --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/llama2/llama2-7b-chat/result.json" --batch_size 2 +TASK="jsquad-1.2-0.3" +python main.py --model hf-causal-experimental --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/llama2/llama2-7b-chat/result.jsquad-1.2.json" --batch_size 2 diff --git a/models/llama2/llama2-7b-chat/result.jsquad-1.2.json b/models/llama2/llama2-7b-chat/result.jsquad-1.2.json new file mode 100644 index 0000000000..4cc31eb6b0 --- /dev/null +++ b/models/llama2/llama2-7b-chat/result.jsquad-1.2.json @@ -0,0 +1,22 @@ +{ + "results": { + "jsquad-1.2-0.3": { + "exact_match": 62.17919855920756, + "f1": 74.84345935966519 + } + }, + "versions": { + "jsquad-1.2-0.3": 1.2 + }, + "config": { + "model": "hf-causal-experimental", + "model_args": "pretrained=meta-llama/Llama-2-7b-chat-hf,use_accelerate=True,dtype=auto", + "num_fewshot": 2, + "batch_size": 2, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} diff --git a/models/llama2/llama2-7b/harness.jsquad-1.2.sh b/models/llama2/llama2-7b/harness.jsquad-1.2.sh new file mode 100644 index 0000000000..a3fcaf3975 --- /dev/null +++ b/models/llama2/llama2-7b/harness.jsquad-1.2.sh @@ -0,0 +1,4 @@ +MODEL_ARGS="pretrained=meta-llama/Llama-2-7b-hf,use_accelerate=True,dtype=auto" +TASK="jsquad-1.2-0.3" +python main.py --model hf-causal-experimental --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/llama2/llama2-7b/result.jsquad-1.2.json" --batch_size 2 + diff --git a/models/llama2/llama2-7b/result.jsquad-1.2.json b/models/llama2/llama2-7b/result.jsquad-1.2.json new file mode 100644 index 0000000000..e08c3863ae --- /dev/null +++ b/models/llama2/llama2-7b/result.jsquad-1.2.json @@ -0,0 +1,22 @@ +{ + "results": { + "jsquad-1.2-0.3": { + "exact_match": 59.92796037820801, + "f1": 70.8236875084182 + } + }, + "versions": { + "jsquad-1.2-0.3": 1.2 + }, + "config": { + "model": "hf-causal-experimental", + "model_args": "pretrained=meta-llama/Llama-2-7b-hf,use_accelerate=True,dtype=auto", + "num_fewshot": 2, + "batch_size": 2, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} diff --git a/models/rinna/rinna-bilingual-gpt-neox-4b-instruction-ppo/harness.jsquad-1.1-0.51.sh b/models/rinna/rinna-bilingual-gpt-neox-4b-instruction-ppo/harness.jsquad-1.2.sh similarity index 89% rename from models/rinna/rinna-bilingual-gpt-neox-4b-instruction-ppo/harness.jsquad-1.1-0.51.sh rename to models/rinna/rinna-bilingual-gpt-neox-4b-instruction-ppo/harness.jsquad-1.2.sh index 73396d81c3..0996e5f292 100644 --- a/models/rinna/rinna-bilingual-gpt-neox-4b-instruction-ppo/harness.jsquad-1.1-0.51.sh +++ b/models/rinna/rinna-bilingual-gpt-neox-4b-instruction-ppo/harness.jsquad-1.2.sh @@ -1,3 +1,3 @@ MODEL_ARGS="pretrained=rinna/bilingual-gpt-neox-4b-instruction-ppo,use_fast=False,device_map=auto,torch_dtype=auto" -TASK="jsquad-1.1-0.51" -python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/rinna/rinna-bilingual-gpt-neox-4b-instruction-ppo/result.jsquad-1.1-0.51.json" +TASK="jsquad-1.2-0.5" +python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/rinna/rinna-bilingual-gpt-neox-4b-instruction-ppo/result.jsquad-1.2-0.5.json" diff --git a/models/rinna/rinna-bilingual-gpt-neox-4b-instruction-ppo/result.jsquad-1.2.json b/models/rinna/rinna-bilingual-gpt-neox-4b-instruction-ppo/result.jsquad-1.2.json new file mode 100644 index 0000000000..2614b439f6 --- /dev/null +++ b/models/rinna/rinna-bilingual-gpt-neox-4b-instruction-ppo/result.jsquad-1.2.json @@ -0,0 +1,22 @@ +{ + "results": { + "jsquad-1.2-0.5": { + "exact_match": 55.94326879783881, + "f1": 70.64052956733126 + } + }, + "versions": { + "jsquad-1.2-0.5": 1.2 + }, + "config": { + "model": "hf-causal", + "model_args": "pretrained=rinna/bilingual-gpt-neox-4b-instruction-ppo,use_fast=False,device_map=auto,torch_dtype=auto", + "num_fewshot": 2, + "batch_size": null, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} diff --git a/models/rinna/rinna-bilingual-gpt-neox-4b-instruction-sft/harness.jsquad-1.1-0.51.sh b/models/rinna/rinna-bilingual-gpt-neox-4b-instruction-sft/harness.jsquad-1.2.sh similarity index 89% rename from models/rinna/rinna-bilingual-gpt-neox-4b-instruction-sft/harness.jsquad-1.1-0.51.sh rename to models/rinna/rinna-bilingual-gpt-neox-4b-instruction-sft/harness.jsquad-1.2.sh index 776e2910ec..7112320819 100644 --- a/models/rinna/rinna-bilingual-gpt-neox-4b-instruction-sft/harness.jsquad-1.1-0.51.sh +++ b/models/rinna/rinna-bilingual-gpt-neox-4b-instruction-sft/harness.jsquad-1.2.sh @@ -1,3 +1,3 @@ MODEL_ARGS="pretrained=rinna/bilingual-gpt-neox-4b-instruction-sft,use_fast=False,device_map=auto,torch_dtype=auto" -TASK="jsquad-1.1-0.51" -python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/rinna/rinna-bilingual-gpt-neox-4b-instruction-sft/result.jsquad-1.1-0.51.json" +TASK="jsquad-1.2-0.5" +python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/rinna/rinna-bilingual-gpt-neox-4b-instruction-sft/result.jsquad-1.2.json" diff --git a/models/rinna/rinna-bilingual-gpt-neox-4b-instruction-sft/result.jsquad-1.2.json b/models/rinna/rinna-bilingual-gpt-neox-4b-instruction-sft/result.jsquad-1.2.json new file mode 100644 index 0000000000..8a8bf42647 --- /dev/null +++ b/models/rinna/rinna-bilingual-gpt-neox-4b-instruction-sft/result.jsquad-1.2.json @@ -0,0 +1,22 @@ +{ + "results": { + "jsquad-1.2-0.5": { + "exact_match": 58.66726699684827, + "f1": 72.38803519363597 + } + }, + "versions": { + "jsquad-1.2-0.5": 1.2 + }, + "config": { + "model": "hf-causal", + "model_args": "pretrained=rinna/bilingual-gpt-neox-4b-instruction-sft,use_fast=False,device_map=auto,torch_dtype=auto", + "num_fewshot": 2, + "batch_size": null, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} diff --git a/models/rinna/rinna-bilingual-gpt-neox-4b/harness.jsquad-1.1-0.21.sh b/models/rinna/rinna-bilingual-gpt-neox-4b/harness.jsquad-1.2.sh similarity index 72% rename from models/rinna/rinna-bilingual-gpt-neox-4b/harness.jsquad-1.1-0.21.sh rename to models/rinna/rinna-bilingual-gpt-neox-4b/harness.jsquad-1.2.sh index f9eee7584d..8276c40fb4 100644 --- a/models/rinna/rinna-bilingual-gpt-neox-4b/harness.jsquad-1.1-0.21.sh +++ b/models/rinna/rinna-bilingual-gpt-neox-4b/harness.jsquad-1.2.sh @@ -1,3 +1,3 @@ MODEL_ARGS="pretrained=rinna/bilingual-gpt-neox-4b,use_fast=False,device_map=auto,torch_dtype=auto" -TASK="jsquad-1.1-0.21" -python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/rinna/rinna-bilingual-gpt-neox-4b/result.jsquad-1.1-0.21.json" +TASK="jsquad-1.2-0.2" +python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/rinna/rinna-bilingual-gpt-neox-4b/result.jsquad-1.2.json" diff --git a/models/rinna/rinna-bilingual-gpt-neox-4b/result.jsquad-1.2.json b/models/rinna/rinna-bilingual-gpt-neox-4b/result.jsquad-1.2.json new file mode 100644 index 0000000000..32c9c4abf5 --- /dev/null +++ b/models/rinna/rinna-bilingual-gpt-neox-4b/result.jsquad-1.2.json @@ -0,0 +1,22 @@ +{ + "results": { + "jsquad-1.2-0.2": { + "exact_match": 51.32823052678973, + "f1": 61.9390389728309 + } + }, + "versions": { + "jsquad-1.2-0.2": 1.2 + }, + "config": { + "model": "hf-causal", + "model_args": "pretrained=rinna/bilingual-gpt-neox-4b,use_fast=False,device_map=auto,torch_dtype=auto", + "num_fewshot": 2, + "batch_size": null, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} diff --git a/models/rinna/rinna-japanese-gpt-1b/harness.jsquad-1.1-0.21.sh b/models/rinna/rinna-japanese-gpt-1b/harness.jsquad-1.2.sh similarity index 70% rename from models/rinna/rinna-japanese-gpt-1b/harness.jsquad-1.1-0.21.sh rename to models/rinna/rinna-japanese-gpt-1b/harness.jsquad-1.2.sh index 32b65c0b54..6824f3f3cd 100644 --- a/models/rinna/rinna-japanese-gpt-1b/harness.jsquad-1.1-0.21.sh +++ b/models/rinna/rinna-japanese-gpt-1b/harness.jsquad-1.2.sh @@ -1,3 +1,3 @@ MODEL_ARGS="pretrained=rinna/japanese-gpt-1b,use_fast=False" -TASK="jsquad-1.1-0.21" -python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/rinna/rinna-japanese-gpt-1b/result.jsquad-1.1-0.21.json" +TASK="jsquad-1.2-0.2" +python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/rinna/rinna-japanese-gpt-1b/result.jsquad-1.2.json" diff --git a/models/rinna/rinna-japanese-gpt-1b/result.jsquad-1.1-0.21.json b/models/rinna/rinna-japanese-gpt-1b/result.jsquad-1.2.json similarity index 88% rename from models/rinna/rinna-japanese-gpt-1b/result.jsquad-1.1-0.21.json rename to models/rinna/rinna-japanese-gpt-1b/result.jsquad-1.2.json index ce2d366360..db1fc1dc33 100644 --- a/models/rinna/rinna-japanese-gpt-1b/result.jsquad-1.1-0.21.json +++ b/models/rinna/rinna-japanese-gpt-1b/result.jsquad-1.2.json @@ -1,12 +1,12 @@ { "results": { - "jsquad-1.1-0.21": { + "jsquad-1.2-0.2": { "exact_match": 30.189104007203962, "f1": 47.12467642283419 } }, "versions": { - "jsquad-1.1-0.21": 1.1 + "jsquad-1.2-0.2": 1.1 }, "config": { "model": "hf-causal", diff --git a/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-ppo/harness.jsquad-1.1-0.41.sh b/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-ppo/harness.jsquad-1.2.sh similarity index 88% rename from models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-ppo/harness.jsquad-1.1-0.41.sh rename to models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-ppo/harness.jsquad-1.2.sh index 4021cc4faf..ce3f084cb9 100644 --- a/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-ppo/harness.jsquad-1.1-0.41.sh +++ b/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-ppo/harness.jsquad-1.2.sh @@ -1,3 +1,3 @@ MODEL_ARGS="pretrained=rinna/japanese-gpt-neox-3.6b-instruction-ppo,use_fast=False,device_map=auto,torch_dtype=auto" -TASK="jsquad-1.1-0.41" -python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-ppo/result.jsquad-1.1-0.41.json" +TASK="jsquad-1.2-0.4" +python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-ppo/result.jsquad-1.2.json" diff --git a/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-ppo/result.jsquad-1.2.json b/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-ppo/result.jsquad-1.2.json new file mode 100644 index 0000000000..86254d4282 --- /dev/null +++ b/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-ppo/result.jsquad-1.2.json @@ -0,0 +1,22 @@ +{ + "results": { + "jsquad-1.2-0.4": { + "exact_match": 53.89464205312922, + "f1": 65.73194869643035 + } + }, + "versions": { + "jsquad-1.2-0.4": 1.2 + }, + "config": { + "model": "hf-causal", + "model_args": "pretrained=rinna/japanese-gpt-neox-3.6b-instruction-ppo,use_fast=False,device_map=auto,torch_dtype=auto", + "num_fewshot": 2, + "batch_size": null, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} diff --git a/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft-v2/harness.jsquad-1.1-0.41.sh b/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft-v2/harness.jsquad-1.2.sh similarity index 88% rename from models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft-v2/harness.jsquad-1.1-0.41.sh rename to models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft-v2/harness.jsquad-1.2.sh index bc3d00877f..6971a7f55b 100644 --- a/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft-v2/harness.jsquad-1.1-0.41.sh +++ b/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft-v2/harness.jsquad-1.2.sh @@ -1,3 +1,3 @@ MODEL_ARGS="pretrained=rinna/japanese-gpt-neox-3.6b-instruction-sft-v2,use_fast=False,device_map=auto,torch_dtype=auto" -TASK="jsquad-1.1-0.41" -python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft-v2/result.jsquad-1.1-0.41.json" +TASK="jsquad-1.2-0.4" +python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft-v2/result.jsquad-1.2.json" diff --git a/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft-v2/result.jsquad-1.1-0.41.json b/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft-v2/result.jsquad-1.2.json similarity index 89% rename from models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft-v2/result.jsquad-1.1-0.41.json rename to models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft-v2/result.jsquad-1.2.json index b7824df68b..92a360030a 100644 --- a/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft-v2/result.jsquad-1.1-0.41.json +++ b/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft-v2/result.jsquad-1.2.json @@ -1,12 +1,12 @@ { "results": { - "jsquad-1.1-0.41": { + "jsquad-1.2-0.4": { "exact_match": 47.90634849167042, "f1": 62.1059309037734 } }, "versions": { - "jsquad-1.1-0.41": 1.1 + "jsquad-1.2-0.4": 1.2 }, "config": { "model": "hf-causal", @@ -19,4 +19,4 @@ "bootstrap_iters": 100000, "description_dict": {} } -} \ No newline at end of file +} diff --git a/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft/harness.jsquad-1.1-0.41.sh b/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft/harness.jsquad-1.2.sh similarity index 88% rename from models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft/harness.jsquad-1.1-0.41.sh rename to models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft/harness.jsquad-1.2.sh index 02f556896b..9f5ac41609 100644 --- a/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft/harness.jsquad-1.1-0.41.sh +++ b/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft/harness.jsquad-1.2.sh @@ -1,3 +1,3 @@ MODEL_ARGS="pretrained=rinna/japanese-gpt-neox-3.6b-instruction-sft,use_fast=False,device_map=auto,torch_dtype=auto" -TASK="jsquad-1.1-0.41" -python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft/result.jsquad-1.1-0.41.json" +TASK="jsquad-1.2-0.4" +python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft/result.jsquad-1.2.json" diff --git a/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft/result.jsquad-1.2.json b/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft/result.jsquad-1.2.json new file mode 100644 index 0000000000..77948c9677 --- /dev/null +++ b/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft/result.jsquad-1.2.json @@ -0,0 +1,22 @@ +{ + "results": { + "jsquad-1.2-0.4": { + "exact_match": 49.392165691130124, + "f1": 63.56485708061489 + } + }, + "versions": { + "jsquad-1.2-0.4": 1.2 + }, + "config": { + "model": "hf-causal", + "model_args": "pretrained=rinna/japanese-gpt-neox-3.6b-instruction-sft,use_fast=False,device_map=auto,torch_dtype=auto", + "num_fewshot": 2, + "batch_size": null, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +} diff --git a/models/rinna/rinna-japanese-gpt-neox-3.6b/harness.jsquad-1.1-0.21.sh b/models/rinna/rinna-japanese-gpt-neox-3.6b/harness.jsquad-1.2.sh similarity index 72% rename from models/rinna/rinna-japanese-gpt-neox-3.6b/harness.jsquad-1.1-0.21.sh rename to models/rinna/rinna-japanese-gpt-neox-3.6b/harness.jsquad-1.2.sh index 7767abc2fa..899442be2e 100644 --- a/models/rinna/rinna-japanese-gpt-neox-3.6b/harness.jsquad-1.1-0.21.sh +++ b/models/rinna/rinna-japanese-gpt-neox-3.6b/harness.jsquad-1.2.sh @@ -1,3 +1,3 @@ MODEL_ARGS="pretrained=rinna/japanese-gpt-neox-3.6b,use_fast=False,device_map=auto,torch_dtype=auto" -TASK="jsquad-1.1-0.21" -python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/rinna/rinna-japanese-gpt-neox-3.6b/result.jsquad-1.1-0.21.json" +TASK="jsquad-1.2-0.2" +python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/rinna/rinna-japanese-gpt-neox-3.6b/result.jsquad-1.2.json" diff --git a/models/rinna/rinna-japanese-gpt-neox-3.6b/result.jsquad-1.2.json b/models/rinna/rinna-japanese-gpt-neox-3.6b/result.jsquad-1.2.json new file mode 100644 index 0000000000..d708afc85f --- /dev/null +++ b/models/rinna/rinna-japanese-gpt-neox-3.6b/result.jsquad-1.2.json @@ -0,0 +1,22 @@ +{ + "results": { + "jsquad-1.2-0.2": { + "exact_match": 49.0094552003602, + "f1": 59.80363888369063 + } + }, + "versions": { + "jsquad-1.2-0.2": 1.2 + }, + "config": { + "model": "hf-causal", + "model_args": "pretrained=rinna/japanese-gpt-neox-3.6b,use_fast=False,device_map=auto,torch_dtype=auto", + "num_fewshot": 2, + "batch_size": null, + "device": "cuda", + "no_cache": false, + "limit": null, + "bootstrap_iters": 100000, + "description_dict": {} + } +}