Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

compare results between Jsquad prompt with title and without title #84

Merged
merged 8 commits into from
Sep 30, 2023
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 85 additions & 2 deletions lm_eval/tasks/ja/jsquad.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@ def _squad_agg(self, key, item):
predictions, references = zip(*item)
return self._squad_metric(predictions=predictions, references=references)[key]


class JSQuADWithFintanPrompt(JSQuAD):
"""
prompt template is taken from [ChatGPT vs BERT: どちらが日本語をより理解できるのか?](https://fintan.jp/page/9126/)
Expand All @@ -195,7 +196,28 @@ def doc_to_text(self, doc):
+ f"{self.SEP}"
+ "回答:"
)



class JSQuADWithFintanPromptV12(JSQuADWithFintanPrompt):
"""
prompt template is taken from [ChatGPT vs BERT: どちらが日本語をより理解できるのか?](https://fintan.jp/page/9126/)
"""
VERSION = 1.2
DESCRIPTION = "質問に対する回答を題名と文章から一言で抽出してください。回答は名詞で答えてください。\n\n"
def doc_to_text(self, doc):
return (
"題名:"
+ doc["title"]
+ f"{self.SEP}"
+ "文章:"
+ doc["context"].split("[SEP]")[-1].strip()
+ f"{self.SEP}"
+ "質問:"
+ doc["question"]
+ f"{self.SEP}"
+ "回答:"
)


class JSQuADWithJAAlpacaPrompt(JSQuAD):
"""
Expand Down Expand Up @@ -231,6 +253,39 @@ def doc_to_text(self, doc):
return f"### 指示:\n{self.INSTRUCTION}\n\n### 入力:\n{input_text}\n\n### 応答:\n"


class JSQuADWithJAAlpacaPromptV12(JSQuADWithJAAlpacaPrompt):
"""
This prompt format was inspired by the below data in fujiki/japanese_alpaca_data.
```
{
'instruction': '与えられた文脈に最も適した文を選択してください。',
'input': '文脈:あなたは親友と現在の仕事の状況について話しています。\nA)私にはあまり選択肢がありません。\nB)他に選択肢がありません。\nC)私には本当に決断する必要がありません。',
'output': 'A) 私には多くの選択肢がありません。'
}
```
Reference:
- data: https://huggingface.co/datasets/fujiki/japanese_alpaca_data
- code: https://github.com/Stability-AI/gpt-neox/blob/c130a4edc1120dccec8f02a34eb60d3e8f484cd3/finetune/finetune_base_ja.py#LL118C23-L127C11
"""
VERSION = 1.2
def doc_to_text(self, doc):
"""
以下は、タスクを説明する指示と、文脈のある入力の組み合わせです。要求を適切に満たす応答を書きなさい。

### 指示:
{instruction}

### 入力:
{input}

### 応答:
{response}
"""
input_text = f"文脈:{doc['title']}\n{doc['context'].split('[SEP]')[-1].strip()}\n質問:{doc['question']}"
return f"### 指示:\n{self.INSTRUCTION}\n\n### 入力:\n{input_text}\n\n### 応答:\n"



class JSQuADWithRinnaInstructionSFT(JSQuAD):
"""
Reference:
Expand All @@ -243,10 +298,22 @@ class JSQuADWithRinnaInstructionSFT(JSQuAD):

def doc_to_text(self, doc):
input_text = f"文脈:{doc['context'].split('[SEP]')[-1].strip()}{self.SEP}質問:{doc['question']}"
# input_text = f"質問:{doc['question']}<NL>文脈:{doc['context'].split('[SEP]')[-1].strip()}"
return f"ユーザー: {input_text}{self.SEP}システム: "


class JSQuADWithRinnaInstructionSFTV12(JSQuADWithRinnaInstructionSFT):
"""
Reference:
- HF Hub: https://huggingface.co/rinna/japanese-gpt-neox-3.6b-instruction-sft
"""
VERSION = 1.2

def doc_to_text(self, doc):
input_text = f"文脈:{doc['title']}{self.SEP}{doc['context'].split('[SEP]')[-1].strip()}{self.SEP}質問:{doc['question']}"
return f"ユーザー: {input_text}{self.SEP}システム: "



class JSQuADWithRinnaBilingualInstructionSFT(JSQuADWithRinnaInstructionSFT):
"""
Reference:
Expand All @@ -257,13 +324,29 @@ class JSQuADWithRinnaBilingualInstructionSFT(JSQuADWithRinnaInstructionSFT):
SEP = "\n"
FEWSHOT_SEP = "\n"


class JSQuADWithRinnaBilingualInstructionSFTV12(JSQuADWithRinnaBilingualInstructionSFT):
"""
Reference:
- HF Hub: https://huggingface.co/rinna/bilingual-gpt-neox-4b-instruction-sft
"""
VERSION = 1.2

def doc_to_text(self, doc):
input_text = f"文脈:{doc['title']}{self.SEP}{doc['context'].split('[SEP]')[-1].strip()}{self.SEP}質問:{doc['question']}"
return f"ユーザー: {input_text}{self.SEP}システム: "


VERSIONS = [
JSQuAD,
JSQuADWithFintanPrompt,
JSQuADWithFintanPromptV12,
JSQuADWithJAAlpacaPrompt,
JSQuADWithJAAlpacaPromptV12,
JSQuADWithRinnaInstructionSFT,
JSQuADWithRinnaInstructionSFTV12,
JSQuADWithRinnaBilingualInstructionSFT,
JSQuADWithRinnaBilingualInstructionSFTV12
]


Expand Down
3 changes: 3 additions & 0 deletions models/abeja-gpt-neox-japanese-2.7b/harness.jsquad-1.2.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
MODEL_ARGS="pretrained=abeja/gpt-neox-japanese-2.7b,device_map=auto,torch_dtype=auto"
TASK="jsquad-1.2-0.2"
python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "3" --device "cuda" --output_path "models/abeja-gpt-neox-japanese-2.7b/result.jsquad-1.2.json"
22 changes: 22 additions & 0 deletions models/abeja-gpt-neox-japanese-2.7b/result.jsquad-1.2.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"results": {
"jsquad-1.2-0.2": {
"exact_match": 15.803692030616839,
"f1": 25.18326978234071
}
},
"versions": {
"jsquad-1.2-0.2": 1.2
},
"config": {
"model": "hf-causal",
"model_args": "pretrained=abeja/gpt-neox-japanese-2.7b,device_map=auto,torch_dtype=auto",
"num_fewshot": 3,
"batch_size": null,
"device": "cuda",
"no_cache": false,
"limit": null,
"bootstrap_iters": 100000,
"description_dict": {}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
MODEL_ARGS="pretrained=cyberagent/open-calm-1b,device_map=auto,torch_dtype=auto"
TASK="jsquad-1.2-0.2"
python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "3" --device "cuda" --output_path "models/cyberagent-open-calm-1b/result.jsquad-1.2.json"
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"results": {
"jsquad-1.2-0.2": {
"exact_match": 39.53174245835209,
"f1": 49.49399460234075
}
},
"versions": {
"jsquad-1.2-0.2": 1.2
},
"config": {
"model": "hf-causal",
"model_args": "pretrained=cyberagent/open-calm-1b",
"num_fewshot": 3,
"batch_size": null,
"device": "cuda",
"no_cache": false,
"limit": null,
"bootstrap_iters": 100000,
"description_dict": {}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
MODEL_ARGS="pretrained=cyberagent/open-calm-3b,device_map=auto,torch_dtype=auto"
TASK="jsquad-1.2-0.2"
python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/cyberagent/cyberagent-open-calm-3b/result.jsquad-1.2.json"
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"results": {
"jsquad-1.2-0.2": {
"exact_match": 44.529491220171096,
"f1": 56.02141036867636
}
},
"versions": {
"jsquad-1.2-0.2": 1.2
},
"config": {
"model": "hf-causal",
"model_args": "pretrained=cyberagent/open-calm-3b,device_map=auto,torch_dtype=auto",
"num_fewshot": 2,
"batch_size": null,
"device": "cuda",
"no_cache": false,
"limit": null,
"bootstrap_iters": 100000,
"description_dict": {}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
MODEL_ARGS="pretrained=cyberagent/open-calm-7b,device_map=auto,torch_dtype=auto"
TASK="jsquad-1.2-0.2"
python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/cyberagent/cyberagent-open-calm-7b/result.jsquad-1.2.json"
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"results": {
"jsquad-1.2-0.2": {
"exact_match": 48.10895992796038,
"f1": 60.90961937230767
}
},
"versions": {
"jsquad-1.2-0.2": 1.2
},
"config": {
"model": "hf-causal",
"model_args": "pretrained=cyberagent/open-calm-7b,device_map=auto,torch_dtype=auto",
"num_fewshot": 2,
"batch_size": null,
"device": "cuda",
"no_cache": false,
"limit": null,
"bootstrap_iters": 100000,
"description_dict": {}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
MODEL_ARGS="pretrained=cyberagent/open-calm-large,use_fast=True,device_map=auto,torch_dtype=auto"
TASK="jsquad-1.2-0.2"
python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "3" --device "cuda" --output_path "models/cyberagent-open-calm-large/result.jsquad-1.2.json"
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"results": {
"jsquad-1.2-0.2": {
"exact_match": 40.4997748761819,
"f1": 51.32160467436942
}
},
"versions": {
"jsquad-1.2-0.2": 1.2
},
"config": {
"model": "hf-causal",
"model_args": "pretrained=cyberagent/open-calm-large,use_fast=True,device_map=auto,torch_dtype=auto",
"num_fewshot": 3,
"batch_size": null,
"device": "cuda",
"no_cache": false,
"limit": null,
"bootstrap_iters": 100000,
"description_dict": {}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
MODEL_ARGS="pretrained=cyberagent/open-calm-medium,use_fast=True,device_map=auto,torch_dtype=auto"
TASK="jsquad-1.2-0.2"
python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "3" --device "cuda" --output_path "models/cyberagent-open-calm-medium/result.jsquad-1.2.json"
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"results": {
"jsquad-1.2-0.2": {
"exact_match": 29.85141828005403,
"f1": 40.49655778214922
}
},
"versions": {
"jsquad-1.2-0.2": 1.2
},
"config": {
"model": "hf-causal",
"model_args": "pretrained=cyberagent/open-calm-medium,use_fast=True,device_map=auto,torch_dtype=auto",
"num_fewshot": 3,
"batch_size": null,
"device": "cuda",
"no_cache": false,
"limit": null,
"bootstrap_iters": 100000,
"description_dict": {}
}
}
4 changes: 4 additions & 0 deletions models/llama/llama-7b/harness.jsquad-1.2.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
MODEL_ARGS="pretrained=huggyllama/llama-7b,use_accelerate=True,load_in_8bit=True"
TASK="jsquad-1.2-0.3"
python main.py --model hf-causal-experimental --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/llama/llama-7b/result.jsquad-1.2.json" --batch_size 2

22 changes: 22 additions & 0 deletions models/llama/llama-7b/result.jsquad-1.2.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"results": {
"jsquad-1.2-0.3": {
"exact_match": 36.24493471409275,
"f1": 50.91625240527312
}
},
"versions": {
"jsquad-1.2-0.3": 1.2
},
"config": {
"model": "hf-causal-experimental",
"model_args": "pretrained=huggyllama/llama-7b,use_accelerate=True,load_in_8bit=True",
"num_fewshot": 2,
"batch_size": 2,
"device": "cuda",
"no_cache": false,
"limit": null,
"bootstrap_iters": 100000,
"description_dict": {}
}
}
4 changes: 4 additions & 0 deletions models/llama2/llama2-2.7b/harness.jsquad-1.2.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
MODEL_ARGS="pretrained=meta-llama/Llama-2-7b-hf,use_accelerate=True,dtype=auto"
TASK="jsquad-1.2-0.3"
python main.py --model hf-causal-experimental --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/llama2/llama2-2.7b/result.jsquad-1.2.json" --batch_size 2

22 changes: 22 additions & 0 deletions models/llama2/llama2-2.7b/result.jsquad-1.2.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"results": {
"jsquad-1.2-0.3": {
"exact_match": 59.92796037820801,
"f1": 70.8236875084182
}
},
"versions": {
"jsquad-1.2-0.3": 1.2
},
"config": {
"model": "hf-causal-experimental",
"model_args": "pretrained=meta-llama/Llama-2-7b-hf,use_accelerate=True,dtype=auto",
"num_fewshot": 2,
"batch_size": 2,
"device": "cuda",
"no_cache": false,
"limit": null,
"bootstrap_iters": 100000,
"description_dict": {}
}
}
3 changes: 3 additions & 0 deletions models/llama2/llama2-7b-chat/harness.jsquad-1.2.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
MODEL_ARGS="pretrained=meta-llama/Llama-2-7b-chat-hf,use_accelerate=True,dtype=auto"
TASK="jsquad-1.2-0.3"
python main.py --model hf-causal-experimental --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/llama2/llama2-7b-chat/result.jsquad-1.2.json" --batch_size 2
22 changes: 22 additions & 0 deletions models/llama2/llama2-7b-chat/result.jsquad-1.2.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"results": {
"jsquad-1.2-0.3": {
"exact_match": 62.17919855920756,
"f1": 74.84345935966519
}
},
"versions": {
"jsquad-1.2-0.3": 1.2
},
"config": {
"model": "hf-causal-experimental",
"model_args": "pretrained=meta-llama/Llama-2-7b-chat-hf,use_accelerate=True,dtype=auto",
"num_fewshot": 2,
"batch_size": 2,
"device": "cuda",
"no_cache": false,
"limit": null,
"bootstrap_iters": 100000,
"description_dict": {}
}
}
4 changes: 4 additions & 0 deletions models/llama2/llama2-7b/harness.jsquad-1.2.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
MODEL_ARGS="pretrained=meta-llama/Llama-2-7b-hf,use_accelerate=True,dtype=auto"
TASK="jsquad-1.2-0.3"
python main.py --model hf-causal-experimental --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/llama2/llama2-7b/result.jsquad-1.2.json" --batch_size 2

Loading
Loading