re-evaluate models with jsquad prompt with title

Stability-AI · Sep 12, 2023 · a73f7a1 · a73f7a1
1 parent 6fefd43
commit a73f7a1
Show file tree

Hide file tree

Showing 33 changed files with 315 additions and 29 deletions.
diff --git a/models/abeja-gpt-neox-japanese-2.7b/harness.jsquad-1.2.sh b/models/abeja-gpt-neox-japanese-2.7b/harness.jsquad-1.2.sh
@@ -0,0 +1,3 @@
+MODEL_ARGS="pretrained=abeja/gpt-neox-japanese-2.7b,device_map=auto,torch_dtype=auto"
+TASK="jsquad-1.2-0.2"
+python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "3" --device "cuda" --output_path "models/abeja-gpt-neox-japanese-2.7b/result.jsquad-1.2.json"
diff --git a/models/abeja-gpt-neox-japanese-2.7b/result.jsquad-1.2.json b/models/abeja-gpt-neox-japanese-2.7b/result.jsquad-1.2.json
@@ -0,0 +1,22 @@
+{
+  "results": {
+    "jsquad-1.2-0.2": {
+      "exact_match": 15.803692030616839,
+      "f1": 25.18326978234071
+    }
+  },
+  "versions": {
+    "jsquad-1.2-0.2": 1.2
+  },
+  "config": {
+    "model": "hf-causal",
+    "model_args": "pretrained=abeja/gpt-neox-japanese-2.7b,device_map=auto,torch_dtype=auto",
+    "num_fewshot": 3,
+    "batch_size": null,
+    "device": "cuda",
+    "no_cache": false,
+    "limit": null,
+    "bootstrap_iters": 100000,
+    "description_dict": {}
+  }
+}
diff --git a/models/cyberagent/cyberagent-open-calm-1b/harness.jsquad-1.2.sh b/models/cyberagent/cyberagent-open-calm-1b/harness.jsquad-1.2.sh
@@ -0,0 +1,3 @@
+MODEL_ARGS="pretrained=cyberagent/open-calm-1b,device_map=auto,torch_dtype=auto"
+TASK="jsquad-1.2-0.2"
+python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "3" --device "cuda" --output_path "models/cyberagent-open-calm-1b/result.jsquad-1.2.json"
diff --git a/models/cyberagent/cyberagent-open-calm-1b/harness.sh b/models/cyberagent/cyberagent-open-calm-1b/harness.sh
@@ -1,3 +1,3 @@
 MODEL_ARGS="pretrained=cyberagent/open-calm-1b"
-TASK="jcommonsenseqa-1.1-0.2,jnli-1.1-0.2,marc_ja-1.1-0.2,jsquad-1.1-0.2,xlsum_ja"
+TASK="jcommonsenseqa-1.1-0.2,jnli-1.1-0.2,marc_ja-1.1-0.2,jsquad-1.2-0.2,xlsum_ja"
 python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2,3,3,3,1" --device "cuda" --output_path "models/cyberagent-open-calm-1b/result.json"
diff --git a/models/cyberagent/cyberagent-open-calm-1b/result.jsquad-1.2.json b/models/cyberagent/cyberagent-open-calm-1b/result.jsquad-1.2.json
@@ -0,0 +1,22 @@
+{
+  "results": {
+    "jsquad-1.2-0.2": {
+      "exact_match": 39.53174245835209,
+      "f1": 49.49399460234075
+    }
+  },
+  "versions": {
+    "jsquad-1.2-0.2": 1.2
+  },
+  "config": {
+    "model": "hf-causal",
+    "model_args": "pretrained=cyberagent/open-calm-1b",
+    "num_fewshot": 3,
+    "batch_size": null,
+    "device": "cuda",
+    "no_cache": false,
+    "limit": null,
+    "bootstrap_iters": 100000,
+    "description_dict": {}
+  }
+}
diff --git a/models/cyberagent/cyberagent-open-calm-3b/harness.jsquad-1.2.sh b/models/cyberagent/cyberagent-open-calm-3b/harness.jsquad-1.2.sh
@@ -0,0 +1,3 @@
+MODEL_ARGS="pretrained=cyberagent/open-calm-3b,device_map=auto,torch_dtype=auto"
+TASK="jsquad-1.2-0.2"
+python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/cyberagent/cyberagent-open-calm-3b/result.jsquad-1.2.json"
diff --git a/models/cyberagent/cyberagent-open-calm-7b/harness.jsquad-1.2.sh b/models/cyberagent/cyberagent-open-calm-7b/harness.jsquad-1.2.sh
@@ -0,0 +1,3 @@
+MODEL_ARGS="pretrained=cyberagent/open-calm-7b,device_map=auto,torch_dtype=auto"
+TASK="jsquad-1.2-0.2"
+python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/cyberagent/cyberagent-open-calm-7b/result.jsquad-1.2.json"
diff --git a/models/cyberagent/cyberagent-open-calm-7b/result.jsquad-1.2.json b/models/cyberagent/cyberagent-open-calm-7b/result.jsquad-1.2.json
@@ -0,0 +1,22 @@
+{
+  "results": {
+    "jsquad-1.2-0.2": {
+      "exact_match": 48.10895992796038,
+      "f1": 60.90961937230767
+    }
+  },
+  "versions": {
+    "jsquad-1.2-0.2": 1.2
+  },
+  "config": {
+    "model": "hf-causal",
+    "model_args": "pretrained=cyberagent/open-calm-7b,device_map=auto,torch_dtype=auto",
+    "num_fewshot": 2,
+    "batch_size": null,
+    "device": "cuda",
+    "no_cache": false,
+    "limit": null,
+    "bootstrap_iters": 100000,
+    "description_dict": {}
+  }
+}
diff --git a/models/cyberagent/cyberagent-open-calm-large/harness.jsquad-1.2.sh b/models/cyberagent/cyberagent-open-calm-large/harness.jsquad-1.2.sh
@@ -0,0 +1,3 @@
+MODEL_ARGS="pretrained=cyberagent/open-calm-large,use_fast=True,device_map=auto,torch_dtype=auto"
+TASK="jsquad-1.2-0.2"
+python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "3" --device "cuda" --output_path "models/cyberagent-open-calm-large/result.jsquad-1.2.json"
diff --git a/models/cyberagent/cyberagent-open-calm-large/result.jsquad-1.2.json b/models/cyberagent/cyberagent-open-calm-large/result.jsquad-1.2.json
@@ -0,0 +1,22 @@
+{
+  "results": {
+    "jsquad-1.2-0.2": {
+      "exact_match": 40.4997748761819,
+      "f1": 51.32160467436942
+    }
+  },
+  "versions": {
+    "jsquad-1.2-0.2": 1.2
+  },
+  "config": {
+    "model": "hf-causal",
+    "model_args": "pretrained=cyberagent/open-calm-large,use_fast=True,device_map=auto,torch_dtype=auto",
+    "num_fewshot": 3,
+    "batch_size": null,
+    "device": "cuda",
+    "no_cache": false,
+    "limit": null,
+    "bootstrap_iters": 100000,
+    "description_dict": {}
+  }
+}
diff --git a/models/cyberagent/cyberagent-open-calm-medium/harness.jsquad-1.2.sh b/models/cyberagent/cyberagent-open-calm-medium/harness.jsquad-1.2.sh
@@ -0,0 +1,3 @@
+MODEL_ARGS="pretrained=cyberagent/open-calm-medium,use_fast=True,device_map=auto,torch_dtype=auto"
+TASK="jsquad-1.2-0.2"
+python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "3" --device "cuda" --output_path "models/cyberagent-open-calm-medium/result.jsquad-1.2.json"
diff --git a/models/cyberagent/cyberagent-open-calm-medium/result.jsquad-1.2.json b/models/cyberagent/cyberagent-open-calm-medium/result.jsquad-1.2.json
@@ -0,0 +1,22 @@
+{
+  "results": {
+    "jsquad-1.2-0.2": {
+      "exact_match": 29.85141828005403,
+      "f1": 40.49655778214922
+    }
+  },
+  "versions": {
+    "jsquad-1.2-0.2": 1.2
+  },
+  "config": {
+    "model": "hf-causal",
+    "model_args": "pretrained=cyberagent/open-calm-medium,use_fast=True,device_map=auto,torch_dtype=auto",
+    "num_fewshot": 3,
+    "batch_size": null,
+    "device": "cuda",
+    "no_cache": false,
+    "limit": null,
+    "bootstrap_iters": 100000,
+    "description_dict": {}
+  }
+}
diff --git a/...a2/llama2-2.7b/harness.jsquad-1.1-0.31.sh → .../llama2/llama2-2.7b/harness.jsquad-1.2.sh b/...a2/llama2-2.7b/harness.jsquad-1.1-0.31.sh → .../llama2/llama2-2.7b/harness.jsquad-1.2.sh
@@ -1,4 +1,4 @@
 MODEL_ARGS="pretrained=meta-llama/Llama-2-7b-hf,use_accelerate=True,dtype=auto"
-TASK="jsquad-1.1-0.31"
-python main.py --model hf-causal-experimental --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/llama2/llama2-2.7b/result.jsquad-1.1-0.31.json" --batch_size 2
+TASK="jsquad-1.2-0.3"
+python main.py --model hf-causal-experimental --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/llama2/llama2-2.7b/result.jsquad-1.2.json" --batch_size 2
 
diff --git a/...2/llama2-2.7b/result.jsquad-1.1-0.31.json → ...llama2/llama2-2.7b/result.jsquad-1.2.json b/...2/llama2-2.7b/result.jsquad-1.1-0.31.json → ...llama2/llama2-2.7b/result.jsquad-1.2.json
@@ -1,12 +1,12 @@
 {
   "results": {
-    "jsquad-1.1-0.31": {
+    "jsquad-1.2-0.3": {
       "exact_match": 59.92796037820801,
       "f1": 70.8236875084182
     }
   },
   "versions": {
-    "jsquad-1.1-0.31": 1.1
+    "jsquad-1.2-0.3": 1.2
   },
   "config": {
     "model": "hf-causal-experimental",
@@ -19,4 +19,4 @@
     "bootstrap_iters": 100000,
     "description_dict": {}
   }
-}
+}
diff --git a/...llama2-7b-chat/harness.jsquad-1.1-0.31.sh → ...ama2/llama2-7b-chat/harness.jsquad-1.2.sh b/...llama2-7b-chat/harness.jsquad-1.1-0.31.sh → ...ama2/llama2-7b-chat/harness.jsquad-1.2.sh
@@ -1,3 +1,3 @@
 MODEL_ARGS="pretrained=meta-llama/Llama-2-7b-chat-hf,use_accelerate=True,dtype=auto"
-TASK="jsquad-1.1-0.31"
-python main.py --model hf-causal-experimental --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/llama2/llama2-7b-chat/result.json" --batch_size 2
+TASK="jsquad-1.2-0.3"
+python main.py --model hf-causal-experimental --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/llama2/llama2-7b-chat/result.jsquad-1.2.json" --batch_size 2
diff --git a/models/llama2/llama2-7b-chat/result.jsquad-1.2.json b/models/llama2/llama2-7b-chat/result.jsquad-1.2.json
@@ -0,0 +1,22 @@
+{
+  "results": {
+    "jsquad-1.2-0.3": {
+      "exact_match": 62.17919855920756,
+      "f1": 74.84345935966519
+    }
+  },
+  "versions": {
+    "jsquad-1.2-0.3": 1.2
+  },
+  "config": {
+    "model": "hf-causal-experimental",
+    "model_args": "pretrained=meta-llama/Llama-2-7b-chat-hf,use_accelerate=True,dtype=auto",
+    "num_fewshot": 2,
+    "batch_size": 2,
+    "device": "cuda",
+    "no_cache": false,
+    "limit": null,
+    "bootstrap_iters": 100000,
+    "description_dict": {}
+  }
+}
diff --git a/models/llama2/llama2-7b/harness.jsquad-1.2.sh b/models/llama2/llama2-7b/harness.jsquad-1.2.sh
@@ -0,0 +1,4 @@
+MODEL_ARGS="pretrained=meta-llama/Llama-2-7b-hf,use_accelerate=True,dtype=auto"
+TASK="jsquad-1.2-0.3"
+python main.py --model hf-causal-experimental --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/llama2/llama2-7b/result.jsquad-1.2.json" --batch_size 2
+
diff --git a/...nstruction-ppo/harness.jsquad-1.1-0.51.sh → ...-4b-instruction-ppo/harness.jsquad-1.2.sh b/...nstruction-ppo/harness.jsquad-1.1-0.51.sh → ...-4b-instruction-ppo/harness.jsquad-1.2.sh
@@ -1,3 +1,3 @@
 MODEL_ARGS="pretrained=rinna/bilingual-gpt-neox-4b-instruction-ppo,use_fast=False,device_map=auto,torch_dtype=auto"
-TASK="jsquad-1.1-0.51"
-python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/rinna/rinna-bilingual-gpt-neox-4b-instruction-ppo/result.jsquad-1.1-0.51.json"
+TASK="jsquad-1.2-0.5"
+python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/rinna/rinna-bilingual-gpt-neox-4b-instruction-ppo/result.jsquad-1.2-0.5.json"
diff --git a/models/rinna/rinna-bilingual-gpt-neox-4b-instruction-ppo/result.jsquad-1.2.json b/models/rinna/rinna-bilingual-gpt-neox-4b-instruction-ppo/result.jsquad-1.2.json
@@ -0,0 +1,22 @@
+{
+  "results": {
+    "jsquad-1.2-0.5": {
+      "exact_match": 55.94326879783881,
+      "f1": 70.64052956733126
+    }
+  },
+  "versions": {
+    "jsquad-1.2-0.5": 1.2
+  },
+  "config": {
+    "model": "hf-causal",
+    "model_args": "pretrained=rinna/bilingual-gpt-neox-4b-instruction-ppo,use_fast=False,device_map=auto,torch_dtype=auto",
+    "num_fewshot": 2,
+    "batch_size": null,
+    "device": "cuda",
+    "no_cache": false,
+    "limit": null,
+    "bootstrap_iters": 100000,
+    "description_dict": {}
+  }
+}
diff --git a/...nstruction-sft/harness.jsquad-1.1-0.51.sh → ...-4b-instruction-sft/harness.jsquad-1.2.sh b/...nstruction-sft/harness.jsquad-1.1-0.51.sh → ...-4b-instruction-sft/harness.jsquad-1.2.sh
@@ -1,3 +1,3 @@
 MODEL_ARGS="pretrained=rinna/bilingual-gpt-neox-4b-instruction-sft,use_fast=False,device_map=auto,torch_dtype=auto"
-TASK="jsquad-1.1-0.51"
-python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/rinna/rinna-bilingual-gpt-neox-4b-instruction-sft/result.jsquad-1.1-0.51.json"
+TASK="jsquad-1.2-0.5"
+python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/rinna/rinna-bilingual-gpt-neox-4b-instruction-sft/result.jsquad-1.2.json"
diff --git a/models/rinna/rinna-bilingual-gpt-neox-4b-instruction-sft/result.jsquad-1.2.json b/models/rinna/rinna-bilingual-gpt-neox-4b-instruction-sft/result.jsquad-1.2.json
@@ -0,0 +1,22 @@
+{
+  "results": {
+    "jsquad-1.2-0.5": {
+      "exact_match": 58.66726699684827,
+      "f1": 72.38803519363597
+    }
+  },
+  "versions": {
+    "jsquad-1.2-0.5": 1.2
+  },
+  "config": {
+    "model": "hf-causal",
+    "model_args": "pretrained=rinna/bilingual-gpt-neox-4b-instruction-sft,use_fast=False,device_map=auto,torch_dtype=auto",
+    "num_fewshot": 2,
+    "batch_size": null,
+    "device": "cuda",
+    "no_cache": false,
+    "limit": null,
+    "bootstrap_iters": 100000,
+    "description_dict": {}
+  }
+}
diff --git a/...al-gpt-neox-4b/harness.jsquad-1.1-0.21.sh → ...lingual-gpt-neox-4b/harness.jsquad-1.2.sh b/...al-gpt-neox-4b/harness.jsquad-1.1-0.21.sh → ...lingual-gpt-neox-4b/harness.jsquad-1.2.sh
@@ -1,3 +1,3 @@
 MODEL_ARGS="pretrained=rinna/bilingual-gpt-neox-4b,use_fast=False,device_map=auto,torch_dtype=auto"
-TASK="jsquad-1.1-0.21"
-python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/rinna/rinna-bilingual-gpt-neox-4b/result.jsquad-1.1-0.21.json"
+TASK="jsquad-1.2-0.2"
+python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/rinna/rinna-bilingual-gpt-neox-4b/result.jsquad-1.2.json"
diff --git a/models/rinna/rinna-bilingual-gpt-neox-4b/result.jsquad-1.2.json b/models/rinna/rinna-bilingual-gpt-neox-4b/result.jsquad-1.2.json
@@ -0,0 +1,22 @@
+{
+  "results": {
+    "jsquad-1.2-0.2": {
+      "exact_match": 51.32823052678973,
+      "f1": 61.9390389728309
+    }
+  },
+  "versions": {
+    "jsquad-1.2-0.2": 1.2
+  },
+  "config": {
+    "model": "hf-causal",
+    "model_args": "pretrained=rinna/bilingual-gpt-neox-4b,use_fast=False,device_map=auto,torch_dtype=auto",
+    "num_fewshot": 2,
+    "batch_size": null,
+    "device": "cuda",
+    "no_cache": false,
+    "limit": null,
+    "bootstrap_iters": 100000,
+    "description_dict": {}
+  }
+}
diff --git a/...apanese-gpt-1b/harness.jsquad-1.1-0.21.sh → ...nna-japanese-gpt-1b/harness.jsquad-1.2.sh b/...apanese-gpt-1b/harness.jsquad-1.1-0.21.sh → ...nna-japanese-gpt-1b/harness.jsquad-1.2.sh
@@ -1,3 +1,3 @@
 MODEL_ARGS="pretrained=rinna/japanese-gpt-1b,use_fast=False"
-TASK="jsquad-1.1-0.21"
-python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/rinna/rinna-japanese-gpt-1b/result.jsquad-1.1-0.21.json"
+TASK="jsquad-1.2-0.2"
+python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/rinna/rinna-japanese-gpt-1b/result.jsquad-1.2.json"
diff --git a/...panese-gpt-1b/result.jsquad-1.1-0.21.json → ...na-japanese-gpt-1b/result.jsquad-1.2.json b/...panese-gpt-1b/result.jsquad-1.1-0.21.json → ...na-japanese-gpt-1b/result.jsquad-1.2.json
@@ -1,12 +1,12 @@
 {
   "results": {
-    "jsquad-1.1-0.21": {
+    "jsquad-1.2-0.2": {
       "exact_match": 30.189104007203962,
       "f1": 47.12467642283419
     }
   },
   "versions": {
-    "jsquad-1.1-0.21": 1.1
+    "jsquad-1.2-0.2": 1.1
   },
   "config": {
     "model": "hf-causal",

diff --git a/...nstruction-ppo/harness.jsquad-1.1-0.41.sh → ....6b-instruction-ppo/harness.jsquad-1.2.sh b/...nstruction-ppo/harness.jsquad-1.1-0.41.sh → ....6b-instruction-ppo/harness.jsquad-1.2.sh
@@ -1,3 +1,3 @@
 MODEL_ARGS="pretrained=rinna/japanese-gpt-neox-3.6b-instruction-ppo,use_fast=False,device_map=auto,torch_dtype=auto"
-TASK="jsquad-1.1-0.41"
-python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-ppo/result.jsquad-1.1-0.41.json"
+TASK="jsquad-1.2-0.4"
+python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-ppo/result.jsquad-1.2.json"
diff --git a/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-ppo/result.jsquad-1.2.json b/models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-ppo/result.jsquad-1.2.json
@@ -0,0 +1,22 @@
+{
+  "results": {
+    "jsquad-1.2-0.4": {
+      "exact_match": 53.89464205312922,
+      "f1": 65.73194869643035
+    }
+  },
+  "versions": {
+    "jsquad-1.2-0.4": 1.2
+  },
+  "config": {
+    "model": "hf-causal",
+    "model_args": "pretrained=rinna/japanese-gpt-neox-3.6b-instruction-ppo,use_fast=False,device_map=auto,torch_dtype=auto",
+    "num_fewshot": 2,
+    "batch_size": null,
+    "device": "cuda",
+    "no_cache": false,
+    "limit": null,
+    "bootstrap_iters": 100000,
+    "description_dict": {}
+  }
+}
diff --git a/...ruction-sft-v2/harness.jsquad-1.1-0.41.sh → ...-instruction-sft-v2/harness.jsquad-1.2.sh b/...ruction-sft-v2/harness.jsquad-1.1-0.41.sh → ...-instruction-sft-v2/harness.jsquad-1.2.sh
@@ -1,3 +1,3 @@
 MODEL_ARGS="pretrained=rinna/japanese-gpt-neox-3.6b-instruction-sft-v2,use_fast=False,device_map=auto,torch_dtype=auto"
-TASK="jsquad-1.1-0.41"
-python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft-v2/result.jsquad-1.1-0.41.json"
+TASK="jsquad-1.2-0.4"
+python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft-v2/result.jsquad-1.2.json"
diff --git a/...uction-sft-v2/result.jsquad-1.1-0.41.json → ...instruction-sft-v2/result.jsquad-1.2.json b/...uction-sft-v2/result.jsquad-1.1-0.41.json → ...instruction-sft-v2/result.jsquad-1.2.json
@@ -1,12 +1,12 @@
 {
   "results": {
-    "jsquad-1.1-0.41": {
+    "jsquad-1.2-0.4": {
       "exact_match": 47.90634849167042,
       "f1": 62.1059309037734
     }
   },
   "versions": {
-    "jsquad-1.1-0.41": 1.1
+    "jsquad-1.2-0.4": 1.2
   },
   "config": {
     "model": "hf-causal",
@@ -19,4 +19,4 @@
     "bootstrap_iters": 100000,
     "description_dict": {}
   }
-}
+}
diff --git a/...nstruction-sft/harness.jsquad-1.1-0.41.sh → ....6b-instruction-sft/harness.jsquad-1.2.sh b/...nstruction-sft/harness.jsquad-1.1-0.41.sh → ....6b-instruction-sft/harness.jsquad-1.2.sh
@@ -1,3 +1,3 @@
 MODEL_ARGS="pretrained=rinna/japanese-gpt-neox-3.6b-instruction-sft,use_fast=False,device_map=auto,torch_dtype=auto"
-TASK="jsquad-1.1-0.41"
-python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft/result.jsquad-1.1-0.41.json"
+TASK="jsquad-1.2-0.4"
+python main.py --model hf-causal --model_args $MODEL_ARGS --tasks $TASK --num_fewshot "2" --device "cuda" --output_path "models/rinna/rinna-japanese-gpt-neox-3.6b-instruction-sft/result.jsquad-1.2.json"