Add non-programmatic BIG-bench-hard tasks (#406)

* Support bigbench-hard json tasks using multiple_choice_grade * Add support for greedy decoding in bigbench tasks * move bigbench_resources to datasets * rectify changes to rf.greedy_until w upstream * make path to resource import reflect new location --------- Co-authored-by: haileyschoelkopf <hailey.schoelkopf@yale.edu>
EleutherAI · Apr 28, 2023 · 602abce · 602abce
1 parent e47e01b
commit 602abce
Show file tree

Hide file tree

Showing 37 changed files with 699,688 additions and 19 deletions.
diff --git a/lm_eval/base.py b/lm_eval/base.py
@@ -342,18 +342,25 @@ def _collate(x):
 
         re_ord = utils.Reorderer(requests, _collate)
 
-        for context, until in tqdm(re_ord.get_reordered()):
+        for context, request_args in tqdm(re_ord.get_reordered()):
+            until = request_args['until']
             if isinstance(until, str):
                 until = [until]
 
-            (primary_until,) = self.tok_encode(until[0])
+            if until:
+                (primary_until,) = self.tok_encode(until[0])
+            else:
+                primary_until = None
 
             context_enc = torch.tensor(
                 [self.tok_encode(context)[self.max_gen_toks - self.max_length :]]
             ).to(self.device)
 
+            max_gen_tokens = min(
+                self.max_gen_toks, request_args.get('max_length', self.max_gen_toks)
+            )
             cont = self._model_generate(
-                context_enc, context_enc.shape[1] + self.max_gen_toks, primary_until
+                context_enc, context_enc.shape[1] + max_gen_tokens, primary_until
             )
 
             s = self.tok_decode(cont[0].tolist()[context_enc.shape[1] :])

diff --git a/lm_eval/datasets/bigbench_resources/causal_judgement.json b/lm_eval/datasets/bigbench_resources/causal_judgement.json