diff --git a/configs/datasets/devops_eval/__init__.py b/configs/datasets/devops_eval/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/configs/datasets/devops_eval/devops_eval_gen.py b/configs/datasets/devops_eval/devops_eval_gen.py new file mode 100644 index 000000000..9c5d449ad --- /dev/null +++ b/configs/datasets/devops_eval/devops_eval_gen.py @@ -0,0 +1,4 @@ +from mmengine.config import read_base + +with read_base(): + from .devops_eval_gen_8df36d import devops_eval_datasets # noqa: F401, F403 diff --git a/configs/datasets/devops_eval/devops_eval_ppl.py b/configs/datasets/devops_eval/devops_eval_ppl.py new file mode 100644 index 000000000..06900f103 --- /dev/null +++ b/configs/datasets/devops_eval/devops_eval_ppl.py @@ -0,0 +1,4 @@ +from mmengine.config import read_base + +with read_base(): + from .devops_eval_ppl_8b3a0d import devops_eval_datasets # noqa: F401, F403 diff --git a/opencompass/datasets/devops_eval.py b/opencompass/datasets/devops_eval.py new file mode 100644 index 000000000..f644d38b8 --- /dev/null +++ b/opencompass/datasets/devops_eval.py @@ -0,0 +1,28 @@ +import csv +import os.path as osp + +from datasets import Dataset, DatasetDict + +from opencompass.registry import LOAD_DATASET + +from .base import BaseDataset + + +@LOAD_DATASET.register_module() +class DevOpsEvalDataset(BaseDataset): + + @staticmethod + def load(path: str, name: str, language: str): + dataset = {} + for split in ['dev', 'test']: + filename = osp.join(path, language, split, f'{name}.csv') + with open(filename, encoding='utf-8') as f: + reader = csv.reader(f) + header = next(reader) + for row in reader: + item = dict(zip(header, row)) + item.setdefault('explanation', '') + item.setdefault('answer', '') + dataset.setdefault(split, []).append(item) + dataset = {i: Dataset.from_list(dataset[i]) for i in dataset} + return DatasetDict(dataset)