huggingface · lhoestq · Nov 24, 2021 · Nov 3, 2021 · Nov 3, 2021 · Nov 3, 2021
diff --git a/datasets/multidoc2dial/README.md b/datasets/multidoc2dial/README.md
diff --git a/datasets/multidoc2dial/dataset_infos.json b/datasets/multidoc2dial/dataset_infos.json
@@ -0,0 +1 @@
+{"dialogue_domain": {"description": "MultiDoc2Dial is a new task and dataset on modeling goal-oriented dialogues grounded in multiple documents. Most previous works treat document-grounded dialogue modeling as a machine reading comprehension task based on a single given document or passage. We aim to address more realistic scenarios where a goal-oriented information-seeking conversation involves multiple topics, and hence is grounded on different documents. \n", "citation": "@inproceedings{feng2021multidoc2dial,\n    title={MultiDoc2Dial: Modeling Dialogues Grounded in Multiple Documents},\n    author={Feng, Song and Patel, Siva Sankalp and Wan, Hui and Joshi, Sachindra},\n    booktitle={EMNLP},\n    year={2021}\n}\n", "homepage": "https://doc2dial.github.io/multidoc2dial/", "license": "", "features": {"dial_id": {"dtype": "string", "id": null, "_type": "Value"}, "domain": {"dtype": "string", "id": null, "_type": "Value"}, "turns": [{"turn_id": {"dtype": "int32", "id": null, "_type": "Value"}, "role": {"dtype": "string", "id": null, "_type": "Value"}, "da": {"dtype": "string", "id": null, "_type": "Value"}, "references": [{"id_sp": {"dtype": "string", "id": null, "_type": "Value"}, "label": {"dtype": "string", "id": null, "_type": "Value"}, "doc_id": {"dtype": "string", "id": null, "_type": "Value"}}], "utterance": {"dtype": "string", "id": null, "_type": "Value"}}]}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "multi_doc2dial", "config_name": "dialogue_domain", "version": {"version_str": "1.0.0", "description": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 11700598, "num_examples": 3474, "dataset_name": "multi_doc2dial"}, "validation": {"name": "validation", "num_bytes": 2210378, "num_examples": 661, "dataset_name": "multi_doc2dial"}}, "download_checksums": {"https://doc2dial.github.io/multidoc2dial/file/multidoc2dial.zip": {"num_bytes": 6451144, "checksum": "a8051237dd3be50d81c06aca82ed5171716922e35f44bfa5b9c024f090903419"}}, "download_size": 6451144, "post_processing_size": null, "dataset_size": 13910976, "size_in_bytes": 20362120}, "document_domain": {"description": "MultiDoc2Dial is a new task and dataset on modeling goal-oriented dialogues grounded in multiple documents. Most previous works treat document-grounded dialogue modeling as a machine reading comprehension task based on a single given document or passage. We aim to address more realistic scenarios where a goal-oriented information-seeking conversation involves multiple topics, and hence is grounded on different documents. \n", "citation": "@inproceedings{feng2021multidoc2dial,\n    title={MultiDoc2Dial: Modeling Dialogues Grounded in Multiple Documents},\n    author={Feng, Song and Patel, Siva Sankalp and Wan, Hui and Joshi, Sachindra},\n    booktitle={EMNLP},\n    year={2021}\n}\n", "homepage": "https://doc2dial.github.io/multidoc2dial/", "license": "", "features": {"domain": {"dtype": "string", "id": null, "_type": "Value"}, "doc_id": {"dtype": "string", "id": null, "_type": "Value"}, "title": {"dtype": "string", "id": null, "_type": "Value"}, "doc_text": {"dtype": "string", "id": null, "_type": "Value"}, "spans": [{"id_sp": {"dtype": "string", "id": null, "_type": "Value"}, "tag": {"dtype": "string", "id": null, "_type": "Value"}, "start_sp": {"dtype": "int32", "id": null, "_type": "Value"}, "end_sp": {"dtype": "int32", "id": null, "_type": "Value"}, "text_sp": {"dtype": "string", "id": null, "_type": "Value"}, "title": {"dtype": "string", "id": null, "_type": "Value"}, "parent_titles": {"feature": {"id_sp": {"dtype": "string", "id": null, "_type": "Value"}, "text": {"dtype": "string", "id": null, "_type": "Value"}, "level": {"dtype": "string", "id": null, "_type": "Value"}}, "length": -1, "id": null, "_type": "Sequence"}, "id_sec": {"dtype": "string", "id": null, "_type": "Value"}, "start_sec": {"dtype": "int32", "id": null, "_type": "Value"}, "text_sec": {"dtype": "string", "id": null, "_type": "Value"}, "end_sec": {"dtype": "int32", "id": null, "_type": "Value"}}], "doc_html_ts": {"dtype": "string", "id": null, "_type": "Value"}, "doc_html_raw": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "multi_doc2dial", "config_name": "document_domain", "version": {"version_str": "1.0.0", "description": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 29378955, "num_examples": 488, "dataset_name": "multi_doc2dial"}}, "download_checksums": {"https://doc2dial.github.io/multidoc2dial/file/multidoc2dial.zip": {"num_bytes": 6451144, "checksum": "a8051237dd3be50d81c06aca82ed5171716922e35f44bfa5b9c024f090903419"}}, "download_size": 6451144, "post_processing_size": null, "dataset_size": 29378955, "size_in_bytes": 35830099}, "multidoc2dial": {"description": "MultiDoc2Dial is a new task and dataset on modeling goal-oriented dialogues grounded in multiple documents. Most previous works treat document-grounded dialogue modeling as a machine reading comprehension task based on a single given document or passage. We aim to address more realistic scenarios where a goal-oriented information-seeking conversation involves multiple topics, and hence is grounded on different documents. \n", "citation": "@inproceedings{feng2021multidoc2dial,\n    title={MultiDoc2Dial: Modeling Dialogues Grounded in Multiple Documents},\n    author={Feng, Song and Patel, Siva Sankalp and Wan, Hui and Joshi, Sachindra},\n    booktitle={EMNLP},\n    year={2021}\n}\n", "homepage": "https://doc2dial.github.io/multidoc2dial/", "license": "", "features": {"id": {"dtype": "string", "id": null, "_type": "Value"}, "title": {"dtype": "string", "id": null, "_type": "Value"}, "context": {"dtype": "string", "id": null, "_type": "Value"}, "question": {"dtype": "string", "id": null, "_type": "Value"}, "da": {"dtype": "string", "id": null, "_type": "Value"}, "answers": {"feature": {"text": {"dtype": "string", "id": null, "_type": "Value"}, "answer_start": {"dtype": "int32", "id": null, "_type": "Value"}}, "length": -1, "id": null, "_type": "Sequence"}, "utterance": {"dtype": "string", "id": null, "_type": "Value"}, "domain": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "multi_doc2dial", "config_name": "multidoc2dial", "version": {"version_str": "1.0.0", "description": null, "major": 1, "minor": 0, "patch": 0}, "splits": {"validation": {"name": "validation", "num_bytes": 24331976, "num_examples": 4201, "dataset_name": "multi_doc2dial"}, "train": {"name": "train", "num_bytes": 126589982, "num_examples": 21451, "dataset_name": "multi_doc2dial"}, "test": {"name": "test", "num_bytes": 33032, "num_examples": 5, "dataset_name": "multi_doc2dial"}}, "download_checksums": {"https://doc2dial.github.io/multidoc2dial/file/multidoc2dial.zip": {"num_bytes": 6451144, "checksum": "a8051237dd3be50d81c06aca82ed5171716922e35f44bfa5b9c024f090903419"}}, "download_size": 6451144, "post_processing_size": null, "dataset_size": 150954990, "size_in_bytes": 157406134}}
diff --git a/datasets/multidoc2dial/dummy/dialogue_domain/1.0.0/dummy_data.zip b/datasets/multidoc2dial/dummy/dialogue_domain/1.0.0/dummy_data.zip
diff --git a/datasets/multidoc2dial/dummy/document_domain/1.0.0/dummy_data.zip b/datasets/multidoc2dial/dummy/document_domain/1.0.0/dummy_data.zip
diff --git a/datasets/multidoc2dial/dummy/multidoc2dial/1.0.0/dummy_data.zip b/datasets/multidoc2dial/dummy/multidoc2dial/1.0.0/dummy_data.zip