Skip to content
This repository has been archived by the owner on Dec 16, 2022. It is now read-only.

Commit

Permalink
Signed-off-by: Abhishek P (VMware) <pab@vmware.com>
Browse files Browse the repository at this point in the history
Converted HFDatasetSplitReader to HFDatasetReader
Now all splits can be used in the same reader
Support for both pre-load of all splits or on demand load of the split
Reduced tests to glue-cola dataset:config which is ~ 0.36MB download
Updated dataset dep to be the range of >=1.5.0 and <1.6.0
  • Loading branch information
Abhishek-P committed Apr 7, 2021
1 parent 6e613b9 commit 977c0b2
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from datasets.features import Value

# TODO pab complete the documentation comments
class HuggingfaceDatasetSplitReader(DatasetReader):
class HuggingfaceDatasetReader(DatasetReader):
"""
This reader implementation wraps the huggingface datasets package
to utilize it's dataset management functionality and load the information in AllenNLP friendly formats
Expand Down
4 changes: 2 additions & 2 deletions tests/data/dataset_readers/huggingface_datasets_test.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import pytest

from allennlp.data.dataset_readers.huggingface_datasets_reader import HuggingfaceDatasetSplitReader
from allennlp.data.dataset_readers.huggingface_datasets_reader import HuggingfaceDatasetReader
import logging

logger = logging.getLogger(__name__)
Expand All @@ -15,7 +15,7 @@ class HuggingfaceDatasetSplitReaderTest:
"""
@pytest.mark.parametrize("dataset, config, split", (("glue", "cola", "train"), ("glue", "cola", "test")))
def test_read_for_datasets_requiring_config(self, dataset, config, split):
huggingface_reader = HuggingfaceDatasetSplitReader(dataset_name=dataset, config_name=config)
huggingface_reader = HuggingfaceDatasetReader(dataset_name=dataset, config_name=config)
instances = list(huggingface_reader.read(split))
assert len(instances) == len(huggingface_reader.datasets[split])
print(instances[0], print(huggingface_reader.datasets[split][0]))

0 comments on commit 977c0b2

Please sign in to comment.