diff --git a/src/datasets/data_files.py b/src/datasets/data_files.py index 63cf8c1000c..9eb1c499299 100644 --- a/src/datasets/data_files.py +++ b/src/datasets/data_files.py @@ -227,9 +227,7 @@ def _is_unrequested_hidden_file_or_is_inside_unrequested_hidden_dir(matched_rel_ return len(hidden_directories_in_path) != len(hidden_directories_in_pattern) -def _get_data_files_patterns( - pattern_resolver: Callable[[str], List[str]], base_path: str = "" -) -> Dict[str, List[str]]: +def _get_data_files_patterns(pattern_resolver: Callable[[str], List[str]]) -> Dict[str, List[str]]: """ Get the default pattern from a directory or repository by testing all the supported patterns. The first patterns to return a non-empty list of data files is returned. @@ -467,7 +465,7 @@ def get_data_patterns(base_path: str, download_config: Optional[DownloadConfig] """ resolver = partial(resolve_pattern, base_path=base_path, download_config=download_config) try: - return _get_data_files_patterns(resolver, base_path=base_path) + return _get_data_files_patterns(resolver) except FileNotFoundError: raise EmptyDatasetError(f"The directory at {base_path} doesn't contain any data files") from None diff --git a/tests/test_data_files.py b/tests/test_data_files.py index de8f6340cb5..01b5e4dd15e 100644 --- a/tests/test_data_files.py +++ b/tests/test_data_files.py @@ -622,7 +622,7 @@ def resolver(pattern): if fs.isfile(file_path) ] - patterns_per_split = _get_data_files_patterns(resolver, base_path=base_path) + patterns_per_split = _get_data_files_patterns(resolver) assert list(patterns_per_split.keys()) == list(data_file_per_split.keys()) # Test split order with list() for split, patterns in patterns_per_split.items(): matched = [file_path for pattern in patterns for file_path in resolver(pattern)]