Deprecate num_proc parameter in DownloadManager.extract (#5142)

* deprecate num_proc * fix the reviewed changes * fix the reviewed changes -2 * fix the reviewed changes -3 * format the code
huggingface · Oct 25, 2022 · d12fbc1 · d12fbc1 · github-actions · Oct 25, 2022
1 parent f09f781
commit d12fbc1
Showing 1 changed file with 16 additions and 2 deletions.
diff --git a/src/datasets/download/download_manager.py b/src/datasets/download/download_manager.py
@@ -20,6 +20,7 @@
 import os
 import posixpath
 import tarfile
+import warnings
 from datetime import datetime
 from functools import partial
 from typing import Callable, Dict, Generator, Iterable, List, Optional, Tuple, Union
@@ -377,7 +378,7 @@ def iter_files(self, paths: Union[str, List[str]]):
         """
         return FilesIterable.from_paths(paths)
 
-    def extract(self, path_or_paths, num_proc=None):
+    def extract(self, path_or_paths, num_proc="deprecated"):
         """Extract given path(s).
 
         Args:
@@ -386,6 +387,12 @@ def extract(self, path_or_paths, num_proc=None):
             num_proc: Use multi-processing if `num_proc` > 1 and the length of
                 `path_or_paths` is larger than `num_proc`
 
+                <Deprecated version="2.6.2">
+
+                Pass `DownloadConfig(num_proc=<num_proc>)` to the initializer instead.
+
+                </Deprecated>
+
         Returns:
             extracted_path(s): `str`, The extracted paths matching the given input
                 path_or_paths.
@@ -397,15 +404,22 @@ def extract(self, path_or_paths, num_proc=None):
         >>> extracted_files = dl_manager.extract(downloaded_files)
         ```
         """
+        if num_proc != "deprecated":
+            warnings.warn(
+                "'num_proc' was deprecated in version 2.6.2 and will be removed in 3.0.0. Pass `DownloadConfig(num_proc=<num_proc>)` to the initializer instead.",
+                FutureWarning,
+            )
         download_config = self.download_config.copy()
         download_config.extract_compressed_file = True
         # Extract downloads the file first if it is not already downloaded
         if download_config.download_desc is None:
             download_config.download_desc = "Downloading data"
+        if download_config.num_proc is None:
+            download_config.num_proc = 16
         extracted_paths = map_nested(
             partial(cached_path, download_config=download_config),
             path_or_paths,
-            num_proc=num_proc,
+            num_proc=download_config.num_proc,
             disable_tqdm=not is_progress_bar_enabled(),
             desc="Extracting data files",
         )