From 7ef5f6de02038dd200276a0ad63be7a6019d152f Mon Sep 17 00:00:00 2001
From: Quentin Lhoest <42851186+lhoestq@users.noreply.github.com>
Date: Fri, 18 Nov 2022 18:25:26 +0100
Subject: [PATCH] Fix `max_shard_size` docs (#5267)

fix max_shard_size docs
---
 docs/source/filesystems.mdx |  2 +-
 src/datasets/builder.py     | 12 ++++++------
 2 files changed, 7 insertions(+), 7 deletions(-)
diff --git a/docs/source/filesystems.mdx b/docs/source/filesystems.mdx
index 962966bd65f..b92b8e6f243 100644
--- a/docs/source/filesystems.mdx
+++ b/docs/source/filesystems.mdx
@@ -140,7 +140,7 @@ Use your own data files (see [how to load local and remote files](./loading#loca
 It is highly recommended to save the files as compressed Parquet files to optimize I/O by specifying `file_format="parquet"`.
 Otherwise the dataset is saved as an uncompressed Arrow file.
 
-You can also specify the size of the Parquet shard using `max_shard_size` (default is 500MB):
+You can also specify the size of the shards using `max_shard_size` (default is 500MB):
 
 ```py
 >>> builder.download_and_prepare(output_dir, storage_options=storage_options, file_format="parquet", max_shard_size="1GB")
diff --git a/src/datasets/builder.py b/src/datasets/builder.py
index 961a2b6e6ff..829acd91e59 100644
--- a/src/datasets/builder.py
+++ b/src/datasets/builder.py
@@ -638,9 +638,9 @@ def download_and_prepare(
                 If the format is "parquet", then image and audio data are embedded into the Parquet files instead of pointing to local files.
 
                 <Added version="2.5.0"/>
-            max_shard_size (:obj:`Union[str, int]`, optional): Maximum number of bytes written per shard.
-                Only available for the "parquet" format with a default of "500MB". The size is based on uncompressed data size,
-                so in practice your shard files may be smaller than `max_shard_size` thanks to Parquet compression.
+            max_shard_size (:obj:`Union[str, int]`, optional): Maximum number of bytes written per shard, default is "500MB".
+                The size is based on uncompressed data size, so in practice your shard files may be smaller than
+                `max_shard_size` thanks to Parquet compression for example.
 
                 <Added version="2.5.0"/>
             num_proc (:obj:`int`, optional, default `None`): Number of processes when downloading and generating the dataset locally.
@@ -1262,9 +1262,9 @@ def _prepare_split(
             split_generator: `SplitGenerator`, Split generator to process
             file_format (:obj:`str`, optional): format of the data files in which the dataset will be written.
                 Supported formats: "arrow", "parquet". Default to "arrow" format.
-            max_shard_size (:obj:`Union[str, int]`, optional): Approximate maximum number of bytes written per shard.
-                Only available for the "parquet" format with a default of "500MB". The size is based on uncompressed data size,
-                so in practice your shard files may be smaller than `max_shard_size` thanks to Parquet compression.
+            max_shard_size (:obj:`Union[str, int]`, optional): Maximum number of bytes written per shard, default is "500MB".
+                The size is based on uncompressed data size, so in practice your shard files may be smaller than
+                `max_shard_size` thanks to Parquet compression for example.
             num_proc (:obj:`int`, optional, default `None`): Number of processes when downloading and generating the dataset locally.
                 Multiprocessing is disabled by default.