Update docstrings/documentations of all the datasets (#931)

pytorch · Oct 2, 2020 · e3d1d74 · csukuangfj · Mar 16, 2022 · mthrok
1 parent 963224f
commit e3d1d74
Show file tree

Hide file tree

Showing 11 changed files with 257 additions and 102 deletions.
diff --git a/docs/source/datasets.rst b/docs/source/datasets.rst
@@ -29,82 +29,85 @@ CMUARCTIC
 ~~~~~~~~~
 
 .. autoclass:: CMUARCTIC
-  :members: __getitem__
-  :special-members:
+  :members:
+  :special-members: __getitem__
 
 
 COMMONVOICE
 ~~~~~~~~~~~
 
 .. autoclass:: COMMONVOICE
-  :members: __getitem__
-  :special-members:
+  :members:
+  :special-members: __getitem__
 
 
 GTZAN
 ~~~~~
 
 .. autoclass:: GTZAN
-  :members: __getitem__
-  :special-members:
+  :members:
+  :special-members: __getitem__
 
 
 LIBRISPEECH
 ~~~~~~~~~~~
 
 .. autoclass:: LIBRISPEECH
-  :members: __getitem__
-  :special-members:
+  :members:
+  :special-members: __getitem__
 
 
 LIBRITTS
 ~~~~~~~~
 
 .. autoclass:: LIBRITTS
-  :members: __getitem__
-  :special-members:
+  :members:
+  :special-members: __getitem__
 
 
 LJSPEECH
 ~~~~~~~~
 
 .. autoclass:: LJSPEECH
-  :members: __getitem__
-  :special-members:
+  :members:
+  :special-members: __getitem__
 
 
 SPEECHCOMMANDS
 ~~~~~~~~~~~~~~
 
 .. autoclass:: SPEECHCOMMANDS
-  :members: __getitem__
-  :special-members:
+  :members:
+  :special-members: __getitem__
 
 
 TEDLIUM
 ~~~~~~~~~~~~~~
 
 .. autoclass:: TEDLIUM
-  :members: __getitem__
-  :special-members: get_phoneme_dict
+  :members:
+  :special-members: __getitem__
+
 
 VCTK
 ~~~~
 
 .. autoclass:: VCTK
-  :members: __getitem__
-  :special-members:
+  :members:
+  :special-members: __getitem__
 
 
 VCTK_092
 ~~~~~~~~
 
 .. autoclass:: VCTK_092
+  :members:
+  :special-members: __getitem__
 
 
 YESNO
 ~~~~~
 
 .. autoclass:: YESNO
-  :members: __getitem__
-  :special-members:
+  :members:
+  :special-members: __getitem__
diff --git a/torchaudio/datasets/cmuarctic.py b/torchaudio/datasets/cmuarctic.py
@@ -76,9 +76,20 @@ def load_cmuarctic_item(line: str,
 
 
 class CMUARCTIC(Dataset):
-    """
-    Create a Dataset for CMU_arctic. Each item is a tuple of the form:
-    waveform, sample_rate, utterance, utterance_id
+    """Create a Dataset for CMU_ARCTIC.
+
+    Args:
+        root (str): Path to the directory where the dataset is found or downloaded.
+        url (str, optional):
+            The URL to download the dataset from or the type of the dataset to dowload.
+            (default: ``"aew"``)
+            Allowed type values are ``"aew"``, ``"ahw"``, ``"aup"``, ``"awb"``, ``"axb"``, ``"bdl"``,
+            ``"clb"``, ``"eey"``, ``"fem"``, ``"gka"``, ``"jmk"``, ``"ksp"``, ``"ljm"``, ``"lnh"``,
+            ``"rms"``, ``"rxr"``, ``"slp"`` or ``"slt"``.
+        folder_in_archive (str, optional):
+            The top-level directory of the dataset. (default: ``"ARCTIC"``)
+        download (bool, optional):
+            Whether to download the dataset if it is not found at root path. (default: ``False``).
     """
 
     _file_text = "txt.done.data"
@@ -143,6 +154,14 @@ def __init__(self,
             self._walker = list(walker)
 
     def __getitem__(self, n: int) -> Tuple[Tensor, int, str, str]:
+        """Load the n-th sample from the dataset.
+
+        Args:
+            n (int): The index of the sample to be loaded
+
+        Returns:
+            tuple: ``(waveform, sample_rate, utterance, utterance_id)``
+        """
         line = self._walker[n]
         return load_cmuarctic_item(line, self._path, self._folder_audio, self._ext_audio)
 

diff --git a/torchaudio/datasets/commonvoice.py b/torchaudio/datasets/commonvoice.py
@@ -100,11 +100,28 @@ def load_commonvoice_item(line: List[str],
 
 
 class COMMONVOICE(Dataset):
-    """
-    Create a Dataset for CommonVoice. Each item is a tuple of the form:
-    (waveform, sample_rate, dictionary)
-    where dictionary is a dictionary built from the tsv file with the following keys:
-    client_id, path, sentence, up_votes, down_votes, age, gender, accent.
+    """Create a Dataset for CommonVoice.
+
+    Args:
+        root (str): Path to the directory where the dataset is found or downloaded.
+        tsv (str, optional): The name of the tsv file used to construct the metadata.
+            (default: ``"train.tsv"``)
+        url (str, optional): The URL to download the dataset from, or the language of
+            the dataset to download. (default: ``"english"``).
+            Allowed language values are ``"tatar"``, ``"english"``, ``"german"``,
+            ``"french"``, ``"welsh"``, ``"breton"``, ``"chuvash"``, ``"turkish"``, ``"kyrgyz"``,
+            ``"irish"``, ``"kabyle"``, ``"catalan"``, ``"taiwanese"``, ``"slovenian"``,
+            ``"italian"``, ``"dutch"``, ``"hakha chin"``, ``"esperanto"``, ``"estonian"``,
+            ``"persian"``, ``"portuguese"``, ``"basque"``, ``"spanish"``, ``"chinese"``,
+            ``"mongolian"``, ``"sakha"``, ``"dhivehi"``, ``"kinyarwanda"``, ``"swedish"``,
+            ``"russian"``, ``"indonesian"``, ``"arabic"``, ``"tamil"``, ``"interlingua"``,
+            ``"latvian"``, ``"japanese"``, ``"votic"``, ``"abkhaz"``, ``"cantonese"`` and
+            ``"romansh sursilvan"``.
+        folder_in_archive (str, optional): The top-level directory of the dataset.
+        version (str): Version string. (default: ``"cv-corpus-4-2019-12-10"``)
+            For the other allowed values, Please checkout https://commonvoice.mozilla.org/en/datasets.
+        download (bool, optional):
+            Whether to download the dataset if it is not found at root path. (default: ``False``).
     """
 
     _ext_txt = ".txt"
@@ -192,6 +209,16 @@ def __init__(self,
             self._walker = list(walker)
 
     def __getitem__(self, n: int) -> Tuple[Tensor, int, Dict[str, str]]:
+        """Load the n-th sample from the dataset.
+
+        Args:
+            n (int): The index of the sample to be loaded
+
+        Returns:
+            tuple: ``(waveform, sample_rate, dictionary)``,  where dictionary is built
+            from the TSV file with the following keys: ``client_id``, ``path``, ``sentence``,
+            ``up_votes``, ``down_votes``, ``age``, ``gender`` and ``accent``.
+        """
         line = self._walker[n]
         return load_commonvoice_item(line, self._header, self._path, self._folder_audio)
 

diff --git a/torchaudio/datasets/gtzan.py b/torchaudio/datasets/gtzan.py
@@ -1,6 +1,6 @@
 import os
 import warnings
-from typing import Any, Tuple
+from typing import Any, Tuple, Optional
 
 import torchaudio
 from torch import Tensor
@@ -998,12 +998,22 @@ def load_gtzan_item(fileid: str, path: str, ext_audio: str) -> Tuple[Tensor, str
 
 
 class GTZAN(Dataset):
-    """
-    Create a Dataset for GTZAN. Each item is a tuple of the form:
-    waveform, sample_rate, label.
+    """Create a Dataset for GTZAN.
+
+    Note:
+        Please see http://marsyas.info/downloads/datasets.html if you are planning to use
+        this dataset to publish results.
 
-    Please see http://marsyas.info/downloads/datasets.html
-    if you are planning to use this dataset to publish results.
+    Args:
+        root (str): Path to the directory where the dataset is found or downloaded.
+        url (str, optional): The URL to download the dataset from.
+            (default: ``"http://opihi.cs.uvic.ca/sound/genres.tar.gz"``)
+        folder_in_archive (str, optional): The top-level directory of the dataset.
+        download (bool, optional):
+            Whether to download the dataset if it is not found at root path. (default: ``False``).
+        subset (str, optional): Which subset of the dataset to use.
+            One of ``"training"``, ``"validation"``, ``"testing"`` or ``None``.
+            If ``None``, the entire dataset is used. (default: ``None``).
     """
 
     _ext_audio = ".wav"
@@ -1014,7 +1024,7 @@ def __init__(
         url: str = URL,
         folder_in_archive: str = FOLDER_IN_ARCHIVE,
         download: bool = False,
-        subset: Any = None,
+        subset: Optional[str] = None,
     ) -> None:
 
         # super(GTZAN, self).__init__()
@@ -1082,6 +1092,14 @@ def __init__(
                 self._walker = filtered_test
 
     def __getitem__(self, n: int) -> Tuple[Tensor, int, str]:
+        """Load the n-th sample from the dataset.
+
+        Args:
+            n (int): The index of the sample to be loaded
+
+        Returns:
+            tuple: ``(waveform, sample_rate, label)``
+        """
         fileid = self._walker[n]
         item = load_gtzan_item(fileid, self._path, self._ext_audio)
         waveform, sample_rate, label = item

diff --git a/torchaudio/datasets/librispeech.py b/torchaudio/datasets/librispeech.py
@@ -67,9 +67,19 @@ def load_librispeech_item(fileid: str,
 
 
 class LIBRISPEECH(Dataset):
-    """
-    Create a Dataset for LibriSpeech. Each item is a tuple of the form:
-    waveform, sample_rate, utterance, speaker_id, chapter_id, utterance_id
+    """Create a Dataset for LibriSpeech.
+
+    Args:
+        root (str): Path to the directory where the dataset is found or downloaded.
+        url (str, optional): The URL to download the dataset from,
+            or the type of the dataset to dowload.
+            Allowed type values are ``"dev-clean"``, ``"dev-other"``, ``"test-clean"``,
+            ``"test-other"``, ``"train-clean-100"``, ``"train-clean-360"`` and
+            ``"train-other-500"``. (default: ``"train-clean-100"``)
+        folder_in_archive (str, optional):
+            The top-level directory of the dataset. (default: ``"LibriSpeech"``)
+        download (bool, optional):
+            Whether to download the dataset if it is not found at root path. (default: ``False``).
     """
 
     _ext_txt = ".trans.txt"
@@ -117,6 +127,14 @@ def __init__(self,
         self._walker = list(walker)
 
     def __getitem__(self, n: int) -> Tuple[Tensor, int, str, int, int, int]:
+        """Load the n-th sample from the dataset.
+
+        Args:
+            n (int): The index of the sample to be loaded
+
+        Returns:
+            tuple: ``(waveform, sample_rate, utterance, speaker_id, chapter_id, utterance_id)``
+        """
         fileid = self._walker[n]
         return load_librispeech_item(fileid, self._path, self._ext_audio, self._ext_txt)
 

diff --git a/torchaudio/datasets/libritts.py b/torchaudio/datasets/libritts.py
@@ -65,9 +65,19 @@ def load_libritts_item(
 
 
 class LIBRITTS(Dataset):
-    """
-    Create a Dataset for LibriTTS. Each item is a tuple of the form:
-    waveform, sample_rate, original_text, normalized_text, speaker_id, chapter_id, utterance_id
+    """Create a Dataset for LibriTTS.
+
+    Args:
+        root (str): Path to the directory where the dataset is found or downloaded.
+        url (str, optional): The URL to download the dataset from,
+            or the type of the dataset to dowload.
+            Allowed type values are ``"dev-clean"``, ``"dev-other"``, ``"test-clean"``,
+            ``"test-other"``, ``"train-clean-100"``, ``"train-clean-360"`` and
+            ``"train-other-500"``. (default: ``"train-clean-100"``)
+        folder_in_archive (str, optional):
+            The top-level directory of the dataset. (default: ``"LibriTTS"``)
+        download (bool, optional):
+            Whether to download the dataset if it is not found at root path. (default: ``False``).
     """
 
     _ext_original_txt = ".original.txt"
@@ -118,6 +128,15 @@ def __init__(
         self._walker = list(walker)
 
     def __getitem__(self, n: int) -> Tuple[Tensor, int, str, str, int, int, str]:
+        """Load the n-th sample from the dataset.
+
+        Args:
+            n (int): The index of the sample to be loaded
+
+        Returns:
+            tuple: ``(waveform, sample_rate, original_text, normalized_text, speaker_id,
+            chapter_id, utterance_id)``
+        """
         fileid = self._walker[n]
         return load_libritts_item(
             fileid,

diff --git a/torchaudio/datasets/ljspeech.py b/torchaudio/datasets/ljspeech.py
@@ -33,9 +33,16 @@ def load_ljspeech_item(line: List[str], path: str, ext_audio: str) -> Tuple[Tens
 
 
 class LJSPEECH(Dataset):
-    """
-    Create a Dataset for LJSpeech-1.1. Each item is a tuple of the form:
-    waveform, sample_rate, transcript, normalized_transcript
+    """Create a Dataset for LJSpeech-1.1.
+
+    Args:
+        root (str): Path to the directory where the dataset is found or downloaded.
+        url (str, optional): The URL to download the dataset from.
+            (default: ``"https://data.keithito.com/data/speech/LJSpeech-1.1.tar.bz2"``)
+        folder_in_archive (str, optional):
+            The top-level directory of the dataset. (default: ``"wavs"``)
+        download (bool, optional):
+            Whether to download the dataset if it is not found at root path. (default: ``False``).
     """
 
     _ext_audio = ".wav"
@@ -68,6 +75,14 @@ def __init__(self,
             self._walker = list(walker)
 
     def __getitem__(self, n: int) -> Tuple[Tensor, int, str, str]:
+        """Load the n-th sample from the dataset.
+
+        Args:
+            n (int): The index of the sample to be loaded
+
+        Returns:
+            tuple: ``(waveform, sample_rate, transcript, normalized_transcript)``
+        """
         line = self._walker[n]
         return load_ljspeech_item(line, self._path, self._ext_audio)