Skip to content

Commit

Permalink
Update docstrings/documentations of all the datasets (#931)
Browse files Browse the repository at this point in the history
  • Loading branch information
mthrok authored Oct 2, 2020
1 parent 963224f commit e3d1d74
Show file tree
Hide file tree
Showing 11 changed files with 257 additions and 102 deletions.
43 changes: 23 additions & 20 deletions docs/source/datasets.rst
Original file line number Diff line number Diff line change
Expand Up @@ -29,82 +29,85 @@ CMUARCTIC
~~~~~~~~~

.. autoclass:: CMUARCTIC
:members: __getitem__
:special-members:
:members:
:special-members: __getitem__


COMMONVOICE
~~~~~~~~~~~

.. autoclass:: COMMONVOICE
:members: __getitem__
:special-members:
:members:
:special-members: __getitem__


GTZAN
~~~~~

.. autoclass:: GTZAN
:members: __getitem__
:special-members:
:members:
:special-members: __getitem__


LIBRISPEECH
~~~~~~~~~~~

.. autoclass:: LIBRISPEECH
:members: __getitem__
:special-members:
:members:
:special-members: __getitem__


LIBRITTS
~~~~~~~~

.. autoclass:: LIBRITTS
:members: __getitem__
:special-members:
:members:
:special-members: __getitem__


LJSPEECH
~~~~~~~~

.. autoclass:: LJSPEECH
:members: __getitem__
:special-members:
:members:
:special-members: __getitem__


SPEECHCOMMANDS
~~~~~~~~~~~~~~

.. autoclass:: SPEECHCOMMANDS
:members: __getitem__
:special-members:
:members:
:special-members: __getitem__


TEDLIUM
~~~~~~~~~~~~~~

.. autoclass:: TEDLIUM
:members: __getitem__
:special-members: get_phoneme_dict
:members:
:special-members: __getitem__


VCTK
~~~~

.. autoclass:: VCTK
:members: __getitem__
:special-members:
:members:
:special-members: __getitem__


VCTK_092
~~~~~~~~

.. autoclass:: VCTK_092
:members:
:special-members: __getitem__


YESNO
~~~~~

.. autoclass:: YESNO
:members: __getitem__
:special-members:
:members:
:special-members: __getitem__
25 changes: 22 additions & 3 deletions torchaudio/datasets/cmuarctic.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,9 +76,20 @@ def load_cmuarctic_item(line: str,


class CMUARCTIC(Dataset):
"""
Create a Dataset for CMU_arctic. Each item is a tuple of the form:
waveform, sample_rate, utterance, utterance_id
"""Create a Dataset for CMU_ARCTIC.
Args:
root (str): Path to the directory where the dataset is found or downloaded.
url (str, optional):
The URL to download the dataset from or the type of the dataset to dowload.

This comment has been minimized.

Copy link
@csukuangfj

csukuangfj Mar 16, 2022

Collaborator

Typo:

the type of the dataset to dowload.

->

the type of the dataset to download.

This comment has been minimized.

Copy link
@mthrok

mthrok Mar 16, 2022

Author Collaborator

Thanks for the report. Will be addressed in #2281

(default: ``"aew"``)
Allowed type values are ``"aew"``, ``"ahw"``, ``"aup"``, ``"awb"``, ``"axb"``, ``"bdl"``,
``"clb"``, ``"eey"``, ``"fem"``, ``"gka"``, ``"jmk"``, ``"ksp"``, ``"ljm"``, ``"lnh"``,
``"rms"``, ``"rxr"``, ``"slp"`` or ``"slt"``.
folder_in_archive (str, optional):
The top-level directory of the dataset. (default: ``"ARCTIC"``)
download (bool, optional):
Whether to download the dataset if it is not found at root path. (default: ``False``).
"""

_file_text = "txt.done.data"
Expand Down Expand Up @@ -143,6 +154,14 @@ def __init__(self,
self._walker = list(walker)

def __getitem__(self, n: int) -> Tuple[Tensor, int, str, str]:
"""Load the n-th sample from the dataset.
Args:
n (int): The index of the sample to be loaded
Returns:
tuple: ``(waveform, sample_rate, utterance, utterance_id)``
"""
line = self._walker[n]
return load_cmuarctic_item(line, self._path, self._folder_audio, self._ext_audio)

Expand Down
37 changes: 32 additions & 5 deletions torchaudio/datasets/commonvoice.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,11 +100,28 @@ def load_commonvoice_item(line: List[str],


class COMMONVOICE(Dataset):
"""
Create a Dataset for CommonVoice. Each item is a tuple of the form:
(waveform, sample_rate, dictionary)
where dictionary is a dictionary built from the tsv file with the following keys:
client_id, path, sentence, up_votes, down_votes, age, gender, accent.
"""Create a Dataset for CommonVoice.
Args:
root (str): Path to the directory where the dataset is found or downloaded.
tsv (str, optional): The name of the tsv file used to construct the metadata.
(default: ``"train.tsv"``)
url (str, optional): The URL to download the dataset from, or the language of
the dataset to download. (default: ``"english"``).
Allowed language values are ``"tatar"``, ``"english"``, ``"german"``,
``"french"``, ``"welsh"``, ``"breton"``, ``"chuvash"``, ``"turkish"``, ``"kyrgyz"``,
``"irish"``, ``"kabyle"``, ``"catalan"``, ``"taiwanese"``, ``"slovenian"``,
``"italian"``, ``"dutch"``, ``"hakha chin"``, ``"esperanto"``, ``"estonian"``,
``"persian"``, ``"portuguese"``, ``"basque"``, ``"spanish"``, ``"chinese"``,
``"mongolian"``, ``"sakha"``, ``"dhivehi"``, ``"kinyarwanda"``, ``"swedish"``,
``"russian"``, ``"indonesian"``, ``"arabic"``, ``"tamil"``, ``"interlingua"``,
``"latvian"``, ``"japanese"``, ``"votic"``, ``"abkhaz"``, ``"cantonese"`` and
``"romansh sursilvan"``.
folder_in_archive (str, optional): The top-level directory of the dataset.
version (str): Version string. (default: ``"cv-corpus-4-2019-12-10"``)
For the other allowed values, Please checkout https://commonvoice.mozilla.org/en/datasets.
download (bool, optional):
Whether to download the dataset if it is not found at root path. (default: ``False``).
"""

_ext_txt = ".txt"
Expand Down Expand Up @@ -192,6 +209,16 @@ def __init__(self,
self._walker = list(walker)

def __getitem__(self, n: int) -> Tuple[Tensor, int, Dict[str, str]]:
"""Load the n-th sample from the dataset.
Args:
n (int): The index of the sample to be loaded
Returns:
tuple: ``(waveform, sample_rate, dictionary)``, where dictionary is built
from the TSV file with the following keys: ``client_id``, ``path``, ``sentence``,
``up_votes``, ``down_votes``, ``age``, ``gender`` and ``accent``.
"""
line = self._walker[n]
return load_commonvoice_item(line, self._header, self._path, self._folder_audio)

Expand Down
32 changes: 25 additions & 7 deletions torchaudio/datasets/gtzan.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import os
import warnings
from typing import Any, Tuple
from typing import Any, Tuple, Optional

import torchaudio
from torch import Tensor
Expand Down Expand Up @@ -998,12 +998,22 @@ def load_gtzan_item(fileid: str, path: str, ext_audio: str) -> Tuple[Tensor, str


class GTZAN(Dataset):
"""
Create a Dataset for GTZAN. Each item is a tuple of the form:
waveform, sample_rate, label.
"""Create a Dataset for GTZAN.
Note:
Please see http://marsyas.info/downloads/datasets.html if you are planning to use
this dataset to publish results.
Please see http://marsyas.info/downloads/datasets.html
if you are planning to use this dataset to publish results.
Args:
root (str): Path to the directory where the dataset is found or downloaded.
url (str, optional): The URL to download the dataset from.
(default: ``"http://opihi.cs.uvic.ca/sound/genres.tar.gz"``)
folder_in_archive (str, optional): The top-level directory of the dataset.
download (bool, optional):
Whether to download the dataset if it is not found at root path. (default: ``False``).
subset (str, optional): Which subset of the dataset to use.
One of ``"training"``, ``"validation"``, ``"testing"`` or ``None``.
If ``None``, the entire dataset is used. (default: ``None``).
"""

_ext_audio = ".wav"
Expand All @@ -1014,7 +1024,7 @@ def __init__(
url: str = URL,
folder_in_archive: str = FOLDER_IN_ARCHIVE,
download: bool = False,
subset: Any = None,
subset: Optional[str] = None,
) -> None:

# super(GTZAN, self).__init__()
Expand Down Expand Up @@ -1082,6 +1092,14 @@ def __init__(
self._walker = filtered_test

def __getitem__(self, n: int) -> Tuple[Tensor, int, str]:
"""Load the n-th sample from the dataset.
Args:
n (int): The index of the sample to be loaded
Returns:
tuple: ``(waveform, sample_rate, label)``
"""
fileid = self._walker[n]
item = load_gtzan_item(fileid, self._path, self._ext_audio)
waveform, sample_rate, label = item
Expand Down
24 changes: 21 additions & 3 deletions torchaudio/datasets/librispeech.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,9 +67,19 @@ def load_librispeech_item(fileid: str,


class LIBRISPEECH(Dataset):
"""
Create a Dataset for LibriSpeech. Each item is a tuple of the form:
waveform, sample_rate, utterance, speaker_id, chapter_id, utterance_id
"""Create a Dataset for LibriSpeech.
Args:
root (str): Path to the directory where the dataset is found or downloaded.
url (str, optional): The URL to download the dataset from,
or the type of the dataset to dowload.
Allowed type values are ``"dev-clean"``, ``"dev-other"``, ``"test-clean"``,
``"test-other"``, ``"train-clean-100"``, ``"train-clean-360"`` and
``"train-other-500"``. (default: ``"train-clean-100"``)
folder_in_archive (str, optional):
The top-level directory of the dataset. (default: ``"LibriSpeech"``)
download (bool, optional):
Whether to download the dataset if it is not found at root path. (default: ``False``).
"""

_ext_txt = ".trans.txt"
Expand Down Expand Up @@ -117,6 +127,14 @@ def __init__(self,
self._walker = list(walker)

def __getitem__(self, n: int) -> Tuple[Tensor, int, str, int, int, int]:
"""Load the n-th sample from the dataset.
Args:
n (int): The index of the sample to be loaded
Returns:
tuple: ``(waveform, sample_rate, utterance, speaker_id, chapter_id, utterance_id)``
"""
fileid = self._walker[n]
return load_librispeech_item(fileid, self._path, self._ext_audio, self._ext_txt)

Expand Down
25 changes: 22 additions & 3 deletions torchaudio/datasets/libritts.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,19 @@ def load_libritts_item(


class LIBRITTS(Dataset):
"""
Create a Dataset for LibriTTS. Each item is a tuple of the form:
waveform, sample_rate, original_text, normalized_text, speaker_id, chapter_id, utterance_id
"""Create a Dataset for LibriTTS.
Args:
root (str): Path to the directory where the dataset is found or downloaded.
url (str, optional): The URL to download the dataset from,
or the type of the dataset to dowload.
Allowed type values are ``"dev-clean"``, ``"dev-other"``, ``"test-clean"``,
``"test-other"``, ``"train-clean-100"``, ``"train-clean-360"`` and
``"train-other-500"``. (default: ``"train-clean-100"``)
folder_in_archive (str, optional):
The top-level directory of the dataset. (default: ``"LibriTTS"``)
download (bool, optional):
Whether to download the dataset if it is not found at root path. (default: ``False``).
"""

_ext_original_txt = ".original.txt"
Expand Down Expand Up @@ -118,6 +128,15 @@ def __init__(
self._walker = list(walker)

def __getitem__(self, n: int) -> Tuple[Tensor, int, str, str, int, int, str]:
"""Load the n-th sample from the dataset.
Args:
n (int): The index of the sample to be loaded
Returns:
tuple: ``(waveform, sample_rate, original_text, normalized_text, speaker_id,
chapter_id, utterance_id)``
"""
fileid = self._walker[n]
return load_libritts_item(
fileid,
Expand Down
21 changes: 18 additions & 3 deletions torchaudio/datasets/ljspeech.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,16 @@ def load_ljspeech_item(line: List[str], path: str, ext_audio: str) -> Tuple[Tens


class LJSPEECH(Dataset):
"""
Create a Dataset for LJSpeech-1.1. Each item is a tuple of the form:
waveform, sample_rate, transcript, normalized_transcript
"""Create a Dataset for LJSpeech-1.1.
Args:
root (str): Path to the directory where the dataset is found or downloaded.
url (str, optional): The URL to download the dataset from.
(default: ``"https://data.keithito.com/data/speech/LJSpeech-1.1.tar.bz2"``)
folder_in_archive (str, optional):
The top-level directory of the dataset. (default: ``"wavs"``)
download (bool, optional):
Whether to download the dataset if it is not found at root path. (default: ``False``).
"""

_ext_audio = ".wav"
Expand Down Expand Up @@ -68,6 +75,14 @@ def __init__(self,
self._walker = list(walker)

def __getitem__(self, n: int) -> Tuple[Tensor, int, str, str]:
"""Load the n-th sample from the dataset.
Args:
n (int): The index of the sample to be loaded
Returns:
tuple: ``(waveform, sample_rate, transcript, normalized_transcript)``
"""
line = self._walker[n]
return load_ljspeech_item(line, self._path, self._ext_audio)

Expand Down
Loading

0 comments on commit e3d1d74

Please sign in to comment.