diff --git a/datasets/common_voice/common_voice.py b/datasets/common_voice/common_voice.py index 0178da98105..b11a09a759e 100644 --- a/datasets/common_voice/common_voice.py +++ b/datasets/common_voice/common_voice.py @@ -15,8 +15,6 @@ """ Common Voice Dataset""" -import os - import datasets from datasets.tasks import AutomaticSpeechRecognition @@ -613,6 +611,7 @@ def __init__(self, name, sub_version, **kwargs): class CommonVoice(datasets.GeneratorBasedBuilder): + DEFAULT_WRITER_BATCH_SIZE = 1000 BUILDER_CONFIGS = [ CommonVoiceConfig( name=lang_id, @@ -658,49 +657,54 @@ def _info(self): def _split_generators(self, dl_manager): """Returns SplitGenerators.""" - dl_path = dl_manager.download_and_extract(_DATA_URL.format(self.config.name)) - abs_path_to_data = os.path.join(dl_path, "cv-corpus-6.1-2020-12-11", self.config.name) - abs_path_to_clips = os.path.join(abs_path_to_data, "clips") + archive = dl_manager.download(_DATA_URL.format(self.config.name)) + path_to_data = "/".join(["cv-corpus-6.1-2020-12-11", self.config.name]) + path_to_clips = "/".join([path_to_data, "clips"]) return [ datasets.SplitGenerator( name=datasets.Split.TRAIN, gen_kwargs={ - "filepath": os.path.join(abs_path_to_data, "train.tsv"), - "path_to_clips": abs_path_to_clips, + "files": dl_manager.iter_archive(archive), + "filepath": "/".join([path_to_data, "train.tsv"]), + "path_to_clips": path_to_clips, }, ), datasets.SplitGenerator( name=datasets.Split.TEST, gen_kwargs={ - "filepath": os.path.join(abs_path_to_data, "test.tsv"), - "path_to_clips": abs_path_to_clips, + "files": dl_manager.iter_archive(archive), + "filepath": "/".join([path_to_data, "test.tsv"]), + "path_to_clips": path_to_clips, }, ), datasets.SplitGenerator( name=datasets.Split.VALIDATION, gen_kwargs={ - "filepath": os.path.join(abs_path_to_data, "dev.tsv"), - "path_to_clips": abs_path_to_clips, + "files": dl_manager.iter_archive(archive), + "filepath": "/".join([path_to_data, "dev.tsv"]), + "path_to_clips": path_to_clips, }, ), datasets.SplitGenerator( name="other", gen_kwargs={ - "filepath": os.path.join(abs_path_to_data, "other.tsv"), - "path_to_clips": abs_path_to_clips, + "files": dl_manager.iter_archive(archive), + "filepath": "/".join([path_to_data, "other.tsv"]), + "path_to_clips": path_to_clips, }, ), datasets.SplitGenerator( name="invalidated", gen_kwargs={ - "filepath": os.path.join(abs_path_to_data, "invalidated.tsv"), - "path_to_clips": abs_path_to_clips, + "files": dl_manager.iter_archive(archive), + "filepath": "/".join([path_to_data, "invalidated.tsv"]), + "path_to_clips": path_to_clips, }, ), ] - def _generate_examples(self, filepath, path_to_clips): + def _generate_examples(self, files, filepath, path_to_clips): """Yields examples.""" data_fields = list(self._info().features.keys()) @@ -708,28 +712,37 @@ def _generate_examples(self, filepath, path_to_clips): data_fields.remove("audio") path_idx = data_fields.index("path") - with open(filepath, encoding="utf-8") as f: - lines = f.readlines() - headline = lines[0] - - column_names = headline.strip().split("\t") - assert ( - column_names == data_fields - ), f"The file should have {data_fields} as column names, but has {column_names}" - - for id_, line in enumerate(lines[1:]): - field_values = line.strip().split("\t") + all_field_values = {} + metadata_found = False + for path, f in files: + if path == filepath: + metadata_found = True + lines = f.readlines() + headline = lines[0].decode("utf-8") - # set absolute path for mp3 audio file - field_values[path_idx] = os.path.join(path_to_clips, field_values[path_idx]) + column_names = headline.strip().split("\t") + assert ( + column_names == data_fields + ), f"The file should have {data_fields} as column names, but has {column_names}" + for line in lines[1:]: + field_values = line.decode("utf-8").strip().split("\t") + # set full path for mp3 audio file + audio_path = "/".join([path_to_clips, field_values[path_idx]]) + all_field_values[audio_path] = field_values + elif path.startswith(path_to_clips): + assert metadata_found, "Found audio clips before the metadata TSV file." + if not all_field_values: + break + if path in all_field_values: + field_values = all_field_values[path] - # if data is incomplete, fill with empty values - if len(field_values) < len(data_fields): - field_values += (len(data_fields) - len(field_values)) * ["''"] + # if data is incomplete, fill with empty values + if len(field_values) < len(data_fields): + field_values += (len(data_fields) - len(field_values)) * ["''"] - result = {key: value for key, value in zip(data_fields, field_values)} + result = {key: value for key, value in zip(data_fields, field_values)} - # set audio feature - result["audio"] = field_values[path_idx] + # set audio feature + result["audio"] = {"path": path, "bytes": f.read()} - yield id_, result + yield path, result diff --git a/datasets/common_voice/dataset_infos.json b/datasets/common_voice/dataset_infos.json index f9266ece149..1e79e044ac3 100644 --- a/datasets/common_voice/dataset_infos.json +++ b/datasets/common_voice/dataset_infos.json @@ -1 +1 @@ -{"ab": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "ab", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 10802, "num_examples": 22, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 4442, "num_examples": 9, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 0, "num_examples": 0, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 376182, "num_examples": 752, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 3906, "num_examples": 8, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/ab.tar.gz": {"num_bytes": 41038412, "checksum": "801de9c63f740c4d2c821709586921bed216c736e593051306579cf478a54388"}}, "download_size": 41038412, "post_processing_size": null, "dataset_size": 395332, "size_in_bytes": 41433744}, "ar": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "ar", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 6330858, "num_examples": 14227, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 3306715, "num_examples": 7622, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 3330810, "num_examples": 7517, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 7881421, "num_examples": 18283, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 2822099, "num_examples": 6333, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/ar.tar.gz": {"num_bytes": 1756264615, "checksum": "516b369da8a000c1b98d8f5ee3b90fa12bcc5d5438391fcf01f3d5e78ccdd6fa"}}, "download_size": 1756264615, "post_processing_size": null, "dataset_size": 23671903, "size_in_bytes": 1779936518}, "as": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "as", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 135331, "num_examples": 270, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 54717, "num_examples": 110, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 63580, "num_examples": 124, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 0, "num_examples": 0, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 15547, "num_examples": 31, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/as.tar.gz": {"num_bytes": 22226465, "checksum": "d9afd6d28e9c837ff0943a94452fb12ce8a7885b38fdeb25fc2912bbe4977f40"}}, "download_size": 22226465, "post_processing_size": null, "dataset_size": 269175, "size_in_bytes": 22495640}, "br": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "br", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 1114817, "num_examples": 2780, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 838823, "num_examples": 2087, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 807978, "num_examples": 1997, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 4446871, "num_examples": 10912, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 260104, "num_examples": 623, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/br.tar.gz": {"num_bytes": 465276982, "checksum": "d323d71337055b794c8fe3dcdf5a0dc03d6bf8f7c8c19f96369884410aef4606"}}, "download_size": 465276982, "post_processing_size": null, "dataset_size": 7468593, "size_in_bytes": 472745575}, "ca": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "ca", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 128917601, "num_examples": 285584, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 6886168, "num_examples": 15724, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 6959066, "num_examples": 15724, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 28903919, "num_examples": 64446, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 8504933, "num_examples": 18846, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/ca.tar.gz": {"num_bytes": 20743110341, "checksum": "a27bec66c151ddb21c1736781b3bca972047cc20c02488bad94d2311c40bc6da"}}, "download_size": 20743110341, "post_processing_size": null, "dataset_size": 180171687, "size_in_bytes": 20923282028}, "cnh": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "cnh", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 330832, "num_examples": 807, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 307840, "num_examples": 752, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 310074, "num_examples": 756, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 1208870, "num_examples": 2934, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 177752, "num_examples": 433, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/cnh.tar.gz": {"num_bytes": 161331331, "checksum": "9c27ce17ea8db73e7a2c8715bdb3a45a40792d6d64238cfbb467a81c6b71d71f"}}, "download_size": 161331331, "post_processing_size": null, "dataset_size": 2335368, "size_in_bytes": 163666699}, "cs": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "cs", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 2459092, "num_examples": 5655, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 1748420, "num_examples": 4144, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 1756122, "num_examples": 4118, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 3247839, "num_examples": 7475, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 292158, "num_examples": 685, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/cs.tar.gz": {"num_bytes": 1271909933, "checksum": "68a1d6f27eb7161fdf28da889e7d37e8c86b7aff73b0b6df52edc8359e30ac56"}}, "download_size": 1271909933, "post_processing_size": null, "dataset_size": 9503631, "size_in_bytes": 1281413564}, "cv": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "cv", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 436012, "num_examples": 931, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 365363, "num_examples": 788, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 388030, "num_examples": 818, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 3263709, "num_examples": 6927, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 607952, "num_examples": 1282, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/cv.tar.gz": {"num_bytes": 439329081, "checksum": "c3fb84c28a5718f01b91cf1026985b1dcd83bb312d32620f16b5ed4f12fb8c73"}}, "download_size": 439329081, "post_processing_size": null, "dataset_size": 5061066, "size_in_bytes": 444390147}, "cy": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "cy", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 3029147, "num_examples": 6839, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 2060863, "num_examples": 4820, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 2102719, "num_examples": 4776, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 7778447, "num_examples": 17919, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 1569654, "num_examples": 3648, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/cy.tar.gz": {"num_bytes": 3434474658, "checksum": "269da0cbbb2887d1903c0e17bbb71ea9bcd83506ba928fe75c660cb3e52f9a67"}}, "download_size": 3434474658, "post_processing_size": null, "dataset_size": 16540830, "size_in_bytes": 3451015488}, "de": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "de", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 111735161, "num_examples": 246525, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 6785721, "num_examples": 15588, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 6850065, "num_examples": 15588, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 4563457, "num_examples": 10095, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 14542398, "num_examples": 32789, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/de.tar.gz": {"num_bytes": 23283812097, "checksum": "733e6e367da4b9588b4bb175ac45c6c0ec545e41df5494a7ee4a7e4ff3141ef7"}}, "download_size": 23283812097, "post_processing_size": null, "dataset_size": 144476802, "size_in_bytes": 23428288899}, "dv": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "dv", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 1312675, "num_examples": 2680, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 1075889, "num_examples": 2202, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 1032265, "num_examples": 2077, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 0, "num_examples": 0, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 421053, "num_examples": 840, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/dv.tar.gz": {"num_bytes": 540488041, "checksum": "b2c8617df5e7aebd74d88491913ecc6b94066198e875853b0b3847d13e70f419"}}, "download_size": 540488041, "post_processing_size": null, "dataset_size": 3841882, "size_in_bytes": 544329923}, "el": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "el", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 1043636, "num_examples": 2316, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 677742, "num_examples": 1522, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 631379, "num_examples": 1401, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 2539987, "num_examples": 5659, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 83583, "num_examples": 185, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/el.tar.gz": {"num_bytes": 381570611, "checksum": "86c67e7bda7658a7087b5a1997d140d57957a05bb413a188610db61807c53ee4"}}, "download_size": 381570611, "post_processing_size": null, "dataset_size": 4976327, "size_in_bytes": 386546938}, "en": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "en", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 250691604, "num_examples": 564337, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 6850452, "num_examples": 16164, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 6976081, "num_examples": 16164, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 72156747, "num_examples": 169895, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 82557632, "num_examples": 189562, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/en.tar.gz": {"num_bytes": 60613063630, "checksum": "0f8fdfc4fe715738be94ee49c4fb63d5f1608d2e6a43a2bed80f6cb871171c36"}}, "download_size": 60613063630, "post_processing_size": null, "dataset_size": 419232516, "size_in_bytes": 61032296146}, "eo": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "eo", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 8663844, "num_examples": 19587, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 3843190, "num_examples": 8969, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 3879354, "num_examples": 8987, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 1296351, "num_examples": 2946, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 2081223, "num_examples": 4736, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/eo.tar.gz": {"num_bytes": 2883560869, "checksum": "c19900010aee0f9eb39416406598509b1cdba136a16318e746b1a64f97d7809c"}}, "download_size": 2883560869, "post_processing_size": null, "dataset_size": 19763962, "size_in_bytes": 2903324831}, "es": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "es", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 72689623, "num_examples": 161813, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 6544041, "num_examples": 15089, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 6567785, "num_examples": 15089, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 62421588, "num_examples": 144791, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 17672664, "num_examples": 40640, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/es.tar.gz": {"num_bytes": 16188844718, "checksum": "276ca393783cd8b208d56b5032b87c13a40fcadde5b3925596e67c15578d0235"}}, "download_size": 16188844718, "post_processing_size": null, "dataset_size": 165895701, "size_in_bytes": 16354740419}, "et": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "et", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 1426348, "num_examples": 2966, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 1173073, "num_examples": 2509, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 1212463, "num_examples": 2507, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 266991, "num_examples": 569, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 1766673, "num_examples": 3557, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/et.tar.gz": {"num_bytes": 767174465, "checksum": "50a861393e4e7013ab71f1b63bca8c42c26dca1519c15a3b9cdb3cb5b6c561a2"}}, "download_size": 767174465, "post_processing_size": null, "dataset_size": 5845548, "size_in_bytes": 773020013}, "eu": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "eu", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 3389176, "num_examples": 7505, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 2247330, "num_examples": 5172, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 2281644, "num_examples": 5172, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 10454269, "num_examples": 23570, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 2389658, "num_examples": 5387, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/eu.tar.gz": {"num_bytes": 3664586106, "checksum": "55b6eaf7ca7c120faa0b60d71c87189b610412334e6b710fe12c2a79489ab06f"}}, "download_size": 3664586106, "post_processing_size": null, "dataset_size": 20762077, "size_in_bytes": 3685348183}, "fa": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "fa", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 3246710, "num_examples": 7593, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 2271812, "num_examples": 5213, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 2263134, "num_examples": 5213, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 9773876, "num_examples": 22510, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 5329900, "num_examples": 11698, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/fa.tar.gz": {"num_bytes": 8884585819, "checksum": "5454efe3b2f6d06d51e7177469b7bef9a962adbf7611e3cd21771451112abe6d"}}, "download_size": 8884585819, "post_processing_size": null, "dataset_size": 22885432, "size_in_bytes": 8907471251}, "fi": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "fi", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 199505, "num_examples": 460, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 183540, "num_examples": 428, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 179607, "num_examples": 415, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 64358, "num_examples": 149, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 25781, "num_examples": 59, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/fi.tar.gz": {"num_bytes": 49882909, "checksum": "eb26d0904beef5ec08cf53267be7e78b8ba5056fd162057d5b085a7cba51f035"}}, "download_size": 49882909, "post_processing_size": null, "dataset_size": 652791, "size_in_bytes": 50535700}, "fr": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "fr", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 133605567, "num_examples": 298982, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 6854610, "num_examples": 15763, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 6868568, "num_examples": 15763, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 1435580, "num_examples": 3222, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 17776024, "num_examples": 40351, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/fr.tar.gz": {"num_bytes": 19130141984, "checksum": "719ef964b55d830a095a602aff311db39b77239e9d600b6af646ec2ed57e5e45"}}, "download_size": 19130141984, "post_processing_size": null, "dataset_size": 166540349, "size_in_bytes": 19296682333}, "fy-NL": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "fy-NL", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 1695909, "num_examples": 3927, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 1311327, "num_examples": 3020, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 1215844, "num_examples": 2790, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 9389087, "num_examples": 21569, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 451010, "num_examples": 1031, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/fy-NL.tar.gz": {"num_bytes": 1237743070, "checksum": "ddee4fc3ce52df2379fa4069090d8f5c853155dc0462eb645f6111e2da627297"}}, "download_size": 1237743070, "post_processing_size": null, "dataset_size": 14063177, "size_in_bytes": 1251806247}, "ga-IE": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "ga-IE", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 236396, "num_examples": 541, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 215599, "num_examples": 506, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 212002, "num_examples": 497, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 917017, "num_examples": 2130, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 176661, "num_examples": 409, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/ga-IE.tar.gz": {"num_bytes": 156553447, "checksum": "27223fc99af6a45f81190ecb90034806991ff3b9e3aa38a7e97caaabbb0a4ddc"}}, "download_size": 156553447, "post_processing_size": null, "dataset_size": 1757675, "size_in_bytes": 158311122}, "hi": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "hi", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 73903, "num_examples": 157, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 58773, "num_examples": 127, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 64002, "num_examples": 135, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 67240, "num_examples": 139, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 29139, "num_examples": 60, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/hi.tar.gz": {"num_bytes": 21424045, "checksum": "5492393b04dd1307a52d93525a7db08fc392c8ba0df553668945152e434f58c9"}}, "download_size": 21424045, "post_processing_size": null, "dataset_size": 293057, "size_in_bytes": 21717102}, "hsb": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "hsb", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 367798, "num_examples": 808, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 173155, "num_examples": 387, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 77478, "num_examples": 172, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 28207, "num_examples": 62, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 103211, "num_examples": 227, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/hsb.tar.gz": {"num_bytes": 79362060, "checksum": "3dd3d79aaa078ad7955552ebc596e0a8894ffd7a4a88a51b2c8ee80c0e088152"}}, "download_size": 79362060, "post_processing_size": null, "dataset_size": 749849, "size_in_bytes": 80111909}, "hu": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "hu", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 1428176, "num_examples": 3348, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 699721, "num_examples": 1649, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 612969, "num_examples": 1434, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 127337, "num_examples": 295, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 72559, "num_examples": 169, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/hu.tar.gz": {"num_bytes": 242758708, "checksum": "61f933155cba6c54c0b76d0ddd2caebd62d69228b7c935382112abe172660953"}}, "download_size": 242758708, "post_processing_size": null, "dataset_size": 2940762, "size_in_bytes": 245699470}, "ia": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "ia", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 1446791, "num_examples": 3477, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 372192, "num_examples": 899, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 664744, "num_examples": 1601, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 452330, "num_examples": 1095, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 79695, "num_examples": 192, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/ia.tar.gz": {"num_bytes": 226499645, "checksum": "47a137a805ea8ce01f2cf9277739919a824a9fd13468345dfbd84eddb52c02f1"}}, "download_size": 226499645, "post_processing_size": null, "dataset_size": 3015752, "size_in_bytes": 229515397}, "id": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "id", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 889083, "num_examples": 2130, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 766675, "num_examples": 1844, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 766720, "num_examples": 1835, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 2831110, "num_examples": 6782, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 196795, "num_examples": 470, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/id.tar.gz": {"num_bytes": 475918233, "checksum": "71177fa9d2fac29f48db5feabc294f1d6bbcaa0c326b0d1099be66c0b804b245"}}, "download_size": 475918233, "post_processing_size": null, "dataset_size": 5450383, "size_in_bytes": 481368616}, "it": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "it", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 25748596, "num_examples": 58015, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 5629778, "num_examples": 12928, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 5651445, "num_examples": 12928, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 6438506, "num_examples": 14549, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 5425867, "num_examples": 12189, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/it.tar.gz": {"num_bytes": 5585781573, "checksum": "3a75b1631958af1487ee49b13cd27efc951183737ed515832cf714ed20c97808"}}, "download_size": 5585781573, "post_processing_size": null, "dataset_size": 48894192, "size_in_bytes": 5634675765}, "ja": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "ja", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 317820, "num_examples": 722, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 278459, "num_examples": 632, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 255038, "num_examples": 586, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 389563, "num_examples": 885, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 222566, "num_examples": 504, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/ja.tar.gz": {"num_bytes": 152879796, "checksum": "3614cd0d0abac80794351c78183967c83179fab390d7e19cad97758eb85ae558"}}, "download_size": 152879796, "post_processing_size": null, "dataset_size": 1463446, "size_in_bytes": 154343242}, "ka": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "ka", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 581587, "num_examples": 1058, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 358380, "num_examples": 656, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 294673, "num_examples": 527, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 24443, "num_examples": 44, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 78770, "num_examples": 139, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/ka.tar.gz": {"num_bytes": 104280554, "checksum": "7677df9d650234306a11bf8518be5807e72e7d5fc440d391304d1b99dd5517f5"}}, "download_size": 104280554, "post_processing_size": null, "dataset_size": 1337853, "size_in_bytes": 105618407}, "kab": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "kab", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 49343008, "num_examples": 120530, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 5936276, "num_examples": 14622, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 5928674, "num_examples": 14622, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 36104123, "num_examples": 88021, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 7518840, "num_examples": 18134, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/kab.tar.gz": {"num_bytes": 17171606918, "checksum": "d2089107d4f3a84856c457a436a47a883b872022f2085cfad0501469be91fd95"}}, "download_size": 17171606918, "post_processing_size": null, "dataset_size": 104830921, "size_in_bytes": 17276437839}, "ky": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "ky", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 927074, "num_examples": 1955, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 700081, "num_examples": 1503, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 711620, "num_examples": 1511, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 3410831, "num_examples": 7223, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 437848, "num_examples": 926, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/ky.tar.gz": {"num_bytes": 579440853, "checksum": "6efe0ca5384d0419fcf5fda0e0229a1b5eb80d8eeba2d7528a4c3c9f2593206f"}}, "download_size": 579440853, "post_processing_size": null, "dataset_size": 6187454, "size_in_bytes": 585628307}, "lg": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "lg", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 549563, "num_examples": 1250, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 253625, "num_examples": 584, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 168943, "num_examples": 384, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 1365647, "num_examples": 3110, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 127043, "num_examples": 290, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/lg.tar.gz": {"num_bytes": 208197149, "checksum": "71243c65f638cd7f392fabe22e37cbafbdca4eb5a199210000ae957a88768040"}}, "download_size": 208197149, "post_processing_size": null, "dataset_size": 2464821, "size_in_bytes": 210661970}, "lt": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "lt", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 402862, "num_examples": 931, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 203781, "num_examples": 466, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 106451, "num_examples": 244, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 710428, "num_examples": 1629, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 44360, "num_examples": 102, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/lt.tar.gz": {"num_bytes": 135299706, "checksum": "5ad3d93bc308f58a70e6685f71ae035237ef9caa0922232ac76846f7587bb8aa"}}, "download_size": 135299706, "post_processing_size": null, "dataset_size": 1467882, "size_in_bytes": 136767588}, "lv": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "lv", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 1051326, "num_examples": 2552, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 767926, "num_examples": 1882, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 819846, "num_examples": 2002, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 641669, "num_examples": 1560, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 58933, "num_examples": 143, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/lv.tar.gz": {"num_bytes": 208307691, "checksum": "8a4350ccf24884ee1012032bfd5a87e0de50d780b1f8450d1cb52afe3f69c671"}}, "download_size": 208307691, "post_processing_size": null, "dataset_size": 3339700, "size_in_bytes": 211647391}, "mn": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "mn", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 1088733, "num_examples": 2183, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 912144, "num_examples": 1862, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 912414, "num_examples": 1837, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 1628610, "num_examples": 3272, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 332643, "num_examples": 667, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/mn.tar.gz": {"num_bytes": 486369317, "checksum": "3aebc40d40eb19263576664a981f4bb8b221abeab78c8154adc3d16875c75ec7"}}, "download_size": 486369317, "post_processing_size": null, "dataset_size": 4874544, "size_in_bytes": 491243861}, "mt": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "mt", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 884543, "num_examples": 2036, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 690486, "num_examples": 1617, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 651610, "num_examples": 1516, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 2464327, "num_examples": 5714, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 136773, "num_examples": 314, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/mt.tar.gz": {"num_bytes": 425114242, "checksum": "9d53000d7832d130c4d35fb412bfc092ab8de8e763a5d2a528aebf37f052af03"}}, "download_size": 425114242, "post_processing_size": null, "dataset_size": 4827739, "size_in_bytes": 429941981}, "nl": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "nl", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 4219972, "num_examples": 9460, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 2457725, "num_examples": 5708, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 2200827, "num_examples": 4938, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 11420, "num_examples": 27, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 1442237, "num_examples": 3308, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/nl.tar.gz": {"num_bytes": 1741827548, "checksum": "048f823408e3bbd16e63111d1b4caecb0102606c440bbdf3e5b6a6bae1e1e3f1"}}, "download_size": 1741827548, "post_processing_size": null, "dataset_size": 10332181, "size_in_bytes": 1752159729}, "or": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "or", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 196790, "num_examples": 388, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 49231, "num_examples": 98, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 65559, "num_examples": 129, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 2191159, "num_examples": 4302, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 30974, "num_examples": 62, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/or.tar.gz": {"num_bytes": 199077358, "checksum": "f3edad30166fe454f4d2b14adeece1434dc4b8eb7b0ece37aac8389b7122218a"}}, "download_size": 199077358, "post_processing_size": null, "dataset_size": 2533713, "size_in_bytes": 201611071}, "pa-IN": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "pa-IN", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 100668, "num_examples": 211, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 54307, "num_examples": 116, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 20728, "num_examples": 44, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 670272, "num_examples": 1411, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 20354, "num_examples": 43, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/pa-IN.tar.gz": {"num_bytes": 69748265, "checksum": "d2e30f28a227ecb8209340c4133edf6489f35f8e3d1eb55ff22b96b12f36952c"}}, "download_size": 69748265, "post_processing_size": null, "dataset_size": 866329, "size_in_bytes": 70614594}, "pl": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "pl", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 3259050, "num_examples": 7468, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 2156262, "num_examples": 5153, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 2203857, "num_examples": 5153, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 5566818, "num_examples": 12848, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 1983448, "num_examples": 4601, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/pl.tar.gz": {"num_bytes": 3537012341, "checksum": "acbf77d36e083e2bcb7152ffb52ab7d1e3e64d33a3f51f106cdff7feff6279aa"}}, "download_size": 3537012341, "post_processing_size": null, "dataset_size": 15169435, "size_in_bytes": 3552181776}, "pt": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "pt", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 2763497, "num_examples": 6514, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 1948500, "num_examples": 4641, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 1936082, "num_examples": 4592, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 3492648, "num_examples": 8390, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 738577, "num_examples": 1740, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/pt.tar.gz": {"num_bytes": 1704252567, "checksum": "6700de499f728e0e3f3ed4d7005e5b7db27ba2ddc872b21b0b404c3b4859d84b"}}, "download_size": 1704252567, "post_processing_size": null, "dataset_size": 10879304, "size_in_bytes": 1715131871}, "rm-sursilv": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "rm-sursilv", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 627518, "num_examples": 1384, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 535630, "num_examples": 1194, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 539772, "num_examples": 1205, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 946574, "num_examples": 2102, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 290484, "num_examples": 639, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/rm-sursilv.tar.gz": {"num_bytes": 275950479, "checksum": "3cfc4971b6ab8958d7c3d784977690fcc04ebd7570ecf788d5948df84a5481a1"}}, "download_size": 275950479, "post_processing_size": null, "dataset_size": 2939978, "size_in_bytes": 278890457}, "rm-vallader": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "rm-vallader", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 267837, "num_examples": 574, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 173761, "num_examples": 378, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 163725, "num_examples": 357, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 339277, "num_examples": 727, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 175312, "num_examples": 374, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/rm-vallader.tar.gz": {"num_bytes": 108113989, "checksum": "4fdb7dc5e20862a636ee7975831b39db29012d615f9139edf2d266b878ce43ae"}}, "download_size": 108113989, "post_processing_size": null, "dataset_size": 1119912, "size_in_bytes": 109233901}, "ro": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "ro", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 1457000, "num_examples": 3399, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 756861, "num_examples": 1778, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 368157, "num_examples": 858, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 827971, "num_examples": 1945, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 207526, "num_examples": 485, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/ro.tar.gz": {"num_bytes": 261978702, "checksum": "450b159e936ef6ff136fcdfad193675caec5b2230d1b6ca24c5cde491ff002cd"}}, "download_size": 261978702, "post_processing_size": null, "dataset_size": 3617515, "size_in_bytes": 265596217}, "ru": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "ru", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 7918252, "num_examples": 15481, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 4035778, "num_examples": 8007, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 4017986, "num_examples": 7963, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 5123246, "num_examples": 10247, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 1567391, "num_examples": 3056, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/ru.tar.gz": {"num_bytes": 3655676916, "checksum": "dcbb460e58d4afc78047c3801c9eb56d940b388eb350ee3da3de5bfe5a74a025"}}, "download_size": 3655676916, "post_processing_size": null, "dataset_size": 22662653, "size_in_bytes": 3678339569}, "rw": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "rw", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 222435182, "num_examples": 515197, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 6836125, "num_examples": 15724, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 6685632, "num_examples": 15032, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 9774022, "num_examples": 22923, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 93086051, "num_examples": 206790, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/rw.tar.gz": {"num_bytes": 42545189583, "checksum": "cf8a07059b3713022d487f9a6b8f465271f3457c525a8b350f829f87b0132b41"}}, "download_size": 42545189583, "post_processing_size": null, "dataset_size": 338817012, "size_in_bytes": 42884006595}, "sah": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "sah", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 733267, "num_examples": 1442, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 379003, "num_examples": 757, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 204118, "num_examples": 405, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 636097, "num_examples": 1275, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 33499, "num_examples": 66, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/sah.tar.gz": {"num_bytes": 181245626, "checksum": "dea1a454813c8f90abcbdf427fa922e1b7a116753deeb410af096ce5f0ae2405"}}, "download_size": 181245626, "post_processing_size": null, "dataset_size": 1985984, "size_in_bytes": 183231610}, "sl": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "sl", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 845619, "num_examples": 2038, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 363066, "num_examples": 881, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 231081, "num_examples": 556, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 1033232, "num_examples": 2502, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 37929, "num_examples": 92, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/sl.tar.gz": {"num_bytes": 222751292, "checksum": "184cfbfe876a1f1c6317e4e34680c82a940db833afca78203c2929db1768a353"}}, "download_size": 222751292, "post_processing_size": null, "dataset_size": 2510927, "size_in_bytes": 225262219}, "sv-SE": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "sv-SE", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 983262, "num_examples": 2331, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 840358, "num_examples": 2027, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 844026, "num_examples": 2019, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 1329608, "num_examples": 3043, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 193364, "num_examples": 462, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/sv-SE.tar.gz": {"num_bytes": 421434184, "checksum": "dc8634dafacb33be00f06e376f6c479d53f84f4834952593c8903f1080535213"}}, "download_size": 421434184, "post_processing_size": null, "dataset_size": 4190618, "size_in_bytes": 425624802}, "ta": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "ta", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 957720, "num_examples": 2009, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 846103, "num_examples": 1781, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 858400, "num_examples": 1779, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 3584809, "num_examples": 7428, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 284039, "num_examples": 594, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/ta.tar.gz": {"num_bytes": 679766097, "checksum": "78560d9d608a63ee75c3fdeb7f96f33cf0d85855ba6294b13e945de066eb46d8"}}, "download_size": 679766097, "post_processing_size": null, "dataset_size": 6531071, "size_in_bytes": 686297168}, "th": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "th", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 1389723, "num_examples": 2917, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 1029454, "num_examples": 2188, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 909292, "num_examples": 1922, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 1268833, "num_examples": 2671, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 222666, "num_examples": 467, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/th.tar.gz": {"num_bytes": 341305736, "checksum": "a3d11043c49d3ea8ffb58dfab117cd831dd62a641e0a26ac60eb43e483534f7a"}}, "download_size": 341305736, "post_processing_size": null, "dataset_size": 4819968, "size_in_bytes": 346125704}, "tr": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "tr", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 778858, "num_examples": 1831, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 689987, "num_examples": 1647, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 694938, "num_examples": 1647, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 137465, "num_examples": 325, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 730583, "num_examples": 1726, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/tr.tar.gz": {"num_bytes": 620848700, "checksum": "b3f266c868b1fe9f76270ba76226b1cdc17f33b3e387e6b44a64d5419f8b9768"}}, "download_size": 620848700, "post_processing_size": null, "dataset_size": 3031831, "size_in_bytes": 623880531}, "tt": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "tt", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 5048627, "num_examples": 11211, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 1974398, "num_examples": 4485, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 939118, "num_examples": 2127, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 793843, "num_examples": 1798, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 129728, "num_examples": 287, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/tt.tar.gz": {"num_bytes": 777153207, "checksum": "89c8d7a49584de720f1790df39e6f07996e2eecb07f6273f4ba2668e9fe4ad46"}}, "download_size": 777153207, "post_processing_size": null, "dataset_size": 8885714, "size_in_bytes": 786038921}, "uk": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "uk", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 1888179, "num_examples": 4035, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 1511544, "num_examples": 3235, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 1521216, "num_examples": 3236, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 3830066, "num_examples": 8161, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 598922, "num_examples": 1255, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/uk.tar.gz": {"num_bytes": 1218559031, "checksum": "f3ca0143cd84f5eacb583187052e69efec21c571a426efee91a765a2284519c2"}}, "download_size": 1218559031, "post_processing_size": null, "dataset_size": 9349927, "size_in_bytes": 1227908958}, "vi": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "vi", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 92564, "num_examples": 221, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 82035, "num_examples": 198, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 84472, "num_examples": 200, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 366671, "num_examples": 870, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 32664, "num_examples": 78, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/vi.tar.gz": {"num_bytes": 51929480, "checksum": "704bce8031932377cc21c017923ff1e96ebd2be9bd520adcf839f7a0f5f03b6e"}}, "download_size": 51929480, "post_processing_size": null, "dataset_size": 658406, "size_in_bytes": 52587886}, "vot": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "vot", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 1250, "num_examples": 3, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 0, "num_examples": 0, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 0, "num_examples": 0, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 163377, "num_examples": 411, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 2419, "num_examples": 6, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/vot.tar.gz": {"num_bytes": 7792602, "checksum": "7fb07dd25b0575e8cd811bb8d1e5aebd17fdbca079a4ee50d81e0aaaff50f8b0"}}, "download_size": 7792602, "post_processing_size": null, "dataset_size": 167046, "size_in_bytes": 7959648}, "zh-CN": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "zh-CN", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 8279157, "num_examples": 18541, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 3757047, "num_examples": 8760, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 3823707, "num_examples": 8743, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 3908115, "num_examples": 8948, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 2328784, "num_examples": 5305, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/zh-CN.tar.gz": {"num_bytes": 2184602350, "checksum": "cd8589cac28541f9f996d1954f14c307954f1146ac44a8eadad8e31ebaf1f15e"}}, "download_size": 2184602350, "post_processing_size": null, "dataset_size": 22096810, "size_in_bytes": 2206699160}, "zh-HK": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "zh-HK", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 3142432, "num_examples": 7506, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 2144145, "num_examples": 5172, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 2163111, "num_examples": 5172, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 16142369, "num_examples": 38830, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 1272392, "num_examples": 2999, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/zh-HK.tar.gz": {"num_bytes": 2774145806, "checksum": "8a525ce4664d6647701449d5e72f7d8658cc3a5fabc72e05c6883994fd3c0134"}}, "download_size": 2774145806, "post_processing_size": null, "dataset_size": 24864449, "size_in_bytes": 2799010255}, "zh-TW": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "zh-TW", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 1478055, "num_examples": 3507, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 1184204, "num_examples": 2895, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 1204526, "num_examples": 2895, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 9437896, "num_examples": 22477, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 1493820, "num_examples": 3584, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/zh-TW.tar.gz": {"num_bytes": 2182836295, "checksum": "67fadf561f8237690d4a4a1d63a9b3ac271b5d05438dc745b7e04282d909460f"}}, "download_size": 2182836295, "post_processing_size": null, "dataset_size": 14798501, "size_in_bytes": 2197634796}} \ No newline at end of file +{"ab": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "struct", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "ab", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 1295622, "num_examples": 22, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 411844, "num_examples": 9, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 0, "num_examples": 0, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 40023390, "num_examples": 752, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 361626, "num_examples": 8, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/ab.tar.gz": {"num_bytes": 41038412, "checksum": "801de9c63f740c4d2c821709586921bed216c736e593051306579cf478a54388"}}, "download_size": 41038412, "post_processing_size": null, "dataset_size": 42092482, "size_in_bytes": 83130894}, "ar": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "struct", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "ar", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 359335168, "num_examples": 14227, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 237546641, "num_examples": 7622, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 209606861, "num_examples": 7517, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 515822404, "num_examples": 18283, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 194805036, "num_examples": 6333, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/ar.tar.gz": {"num_bytes": 1756264615, "checksum": "516b369da8a000c1b98d8f5ee3b90fa12bcc5d5438391fcf01f3d5e78ccdd6fa"}}, "download_size": 1756264615, "post_processing_size": null, "dataset_size": 1517116110, "size_in_bytes": 3273380725}, "as": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "struct", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "as", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 11442279, "num_examples": 270, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 5071343, "num_examples": 110, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 5480156, "num_examples": 124, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 0, "num_examples": 0, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 886145, "num_examples": 31, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/as.tar.gz": {"num_bytes": 22226465, "checksum": "d9afd6d28e9c837ff0943a94452fb12ce8a7885b38fdeb25fc2912bbe4977f40"}}, "download_size": 22226465, "post_processing_size": null, "dataset_size": 22879923, "size_in_bytes": 45106388}, "br": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "struct", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "br", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 62238289, "num_examples": 2780, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 54461339, "num_examples": 2087, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 46995570, "num_examples": 1997, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 269858143, "num_examples": 10912, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 20861017, "num_examples": 623, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/br.tar.gz": {"num_bytes": 465276982, "checksum": "d323d71337055b794c8fe3dcdf5a0dc03d6bf8f7c8c19f96369884410aef4606"}}, "download_size": 465276982, "post_processing_size": null, "dataset_size": 454414358, "size_in_bytes": 919691340}, "ca": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "struct", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "ca", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 12966939466, "num_examples": 285584, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 745761890, "num_examples": 15724, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 716442038, "num_examples": 15724, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 2693542910, "num_examples": 64446, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 850402888, "num_examples": 18846, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/ca.tar.gz": {"num_bytes": 20743110341, "checksum": "a27bec66c151ddb21c1736781b3bca972047cc20c02488bad94d2311c40bc6da"}}, "download_size": 20743110341, "post_processing_size": null, "dataset_size": 17973089192, "size_in_bytes": 38716199533}, "cnh": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "struct", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "cnh", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 18866674, "num_examples": 807, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 24675321, "num_examples": 752, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 22162315, "num_examples": 756, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 84878963, "num_examples": 2934, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 13642724, "num_examples": 433, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/cnh.tar.gz": {"num_bytes": 161331331, "checksum": "9c27ce17ea8db73e7a2c8715bdb3a45a40792d6d64238cfbb467a81c6b71d71f"}}, "download_size": 161331331, "post_processing_size": null, "dataset_size": 164225997, "size_in_bytes": 325557328}, "cs": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "struct", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "cs", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 215205282, "num_examples": 5655, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 148499476, "num_examples": 4144, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 148312130, "num_examples": 4118, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 282225475, "num_examples": 7475, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 24717823, "num_examples": 685, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/cs.tar.gz": {"num_bytes": 1271909933, "checksum": "68a1d6f27eb7161fdf28da889e7d37e8c86b7aff73b0b6df52edc8359e30ac56"}}, "download_size": 1271909933, "post_processing_size": null, "dataset_size": 818960186, "size_in_bytes": 2090870119}, "cv": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "struct", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "cv", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 31649510, "num_examples": 931, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 32513061, "num_examples": 788, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 28429779, "num_examples": 818, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 288294623, "num_examples": 6927, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 57923138, "num_examples": 1282, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/cv.tar.gz": {"num_bytes": 439329081, "checksum": "c3fb84c28a5718f01b91cf1026985b1dcd83bb312d32620f16b5ed4f12fb8c73"}}, "download_size": 439329081, "post_processing_size": null, "dataset_size": 438810111, "size_in_bytes": 878139192}, "cy": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "struct", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "cy", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 271642649, "num_examples": 6839, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 206865596, "num_examples": 4820, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 201813388, "num_examples": 4776, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 688469886, "num_examples": 17919, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 146874576, "num_examples": 3648, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/cy.tar.gz": {"num_bytes": 3434474658, "checksum": "269da0cbbb2887d1903c0e17bbb71ea9bcd83506ba928fe75c660cb3e52f9a67"}}, "download_size": 3434474658, "post_processing_size": null, "dataset_size": 1515666095, "size_in_bytes": 4950140753}, "de": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "struct", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "de", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 11463160619, "num_examples": 246525, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 744617681, "num_examples": 15588, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 729559862, "num_examples": 15588, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 464513461, "num_examples": 10095, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 1440604803, "num_examples": 32789, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/de.tar.gz": {"num_bytes": 23283812097, "checksum": "733e6e367da4b9588b4bb175ac45c6c0ec545e41df5494a7ee4a7e4ff3141ef7"}}, "download_size": 23283812097, "post_processing_size": null, "dataset_size": 14842456426, "size_in_bytes": 38126268523}, "dv": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "struct", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "dv", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 118576140, "num_examples": 2680, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 94281409, "num_examples": 2202, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 94117088, "num_examples": 2077, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 0, "num_examples": 0, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 37694847, "num_examples": 840, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/dv.tar.gz": {"num_bytes": 540488041, "checksum": "b2c8617df5e7aebd74d88491913ecc6b94066198e875853b0b3847d13e70f419"}}, "download_size": 540488041, "post_processing_size": null, "dataset_size": 344669484, "size_in_bytes": 885157525}, "el": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "struct", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "el", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 80759076, "num_examples": 2316, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 53820491, "num_examples": 1522, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 44818565, "num_examples": 1401, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 186861175, "num_examples": 5659, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 6023769, "num_examples": 185, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/el.tar.gz": {"num_bytes": 381570611, "checksum": "86c67e7bda7658a7087b5a1997d140d57957a05bb413a188610db61807c53ee4"}}, "download_size": 381570611, "post_processing_size": null, "dataset_size": 372283076, "size_in_bytes": 753853687}, "en": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "struct", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "en", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 26088826658, "num_examples": 564337, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 758718688, "num_examples": 16164, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 795638801, "num_examples": 16164, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 5796244022, "num_examples": 169895, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 9122973965, "num_examples": 189562, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/en.tar.gz": {"num_bytes": 60613063630, "checksum": "0f8fdfc4fe715738be94ee49c4fb63d5f1608d2e6a43a2bed80f6cb871171c36"}}, "download_size": 60613063630, "post_processing_size": null, "dataset_size": 42562402134, "size_in_bytes": 103175465764}, "eo": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "struct", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "eo", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 993655930, "num_examples": 19587, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 420153812, "num_examples": 8969, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 391427586, "num_examples": 8987, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 142476819, "num_examples": 2946, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 238105462, "num_examples": 4736, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/eo.tar.gz": {"num_bytes": 2883560869, "checksum": "c19900010aee0f9eb39416406598509b1cdba136a16318e746b1a64f97d7809c"}}, "download_size": 2883560869, "post_processing_size": null, "dataset_size": 2185819609, "size_in_bytes": 5069380478}, "es": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "struct", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "es", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 6918333205, "num_examples": 161813, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 754049291, "num_examples": 15089, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 735558084, "num_examples": 15089, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 5528972205, "num_examples": 144791, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 1664876264, "num_examples": 40640, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/es.tar.gz": {"num_bytes": 16188844718, "checksum": "276ca393783cd8b208d56b5032b87c13a40fcadde5b3925596e67c15578d0235"}}, "download_size": 16188844718, "post_processing_size": null, "dataset_size": 15601789049, "size_in_bytes": 31790633767}, "et": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "struct", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "et", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 161124199, "num_examples": 2966, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 133183135, "num_examples": 2509, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 137604813, "num_examples": 2507, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 30339130, "num_examples": 569, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 193019544, "num_examples": 3557, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/et.tar.gz": {"num_bytes": 767174465, "checksum": "50a861393e4e7013ab71f1b63bca8c42c26dca1519c15a3b9cdb3cb5b6c561a2"}}, "download_size": 767174465, "post_processing_size": null, "dataset_size": 655270821, "size_in_bytes": 1422445286}, "eu": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "struct", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "eu", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 317322801, "num_examples": 7505, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 238866501, "num_examples": 5172, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 228150083, "num_examples": 5172, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 988079897, "num_examples": 23570, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 208553909, "num_examples": 5387, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/eu.tar.gz": {"num_bytes": 3664586106, "checksum": "55b6eaf7ca7c120faa0b60d71c87189b610412334e6b710fe12c2a79489ab06f"}}, "download_size": 3664586106, "post_processing_size": null, "dataset_size": 1980973191, "size_in_bytes": 5645559297}, "fa": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "struct", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "fa", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 239255087, "num_examples": 7593, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 217939210, "num_examples": 5213, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 196558067, "num_examples": 5213, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 737017546, "num_examples": 22510, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 499570226, "num_examples": 11698, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/fa.tar.gz": {"num_bytes": 8884585819, "checksum": "5454efe3b2f6d06d51e7177469b7bef9a962adbf7611e3cd21771451112abe6d"}}, "download_size": 8884585819, "post_processing_size": null, "dataset_size": 1890340136, "size_in_bytes": 10774925955}, "fi": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "struct", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "fi", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 16017393, "num_examples": 460, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 16117529, "num_examples": 428, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 15471757, "num_examples": 415, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 5836400, "num_examples": 149, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 2228215, "num_examples": 59, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/fi.tar.gz": {"num_bytes": 49882909, "checksum": "eb26d0904beef5ec08cf53267be7e78b8ba5056fd162057d5b085a7cba51f035"}}, "download_size": 49882909, "post_processing_size": null, "dataset_size": 55671294, "size_in_bytes": 105554203}, "fr": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "struct", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "fr", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 12439892070, "num_examples": 298982, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 733943163, "num_examples": 15763, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 703801114, "num_examples": 15763, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 117998889, "num_examples": 3222, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 1794149368, "num_examples": 40351, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/fr.tar.gz": {"num_bytes": 19130141984, "checksum": "719ef964b55d830a095a602aff311db39b77239e9d600b6af646ec2ed57e5e45"}}, "download_size": 19130141984, "post_processing_size": null, "dataset_size": 15789784604, "size_in_bytes": 34919926588}, "fy-NL": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "struct", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "fy-NL", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 159116360, "num_examples": 3927, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 126913262, "num_examples": 3020, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 112288554, "num_examples": 2790, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 893887467, "num_examples": 21569, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 38985422, "num_examples": 1031, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/fy-NL.tar.gz": {"num_bytes": 1237743070, "checksum": "ddee4fc3ce52df2379fa4069090d8f5c853155dc0462eb645f6111e2da627297"}}, "download_size": 1237743070, "post_processing_size": null, "dataset_size": 1331191065, "size_in_bytes": 2568934135}, "ga-IE": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "struct", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "ga-IE", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 15396820, "num_examples": 541, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 16611739, "num_examples": 506, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 14897739, "num_examples": 497, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 61948768, "num_examples": 2130, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 10993268, "num_examples": 409, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/ga-IE.tar.gz": {"num_bytes": 156553447, "checksum": "27223fc99af6a45f81190ecb90034806991ff3b9e3aa38a7e97caaabbb0a4ddc"}}, "download_size": 156553447, "post_processing_size": null, "dataset_size": 119848334, "size_in_bytes": 276401781}, "hi": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "struct", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "hi", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 4860737, "num_examples": 157, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 4728043, "num_examples": 127, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 5569352, "num_examples": 135, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 4176110, "num_examples": 139, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 2801051, "num_examples": 60, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/hi.tar.gz": {"num_bytes": 21424045, "checksum": "5492393b04dd1307a52d93525a7db08fc392c8ba0df553668945152e434f58c9"}}, "download_size": 21424045, "post_processing_size": null, "dataset_size": 22135293, "size_in_bytes": 43559338}, "hsb": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "struct", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "hsb", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 43049910, "num_examples": 808, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 20929094, "num_examples": 387, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 8769458, "num_examples": 172, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 3173841, "num_examples": 62, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 5589972, "num_examples": 227, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/hsb.tar.gz": {"num_bytes": 79362060, "checksum": "3dd3d79aaa078ad7955552ebc596e0a8894ffd7a4a88a51b2c8ee80c0e088152"}}, "download_size": 79362060, "post_processing_size": null, "dataset_size": 81512275, "size_in_bytes": 160874335}, "hu": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "struct", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "hu", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 126163153, "num_examples": 3348, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 57056435, "num_examples": 1649, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 50306925, "num_examples": 1434, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 12051094, "num_examples": 295, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 5881521, "num_examples": 169, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/hu.tar.gz": {"num_bytes": 242758708, "checksum": "61f933155cba6c54c0b76d0ddd2caebd62d69228b7c935382112abe172660953"}}, "download_size": 242758708, "post_processing_size": null, "dataset_size": 251459128, "size_in_bytes": 494217836}, "ia": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "struct", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "ia", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 96577153, "num_examples": 3477, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 33204678, "num_examples": 899, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 67436779, "num_examples": 1601, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 30937041, "num_examples": 1095, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 6769573, "num_examples": 192, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/ia.tar.gz": {"num_bytes": 226499645, "checksum": "47a137a805ea8ce01f2cf9277739919a824a9fd13468345dfbd84eddb52c02f1"}}, "download_size": 226499645, "post_processing_size": null, "dataset_size": 234925224, "size_in_bytes": 461424869}, "id": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "struct", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "id", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 63515863, "num_examples": 2130, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 60711104, "num_examples": 1844, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 56963520, "num_examples": 1835, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 206578628, "num_examples": 6782, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 16566129, "num_examples": 470, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/id.tar.gz": {"num_bytes": 475918233, "checksum": "71177fa9d2fac29f48db5feabc294f1d6bbcaa0c326b0d1099be66c0b804b245"}}, "download_size": 475918233, "post_processing_size": null, "dataset_size": 404335244, "size_in_bytes": 880253477}, "it": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "struct", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "it", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 2555546829, "num_examples": 58015, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 656285877, "num_examples": 12928, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 621955330, "num_examples": 12928, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 671213467, "num_examples": 14549, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 564610354, "num_examples": 12189, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/it.tar.gz": {"num_bytes": 5585781573, "checksum": "3a75b1631958af1487ee49b13cd27efc951183737ed515832cf714ed20c97808"}}, "download_size": 5585781573, "post_processing_size": null, "dataset_size": 5069611857, "size_in_bytes": 10655393430}, "ja": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "struct", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "ja", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 27600264, "num_examples": 722, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 26475556, "num_examples": 632, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 22098940, "num_examples": 586, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 34588931, "num_examples": 885, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 17819020, "num_examples": 504, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/ja.tar.gz": {"num_bytes": 152879796, "checksum": "3614cd0d0abac80794351c78183967c83179fab390d7e19cad97758eb85ae558"}}, "download_size": 152879796, "post_processing_size": null, "dataset_size": 128582711, "size_in_bytes": 281462507}, "ka": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "struct", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "ka", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 47790695, "num_examples": 1058, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 30301524, "num_examples": 656, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 24951079, "num_examples": 527, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 2144603, "num_examples": 44, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 7004160, "num_examples": 139, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/ka.tar.gz": {"num_bytes": 104280554, "checksum": "7677df9d650234306a11bf8518be5807e72e7d5fc440d391304d1b99dd5517f5"}}, "download_size": 104280554, "post_processing_size": null, "dataset_size": 112192061, "size_in_bytes": 216472615}, "kab": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "struct", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "kab", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 3219289101, "num_examples": 120530, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 446453041, "num_examples": 14622, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 414159937, "num_examples": 14622, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 2282481767, "num_examples": 88021, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 581587104, "num_examples": 18134, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/kab.tar.gz": {"num_bytes": 17171606918, "checksum": "d2089107d4f3a84856c457a436a47a883b872022f2085cfad0501469be91fd95"}}, "download_size": 17171606918, "post_processing_size": null, "dataset_size": 6943970950, "size_in_bytes": 24115577868}, "ky": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "struct", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "ky", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 75460488, "num_examples": 1955, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 57116561, "num_examples": 1503, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 61393867, "num_examples": 1511, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 258081579, "num_examples": 7223, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 41007711, "num_examples": 926, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/ky.tar.gz": {"num_bytes": 579440853, "checksum": "6efe0ca5384d0419fcf5fda0e0229a1b5eb80d8eeba2d7528a4c3c9f2593206f"}}, "download_size": 579440853, "post_processing_size": null, "dataset_size": 493060206, "size_in_bytes": 1072501059}, "lg": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "struct", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "lg", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 46910479, "num_examples": 1250, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 26951803, "num_examples": 584, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 16709367, "num_examples": 384, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 111180838, "num_examples": 3110, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 14069959, "num_examples": 290, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/lg.tar.gz": {"num_bytes": 208197149, "checksum": "71243c65f638cd7f392fabe22e37cbafbdca4eb5a199210000ae957a88768040"}}, "download_size": 208197149, "post_processing_size": null, "dataset_size": 215822446, "size_in_bytes": 424019595}, "lt": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "struct", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "lt", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 34605356, "num_examples": 931, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 19940391, "num_examples": 466, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 10462851, "num_examples": 244, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 71150206, "num_examples": 1629, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 4414780, "num_examples": 102, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/lt.tar.gz": {"num_bytes": 135299706, "checksum": "5ad3d93bc308f58a70e6685f71ae035237ef9caa0922232ac76846f7587bb8aa"}}, "download_size": 135299706, "post_processing_size": null, "dataset_size": 140573584, "size_in_bytes": 275873290}, "lv": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "struct", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "lv", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 67269173, "num_examples": 2552, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 56937435, "num_examples": 1882, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 55289058, "num_examples": 2002, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 40259801, "num_examples": 1560, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 4383319, "num_examples": 143, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/lv.tar.gz": {"num_bytes": 208307691, "checksum": "8a4350ccf24884ee1012032bfd5a87e0de50d780b1f8450d1cb52afe3f69c671"}}, "download_size": 208307691, "post_processing_size": null, "dataset_size": 224138786, "size_in_bytes": 432446477}, "mn": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "struct", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "mn", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 89913910, "num_examples": 2183, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 86737041, "num_examples": 1862, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 82343275, "num_examples": 1837, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 146365394, "num_examples": 3272, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 31764232, "num_examples": 667, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/mn.tar.gz": {"num_bytes": 486369317, "checksum": "3aebc40d40eb19263576664a981f4bb8b221abeab78c8154adc3d16875c75ec7"}}, "download_size": 486369317, "post_processing_size": null, "dataset_size": 437123852, "size_in_bytes": 923493169}, "mt": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "struct", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "mt", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 73850815, "num_examples": 2036, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 66520195, "num_examples": 1617, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 56412066, "num_examples": 1516, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 220666971, "num_examples": 5714, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 12328068, "num_examples": 314, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/mt.tar.gz": {"num_bytes": 425114242, "checksum": "9d53000d7832d130c4d35fb412bfc092ab8de8e763a5d2a528aebf37f052af03"}}, "download_size": 425114242, "post_processing_size": null, "dataset_size": 429778115, "size_in_bytes": 854892357}, "nl": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "struct", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "nl", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 321946148, "num_examples": 9460, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 205287443, "num_examples": 5708, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 186095353, "num_examples": 4938, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 801418, "num_examples": 27, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 115133112, "num_examples": 3308, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/nl.tar.gz": {"num_bytes": 1741827548, "checksum": "048f823408e3bbd16e63111d1b4caecb0102606c440bbdf3e5b6a6bae1e1e3f1"}}, "download_size": 1741827548, "post_processing_size": null, "dataset_size": 829263474, "size_in_bytes": 2571091022}, "or": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "struct", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "or", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 16067910, "num_examples": 388, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 4270651, "num_examples": 98, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 5485937, "num_examples": 129, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 177775963, "num_examples": 4302, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 2701922, "num_examples": 62, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/or.tar.gz": {"num_bytes": 199077358, "checksum": "f3edad30166fe454f4d2b14adeece1434dc4b8eb7b0ece37aac8389b7122218a"}}, "download_size": 199077358, "post_processing_size": null, "dataset_size": 206302383, "size_in_bytes": 405379741}, "pa-IN": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "struct", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "pa-IN", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 7572499, "num_examples": 211, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 4375532, "num_examples": 116, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 1702492, "num_examples": 44, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 56683312, "num_examples": 1411, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 1690766, "num_examples": 43, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/pa-IN.tar.gz": {"num_bytes": 69748265, "checksum": "d2e30f28a227ecb8209340c4133edf6489f35f8e3d1eb55ff22b96b12f36952c"}}, "download_size": 69748265, "post_processing_size": null, "dataset_size": 72024601, "size_in_bytes": 141772866}, "pl": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "struct", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "pl", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 273394509, "num_examples": 7468, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 205047541, "num_examples": 5153, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 195917307, "num_examples": 5153, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 442144781, "num_examples": 12848, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 180801918, "num_examples": 4601, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/pl.tar.gz": {"num_bytes": 3537012341, "checksum": "acbf77d36e083e2bcb7152ffb52ab7d1e3e64d33a3f51f106cdff7feff6279aa"}}, "download_size": 3537012341, "post_processing_size": null, "dataset_size": 1297306056, "size_in_bytes": 4834318397}, "pt": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "struct", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "pt", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 231451724, "num_examples": 6514, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 180108694, "num_examples": 4641, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 165966139, "num_examples": 4592, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 283497435, "num_examples": 8390, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 67948392, "num_examples": 1740, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/pt.tar.gz": {"num_bytes": 1704252567, "checksum": "6700de499f728e0e3f3ed4d7005e5b7db27ba2ddc872b21b0b404c3b4859d84b"}}, "download_size": 1704252567, "post_processing_size": null, "dataset_size": 928972384, "size_in_bytes": 2633224951}, "rm-sursilv": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "struct", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "rm-sursilv", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 62396326, "num_examples": 1384, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 51707733, "num_examples": 1194, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 52114252, "num_examples": 1205, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 93351293, "num_examples": 2102, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 30593270, "num_examples": 639, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/rm-sursilv.tar.gz": {"num_bytes": 275950479, "checksum": "3cfc4971b6ab8958d7c3d784977690fcc04ebd7570ecf788d5948df84a5481a1"}}, "download_size": 275950479, "post_processing_size": null, "dataset_size": 290162874, "size_in_bytes": 566113353}, "rm-vallader": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "struct", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "rm-vallader", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 29528457, "num_examples": 574, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 18805466, "num_examples": 378, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 17012341, "num_examples": 357, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 36890435, "num_examples": 727, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 9356204, "num_examples": 374, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/rm-vallader.tar.gz": {"num_bytes": 108113989, "checksum": "4fdb7dc5e20862a636ee7975831b39db29012d615f9139edf2d266b878ce43ae"}}, "download_size": 108113989, "post_processing_size": null, "dataset_size": 111592903, "size_in_bytes": 219706892}, "ro": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "struct", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "ro", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 107235430, "num_examples": 3399, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 60106568, "num_examples": 1778, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 30358457, "num_examples": 858, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 65805210, "num_examples": 1945, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 11108104, "num_examples": 485, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/ro.tar.gz": {"num_bytes": 261978702, "checksum": "450b159e936ef6ff136fcdfad193675caec5b2230d1b6ca24c5cde491ff002cd"}}, "download_size": 261978702, "post_processing_size": null, "dataset_size": 274613769, "size_in_bytes": 536592471}, "ru": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "struct", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "ru", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 686168722, "num_examples": 15481, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 385349488, "num_examples": 8007, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 361164462, "num_examples": 7963, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 450644862, "num_examples": 10247, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 145739451, "num_examples": 3056, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/ru.tar.gz": {"num_bytes": 3655676916, "checksum": "dcbb460e58d4afc78047c3801c9eb56d940b388eb350ee3da3de5bfe5a74a025"}}, "download_size": 3655676916, "post_processing_size": null, "dataset_size": 2029066985, "size_in_bytes": 5684743901}, "rw": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "struct", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "rw", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 21645788973, "num_examples": 515197, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 707959382, "num_examples": 15724, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 698662384, "num_examples": 15032, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 923146896, "num_examples": 22923, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 7969286423, "num_examples": 206790, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/rw.tar.gz": {"num_bytes": 42545189583, "checksum": "cf8a07059b3713022d487f9a6b8f465271f3457c525a8b350f829f87b0132b41"}}, "download_size": 42545189583, "post_processing_size": null, "dataset_size": 31944844058, "size_in_bytes": 74490033641}, "sah": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "struct", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "sah", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 68286985, "num_examples": 1442, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 38534020, "num_examples": 757, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 17900397, "num_examples": 405, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 62594222, "num_examples": 1275, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 3594160, "num_examples": 66, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/sah.tar.gz": {"num_bytes": 181245626, "checksum": "dea1a454813c8f90abcbdf427fa922e1b7a116753deeb410af096ce5f0ae2405"}}, "download_size": 181245626, "post_processing_size": null, "dataset_size": 190909784, "size_in_bytes": 372155410}, "sl": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "struct", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "sl", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 66122967, "num_examples": 2038, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 26872195, "num_examples": 881, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 16353097, "num_examples": 556, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 79268518, "num_examples": 2502, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 3048301, "num_examples": 92, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/sl.tar.gz": {"num_bytes": 222751292, "checksum": "184cfbfe876a1f1c6317e4e34680c82a940db833afca78203c2929db1768a353"}}, "download_size": 222751292, "post_processing_size": null, "dataset_size": 191665078, "size_in_bytes": 414416370}, "sv-SE": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "struct", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "sv-SE", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 62727263, "num_examples": 2331, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 59127381, "num_examples": 2027, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 53846355, "num_examples": 2019, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 109970049, "num_examples": 3043, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 13462567, "num_examples": 462, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/sv-SE.tar.gz": {"num_bytes": 421434184, "checksum": "dc8634dafacb33be00f06e376f6c479d53f84f4834952593c8903f1080535213"}}, "download_size": 421434184, "post_processing_size": null, "dataset_size": 299133615, "size_in_bytes": 720567799}, "ta": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "struct", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "ta", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 69052658, "num_examples": 2009, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 67616865, "num_examples": 1781, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 63248009, "num_examples": 1779, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 246650792, "num_examples": 7428, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 23587453, "num_examples": 594, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/ta.tar.gz": {"num_bytes": 679766097, "checksum": "78560d9d608a63ee75c3fdeb7f96f33cf0d85855ba6294b13e945de066eb46d8"}}, "download_size": 679766097, "post_processing_size": null, "dataset_size": 470155777, "size_in_bytes": 1149921874}, "th": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "struct", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "th", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 100435725, "num_examples": 2917, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 82030679, "num_examples": 2188, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 63237632, "num_examples": 1922, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 95235301, "num_examples": 2671, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 18247080, "num_examples": 467, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/th.tar.gz": {"num_bytes": 341305736, "checksum": "a3d11043c49d3ea8ffb58dfab117cd831dd62a641e0a26ac60eb43e483534f7a"}}, "download_size": 341305736, "post_processing_size": null, "dataset_size": 359186417, "size_in_bytes": 700492153}, "tr": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "struct", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "tr", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 57879052, "num_examples": 1831, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 60268059, "num_examples": 1647, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 54914798, "num_examples": 1647, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 10954154, "num_examples": 325, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 59288266, "num_examples": 1726, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/tr.tar.gz": {"num_bytes": 620848700, "checksum": "b3f266c868b1fe9f76270ba76226b1cdc17f33b3e387e6b44a64d5419f8b9768"}}, "download_size": 620848700, "post_processing_size": null, "dataset_size": 243304329, "size_in_bytes": 864153029}, "tt": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "struct", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "tt", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 348132697, "num_examples": 11211, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 135120057, "num_examples": 4485, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 61690964, "num_examples": 2127, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 62158038, "num_examples": 1798, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 10403128, "num_examples": 287, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/tt.tar.gz": {"num_bytes": 777153207, "checksum": "89c8d7a49584de720f1790df39e6f07996e2eecb07f6273f4ba2668e9fe4ad46"}}, "download_size": 777153207, "post_processing_size": null, "dataset_size": 617504884, "size_in_bytes": 1394658091}, "uk": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "struct", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "uk", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 161925063, "num_examples": 4035, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 138422211, "num_examples": 3235, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 135483169, "num_examples": 3236, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 327979131, "num_examples": 8161, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 55745301, "num_examples": 1255, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/uk.tar.gz": {"num_bytes": 1218559031, "checksum": "f3ca0143cd84f5eacb583187052e69efec21c571a426efee91a765a2284519c2"}}, "download_size": 1218559031, "post_processing_size": null, "dataset_size": 819554875, "size_in_bytes": 2038113906}, "vi": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "struct", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "vi", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 6244454, "num_examples": 221, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 6656365, "num_examples": 198, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 6531856, "num_examples": 200, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 31315434, "num_examples": 870, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 2981661, "num_examples": 78, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/vi.tar.gz": {"num_bytes": 51929480, "checksum": "704bce8031932377cc21c017923ff1e96ebd2be9bd520adcf839f7a0f5f03b6e"}}, "download_size": 51929480, "post_processing_size": null, "dataset_size": 53729770, "size_in_bytes": 105659250}, "vot": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "struct", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "vot", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 146467, "num_examples": 3, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 0, "num_examples": 0, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 0, "num_examples": 0, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 7963322, "num_examples": 411, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 107949, "num_examples": 6, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/vot.tar.gz": {"num_bytes": 7792602, "checksum": "7fb07dd25b0575e8cd811bb8d1e5aebd17fdbca079a4ee50d81e0aaaff50f8b0"}}, "download_size": 7792602, "post_processing_size": null, "dataset_size": 8217738, "size_in_bytes": 16010340}, "zh-CN": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "struct", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "zh-CN", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 793667379, "num_examples": 18541, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 420202544, "num_examples": 8760, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 396096323, "num_examples": 8743, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 381264783, "num_examples": 8948, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 266234479, "num_examples": 5305, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/zh-CN.tar.gz": {"num_bytes": 2184602350, "checksum": "cd8589cac28541f9f996d1954f14c307954f1146ac44a8eadad8e31ebaf1f15e"}}, "download_size": 2184602350, "post_processing_size": null, "dataset_size": 2257465508, "size_in_bytes": 4442067858}, "zh-HK": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "struct", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "zh-HK", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 221459521, "num_examples": 7506, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 217627041, "num_examples": 5172, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 196071110, "num_examples": 5172, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 1319233252, "num_examples": 38830, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 124170969, "num_examples": 2999, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/zh-HK.tar.gz": {"num_bytes": 2774145806, "checksum": "8a525ce4664d6647701449d5e72f7d8658cc3a5fabc72e05c6883994fd3c0134"}}, "download_size": 2774145806, "post_processing_size": null, "dataset_size": 2078561893, "size_in_bytes": 4852707699}, "zh-TW": {"description": "Common Voice is Mozilla's initiative to help teach machines how real people speak.\nThe dataset currently consists of 7,335 validated hours of speech in 60 languages, but we\u2019re always adding more voices and languages.\n", "citation": "@inproceedings{commonvoice:2020,\n author = {Ardila, R. and Branson, M. and Davis, K. and Henretty, M. and Kohler, M. and Meyer, J. and Morais, R. and Saunders, L. and Tyers, F. M. and Weber, G.},\n title = {Common Voice: A Massively-Multilingual Speech Corpus},\n booktitle = {Proceedings of the 12th Conference on Language Resources and Evaluation (LREC 2020)},\n pages = {4211--4215},\n year = 2020\n}\n", "homepage": "https://commonvoice.mozilla.org/en/datasets", "license": "https://github.com/common-voice/common-voice/blob/main/LICENSE", "features": {"client_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "struct", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}, "up_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "down_votes": {"dtype": "int64", "id": null, "_type": "Value"}, "age": {"dtype": "string", "id": null, "_type": "Value"}, "gender": {"dtype": "string", "id": null, "_type": "Value"}, "accent": {"dtype": "string", "id": null, "_type": "Value"}, "locale": {"dtype": "string", "id": null, "_type": "Value"}, "segment": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "common_voice", "config_name": "zh-TW", "version": {"version_str": "6.1.0", "description": "", "major": 6, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 97323787, "num_examples": 3507, "dataset_name": "common_voice"}, "test": {"name": "test", "num_bytes": 85512325, "num_examples": 2895, "dataset_name": "common_voice"}, "validation": {"name": "validation", "num_bytes": 80402637, "num_examples": 2895, "dataset_name": "common_voice"}, "other": {"name": "other", "num_bytes": 623801957, "num_examples": 22477, "dataset_name": "common_voice"}, "invalidated": {"name": "invalidated", "num_bytes": 100241443, "num_examples": 3584, "dataset_name": "common_voice"}}, "download_checksums": {"https://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-6.1-2020-12-11/zh-TW.tar.gz": {"num_bytes": 2182836295, "checksum": "67fadf561f8237690d4a4a1d63a9b3ac271b5d05438dc745b7e04282d909460f"}}, "download_size": 2182836295, "post_processing_size": null, "dataset_size": 987282149, "size_in_bytes": 3170118444}} \ No newline at end of file diff --git a/datasets/common_voice/dummy/ab/6.1.0/dummy_data.zip b/datasets/common_voice/dummy/ab/6.1.0/dummy_data.zip index 26e2c823b65..282b285dd46 100644 Binary files a/datasets/common_voice/dummy/ab/6.1.0/dummy_data.zip and b/datasets/common_voice/dummy/ab/6.1.0/dummy_data.zip differ diff --git a/datasets/common_voice/dummy/ar/6.1.0/dummy_data.zip b/datasets/common_voice/dummy/ar/6.1.0/dummy_data.zip deleted file mode 100644 index f49335dba97..00000000000 Binary files a/datasets/common_voice/dummy/ar/6.1.0/dummy_data.zip and /dev/null differ diff --git a/datasets/common_voice/dummy/as/6.1.0/dummy_data.zip b/datasets/common_voice/dummy/as/6.1.0/dummy_data.zip deleted file mode 100644 index c47e2a63cb7..00000000000 Binary files a/datasets/common_voice/dummy/as/6.1.0/dummy_data.zip and /dev/null differ diff --git a/datasets/common_voice/dummy/br/6.1.0/dummy_data.zip b/datasets/common_voice/dummy/br/6.1.0/dummy_data.zip deleted file mode 100644 index 7aec9e67261..00000000000 Binary files a/datasets/common_voice/dummy/br/6.1.0/dummy_data.zip and /dev/null differ diff --git a/datasets/common_voice/dummy/ca/6.1.0/dummy_data.zip b/datasets/common_voice/dummy/ca/6.1.0/dummy_data.zip deleted file mode 100644 index e823fa105b5..00000000000 Binary files a/datasets/common_voice/dummy/ca/6.1.0/dummy_data.zip and /dev/null differ diff --git a/datasets/common_voice/dummy/cnh/6.1.0/dummy_data.zip b/datasets/common_voice/dummy/cnh/6.1.0/dummy_data.zip deleted file mode 100644 index 9c0004c00ee..00000000000 Binary files a/datasets/common_voice/dummy/cnh/6.1.0/dummy_data.zip and /dev/null differ diff --git a/datasets/common_voice/dummy/cs/6.1.0/dummy_data.zip b/datasets/common_voice/dummy/cs/6.1.0/dummy_data.zip deleted file mode 100644 index 555ddd9291d..00000000000 Binary files a/datasets/common_voice/dummy/cs/6.1.0/dummy_data.zip and /dev/null differ diff --git a/datasets/common_voice/dummy/cv/6.1.0/dummy_data.zip b/datasets/common_voice/dummy/cv/6.1.0/dummy_data.zip deleted file mode 100644 index 037451f0557..00000000000 Binary files a/datasets/common_voice/dummy/cv/6.1.0/dummy_data.zip and /dev/null differ diff --git a/datasets/common_voice/dummy/cy/6.1.0/dummy_data.zip b/datasets/common_voice/dummy/cy/6.1.0/dummy_data.zip deleted file mode 100644 index b9cf8832952..00000000000 Binary files a/datasets/common_voice/dummy/cy/6.1.0/dummy_data.zip and /dev/null differ diff --git a/datasets/common_voice/dummy/de/6.1.0/dummy_data.zip b/datasets/common_voice/dummy/de/6.1.0/dummy_data.zip deleted file mode 100644 index 1f7d059d183..00000000000 Binary files a/datasets/common_voice/dummy/de/6.1.0/dummy_data.zip and /dev/null differ diff --git a/datasets/common_voice/dummy/dv/6.1.0/dummy_data.zip b/datasets/common_voice/dummy/dv/6.1.0/dummy_data.zip deleted file mode 100644 index f9ed06c341c..00000000000 Binary files a/datasets/common_voice/dummy/dv/6.1.0/dummy_data.zip and /dev/null differ diff --git a/datasets/common_voice/dummy/el/6.1.0/dummy_data.zip b/datasets/common_voice/dummy/el/6.1.0/dummy_data.zip deleted file mode 100644 index 965ff2c7bdd..00000000000 Binary files a/datasets/common_voice/dummy/el/6.1.0/dummy_data.zip and /dev/null differ diff --git a/datasets/common_voice/dummy/en/6.1.0/dummy_data.zip b/datasets/common_voice/dummy/en/6.1.0/dummy_data.zip deleted file mode 100644 index 8c7b5942542..00000000000 Binary files a/datasets/common_voice/dummy/en/6.1.0/dummy_data.zip and /dev/null differ diff --git a/datasets/common_voice/dummy/eo/6.1.0/dummy_data.zip b/datasets/common_voice/dummy/eo/6.1.0/dummy_data.zip deleted file mode 100644 index 9a4a43aa636..00000000000 Binary files a/datasets/common_voice/dummy/eo/6.1.0/dummy_data.zip and /dev/null differ diff --git a/datasets/common_voice/dummy/es/6.1.0/dummy_data.zip b/datasets/common_voice/dummy/es/6.1.0/dummy_data.zip deleted file mode 100644 index 8a396c6648c..00000000000 Binary files a/datasets/common_voice/dummy/es/6.1.0/dummy_data.zip and /dev/null differ diff --git a/datasets/common_voice/dummy/et/6.1.0/dummy_data.zip b/datasets/common_voice/dummy/et/6.1.0/dummy_data.zip deleted file mode 100644 index 1e24b21b907..00000000000 Binary files a/datasets/common_voice/dummy/et/6.1.0/dummy_data.zip and /dev/null differ diff --git a/datasets/common_voice/dummy/eu/6.1.0/dummy_data.zip b/datasets/common_voice/dummy/eu/6.1.0/dummy_data.zip deleted file mode 100644 index f04c5f3ced9..00000000000 Binary files a/datasets/common_voice/dummy/eu/6.1.0/dummy_data.zip and /dev/null differ diff --git a/datasets/common_voice/dummy/fa/6.1.0/dummy_data.zip b/datasets/common_voice/dummy/fa/6.1.0/dummy_data.zip deleted file mode 100644 index e77110ad477..00000000000 Binary files a/datasets/common_voice/dummy/fa/6.1.0/dummy_data.zip and /dev/null differ diff --git a/datasets/common_voice/dummy/fi/6.1.0/dummy_data.zip b/datasets/common_voice/dummy/fi/6.1.0/dummy_data.zip deleted file mode 100644 index 3498a204ca6..00000000000 Binary files a/datasets/common_voice/dummy/fi/6.1.0/dummy_data.zip and /dev/null differ diff --git a/datasets/common_voice/dummy/fr/6.1.0/dummy_data.zip b/datasets/common_voice/dummy/fr/6.1.0/dummy_data.zip deleted file mode 100644 index c8f82000515..00000000000 Binary files a/datasets/common_voice/dummy/fr/6.1.0/dummy_data.zip and /dev/null differ diff --git a/datasets/common_voice/dummy/fy-NL/6.1.0/dummy_data.zip b/datasets/common_voice/dummy/fy-NL/6.1.0/dummy_data.zip deleted file mode 100644 index e74ef5b16c1..00000000000 Binary files a/datasets/common_voice/dummy/fy-NL/6.1.0/dummy_data.zip and /dev/null differ diff --git a/datasets/common_voice/dummy/ga-IE/6.1.0/dummy_data.zip b/datasets/common_voice/dummy/ga-IE/6.1.0/dummy_data.zip deleted file mode 100644 index 6c207721ef8..00000000000 Binary files a/datasets/common_voice/dummy/ga-IE/6.1.0/dummy_data.zip and /dev/null differ diff --git a/datasets/common_voice/dummy/hi/6.1.0/dummy_data.zip b/datasets/common_voice/dummy/hi/6.1.0/dummy_data.zip deleted file mode 100644 index 2570d76101e..00000000000 Binary files a/datasets/common_voice/dummy/hi/6.1.0/dummy_data.zip and /dev/null differ diff --git a/datasets/common_voice/dummy/hsb/6.1.0/dummy_data.zip b/datasets/common_voice/dummy/hsb/6.1.0/dummy_data.zip deleted file mode 100644 index 9794d0b658a..00000000000 Binary files a/datasets/common_voice/dummy/hsb/6.1.0/dummy_data.zip and /dev/null differ diff --git a/datasets/common_voice/dummy/hu/6.1.0/dummy_data.zip b/datasets/common_voice/dummy/hu/6.1.0/dummy_data.zip deleted file mode 100644 index 3f593b16956..00000000000 Binary files a/datasets/common_voice/dummy/hu/6.1.0/dummy_data.zip and /dev/null differ diff --git a/datasets/common_voice/dummy/ia/6.1.0/dummy_data.zip b/datasets/common_voice/dummy/ia/6.1.0/dummy_data.zip deleted file mode 100644 index ceb6639e908..00000000000 Binary files a/datasets/common_voice/dummy/ia/6.1.0/dummy_data.zip and /dev/null differ diff --git a/datasets/common_voice/dummy/id/6.1.0/dummy_data.zip b/datasets/common_voice/dummy/id/6.1.0/dummy_data.zip deleted file mode 100644 index 9b9006d32c6..00000000000 Binary files a/datasets/common_voice/dummy/id/6.1.0/dummy_data.zip and /dev/null differ diff --git a/datasets/common_voice/dummy/it/6.1.0/dummy_data.zip b/datasets/common_voice/dummy/it/6.1.0/dummy_data.zip deleted file mode 100644 index e50498ce0c4..00000000000 Binary files a/datasets/common_voice/dummy/it/6.1.0/dummy_data.zip and /dev/null differ diff --git a/datasets/common_voice/dummy/ja/6.1.0/dummy_data.zip b/datasets/common_voice/dummy/ja/6.1.0/dummy_data.zip deleted file mode 100644 index 95256af4cd4..00000000000 Binary files a/datasets/common_voice/dummy/ja/6.1.0/dummy_data.zip and /dev/null differ diff --git a/datasets/common_voice/dummy/ka/6.1.0/dummy_data.zip b/datasets/common_voice/dummy/ka/6.1.0/dummy_data.zip deleted file mode 100644 index cd88f904ac4..00000000000 Binary files a/datasets/common_voice/dummy/ka/6.1.0/dummy_data.zip and /dev/null differ diff --git a/datasets/common_voice/dummy/kab/6.1.0/dummy_data.zip b/datasets/common_voice/dummy/kab/6.1.0/dummy_data.zip deleted file mode 100644 index 3a8d0bf1c94..00000000000 Binary files a/datasets/common_voice/dummy/kab/6.1.0/dummy_data.zip and /dev/null differ diff --git a/datasets/common_voice/dummy/ky/6.1.0/dummy_data.zip b/datasets/common_voice/dummy/ky/6.1.0/dummy_data.zip deleted file mode 100644 index 8687be4bb45..00000000000 Binary files a/datasets/common_voice/dummy/ky/6.1.0/dummy_data.zip and /dev/null differ diff --git a/datasets/common_voice/dummy/lg/6.1.0/dummy_data.zip b/datasets/common_voice/dummy/lg/6.1.0/dummy_data.zip deleted file mode 100644 index 04f67d1a8c9..00000000000 Binary files a/datasets/common_voice/dummy/lg/6.1.0/dummy_data.zip and /dev/null differ diff --git a/datasets/common_voice/dummy/lt/6.1.0/dummy_data.zip b/datasets/common_voice/dummy/lt/6.1.0/dummy_data.zip deleted file mode 100644 index e12366b49dc..00000000000 Binary files a/datasets/common_voice/dummy/lt/6.1.0/dummy_data.zip and /dev/null differ diff --git a/datasets/common_voice/dummy/lv/6.1.0/dummy_data.zip b/datasets/common_voice/dummy/lv/6.1.0/dummy_data.zip deleted file mode 100644 index 2ba4a4df6f1..00000000000 Binary files a/datasets/common_voice/dummy/lv/6.1.0/dummy_data.zip and /dev/null differ diff --git a/datasets/common_voice/dummy/mn/6.1.0/dummy_data.zip b/datasets/common_voice/dummy/mn/6.1.0/dummy_data.zip deleted file mode 100644 index 0c27e7ab976..00000000000 Binary files a/datasets/common_voice/dummy/mn/6.1.0/dummy_data.zip and /dev/null differ diff --git a/datasets/common_voice/dummy/mt/6.1.0/dummy_data.zip b/datasets/common_voice/dummy/mt/6.1.0/dummy_data.zip deleted file mode 100644 index fe980849109..00000000000 Binary files a/datasets/common_voice/dummy/mt/6.1.0/dummy_data.zip and /dev/null differ diff --git a/datasets/common_voice/dummy/nl/6.1.0/dummy_data.zip b/datasets/common_voice/dummy/nl/6.1.0/dummy_data.zip deleted file mode 100644 index 38399d0736b..00000000000 Binary files a/datasets/common_voice/dummy/nl/6.1.0/dummy_data.zip and /dev/null differ diff --git a/datasets/common_voice/dummy/or/6.1.0/dummy_data.zip b/datasets/common_voice/dummy/or/6.1.0/dummy_data.zip deleted file mode 100644 index 276c1fad039..00000000000 Binary files a/datasets/common_voice/dummy/or/6.1.0/dummy_data.zip and /dev/null differ diff --git a/datasets/common_voice/dummy/pa-IN/6.1.0/dummy_data.zip b/datasets/common_voice/dummy/pa-IN/6.1.0/dummy_data.zip deleted file mode 100644 index de46a6bdfe7..00000000000 Binary files a/datasets/common_voice/dummy/pa-IN/6.1.0/dummy_data.zip and /dev/null differ diff --git a/datasets/common_voice/dummy/pl/6.1.0/dummy_data.zip b/datasets/common_voice/dummy/pl/6.1.0/dummy_data.zip deleted file mode 100644 index 44dc0a8e87e..00000000000 Binary files a/datasets/common_voice/dummy/pl/6.1.0/dummy_data.zip and /dev/null differ diff --git a/datasets/common_voice/dummy/pt/6.1.0/dummy_data.zip b/datasets/common_voice/dummy/pt/6.1.0/dummy_data.zip deleted file mode 100644 index ae45552c8dd..00000000000 Binary files a/datasets/common_voice/dummy/pt/6.1.0/dummy_data.zip and /dev/null differ diff --git a/datasets/common_voice/dummy/rm-sursilv/6.1.0/dummy_data.zip b/datasets/common_voice/dummy/rm-sursilv/6.1.0/dummy_data.zip deleted file mode 100644 index eccefac1af2..00000000000 Binary files a/datasets/common_voice/dummy/rm-sursilv/6.1.0/dummy_data.zip and /dev/null differ diff --git a/datasets/common_voice/dummy/rm-vallader/6.1.0/dummy_data.zip b/datasets/common_voice/dummy/rm-vallader/6.1.0/dummy_data.zip deleted file mode 100644 index 3d474f92bee..00000000000 Binary files a/datasets/common_voice/dummy/rm-vallader/6.1.0/dummy_data.zip and /dev/null differ diff --git a/datasets/common_voice/dummy/ro/6.1.0/dummy_data.zip b/datasets/common_voice/dummy/ro/6.1.0/dummy_data.zip deleted file mode 100644 index 292c905b122..00000000000 Binary files a/datasets/common_voice/dummy/ro/6.1.0/dummy_data.zip and /dev/null differ diff --git a/datasets/common_voice/dummy/ru/6.1.0/dummy_data.zip b/datasets/common_voice/dummy/ru/6.1.0/dummy_data.zip deleted file mode 100644 index 032973e097f..00000000000 Binary files a/datasets/common_voice/dummy/ru/6.1.0/dummy_data.zip and /dev/null differ diff --git a/datasets/common_voice/dummy/rw/6.1.0/dummy_data.zip b/datasets/common_voice/dummy/rw/6.1.0/dummy_data.zip deleted file mode 100644 index 4c837eecc19..00000000000 Binary files a/datasets/common_voice/dummy/rw/6.1.0/dummy_data.zip and /dev/null differ diff --git a/datasets/common_voice/dummy/sah/6.1.0/dummy_data.zip b/datasets/common_voice/dummy/sah/6.1.0/dummy_data.zip deleted file mode 100644 index 24059a67ada..00000000000 Binary files a/datasets/common_voice/dummy/sah/6.1.0/dummy_data.zip and /dev/null differ diff --git a/datasets/common_voice/dummy/sl/6.1.0/dummy_data.zip b/datasets/common_voice/dummy/sl/6.1.0/dummy_data.zip deleted file mode 100644 index f1a939b8c41..00000000000 Binary files a/datasets/common_voice/dummy/sl/6.1.0/dummy_data.zip and /dev/null differ diff --git a/datasets/common_voice/dummy/sv-SE/6.1.0/dummy_data.zip b/datasets/common_voice/dummy/sv-SE/6.1.0/dummy_data.zip deleted file mode 100644 index 9dd77cf834a..00000000000 Binary files a/datasets/common_voice/dummy/sv-SE/6.1.0/dummy_data.zip and /dev/null differ diff --git a/datasets/common_voice/dummy/ta/6.1.0/dummy_data.zip b/datasets/common_voice/dummy/ta/6.1.0/dummy_data.zip deleted file mode 100644 index ffa8dd08b0a..00000000000 Binary files a/datasets/common_voice/dummy/ta/6.1.0/dummy_data.zip and /dev/null differ diff --git a/datasets/common_voice/dummy/th/6.1.0/dummy_data.zip b/datasets/common_voice/dummy/th/6.1.0/dummy_data.zip deleted file mode 100644 index 335835de4b1..00000000000 Binary files a/datasets/common_voice/dummy/th/6.1.0/dummy_data.zip and /dev/null differ diff --git a/datasets/common_voice/dummy/tr/6.1.0/dummy_data.zip b/datasets/common_voice/dummy/tr/6.1.0/dummy_data.zip deleted file mode 100644 index 14c0dbad654..00000000000 Binary files a/datasets/common_voice/dummy/tr/6.1.0/dummy_data.zip and /dev/null differ diff --git a/datasets/common_voice/dummy/tt/6.1.0/dummy_data.zip b/datasets/common_voice/dummy/tt/6.1.0/dummy_data.zip deleted file mode 100644 index 12fd52efb3e..00000000000 Binary files a/datasets/common_voice/dummy/tt/6.1.0/dummy_data.zip and /dev/null differ diff --git a/datasets/common_voice/dummy/uk/6.1.0/dummy_data.zip b/datasets/common_voice/dummy/uk/6.1.0/dummy_data.zip deleted file mode 100644 index c9001c18749..00000000000 Binary files a/datasets/common_voice/dummy/uk/6.1.0/dummy_data.zip and /dev/null differ diff --git a/datasets/common_voice/dummy/vi/6.1.0/dummy_data.zip b/datasets/common_voice/dummy/vi/6.1.0/dummy_data.zip deleted file mode 100644 index 3b3be9d43db..00000000000 Binary files a/datasets/common_voice/dummy/vi/6.1.0/dummy_data.zip and /dev/null differ diff --git a/datasets/common_voice/dummy/vot/6.1.0/dummy_data.zip b/datasets/common_voice/dummy/vot/6.1.0/dummy_data.zip deleted file mode 100644 index 026a2917670..00000000000 Binary files a/datasets/common_voice/dummy/vot/6.1.0/dummy_data.zip and /dev/null differ diff --git a/datasets/common_voice/dummy/zh-CN/6.1.0/dummy_data.zip b/datasets/common_voice/dummy/zh-CN/6.1.0/dummy_data.zip deleted file mode 100644 index 9268b85991f..00000000000 Binary files a/datasets/common_voice/dummy/zh-CN/6.1.0/dummy_data.zip and /dev/null differ diff --git a/datasets/common_voice/dummy/zh-HK/6.1.0/dummy_data.zip b/datasets/common_voice/dummy/zh-HK/6.1.0/dummy_data.zip deleted file mode 100644 index bc6d96478de..00000000000 Binary files a/datasets/common_voice/dummy/zh-HK/6.1.0/dummy_data.zip and /dev/null differ diff --git a/datasets/common_voice/dummy/zh-TW/6.1.0/dummy_data.zip b/datasets/common_voice/dummy/zh-TW/6.1.0/dummy_data.zip deleted file mode 100644 index a825b9ac44e..00000000000 Binary files a/datasets/common_voice/dummy/zh-TW/6.1.0/dummy_data.zip and /dev/null differ diff --git a/datasets/librispeech_asr/dataset_infos.json b/datasets/librispeech_asr/dataset_infos.json index 737f150eba8..3ff72c90ec3 100644 --- a/datasets/librispeech_asr/dataset_infos.json +++ b/datasets/librispeech_asr/dataset_infos.json @@ -1 +1 @@ -{"clean": {"description": "LibriSpeech is a corpus of approximately 1000 hours of read English speech with sampling rate of 16 kHz,\nprepared by Vassil Panayotov with the assistance of Daniel Povey. The data is derived from read\naudiobooks from the LibriVox project, and has been carefully segmented and aligned.87\n\nNote that in order to limit the required storage for preparing this dataset, the audio\nis stored in the .flac format and is not converted to a float32 array. To convert, the audio\nfile to a float32 array, please make use of the `.map()` function as follows:\n\n\n```python\nimport soundfile as sf\n\ndef map_to_array(batch):\n speech_array, _ = sf.read(batch[\"file\"])\n batch[\"speech\"] = speech_array\n return batch\n\ndataset = dataset.map(map_to_array, remove_columns=[\"file\"])\n", "citation": "@inproceedings{panayotov2015librispeech,\n title={Librispeech: an ASR corpus based on public domain audio books},\n author={Panayotov, Vassil and Chen, Guoguo and Povey, Daniel and Khudanpur, Sanjeev},\n booktitle={Acoustics, Speech and Signal Processing (ICASSP), 2015 IEEE International Conference on},\n pages={5206--5210},\n year={2015},\n organization={IEEE}\n}\n", "homepage": "http://www.openslr.org/12", "license": "", "features": {"file": {"dtype": "string", "id": null, "_type": "Value"}, "text": {"dtype": "string", "id": null, "_type": "Value"}, "speaker_id": {"dtype": "int64", "id": null, "_type": "Value"}, "chapter_id": {"dtype": "int64", "id": null, "_type": "Value"}, "id": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": {"input": "speech", "output": "text"}, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "file", "transcription_column": "text"}], "builder_name": "librispeech_asr", "config_name": "clean", "version": {"version_str": "2.1.0", "description": "", "major": 2, "minor": 1, "patch": 0}, "splits": {"train.100": {"name": "train.100", "num_bytes": 11823891, "num_examples": 28539, "dataset_name": "librispeech_asr"}, "train.360": {"name": "train.360", "num_bytes": 43049490, "num_examples": 104014, "dataset_name": "librispeech_asr"}, "validation": {"name": "validation", "num_bytes": 894510, "num_examples": 2703, "dataset_name": "librispeech_asr"}, "test": {"name": "test", "num_bytes": 868614, "num_examples": 2620, "dataset_name": "librispeech_asr"}}, "download_checksums": {"http://www.openslr.org/resources/12/dev-clean.tar.gz": {"num_bytes": 337926286, "checksum": "76f87d090650617fca0cac8f88b9416e0ebf80350acb97b343a85fa903728ab3"}, "http://www.openslr.org/resources/12/test-clean.tar.gz": {"num_bytes": 346663984, "checksum": "39fde525e59672dc6d1551919b1478f724438a95aa55f874b576be21967e6c23"}, "http://www.openslr.org/resources/12/train-clean-100.tar.gz": {"num_bytes": 6387309499, "checksum": "d4ddd1d5a6ab303066f14971d768ee43278a5f2a0aa43dc716b0e64ecbbbf6e2"}, "http://www.openslr.org/resources/12/train-clean-360.tar.gz": {"num_bytes": 23049477885, "checksum": "146a56496217e96c14334a160df97fffedd6e0a04e66b9c5af0d40be3c792ecf"}}, "download_size": 30121377654, "post_processing_size": null, "dataset_size": 56636505, "size_in_bytes": 30178014159}, "other": {"description": "LibriSpeech is a corpus of approximately 1000 hours of read English speech with sampling rate of 16 kHz,\nprepared by Vassil Panayotov with the assistance of Daniel Povey. The data is derived from read\naudiobooks from the LibriVox project, and has been carefully segmented and aligned.87\n\nNote that in order to limit the required storage for preparing this dataset, the audio\nis stored in the .flac format and is not converted to a float32 array. To convert, the audio\nfile to a float32 array, please make use of the `.map()` function as follows:\n\n\n```python\nimport soundfile as sf\n\ndef map_to_array(batch):\n speech_array, _ = sf.read(batch[\"file\"])\n batch[\"speech\"] = speech_array\n return batch\n\ndataset = dataset.map(map_to_array, remove_columns=[\"file\"])\n", "citation": "@inproceedings{panayotov2015librispeech,\n title={Librispeech: an ASR corpus based on public domain audio books},\n author={Panayotov, Vassil and Chen, Guoguo and Povey, Daniel and Khudanpur, Sanjeev},\n booktitle={Acoustics, Speech and Signal Processing (ICASSP), 2015 IEEE International Conference on},\n pages={5206--5210},\n year={2015},\n organization={IEEE}\n}\n", "homepage": "http://www.openslr.org/12", "license": "", "features": {"file": {"dtype": "string", "id": null, "_type": "Value"}, "text": {"dtype": "string", "id": null, "_type": "Value"}, "speaker_id": {"dtype": "int64", "id": null, "_type": "Value"}, "chapter_id": {"dtype": "int64", "id": null, "_type": "Value"}, "id": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": {"input": "speech", "output": "text"}, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "file", "transcription_column": "text"}], "builder_name": "librispeech_asr", "config_name": "other", "version": {"version_str": "2.1.0", "description": "", "major": 2, "minor": 1, "patch": 0}, "splits": {"train.500": {"name": "train.500", "num_bytes": 59561081, "num_examples": 148688, "dataset_name": "librispeech_asr"}, "validation": {"name": "validation", "num_bytes": 907644, "num_examples": 2864, "dataset_name": "librispeech_asr"}, "test": {"name": "test", "num_bytes": 934838, "num_examples": 2939, "dataset_name": "librispeech_asr"}}, "download_checksums": {"http://www.openslr.org/resources/12/test-other.tar.gz": {"num_bytes": 328757843, "checksum": "d09c181bba5cf717b3dee7d4d592af11a3ee3a09e08ae025c5506f6ebe961c29"}, "http://www.openslr.org/resources/12/dev-other.tar.gz": {"num_bytes": 314305928, "checksum": "12661c48e8c3fe1de2c1caa4c3e135193bfb1811584f11f569dd12645aa84365"}, "http://www.openslr.org/resources/12/train-other-500.tar.gz": {"num_bytes": 30593501606, "checksum": "ddb22f27f96ec163645d53215559df6aa36515f26e01dd70798188350adcb6d2"}}, "download_size": 31236565377, "post_processing_size": null, "dataset_size": 61403563, "size_in_bytes": 31297968940}} \ No newline at end of file +{"clean": {"description": "LibriSpeech is a corpus of approximately 1000 hours of read English speech with sampling rate of 16 kHz,\nprepared by Vassil Panayotov with the assistance of Daniel Povey. The data is derived from read\naudiobooks from the LibriVox project, and has been carefully segmented and aligned.87\n\nNote that in order to limit the required storage for preparing this dataset, the audio\nis stored in the .flac format and is not converted to a float32 array. To convert, the audio\nfile to a float32 array, please make use of the `.map()` function as follows:\n\n\n```python\nimport soundfile as sf\n\ndef map_to_array(batch):\n speech_array, _ = sf.read(batch[\"file\"])\n batch[\"speech\"] = speech_array\n return batch\n\ndataset = dataset.map(map_to_array, remove_columns=[\"file\"])\n```\n", "citation": "@inproceedings{panayotov2015librispeech,\n title={Librispeech: an ASR corpus based on public domain audio books},\n author={Panayotov, Vassil and Chen, Guoguo and Povey, Daniel and Khudanpur, Sanjeev},\n booktitle={Acoustics, Speech and Signal Processing (ICASSP), 2015 IEEE International Conference on},\n pages={5206--5210},\n year={2015},\n organization={IEEE}\n}\n", "homepage": "http://www.openslr.org/12", "license": "", "features": {"file": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 16000, "mono": true, "_storage_dtype": "struct", "id": null, "_type": "Audio"}, "text": {"dtype": "string", "id": null, "_type": "Value"}, "speaker_id": {"dtype": "int64", "id": null, "_type": "Value"}, "chapter_id": {"dtype": "int64", "id": null, "_type": "Value"}, "id": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": {"input": "file", "output": "text"}, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "file", "transcription_column": "text"}], "builder_name": "librispeech_asr", "config_name": "clean", "version": {"version_str": "2.1.0", "description": "", "major": 2, "minor": 1, "patch": 0}, "splits": {"train.100": {"name": "train.100", "num_bytes": 6619683041, "num_examples": 28539, "dataset_name": "librispeech_asr"}, "train.360": {"name": "train.360", "num_bytes": 23898214592, "num_examples": 104014, "dataset_name": "librispeech_asr"}, "validation": {"name": "validation", "num_bytes": 359572231, "num_examples": 2703, "dataset_name": "librispeech_asr"}, "test": {"name": "test", "num_bytes": 367705423, "num_examples": 2620, "dataset_name": "librispeech_asr"}}, "download_checksums": {"http://www.openslr.org/resources/12/dev-clean.tar.gz": {"num_bytes": 337926286, "checksum": "76f87d090650617fca0cac8f88b9416e0ebf80350acb97b343a85fa903728ab3"}, "http://www.openslr.org/resources/12/test-clean.tar.gz": {"num_bytes": 346663984, "checksum": "39fde525e59672dc6d1551919b1478f724438a95aa55f874b576be21967e6c23"}, "http://www.openslr.org/resources/12/train-clean-100.tar.gz": {"num_bytes": 6387309499, "checksum": "d4ddd1d5a6ab303066f14971d768ee43278a5f2a0aa43dc716b0e64ecbbbf6e2"}, "http://www.openslr.org/resources/12/train-clean-360.tar.gz": {"num_bytes": 23049477885, "checksum": "146a56496217e96c14334a160df97fffedd6e0a04e66b9c5af0d40be3c792ecf"}}, "download_size": 30121377654, "post_processing_size": null, "dataset_size": 31245175287, "size_in_bytes": 61366552941}, "other": {"description": "LibriSpeech is a corpus of approximately 1000 hours of read English speech with sampling rate of 16 kHz,\nprepared by Vassil Panayotov with the assistance of Daniel Povey. The data is derived from read\naudiobooks from the LibriVox project, and has been carefully segmented and aligned.87\n\nNote that in order to limit the required storage for preparing this dataset, the audio\nis stored in the .flac format and is not converted to a float32 array. To convert, the audio\nfile to a float32 array, please make use of the `.map()` function as follows:\n\n\n```python\nimport soundfile as sf\n\ndef map_to_array(batch):\n speech_array, _ = sf.read(batch[\"file\"])\n batch[\"speech\"] = speech_array\n return batch\n\ndataset = dataset.map(map_to_array, remove_columns=[\"file\"])\n```\n", "citation": "@inproceedings{panayotov2015librispeech,\n title={Librispeech: an ASR corpus based on public domain audio books},\n author={Panayotov, Vassil and Chen, Guoguo and Povey, Daniel and Khudanpur, Sanjeev},\n booktitle={Acoustics, Speech and Signal Processing (ICASSP), 2015 IEEE International Conference on},\n pages={5206--5210},\n year={2015},\n organization={IEEE}\n}\n", "homepage": "http://www.openslr.org/12", "license": "", "features": {"file": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 16000, "mono": true, "_storage_dtype": "struct", "id": null, "_type": "Audio"}, "text": {"dtype": "string", "id": null, "_type": "Value"}, "speaker_id": {"dtype": "int64", "id": null, "_type": "Value"}, "chapter_id": {"dtype": "int64", "id": null, "_type": "Value"}, "id": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": {"input": "file", "output": "text"}, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "file", "transcription_column": "text"}], "builder_name": "librispeech_asr", "config_name": "other", "version": {"version_str": "2.1.0", "description": "", "major": 2, "minor": 1, "patch": 0}, "splits": {"train.500": {"name": "train.500", "num_bytes": 31810256902, "num_examples": 148688, "dataset_name": "librispeech_asr"}, "validation": {"name": "validation", "num_bytes": 337283304, "num_examples": 2864, "dataset_name": "librispeech_asr"}, "test": {"name": "test", "num_bytes": 352396474, "num_examples": 2939, "dataset_name": "librispeech_asr"}}, "download_checksums": {"http://www.openslr.org/resources/12/test-other.tar.gz": {"num_bytes": 328757843, "checksum": "d09c181bba5cf717b3dee7d4d592af11a3ee3a09e08ae025c5506f6ebe961c29"}, "http://www.openslr.org/resources/12/dev-other.tar.gz": {"num_bytes": 314305928, "checksum": "12661c48e8c3fe1de2c1caa4c3e135193bfb1811584f11f569dd12645aa84365"}, "http://www.openslr.org/resources/12/train-other-500.tar.gz": {"num_bytes": 30593501606, "checksum": "ddb22f27f96ec163645d53215559df6aa36515f26e01dd70798188350adcb6d2"}}, "download_size": 31236565377, "post_processing_size": null, "dataset_size": 32499936680, "size_in_bytes": 63736502057}} \ No newline at end of file diff --git a/datasets/librispeech_asr/librispeech_asr.py b/datasets/librispeech_asr/librispeech_asr.py index acdec76ddf5..86bbee658c2 100644 --- a/datasets/librispeech_asr/librispeech_asr.py +++ b/datasets/librispeech_asr/librispeech_asr.py @@ -17,9 +17,6 @@ """Librispeech automatic speech recognition dataset.""" -import glob -import os - import datasets from datasets.tasks import AutomaticSpeechRecognition @@ -93,6 +90,7 @@ def __init__(self, **kwargs): class LibrispeechASR(datasets.GeneratorBasedBuilder): """Librispeech dataset.""" + DEFAULT_WRITER_BATCH_SIZE = 256 BUILDER_CONFIGS = [ LibrispeechASRConfig(name="clean", description="'Clean' speech."), LibrispeechASRConfig(name="other", description="'Other', more challenging, speech."), @@ -118,41 +116,62 @@ def _info(self): ) def _split_generators(self, dl_manager): - archive_path = dl_manager.download_and_extract(_DL_URLS[self.config.name]) + archive_path = dl_manager.download(_DL_URLS[self.config.name]) if self.config.name == "clean": train_splits = [ - datasets.SplitGenerator(name="train.100", gen_kwargs={"archive_path": archive_path["train.100"]}), - datasets.SplitGenerator(name="train.360", gen_kwargs={"archive_path": archive_path["train.360"]}), + datasets.SplitGenerator( + name="train.100", gen_kwargs={"files": dl_manager.iter_archive(archive_path["train.100"])} + ), + datasets.SplitGenerator( + name="train.360", gen_kwargs={"files": dl_manager.iter_archive(archive_path["train.360"])} + ), ] elif self.config.name == "other": train_splits = [ - datasets.SplitGenerator(name="train.500", gen_kwargs={"archive_path": archive_path["train.500"]}), + datasets.SplitGenerator( + name="train.500", gen_kwargs={"files": dl_manager.iter_archive(archive_path["train.500"])} + ), ] return train_splits + [ - datasets.SplitGenerator(name=datasets.Split.VALIDATION, gen_kwargs={"archive_path": archive_path["dev"]}), - datasets.SplitGenerator(name=datasets.Split.TEST, gen_kwargs={"archive_path": archive_path["test"]}), + datasets.SplitGenerator( + name=datasets.Split.VALIDATION, gen_kwargs={"files": dl_manager.iter_archive(archive_path["dev"])} + ), + datasets.SplitGenerator( + name=datasets.Split.TEST, gen_kwargs={"files": dl_manager.iter_archive(archive_path["test"])} + ), ] - def _generate_examples(self, archive_path): + def _generate_examples(self, files): """Generate examples from a LibriSpeech archive_path.""" - transcripts_glob = os.path.join(archive_path, "LibriSpeech", "*/*/*/*.txt") key = 0 - for transcript_path in sorted(glob.glob(transcripts_glob)): - transcript_dir_path = os.path.dirname(transcript_path) - with open(transcript_path, "r", encoding="utf-8") as f: + audio_data = {} + transcripts = [] + for path, f in files: + if path.endswith(".flac"): + id_ = path.split("/")[-1][: -len(".flac")] + audio_data[id_] = f.read() + elif path.endswith(".trans.txt"): for line in f: - line = line.strip() - id_, transcript = line.split(" ", 1) - audio_file = f"{id_}.flac" - speaker_id, chapter_id = [int(el) for el in id_.split("-")[:2]] - yield key, { - "id": id_, - "speaker_id": speaker_id, - "chapter_id": chapter_id, - "file": os.path.join(transcript_dir_path, audio_file), - "audio": os.path.join(transcript_dir_path, audio_file), - "text": transcript, - } + if line: + line = line.decode("utf-8").strip() + id_, transcript = line.split(" ", 1) + audio_file = f"{id_}.flac" + speaker_id, chapter_id = [int(el) for el in id_.split("-")[:2]] + transcripts.append( + { + "id": id_, + "speaker_id": speaker_id, + "chapter_id": chapter_id, + "file": audio_file, + "text": transcript, + } + ) + if audio_data and len(audio_data) == len(transcripts): + for transcript in transcripts: + audio = {"path": transcript["file"], "bytes": audio_data[transcript["id"]]} + yield key, {"audio": audio, **transcript} key += 1 + audio_data = {} + transcripts = [] diff --git a/datasets/openslr/dataset_infos.json b/datasets/openslr/dataset_infos.json index cd269515fb2..e3de4f34dbb 100644 --- a/datasets/openslr/dataset_infos.json +++ b/datasets/openslr/dataset_infos.json @@ -1 +1 @@ -{"SLR41": {"description": "OpenSLR is a site devoted to hosting speech and language resources, such as training corpora for speech recognition,\nand software related to speech recognition. We intend to be a convenient place for anyone to put resources that\nthey have created, so that they can be downloaded publicly.\n", "citation": "SLR32:\n@inproceedings{van-niekerk-etal-2017,\n title = {{Rapid development of TTS corpora for four South African languages}},\n author = {Daniel van Niekerk and Charl van Heerden and Marelie Davel and Neil Kleynhans and Oddur Kjartansson\n and Martin Jansche and Linne Ha},\n booktitle = {Proc. Interspeech 2017},\n pages = {2178--2182},\n address = {Stockholm, Sweden},\n month = aug,\n year = {2017},\n URL = {http://dx.doi.org/10.21437/Interspeech.2017-1139}\n}\n\nSLR35, SLR36, SLR52, SLR53, SLR54:\n@inproceedings{kjartansson-etal-sltu2018,\n title = {{Crowd-Sourced Speech Corpora for Javanese, Sundanese, Sinhala, Nepali, and Bangladeshi Bengali}},\n author = {Oddur Kjartansson and Supheakmungkol Sarin and Knot Pipatsrisawat and Martin Jansche and Linne Ha},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {52--55},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-11},\n}\n\nSLR41, SLR42, SLR43, SLR44:\n@inproceedings{kjartansson-etal-tts-sltu2018,\n title = {{A Step-by-Step Process for Building TTS Voices Using Open Source Data and Framework for Bangla, Javanese,\n Khmer, Nepali, Sinhala, and Sundanese}},\n author = {Keshan Sodimana and Knot Pipatsrisawat and Linne Ha and Martin Jansche and Oddur Kjartansson and Pasindu\n De Silva and Supheakmungkol Sarin},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {66--70},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-14}\n}\n\nSLR63, SLR64, SLR65, SLR66, SLR78, SLR79:\n@inproceedings{he-etal-2020-open,\n title = {{Open-source Multi-speaker Speech Corpora for Building Gujarati, Kannada, Malayalam, Marathi, Tamil and\n Telugu Speech Synthesis Systems}},\n author = {He, Fei and Chu, Shan-Hui Cathy and Kjartansson, Oddur and Rivera, Clara and Katanova, Anna and Gutkin,\n Alexander and Demirsahin, Isin and Johny, Cibu and Jansche, Martin and Sarin, Supheakmungkol and Pipatsrisawat, Knot},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n pages = {6494--6503},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.800},\n ISBN = \"{979-10-95546-34-4},\n}\n\nSLR69, SLR76, SLR77:\n@inproceedings{kjartansson-etal-2020-open,\n title = {{Open-Source High Quality Speech Datasets for Basque, Catalan and Galician}},\n author = {Kjartansson, Oddur and Gutkin, Alexander and Butryna, Alena and Demirsahin, Isin and Rivera, Clara},\n booktitle = {Proceedings of the 1st Joint Workshop on Spoken Language Technologies for Under-resourced languages\n (SLTU) and Collaboration and Computing for Under-Resourced Languages (CCURL)},\n year = {2020},\n pages = {21--27},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.sltu-1.3},\n ISBN = {979-10-95546-35-1},\n}\n\nSLR71, SLR71, SLR72, SLR73, SLR74, SLR75:\n@inproceedings{guevara-rukoz-etal-2020-crowdsourcing,\n title = {{Crowdsourcing Latin American Spanish for Low-Resource Text-to-Speech}},\n author = {Guevara-Rukoz, Adriana and Demirsahin, Isin and He, Fei and Chu, Shan-Hui Cathy and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Gutkin, Alexander and Butryna, Alena and Kjartansson, Oddur},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n year = {2020},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.801},\n pages = {6504--6513},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR83\n@inproceedings{demirsahin-etal-2020-open,\n title = {{Open-source Multi-speaker Corpora of the English Accents in the British Isles}},\n author = {Demirsahin, Isin and Kjartansson, Oddur and Gutkin, Alexander and Rivera, Clara},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = {6532--6541},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.804},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR80\n@inproceedings{oo-etal-2020-burmese,\n title = {{Burmese Speech Corpus, Finite-State Text Normalization and Pronunciation Grammars with an Application\n to Text-to-Speech}},\n author = {Oo, Yin May and Wattanavekin, Theeraphol and Li, Chenfang and De Silva, Pasindu and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Jansche, Martin and Kjartansson, Oddur and Gutkin, Alexander},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = \"6328--6339\",\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.777},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR86\n@inproceedings{gutkin-et-al-yoruba2020,\n title = {{Developing an Open-Source Corpus of Yoruba Speech}},\n author = {Alexander Gutkin and I\u015f\u0131n Demir\u015fahin and Oddur Kjartansson and Clara Rivera and K\u00f3\u0323l\u00e1 T\u00fab\u00f2\u0323s\u00fan},\n booktitle = {Proceedings of Interspeech 2020},\n pages = {404--408},\n month = {October},\n year = {2020},\n address = {Shanghai, China},\n publisher = {International Speech and Communication Association (ISCA)},\n doi = {10.21437/Interspeech.2020-1096},\n url = {https://dx.doi.org/10.21437/Interspeech.2020-1096},\n}\n", "homepage": "https://openslr.org/", "license": "", "features": {"path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "open_slr", "config_name": "SLR41", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 2363510, "num_examples": 5822, "dataset_name": "open_slr"}}, "download_checksums": {"https://openslr.org/resources/41/jv_id_female.zip": {"num_bytes": 967179448, "checksum": "6fd795a441b3ddd62d6131d4bbd9231151af89f5d9ce5ac7d8ecb370a49576c7"}, "https://openslr.org/resources/41/jv_id_male.zip": {"num_bytes": 923612912, "checksum": "6ee23916b7489420a538e7032f58d7be088a615fb67ec3e7043414d436bb5c1a"}}, "download_size": 1890792360, "post_processing_size": null, "dataset_size": 2363510, "size_in_bytes": 1893155870}, "SLR42": {"description": "OpenSLR is a site devoted to hosting speech and language resources, such as training corpora for speech recognition,\nand software related to speech recognition. We intend to be a convenient place for anyone to put resources that\nthey have created, so that they can be downloaded publicly.\n", "citation": "SLR32:\n@inproceedings{van-niekerk-etal-2017,\n title = {{Rapid development of TTS corpora for four South African languages}},\n author = {Daniel van Niekerk and Charl van Heerden and Marelie Davel and Neil Kleynhans and Oddur Kjartansson\n and Martin Jansche and Linne Ha},\n booktitle = {Proc. Interspeech 2017},\n pages = {2178--2182},\n address = {Stockholm, Sweden},\n month = aug,\n year = {2017},\n URL = {http://dx.doi.org/10.21437/Interspeech.2017-1139}\n}\n\nSLR35, SLR36, SLR52, SLR53, SLR54:\n@inproceedings{kjartansson-etal-sltu2018,\n title = {{Crowd-Sourced Speech Corpora for Javanese, Sundanese, Sinhala, Nepali, and Bangladeshi Bengali}},\n author = {Oddur Kjartansson and Supheakmungkol Sarin and Knot Pipatsrisawat and Martin Jansche and Linne Ha},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {52--55},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-11},\n}\n\nSLR41, SLR42, SLR43, SLR44:\n@inproceedings{kjartansson-etal-tts-sltu2018,\n title = {{A Step-by-Step Process for Building TTS Voices Using Open Source Data and Framework for Bangla, Javanese,\n Khmer, Nepali, Sinhala, and Sundanese}},\n author = {Keshan Sodimana and Knot Pipatsrisawat and Linne Ha and Martin Jansche and Oddur Kjartansson and Pasindu\n De Silva and Supheakmungkol Sarin},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {66--70},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-14}\n}\n\nSLR63, SLR64, SLR65, SLR66, SLR78, SLR79:\n@inproceedings{he-etal-2020-open,\n title = {{Open-source Multi-speaker Speech Corpora for Building Gujarati, Kannada, Malayalam, Marathi, Tamil and\n Telugu Speech Synthesis Systems}},\n author = {He, Fei and Chu, Shan-Hui Cathy and Kjartansson, Oddur and Rivera, Clara and Katanova, Anna and Gutkin,\n Alexander and Demirsahin, Isin and Johny, Cibu and Jansche, Martin and Sarin, Supheakmungkol and Pipatsrisawat, Knot},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n pages = {6494--6503},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.800},\n ISBN = \"{979-10-95546-34-4},\n}\n\nSLR69, SLR76, SLR77:\n@inproceedings{kjartansson-etal-2020-open,\n title = {{Open-Source High Quality Speech Datasets for Basque, Catalan and Galician}},\n author = {Kjartansson, Oddur and Gutkin, Alexander and Butryna, Alena and Demirsahin, Isin and Rivera, Clara},\n booktitle = {Proceedings of the 1st Joint Workshop on Spoken Language Technologies for Under-resourced languages\n (SLTU) and Collaboration and Computing for Under-Resourced Languages (CCURL)},\n year = {2020},\n pages = {21--27},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.sltu-1.3},\n ISBN = {979-10-95546-35-1},\n}\n\nSLR71, SLR71, SLR72, SLR73, SLR74, SLR75:\n@inproceedings{guevara-rukoz-etal-2020-crowdsourcing,\n title = {{Crowdsourcing Latin American Spanish for Low-Resource Text-to-Speech}},\n author = {Guevara-Rukoz, Adriana and Demirsahin, Isin and He, Fei and Chu, Shan-Hui Cathy and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Gutkin, Alexander and Butryna, Alena and Kjartansson, Oddur},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n year = {2020},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.801},\n pages = {6504--6513},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR83\n@inproceedings{demirsahin-etal-2020-open,\n title = {{Open-source Multi-speaker Corpora of the English Accents in the British Isles}},\n author = {Demirsahin, Isin and Kjartansson, Oddur and Gutkin, Alexander and Rivera, Clara},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = {6532--6541},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.804},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR80\n@inproceedings{oo-etal-2020-burmese,\n title = {{Burmese Speech Corpus, Finite-State Text Normalization and Pronunciation Grammars with an Application\n to Text-to-Speech}},\n author = {Oo, Yin May and Wattanavekin, Theeraphol and Li, Chenfang and De Silva, Pasindu and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Jansche, Martin and Kjartansson, Oddur and Gutkin, Alexander},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = \"6328--6339\",\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.777},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR86\n@inproceedings{gutkin-et-al-yoruba2020,\n title = {{Developing an Open-Source Corpus of Yoruba Speech}},\n author = {Alexander Gutkin and I\u015f\u0131n Demir\u015fahin and Oddur Kjartansson and Clara Rivera and K\u00f3\u0323l\u00e1 T\u00fab\u00f2\u0323s\u00fan},\n booktitle = {Proceedings of Interspeech 2020},\n pages = {404--408},\n month = {October},\n year = {2020},\n address = {Shanghai, China},\n publisher = {International Speech and Communication Association (ISCA)},\n doi = {10.21437/Interspeech.2020-1096},\n url = {https://dx.doi.org/10.21437/Interspeech.2020-1096},\n}\n", "homepage": "https://openslr.org/", "license": "", "features": {"path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "open_slr", "config_name": "SLR42", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 1397844, "num_examples": 2906, "dataset_name": "open_slr"}}, "download_checksums": {"https://openslr.org/resources/42/km_kh_male.zip": {"num_bytes": 866086951, "checksum": "c0ec9c0494c57f04cf1f2d8d2668d517598375f24e34de07272ecd637c332591"}}, "download_size": 866086951, "post_processing_size": null, "dataset_size": 1397844, "size_in_bytes": 867484795}, "SLR43": {"description": "OpenSLR is a site devoted to hosting speech and language resources, such as training corpora for speech recognition,\nand software related to speech recognition. We intend to be a convenient place for anyone to put resources that\nthey have created, so that they can be downloaded publicly.\n", "citation": "SLR32:\n@inproceedings{van-niekerk-etal-2017,\n title = {{Rapid development of TTS corpora for four South African languages}},\n author = {Daniel van Niekerk and Charl van Heerden and Marelie Davel and Neil Kleynhans and Oddur Kjartansson\n and Martin Jansche and Linne Ha},\n booktitle = {Proc. Interspeech 2017},\n pages = {2178--2182},\n address = {Stockholm, Sweden},\n month = aug,\n year = {2017},\n URL = {http://dx.doi.org/10.21437/Interspeech.2017-1139}\n}\n\nSLR35, SLR36, SLR52, SLR53, SLR54:\n@inproceedings{kjartansson-etal-sltu2018,\n title = {{Crowd-Sourced Speech Corpora for Javanese, Sundanese, Sinhala, Nepali, and Bangladeshi Bengali}},\n author = {Oddur Kjartansson and Supheakmungkol Sarin and Knot Pipatsrisawat and Martin Jansche and Linne Ha},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {52--55},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-11},\n}\n\nSLR41, SLR42, SLR43, SLR44:\n@inproceedings{kjartansson-etal-tts-sltu2018,\n title = {{A Step-by-Step Process for Building TTS Voices Using Open Source Data and Framework for Bangla, Javanese,\n Khmer, Nepali, Sinhala, and Sundanese}},\n author = {Keshan Sodimana and Knot Pipatsrisawat and Linne Ha and Martin Jansche and Oddur Kjartansson and Pasindu\n De Silva and Supheakmungkol Sarin},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {66--70},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-14}\n}\n\nSLR63, SLR64, SLR65, SLR66, SLR78, SLR79:\n@inproceedings{he-etal-2020-open,\n title = {{Open-source Multi-speaker Speech Corpora for Building Gujarati, Kannada, Malayalam, Marathi, Tamil and\n Telugu Speech Synthesis Systems}},\n author = {He, Fei and Chu, Shan-Hui Cathy and Kjartansson, Oddur and Rivera, Clara and Katanova, Anna and Gutkin,\n Alexander and Demirsahin, Isin and Johny, Cibu and Jansche, Martin and Sarin, Supheakmungkol and Pipatsrisawat, Knot},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n pages = {6494--6503},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.800},\n ISBN = \"{979-10-95546-34-4},\n}\n\nSLR69, SLR76, SLR77:\n@inproceedings{kjartansson-etal-2020-open,\n title = {{Open-Source High Quality Speech Datasets for Basque, Catalan and Galician}},\n author = {Kjartansson, Oddur and Gutkin, Alexander and Butryna, Alena and Demirsahin, Isin and Rivera, Clara},\n booktitle = {Proceedings of the 1st Joint Workshop on Spoken Language Technologies for Under-resourced languages\n (SLTU) and Collaboration and Computing for Under-Resourced Languages (CCURL)},\n year = {2020},\n pages = {21--27},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.sltu-1.3},\n ISBN = {979-10-95546-35-1},\n}\n\nSLR71, SLR71, SLR72, SLR73, SLR74, SLR75:\n@inproceedings{guevara-rukoz-etal-2020-crowdsourcing,\n title = {{Crowdsourcing Latin American Spanish for Low-Resource Text-to-Speech}},\n author = {Guevara-Rukoz, Adriana and Demirsahin, Isin and He, Fei and Chu, Shan-Hui Cathy and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Gutkin, Alexander and Butryna, Alena and Kjartansson, Oddur},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n year = {2020},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.801},\n pages = {6504--6513},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR83\n@inproceedings{demirsahin-etal-2020-open,\n title = {{Open-source Multi-speaker Corpora of the English Accents in the British Isles}},\n author = {Demirsahin, Isin and Kjartansson, Oddur and Gutkin, Alexander and Rivera, Clara},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = {6532--6541},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.804},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR80\n@inproceedings{oo-etal-2020-burmese,\n title = {{Burmese Speech Corpus, Finite-State Text Normalization and Pronunciation Grammars with an Application\n to Text-to-Speech}},\n author = {Oo, Yin May and Wattanavekin, Theeraphol and Li, Chenfang and De Silva, Pasindu and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Jansche, Martin and Kjartansson, Oddur and Gutkin, Alexander},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = \"6328--6339\",\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.777},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR86\n@inproceedings{gutkin-et-al-yoruba2020,\n title = {{Developing an Open-Source Corpus of Yoruba Speech}},\n author = {Alexander Gutkin and I\u015f\u0131n Demir\u015fahin and Oddur Kjartansson and Clara Rivera and K\u00f3\u0323l\u00e1 T\u00fab\u00f2\u0323s\u00fan},\n booktitle = {Proceedings of Interspeech 2020},\n pages = {404--408},\n month = {October},\n year = {2020},\n address = {Shanghai, China},\n publisher = {International Speech and Communication Association (ISCA)},\n doi = {10.21437/Interspeech.2020-1096},\n url = {https://dx.doi.org/10.21437/Interspeech.2020-1096},\n}\n", "homepage": "https://openslr.org/", "license": "", "features": {"path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "open_slr", "config_name": "SLR43", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 1052597, "num_examples": 2064, "dataset_name": "open_slr"}}, "download_checksums": {"https://openslr.org/resources/43/ne_np_female.zip": {"num_bytes": 800375645, "checksum": "3f355b543e1fad7af5e63116db871fac8e0a2d2f1a2c8f6ebc742270819da101"}}, "download_size": 800375645, "post_processing_size": null, "dataset_size": 1052597, "size_in_bytes": 801428242}, "SLR44": {"description": "OpenSLR is a site devoted to hosting speech and language resources, such as training corpora for speech recognition,\nand software related to speech recognition. We intend to be a convenient place for anyone to put resources that\nthey have created, so that they can be downloaded publicly.\n", "citation": "SLR32:\n@inproceedings{van-niekerk-etal-2017,\n title = {{Rapid development of TTS corpora for four South African languages}},\n author = {Daniel van Niekerk and Charl van Heerden and Marelie Davel and Neil Kleynhans and Oddur Kjartansson\n and Martin Jansche and Linne Ha},\n booktitle = {Proc. Interspeech 2017},\n pages = {2178--2182},\n address = {Stockholm, Sweden},\n month = aug,\n year = {2017},\n URL = {http://dx.doi.org/10.21437/Interspeech.2017-1139}\n}\n\nSLR35, SLR36, SLR52, SLR53, SLR54:\n@inproceedings{kjartansson-etal-sltu2018,\n title = {{Crowd-Sourced Speech Corpora for Javanese, Sundanese, Sinhala, Nepali, and Bangladeshi Bengali}},\n author = {Oddur Kjartansson and Supheakmungkol Sarin and Knot Pipatsrisawat and Martin Jansche and Linne Ha},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {52--55},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-11},\n}\n\nSLR41, SLR42, SLR43, SLR44:\n@inproceedings{kjartansson-etal-tts-sltu2018,\n title = {{A Step-by-Step Process for Building TTS Voices Using Open Source Data and Framework for Bangla, Javanese,\n Khmer, Nepali, Sinhala, and Sundanese}},\n author = {Keshan Sodimana and Knot Pipatsrisawat and Linne Ha and Martin Jansche and Oddur Kjartansson and Pasindu\n De Silva and Supheakmungkol Sarin},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {66--70},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-14}\n}\n\nSLR63, SLR64, SLR65, SLR66, SLR78, SLR79:\n@inproceedings{he-etal-2020-open,\n title = {{Open-source Multi-speaker Speech Corpora for Building Gujarati, Kannada, Malayalam, Marathi, Tamil and\n Telugu Speech Synthesis Systems}},\n author = {He, Fei and Chu, Shan-Hui Cathy and Kjartansson, Oddur and Rivera, Clara and Katanova, Anna and Gutkin,\n Alexander and Demirsahin, Isin and Johny, Cibu and Jansche, Martin and Sarin, Supheakmungkol and Pipatsrisawat, Knot},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n pages = {6494--6503},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.800},\n ISBN = \"{979-10-95546-34-4},\n}\n\nSLR69, SLR76, SLR77:\n@inproceedings{kjartansson-etal-2020-open,\n title = {{Open-Source High Quality Speech Datasets for Basque, Catalan and Galician}},\n author = {Kjartansson, Oddur and Gutkin, Alexander and Butryna, Alena and Demirsahin, Isin and Rivera, Clara},\n booktitle = {Proceedings of the 1st Joint Workshop on Spoken Language Technologies for Under-resourced languages\n (SLTU) and Collaboration and Computing for Under-Resourced Languages (CCURL)},\n year = {2020},\n pages = {21--27},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.sltu-1.3},\n ISBN = {979-10-95546-35-1},\n}\n\nSLR71, SLR71, SLR72, SLR73, SLR74, SLR75:\n@inproceedings{guevara-rukoz-etal-2020-crowdsourcing,\n title = {{Crowdsourcing Latin American Spanish for Low-Resource Text-to-Speech}},\n author = {Guevara-Rukoz, Adriana and Demirsahin, Isin and He, Fei and Chu, Shan-Hui Cathy and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Gutkin, Alexander and Butryna, Alena and Kjartansson, Oddur},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n year = {2020},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.801},\n pages = {6504--6513},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR83\n@inproceedings{demirsahin-etal-2020-open,\n title = {{Open-source Multi-speaker Corpora of the English Accents in the British Isles}},\n author = {Demirsahin, Isin and Kjartansson, Oddur and Gutkin, Alexander and Rivera, Clara},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = {6532--6541},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.804},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR80\n@inproceedings{oo-etal-2020-burmese,\n title = {{Burmese Speech Corpus, Finite-State Text Normalization and Pronunciation Grammars with an Application\n to Text-to-Speech}},\n author = {Oo, Yin May and Wattanavekin, Theeraphol and Li, Chenfang and De Silva, Pasindu and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Jansche, Martin and Kjartansson, Oddur and Gutkin, Alexander},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = \"6328--6339\",\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.777},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR86\n@inproceedings{gutkin-et-al-yoruba2020,\n title = {{Developing an Open-Source Corpus of Yoruba Speech}},\n author = {Alexander Gutkin and I\u015f\u0131n Demir\u015fahin and Oddur Kjartansson and Clara Rivera and K\u00f3\u0323l\u00e1 T\u00fab\u00f2\u0323s\u00fan},\n booktitle = {Proceedings of Interspeech 2020},\n pages = {404--408},\n month = {October},\n year = {2020},\n address = {Shanghai, China},\n publisher = {International Speech and Communication Association (ISCA)},\n doi = {10.21437/Interspeech.2020-1096},\n url = {https://dx.doi.org/10.21437/Interspeech.2020-1096},\n}\n", "homepage": "https://openslr.org/", "license": "", "features": {"path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "open_slr", "config_name": "SLR44", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 1733125, "num_examples": 4213, "dataset_name": "open_slr"}}, "download_checksums": {"https://openslr.org/resources/44/su_id_female.zip": {"num_bytes": 861425671, "checksum": "aa75bdef23b7bf0b980431d68df6bb32f695f3be365eb379d4c22516d2d11c5a"}, "https://openslr.org/resources/44/su_id_male.zip": {"num_bytes": 610827081, "checksum": "cabed03a45d4ce0f76e2de4d34b82d6876cd00d5ad6a5349629359028460652d"}}, "download_size": 1472252752, "post_processing_size": null, "dataset_size": 1733125, "size_in_bytes": 1473985877}, "SLR63": {"description": "OpenSLR is a site devoted to hosting speech and language resources, such as training corpora for speech recognition,\nand software related to speech recognition. We intend to be a convenient place for anyone to put resources that\nthey have created, so that they can be downloaded publicly.\n", "citation": "SLR32:\n@inproceedings{van-niekerk-etal-2017,\n title = {{Rapid development of TTS corpora for four South African languages}},\n author = {Daniel van Niekerk and Charl van Heerden and Marelie Davel and Neil Kleynhans and Oddur Kjartansson\n and Martin Jansche and Linne Ha},\n booktitle = {Proc. Interspeech 2017},\n pages = {2178--2182},\n address = {Stockholm, Sweden},\n month = aug,\n year = {2017},\n URL = {http://dx.doi.org/10.21437/Interspeech.2017-1139}\n}\n\nSLR35, SLR36, SLR52, SLR53, SLR54:\n@inproceedings{kjartansson-etal-sltu2018,\n title = {{Crowd-Sourced Speech Corpora for Javanese, Sundanese, Sinhala, Nepali, and Bangladeshi Bengali}},\n author = {Oddur Kjartansson and Supheakmungkol Sarin and Knot Pipatsrisawat and Martin Jansche and Linne Ha},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {52--55},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-11},\n}\n\nSLR41, SLR42, SLR43, SLR44:\n@inproceedings{kjartansson-etal-tts-sltu2018,\n title = {{A Step-by-Step Process for Building TTS Voices Using Open Source Data and Framework for Bangla, Javanese,\n Khmer, Nepali, Sinhala, and Sundanese}},\n author = {Keshan Sodimana and Knot Pipatsrisawat and Linne Ha and Martin Jansche and Oddur Kjartansson and Pasindu\n De Silva and Supheakmungkol Sarin},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {66--70},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-14}\n}\n\nSLR63, SLR64, SLR65, SLR66, SLR78, SLR79:\n@inproceedings{he-etal-2020-open,\n title = {{Open-source Multi-speaker Speech Corpora for Building Gujarati, Kannada, Malayalam, Marathi, Tamil and\n Telugu Speech Synthesis Systems}},\n author = {He, Fei and Chu, Shan-Hui Cathy and Kjartansson, Oddur and Rivera, Clara and Katanova, Anna and Gutkin,\n Alexander and Demirsahin, Isin and Johny, Cibu and Jansche, Martin and Sarin, Supheakmungkol and Pipatsrisawat, Knot},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n pages = {6494--6503},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.800},\n ISBN = \"{979-10-95546-34-4},\n}\n\nSLR69, SLR76, SLR77:\n@inproceedings{kjartansson-etal-2020-open,\n title = {{Open-Source High Quality Speech Datasets for Basque, Catalan and Galician}},\n author = {Kjartansson, Oddur and Gutkin, Alexander and Butryna, Alena and Demirsahin, Isin and Rivera, Clara},\n booktitle = {Proceedings of the 1st Joint Workshop on Spoken Language Technologies for Under-resourced languages\n (SLTU) and Collaboration and Computing for Under-Resourced Languages (CCURL)},\n year = {2020},\n pages = {21--27},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.sltu-1.3},\n ISBN = {979-10-95546-35-1},\n}\n\nSLR71, SLR71, SLR72, SLR73, SLR74, SLR75:\n@inproceedings{guevara-rukoz-etal-2020-crowdsourcing,\n title = {{Crowdsourcing Latin American Spanish for Low-Resource Text-to-Speech}},\n author = {Guevara-Rukoz, Adriana and Demirsahin, Isin and He, Fei and Chu, Shan-Hui Cathy and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Gutkin, Alexander and Butryna, Alena and Kjartansson, Oddur},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n year = {2020},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.801},\n pages = {6504--6513},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR83\n@inproceedings{demirsahin-etal-2020-open,\n title = {{Open-source Multi-speaker Corpora of the English Accents in the British Isles}},\n author = {Demirsahin, Isin and Kjartansson, Oddur and Gutkin, Alexander and Rivera, Clara},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = {6532--6541},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.804},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR80\n@inproceedings{oo-etal-2020-burmese,\n title = {{Burmese Speech Corpus, Finite-State Text Normalization and Pronunciation Grammars with an Application\n to Text-to-Speech}},\n author = {Oo, Yin May and Wattanavekin, Theeraphol and Li, Chenfang and De Silva, Pasindu and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Jansche, Martin and Kjartansson, Oddur and Gutkin, Alexander},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = \"6328--6339\",\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.777},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR86\n@inproceedings{gutkin-et-al-yoruba2020,\n title = {{Developing an Open-Source Corpus of Yoruba Speech}},\n author = {Alexander Gutkin and I\u015f\u0131n Demir\u015fahin and Oddur Kjartansson and Clara Rivera and K\u00f3\u0323l\u00e1 T\u00fab\u00f2\u0323s\u00fan},\n booktitle = {Proceedings of Interspeech 2020},\n pages = {404--408},\n month = {October},\n year = {2020},\n address = {Shanghai, China},\n publisher = {International Speech and Communication Association (ISCA)},\n doi = {10.21437/Interspeech.2020-1096},\n url = {https://dx.doi.org/10.21437/Interspeech.2020-1096},\n}\n", "homepage": "https://openslr.org/", "license": "", "features": {"path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "open_slr", "config_name": "SLR63", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 1973791, "num_examples": 4126, "dataset_name": "open_slr"}}, "download_checksums": {"https://openslr.org/resources/63/ml_in_female.zip": {"num_bytes": 710218411, "checksum": "e82d70717d20304f20f635d248c8cb1fd0c9c888e35b9105c8306fc76498a67e"}, "https://openslr.org/resources/63/ml_in_male.zip": {"num_bytes": 635657888, "checksum": "d1a6de4f58f53b973596ff1c69a64afea70f899b044397ce37465c626eee2ab9"}}, "download_size": 1345876299, "post_processing_size": null, "dataset_size": 1973791, "size_in_bytes": 1347850090}, "SLR64": {"description": "OpenSLR is a site devoted to hosting speech and language resources, such as training corpora for speech recognition,\nand software related to speech recognition. We intend to be a convenient place for anyone to put resources that\nthey have created, so that they can be downloaded publicly.\n", "citation": "SLR32:\n@inproceedings{van-niekerk-etal-2017,\n title = {{Rapid development of TTS corpora for four South African languages}},\n author = {Daniel van Niekerk and Charl van Heerden and Marelie Davel and Neil Kleynhans and Oddur Kjartansson\n and Martin Jansche and Linne Ha},\n booktitle = {Proc. Interspeech 2017},\n pages = {2178--2182},\n address = {Stockholm, Sweden},\n month = aug,\n year = {2017},\n URL = {http://dx.doi.org/10.21437/Interspeech.2017-1139}\n}\n\nSLR35, SLR36, SLR52, SLR53, SLR54:\n@inproceedings{kjartansson-etal-sltu2018,\n title = {{Crowd-Sourced Speech Corpora for Javanese, Sundanese, Sinhala, Nepali, and Bangladeshi Bengali}},\n author = {Oddur Kjartansson and Supheakmungkol Sarin and Knot Pipatsrisawat and Martin Jansche and Linne Ha},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {52--55},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-11},\n}\n\nSLR41, SLR42, SLR43, SLR44:\n@inproceedings{kjartansson-etal-tts-sltu2018,\n title = {{A Step-by-Step Process for Building TTS Voices Using Open Source Data and Framework for Bangla, Javanese,\n Khmer, Nepali, Sinhala, and Sundanese}},\n author = {Keshan Sodimana and Knot Pipatsrisawat and Linne Ha and Martin Jansche and Oddur Kjartansson and Pasindu\n De Silva and Supheakmungkol Sarin},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {66--70},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-14}\n}\n\nSLR63, SLR64, SLR65, SLR66, SLR78, SLR79:\n@inproceedings{he-etal-2020-open,\n title = {{Open-source Multi-speaker Speech Corpora for Building Gujarati, Kannada, Malayalam, Marathi, Tamil and\n Telugu Speech Synthesis Systems}},\n author = {He, Fei and Chu, Shan-Hui Cathy and Kjartansson, Oddur and Rivera, Clara and Katanova, Anna and Gutkin,\n Alexander and Demirsahin, Isin and Johny, Cibu and Jansche, Martin and Sarin, Supheakmungkol and Pipatsrisawat, Knot},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n pages = {6494--6503},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.800},\n ISBN = \"{979-10-95546-34-4},\n}\n\nSLR69, SLR76, SLR77:\n@inproceedings{kjartansson-etal-2020-open,\n title = {{Open-Source High Quality Speech Datasets for Basque, Catalan and Galician}},\n author = {Kjartansson, Oddur and Gutkin, Alexander and Butryna, Alena and Demirsahin, Isin and Rivera, Clara},\n booktitle = {Proceedings of the 1st Joint Workshop on Spoken Language Technologies for Under-resourced languages\n (SLTU) and Collaboration and Computing for Under-Resourced Languages (CCURL)},\n year = {2020},\n pages = {21--27},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.sltu-1.3},\n ISBN = {979-10-95546-35-1},\n}\n\nSLR71, SLR71, SLR72, SLR73, SLR74, SLR75:\n@inproceedings{guevara-rukoz-etal-2020-crowdsourcing,\n title = {{Crowdsourcing Latin American Spanish for Low-Resource Text-to-Speech}},\n author = {Guevara-Rukoz, Adriana and Demirsahin, Isin and He, Fei and Chu, Shan-Hui Cathy and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Gutkin, Alexander and Butryna, Alena and Kjartansson, Oddur},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n year = {2020},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.801},\n pages = {6504--6513},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR83\n@inproceedings{demirsahin-etal-2020-open,\n title = {{Open-source Multi-speaker Corpora of the English Accents in the British Isles}},\n author = {Demirsahin, Isin and Kjartansson, Oddur and Gutkin, Alexander and Rivera, Clara},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = {6532--6541},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.804},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR80\n@inproceedings{oo-etal-2020-burmese,\n title = {{Burmese Speech Corpus, Finite-State Text Normalization and Pronunciation Grammars with an Application\n to Text-to-Speech}},\n author = {Oo, Yin May and Wattanavekin, Theeraphol and Li, Chenfang and De Silva, Pasindu and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Jansche, Martin and Kjartansson, Oddur and Gutkin, Alexander},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = \"6328--6339\",\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.777},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR86\n@inproceedings{gutkin-et-al-yoruba2020,\n title = {{Developing an Open-Source Corpus of Yoruba Speech}},\n author = {Alexander Gutkin and I\u015f\u0131n Demir\u015fahin and Oddur Kjartansson and Clara Rivera and K\u00f3\u0323l\u00e1 T\u00fab\u00f2\u0323s\u00fan},\n booktitle = {Proceedings of Interspeech 2020},\n pages = {404--408},\n month = {October},\n year = {2020},\n address = {Shanghai, China},\n publisher = {International Speech and Communication Association (ISCA)},\n doi = {10.21437/Interspeech.2020-1096},\n url = {https://dx.doi.org/10.21437/Interspeech.2020-1096},\n}\n", "homepage": "https://openslr.org/", "license": "", "features": {"path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "open_slr", "config_name": "SLR64", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 794097, "num_examples": 1569, "dataset_name": "open_slr"}}, "download_checksums": {"https://openslr.org/resources/64/mr_in_female.zip": {"num_bytes": 712155683, "checksum": "42b770ee87c95379b55e187b17dccb9fbacb05d0e8292430ffe16a7483948fe5"}}, "download_size": 712155683, "post_processing_size": null, "dataset_size": 794097, "size_in_bytes": 712949780}, "SLR65": {"description": "OpenSLR is a site devoted to hosting speech and language resources, such as training corpora for speech recognition,\nand software related to speech recognition. We intend to be a convenient place for anyone to put resources that\nthey have created, so that they can be downloaded publicly.\n", "citation": "SLR32:\n@inproceedings{van-niekerk-etal-2017,\n title = {{Rapid development of TTS corpora for four South African languages}},\n author = {Daniel van Niekerk and Charl van Heerden and Marelie Davel and Neil Kleynhans and Oddur Kjartansson\n and Martin Jansche and Linne Ha},\n booktitle = {Proc. Interspeech 2017},\n pages = {2178--2182},\n address = {Stockholm, Sweden},\n month = aug,\n year = {2017},\n URL = {http://dx.doi.org/10.21437/Interspeech.2017-1139}\n}\n\nSLR35, SLR36, SLR52, SLR53, SLR54:\n@inproceedings{kjartansson-etal-sltu2018,\n title = {{Crowd-Sourced Speech Corpora for Javanese, Sundanese, Sinhala, Nepali, and Bangladeshi Bengali}},\n author = {Oddur Kjartansson and Supheakmungkol Sarin and Knot Pipatsrisawat and Martin Jansche and Linne Ha},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {52--55},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-11},\n}\n\nSLR41, SLR42, SLR43, SLR44:\n@inproceedings{kjartansson-etal-tts-sltu2018,\n title = {{A Step-by-Step Process for Building TTS Voices Using Open Source Data and Framework for Bangla, Javanese,\n Khmer, Nepali, Sinhala, and Sundanese}},\n author = {Keshan Sodimana and Knot Pipatsrisawat and Linne Ha and Martin Jansche and Oddur Kjartansson and Pasindu\n De Silva and Supheakmungkol Sarin},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {66--70},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-14}\n}\n\nSLR63, SLR64, SLR65, SLR66, SLR78, SLR79:\n@inproceedings{he-etal-2020-open,\n title = {{Open-source Multi-speaker Speech Corpora for Building Gujarati, Kannada, Malayalam, Marathi, Tamil and\n Telugu Speech Synthesis Systems}},\n author = {He, Fei and Chu, Shan-Hui Cathy and Kjartansson, Oddur and Rivera, Clara and Katanova, Anna and Gutkin,\n Alexander and Demirsahin, Isin and Johny, Cibu and Jansche, Martin and Sarin, Supheakmungkol and Pipatsrisawat, Knot},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n pages = {6494--6503},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.800},\n ISBN = \"{979-10-95546-34-4},\n}\n\nSLR69, SLR76, SLR77:\n@inproceedings{kjartansson-etal-2020-open,\n title = {{Open-Source High Quality Speech Datasets for Basque, Catalan and Galician}},\n author = {Kjartansson, Oddur and Gutkin, Alexander and Butryna, Alena and Demirsahin, Isin and Rivera, Clara},\n booktitle = {Proceedings of the 1st Joint Workshop on Spoken Language Technologies for Under-resourced languages\n (SLTU) and Collaboration and Computing for Under-Resourced Languages (CCURL)},\n year = {2020},\n pages = {21--27},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.sltu-1.3},\n ISBN = {979-10-95546-35-1},\n}\n\nSLR71, SLR71, SLR72, SLR73, SLR74, SLR75:\n@inproceedings{guevara-rukoz-etal-2020-crowdsourcing,\n title = {{Crowdsourcing Latin American Spanish for Low-Resource Text-to-Speech}},\n author = {Guevara-Rukoz, Adriana and Demirsahin, Isin and He, Fei and Chu, Shan-Hui Cathy and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Gutkin, Alexander and Butryna, Alena and Kjartansson, Oddur},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n year = {2020},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.801},\n pages = {6504--6513},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR83\n@inproceedings{demirsahin-etal-2020-open,\n title = {{Open-source Multi-speaker Corpora of the English Accents in the British Isles}},\n author = {Demirsahin, Isin and Kjartansson, Oddur and Gutkin, Alexander and Rivera, Clara},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = {6532--6541},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.804},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR80\n@inproceedings{oo-etal-2020-burmese,\n title = {{Burmese Speech Corpus, Finite-State Text Normalization and Pronunciation Grammars with an Application\n to Text-to-Speech}},\n author = {Oo, Yin May and Wattanavekin, Theeraphol and Li, Chenfang and De Silva, Pasindu and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Jansche, Martin and Kjartansson, Oddur and Gutkin, Alexander},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = \"6328--6339\",\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.777},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR86\n@inproceedings{gutkin-et-al-yoruba2020,\n title = {{Developing an Open-Source Corpus of Yoruba Speech}},\n author = {Alexander Gutkin and I\u015f\u0131n Demir\u015fahin and Oddur Kjartansson and Clara Rivera and K\u00f3\u0323l\u00e1 T\u00fab\u00f2\u0323s\u00fan},\n booktitle = {Proceedings of Interspeech 2020},\n pages = {404--408},\n month = {October},\n year = {2020},\n address = {Shanghai, China},\n publisher = {International Speech and Communication Association (ISCA)},\n doi = {10.21437/Interspeech.2020-1096},\n url = {https://dx.doi.org/10.21437/Interspeech.2020-1096},\n}\n", "homepage": "https://openslr.org/", "license": "", "features": {"path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "open_slr", "config_name": "SLR65", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 2092011, "num_examples": 4284, "dataset_name": "open_slr"}}, "download_checksums": {"https://openslr.org/resources/65/ta_in_female.zip": {"num_bytes": 769504014, "checksum": "fe00da10ae12ecd6dbe1afcc5abe365d44ad9036fb017cbd73bcfed71e0f8c81"}, "https://openslr.org/resources/65/ta_in_male.zip": {"num_bytes": 603800641, "checksum": "80e546e954939c92a0cd732446418b583b61da9f538f83b00cbd445cbebd4395"}}, "download_size": 1373304655, "post_processing_size": null, "dataset_size": 2092011, "size_in_bytes": 1375396666}, "SLR66": {"description": "OpenSLR is a site devoted to hosting speech and language resources, such as training corpora for speech recognition,\nand software related to speech recognition. We intend to be a convenient place for anyone to put resources that\nthey have created, so that they can be downloaded publicly.\n", "citation": "SLR32:\n@inproceedings{van-niekerk-etal-2017,\n title = {{Rapid development of TTS corpora for four South African languages}},\n author = {Daniel van Niekerk and Charl van Heerden and Marelie Davel and Neil Kleynhans and Oddur Kjartansson\n and Martin Jansche and Linne Ha},\n booktitle = {Proc. Interspeech 2017},\n pages = {2178--2182},\n address = {Stockholm, Sweden},\n month = aug,\n year = {2017},\n URL = {http://dx.doi.org/10.21437/Interspeech.2017-1139}\n}\n\nSLR35, SLR36, SLR52, SLR53, SLR54:\n@inproceedings{kjartansson-etal-sltu2018,\n title = {{Crowd-Sourced Speech Corpora for Javanese, Sundanese, Sinhala, Nepali, and Bangladeshi Bengali}},\n author = {Oddur Kjartansson and Supheakmungkol Sarin and Knot Pipatsrisawat and Martin Jansche and Linne Ha},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {52--55},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-11},\n}\n\nSLR41, SLR42, SLR43, SLR44:\n@inproceedings{kjartansson-etal-tts-sltu2018,\n title = {{A Step-by-Step Process for Building TTS Voices Using Open Source Data and Framework for Bangla, Javanese,\n Khmer, Nepali, Sinhala, and Sundanese}},\n author = {Keshan Sodimana and Knot Pipatsrisawat and Linne Ha and Martin Jansche and Oddur Kjartansson and Pasindu\n De Silva and Supheakmungkol Sarin},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {66--70},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-14}\n}\n\nSLR63, SLR64, SLR65, SLR66, SLR78, SLR79:\n@inproceedings{he-etal-2020-open,\n title = {{Open-source Multi-speaker Speech Corpora for Building Gujarati, Kannada, Malayalam, Marathi, Tamil and\n Telugu Speech Synthesis Systems}},\n author = {He, Fei and Chu, Shan-Hui Cathy and Kjartansson, Oddur and Rivera, Clara and Katanova, Anna and Gutkin,\n Alexander and Demirsahin, Isin and Johny, Cibu and Jansche, Martin and Sarin, Supheakmungkol and Pipatsrisawat, Knot},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n pages = {6494--6503},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.800},\n ISBN = \"{979-10-95546-34-4},\n}\n\nSLR69, SLR76, SLR77:\n@inproceedings{kjartansson-etal-2020-open,\n title = {{Open-Source High Quality Speech Datasets for Basque, Catalan and Galician}},\n author = {Kjartansson, Oddur and Gutkin, Alexander and Butryna, Alena and Demirsahin, Isin and Rivera, Clara},\n booktitle = {Proceedings of the 1st Joint Workshop on Spoken Language Technologies for Under-resourced languages\n (SLTU) and Collaboration and Computing for Under-Resourced Languages (CCURL)},\n year = {2020},\n pages = {21--27},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.sltu-1.3},\n ISBN = {979-10-95546-35-1},\n}\n\nSLR71, SLR71, SLR72, SLR73, SLR74, SLR75:\n@inproceedings{guevara-rukoz-etal-2020-crowdsourcing,\n title = {{Crowdsourcing Latin American Spanish for Low-Resource Text-to-Speech}},\n author = {Guevara-Rukoz, Adriana and Demirsahin, Isin and He, Fei and Chu, Shan-Hui Cathy and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Gutkin, Alexander and Butryna, Alena and Kjartansson, Oddur},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n year = {2020},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.801},\n pages = {6504--6513},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR83\n@inproceedings{demirsahin-etal-2020-open,\n title = {{Open-source Multi-speaker Corpora of the English Accents in the British Isles}},\n author = {Demirsahin, Isin and Kjartansson, Oddur and Gutkin, Alexander and Rivera, Clara},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = {6532--6541},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.804},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR80\n@inproceedings{oo-etal-2020-burmese,\n title = {{Burmese Speech Corpus, Finite-State Text Normalization and Pronunciation Grammars with an Application\n to Text-to-Speech}},\n author = {Oo, Yin May and Wattanavekin, Theeraphol and Li, Chenfang and De Silva, Pasindu and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Jansche, Martin and Kjartansson, Oddur and Gutkin, Alexander},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = \"6328--6339\",\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.777},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR86\n@inproceedings{gutkin-et-al-yoruba2020,\n title = {{Developing an Open-Source Corpus of Yoruba Speech}},\n author = {Alexander Gutkin and I\u015f\u0131n Demir\u015fahin and Oddur Kjartansson and Clara Rivera and K\u00f3\u0323l\u00e1 T\u00fab\u00f2\u0323s\u00fan},\n booktitle = {Proceedings of Interspeech 2020},\n pages = {404--408},\n month = {October},\n year = {2020},\n address = {Shanghai, China},\n publisher = {International Speech and Communication Association (ISCA)},\n doi = {10.21437/Interspeech.2020-1096},\n url = {https://dx.doi.org/10.21437/Interspeech.2020-1096},\n}\n", "homepage": "https://openslr.org/", "license": "", "features": {"path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "open_slr", "config_name": "SLR66", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 1852199, "num_examples": 4448, "dataset_name": "open_slr"}}, "download_checksums": {"https://openslr.org/resources/66/te_in_female.zip": {"num_bytes": 505680804, "checksum": "3aa3c22d6fad33ed68951f4934ae47349ee76b77220d8261ec3bda8c24bf42b2"}, "https://openslr.org/resources/66/te_in_male.zip": {"num_bytes": 529447066, "checksum": "f8a0f239d39088b6702a2186681e2874328e9fcd9bfa6a0dd9e1dc5695be3185"}}, "download_size": 1035127870, "post_processing_size": null, "dataset_size": 1852199, "size_in_bytes": 1036980069}, "SLR69": {"description": "OpenSLR is a site devoted to hosting speech and language resources, such as training corpora for speech recognition,\nand software related to speech recognition. We intend to be a convenient place for anyone to put resources that\nthey have created, so that they can be downloaded publicly.\n", "citation": "SLR32:\n@inproceedings{van-niekerk-etal-2017,\n title = {{Rapid development of TTS corpora for four South African languages}},\n author = {Daniel van Niekerk and Charl van Heerden and Marelie Davel and Neil Kleynhans and Oddur Kjartansson\n and Martin Jansche and Linne Ha},\n booktitle = {Proc. Interspeech 2017},\n pages = {2178--2182},\n address = {Stockholm, Sweden},\n month = aug,\n year = {2017},\n URL = {http://dx.doi.org/10.21437/Interspeech.2017-1139}\n}\n\nSLR35, SLR36, SLR52, SLR53, SLR54:\n@inproceedings{kjartansson-etal-sltu2018,\n title = {{Crowd-Sourced Speech Corpora for Javanese, Sundanese, Sinhala, Nepali, and Bangladeshi Bengali}},\n author = {Oddur Kjartansson and Supheakmungkol Sarin and Knot Pipatsrisawat and Martin Jansche and Linne Ha},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {52--55},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-11},\n}\n\nSLR41, SLR42, SLR43, SLR44:\n@inproceedings{kjartansson-etal-tts-sltu2018,\n title = {{A Step-by-Step Process for Building TTS Voices Using Open Source Data and Framework for Bangla, Javanese,\n Khmer, Nepali, Sinhala, and Sundanese}},\n author = {Keshan Sodimana and Knot Pipatsrisawat and Linne Ha and Martin Jansche and Oddur Kjartansson and Pasindu\n De Silva and Supheakmungkol Sarin},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {66--70},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-14}\n}\n\nSLR63, SLR64, SLR65, SLR66, SLR78, SLR79:\n@inproceedings{he-etal-2020-open,\n title = {{Open-source Multi-speaker Speech Corpora for Building Gujarati, Kannada, Malayalam, Marathi, Tamil and\n Telugu Speech Synthesis Systems}},\n author = {He, Fei and Chu, Shan-Hui Cathy and Kjartansson, Oddur and Rivera, Clara and Katanova, Anna and Gutkin,\n Alexander and Demirsahin, Isin and Johny, Cibu and Jansche, Martin and Sarin, Supheakmungkol and Pipatsrisawat, Knot},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n pages = {6494--6503},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.800},\n ISBN = \"{979-10-95546-34-4},\n}\n\nSLR69, SLR76, SLR77:\n@inproceedings{kjartansson-etal-2020-open,\n title = {{Open-Source High Quality Speech Datasets for Basque, Catalan and Galician}},\n author = {Kjartansson, Oddur and Gutkin, Alexander and Butryna, Alena and Demirsahin, Isin and Rivera, Clara},\n booktitle = {Proceedings of the 1st Joint Workshop on Spoken Language Technologies for Under-resourced languages\n (SLTU) and Collaboration and Computing for Under-Resourced Languages (CCURL)},\n year = {2020},\n pages = {21--27},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.sltu-1.3},\n ISBN = {979-10-95546-35-1},\n}\n\nSLR71, SLR71, SLR72, SLR73, SLR74, SLR75:\n@inproceedings{guevara-rukoz-etal-2020-crowdsourcing,\n title = {{Crowdsourcing Latin American Spanish for Low-Resource Text-to-Speech}},\n author = {Guevara-Rukoz, Adriana and Demirsahin, Isin and He, Fei and Chu, Shan-Hui Cathy and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Gutkin, Alexander and Butryna, Alena and Kjartansson, Oddur},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n year = {2020},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.801},\n pages = {6504--6513},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR83\n@inproceedings{demirsahin-etal-2020-open,\n title = {{Open-source Multi-speaker Corpora of the English Accents in the British Isles}},\n author = {Demirsahin, Isin and Kjartansson, Oddur and Gutkin, Alexander and Rivera, Clara},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = {6532--6541},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.804},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR80\n@inproceedings{oo-etal-2020-burmese,\n title = {{Burmese Speech Corpus, Finite-State Text Normalization and Pronunciation Grammars with an Application\n to Text-to-Speech}},\n author = {Oo, Yin May and Wattanavekin, Theeraphol and Li, Chenfang and De Silva, Pasindu and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Jansche, Martin and Kjartansson, Oddur and Gutkin, Alexander},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = \"6328--6339\",\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.777},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR86\n@inproceedings{gutkin-et-al-yoruba2020,\n title = {{Developing an Open-Source Corpus of Yoruba Speech}},\n author = {Alexander Gutkin and I\u015f\u0131n Demir\u015fahin and Oddur Kjartansson and Clara Rivera and K\u00f3\u0323l\u00e1 T\u00fab\u00f2\u0323s\u00fan},\n booktitle = {Proceedings of Interspeech 2020},\n pages = {404--408},\n month = {October},\n year = {2020},\n address = {Shanghai, China},\n publisher = {International Speech and Communication Association (ISCA)},\n doi = {10.21437/Interspeech.2020-1096},\n url = {https://dx.doi.org/10.21437/Interspeech.2020-1096},\n}\n", "homepage": "https://openslr.org/", "license": "", "features": {"path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "open_slr", "config_name": "SLR69", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 1603279, "num_examples": 4240, "dataset_name": "open_slr"}}, "download_checksums": {"https://openslr.org/resources/69/ca_es_female.zip": {"num_bytes": 1043934596, "checksum": "2ec39de70550a1cdb93aee960967125fb652b8d26b8de4f6e8658c62847c3f11"}, "https://openslr.org/resources/69/ca_es_male.zip": {"num_bytes": 804724947, "checksum": "8b412ffaa65cd85692c6eab038fc085a8ae5613c6eed38c097a65946c2ee9146"}}, "download_size": 1848659543, "post_processing_size": null, "dataset_size": 1603279, "size_in_bytes": 1850262822}, "SLR35": {"description": "OpenSLR is a site devoted to hosting speech and language resources, such as training corpora for speech recognition,\nand software related to speech recognition. We intend to be a convenient place for anyone to put resources that\nthey have created, so that they can be downloaded publicly.\n", "citation": "SLR32:\n@inproceedings{van-niekerk-etal-2017,\n title = {{Rapid development of TTS corpora for four South African languages}},\n author = {Daniel van Niekerk and Charl van Heerden and Marelie Davel and Neil Kleynhans and Oddur Kjartansson\n and Martin Jansche and Linne Ha},\n booktitle = {Proc. Interspeech 2017},\n pages = {2178--2182},\n address = {Stockholm, Sweden},\n month = aug,\n year = {2017},\n URL = {http://dx.doi.org/10.21437/Interspeech.2017-1139}\n}\n\nSLR35, SLR36, SLR52, SLR53, SLR54:\n@inproceedings{kjartansson-etal-sltu2018,\n title = {{Crowd-Sourced Speech Corpora for Javanese, Sundanese, Sinhala, Nepali, and Bangladeshi Bengali}},\n author = {Oddur Kjartansson and Supheakmungkol Sarin and Knot Pipatsrisawat and Martin Jansche and Linne Ha},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {52--55},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-11},\n}\n\nSLR41, SLR42, SLR43, SLR44:\n@inproceedings{kjartansson-etal-tts-sltu2018,\n title = {{A Step-by-Step Process for Building TTS Voices Using Open Source Data and Framework for Bangla, Javanese,\n Khmer, Nepali, Sinhala, and Sundanese}},\n author = {Keshan Sodimana and Knot Pipatsrisawat and Linne Ha and Martin Jansche and Oddur Kjartansson and Pasindu\n De Silva and Supheakmungkol Sarin},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {66--70},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-14}\n}\n\nSLR63, SLR64, SLR65, SLR66, SLR78, SLR79:\n@inproceedings{he-etal-2020-open,\n title = {{Open-source Multi-speaker Speech Corpora for Building Gujarati, Kannada, Malayalam, Marathi, Tamil and\n Telugu Speech Synthesis Systems}},\n author = {He, Fei and Chu, Shan-Hui Cathy and Kjartansson, Oddur and Rivera, Clara and Katanova, Anna and Gutkin,\n Alexander and Demirsahin, Isin and Johny, Cibu and Jansche, Martin and Sarin, Supheakmungkol and Pipatsrisawat, Knot},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n pages = {6494--6503},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.800},\n ISBN = \"{979-10-95546-34-4},\n}\n\nSLR69, SLR76, SLR77:\n@inproceedings{kjartansson-etal-2020-open,\n title = {{Open-Source High Quality Speech Datasets for Basque, Catalan and Galician}},\n author = {Kjartansson, Oddur and Gutkin, Alexander and Butryna, Alena and Demirsahin, Isin and Rivera, Clara},\n booktitle = {Proceedings of the 1st Joint Workshop on Spoken Language Technologies for Under-resourced languages\n (SLTU) and Collaboration and Computing for Under-Resourced Languages (CCURL)},\n year = {2020},\n pages = {21--27},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.sltu-1.3},\n ISBN = {979-10-95546-35-1},\n}\n\nSLR71, SLR71, SLR72, SLR73, SLR74, SLR75:\n@inproceedings{guevara-rukoz-etal-2020-crowdsourcing,\n title = {{Crowdsourcing Latin American Spanish for Low-Resource Text-to-Speech}},\n author = {Guevara-Rukoz, Adriana and Demirsahin, Isin and He, Fei and Chu, Shan-Hui Cathy and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Gutkin, Alexander and Butryna, Alena and Kjartansson, Oddur},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n year = {2020},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.801},\n pages = {6504--6513},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR83\n@inproceedings{demirsahin-etal-2020-open,\n title = {{Open-source Multi-speaker Corpora of the English Accents in the British Isles}},\n author = {Demirsahin, Isin and Kjartansson, Oddur and Gutkin, Alexander and Rivera, Clara},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = {6532--6541},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.804},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR80\n@inproceedings{oo-etal-2020-burmese,\n title = {{Burmese Speech Corpus, Finite-State Text Normalization and Pronunciation Grammars with an Application\n to Text-to-Speech}},\n author = {Oo, Yin May and Wattanavekin, Theeraphol and Li, Chenfang and De Silva, Pasindu and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Jansche, Martin and Kjartansson, Oddur and Gutkin, Alexander},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = \"6328--6339\",\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.777},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR86\n@inproceedings{gutkin-et-al-yoruba2020,\n title = {{Developing an Open-Source Corpus of Yoruba Speech}},\n author = {Alexander Gutkin and I\u015f\u0131n Demir\u015fahin and Oddur Kjartansson and Clara Rivera and K\u00f3\u0323l\u00e1 T\u00fab\u00f2\u0323s\u00fan},\n booktitle = {Proceedings of Interspeech 2020},\n pages = {404--408},\n month = {October},\n year = {2020},\n address = {Shanghai, China},\n publisher = {International Speech and Communication Association (ISCA)},\n doi = {10.21437/Interspeech.2020-1096},\n url = {https://dx.doi.org/10.21437/Interspeech.2020-1096},\n}\n", "homepage": "https://openslr.org/", "license": "", "features": {"path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "open_slr", "config_name": "SLR35", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 71645434, "num_examples": 185076, "dataset_name": "open_slr"}}, "download_checksums": {"https://openslr.org/resources/35/asr_javanese_0.zip": {"num_bytes": 1197540348, "checksum": "a871c8b71ff8fa9d95955447ca0c388e8c6f925aecfce92e1880bda2da113bcb"}, "https://openslr.org/resources/35/asr_javanese_1.zip": {"num_bytes": 1172552957, "checksum": "8024b18acc265bd502f2c36930ab41bd9a8a9cbc67d3db340698df1f6799eeef"}, "https://openslr.org/resources/35/asr_javanese_2.zip": {"num_bytes": 1187099390, "checksum": "c1605da9f74b0951533bcd9bb66a868dc4552929a6e3597d1f6b66c8436cd87e"}, "https://openslr.org/resources/35/asr_javanese_3.zip": {"num_bytes": 1178721705, "checksum": "f813cfa6ea5db1a2c7af65d62dd4d2edc932e67990570f0e5418675c0c9443d3"}, "https://openslr.org/resources/35/asr_javanese_4.zip": {"num_bytes": 1174850803, "checksum": "506af733d9c1f02372e83e997c924fac5a8141a7920d1ab345bd607e26438f0c"}, "https://openslr.org/resources/35/asr_javanese_5.zip": {"num_bytes": 1178642105, "checksum": "5300df2d2fd95033632fe7d3d77042804c92bf4f9983f11e707c20e358e45a91"}, "https://openslr.org/resources/35/asr_javanese_6.zip": {"num_bytes": 1197026293, "checksum": "a487e12f9d3fd1d3e6d8a8c2b58363813d6121e6a84937ec0d27601fea2654db"}, "https://openslr.org/resources/35/asr_javanese_7.zip": {"num_bytes": 1197789186, "checksum": "944ce7e3463f2e0d6024f8a1768e161a64dd4ab7cf8a96b7924fb8666ae2142e"}, "https://openslr.org/resources/35/asr_javanese_8.zip": {"num_bytes": 1185807385, "checksum": "cb598b81bd681dc51965c912bf4aabc4af6eb9b57d5a7cb0998ed121cec63dcd"}, "https://openslr.org/resources/35/asr_javanese_9.zip": {"num_bytes": 1160028499, "checksum": "7ee9de72360a59dc2a3cd3570627565a638d7a47f0f95ce4c14545bc9b6690b2"}, "https://openslr.org/resources/35/asr_javanese_a.zip": {"num_bytes": 1176016135, "checksum": "1fd1e4b06ed5d18614ef7ce414e7e0b6c105d6f5d87b3a6210fcedc4cc6f35cd"}, "https://openslr.org/resources/35/asr_javanese_b.zip": {"num_bytes": 1176960512, "checksum": "036bb70c60e8ba4b9be090dcd717e1da8744dd1cfdfab1eb4a4cd29d7755b938"}, "https://openslr.org/resources/35/asr_javanese_c.zip": {"num_bytes": 1178017086, "checksum": "a46d7b1ad184a4c2ac9099c8399f18fb8b14dd9ab4172a61f8abe3e464f7b2b9"}, "https://openslr.org/resources/35/asr_javanese_d.zip": {"num_bytes": 1199910382, "checksum": "9f3058916fe721f92a4d1a6c2794d82920b7c88ed780ef06fe69f8e448d0ddb6"}, "https://openslr.org/resources/35/asr_javanese_e.zip": {"num_bytes": 1175431904, "checksum": "d9234d3331fb11c082bc17f3b54c13dfa183c4cb13e35c030f7a1dbbe4c819cd"}, "https://openslr.org/resources/35/asr_javanese_f.zip": {"num_bytes": 1163711036, "checksum": "1bedbc295e4d1592e5730da8f0774fe360fe146d193b9c9815a8025072dd0b70"}}, "download_size": 18900105726, "post_processing_size": null, "dataset_size": 71645434, "size_in_bytes": 18971751160}, "SLR36": {"description": "OpenSLR is a site devoted to hosting speech and language resources, such as training corpora for speech recognition,\nand software related to speech recognition. We intend to be a convenient place for anyone to put resources that\nthey have created, so that they can be downloaded publicly.\n", "citation": "SLR32:\n@inproceedings{van-niekerk-etal-2017,\n title = {{Rapid development of TTS corpora for four South African languages}},\n author = {Daniel van Niekerk and Charl van Heerden and Marelie Davel and Neil Kleynhans and Oddur Kjartansson\n and Martin Jansche and Linne Ha},\n booktitle = {Proc. Interspeech 2017},\n pages = {2178--2182},\n address = {Stockholm, Sweden},\n month = aug,\n year = {2017},\n URL = {http://dx.doi.org/10.21437/Interspeech.2017-1139}\n}\n\nSLR35, SLR36, SLR52, SLR53, SLR54:\n@inproceedings{kjartansson-etal-sltu2018,\n title = {{Crowd-Sourced Speech Corpora for Javanese, Sundanese, Sinhala, Nepali, and Bangladeshi Bengali}},\n author = {Oddur Kjartansson and Supheakmungkol Sarin and Knot Pipatsrisawat and Martin Jansche and Linne Ha},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {52--55},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-11},\n}\n\nSLR41, SLR42, SLR43, SLR44:\n@inproceedings{kjartansson-etal-tts-sltu2018,\n title = {{A Step-by-Step Process for Building TTS Voices Using Open Source Data and Framework for Bangla, Javanese,\n Khmer, Nepali, Sinhala, and Sundanese}},\n author = {Keshan Sodimana and Knot Pipatsrisawat and Linne Ha and Martin Jansche and Oddur Kjartansson and Pasindu\n De Silva and Supheakmungkol Sarin},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {66--70},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-14}\n}\n\nSLR63, SLR64, SLR65, SLR66, SLR78, SLR79:\n@inproceedings{he-etal-2020-open,\n title = {{Open-source Multi-speaker Speech Corpora for Building Gujarati, Kannada, Malayalam, Marathi, Tamil and\n Telugu Speech Synthesis Systems}},\n author = {He, Fei and Chu, Shan-Hui Cathy and Kjartansson, Oddur and Rivera, Clara and Katanova, Anna and Gutkin,\n Alexander and Demirsahin, Isin and Johny, Cibu and Jansche, Martin and Sarin, Supheakmungkol and Pipatsrisawat, Knot},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n pages = {6494--6503},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.800},\n ISBN = \"{979-10-95546-34-4},\n}\n\nSLR69, SLR76, SLR77:\n@inproceedings{kjartansson-etal-2020-open,\n title = {{Open-Source High Quality Speech Datasets for Basque, Catalan and Galician}},\n author = {Kjartansson, Oddur and Gutkin, Alexander and Butryna, Alena and Demirsahin, Isin and Rivera, Clara},\n booktitle = {Proceedings of the 1st Joint Workshop on Spoken Language Technologies for Under-resourced languages\n (SLTU) and Collaboration and Computing for Under-Resourced Languages (CCURL)},\n year = {2020},\n pages = {21--27},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.sltu-1.3},\n ISBN = {979-10-95546-35-1},\n}\n\nSLR71, SLR71, SLR72, SLR73, SLR74, SLR75:\n@inproceedings{guevara-rukoz-etal-2020-crowdsourcing,\n title = {{Crowdsourcing Latin American Spanish for Low-Resource Text-to-Speech}},\n author = {Guevara-Rukoz, Adriana and Demirsahin, Isin and He, Fei and Chu, Shan-Hui Cathy and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Gutkin, Alexander and Butryna, Alena and Kjartansson, Oddur},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n year = {2020},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.801},\n pages = {6504--6513},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR83\n@inproceedings{demirsahin-etal-2020-open,\n title = {{Open-source Multi-speaker Corpora of the English Accents in the British Isles}},\n author = {Demirsahin, Isin and Kjartansson, Oddur and Gutkin, Alexander and Rivera, Clara},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = {6532--6541},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.804},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR80\n@inproceedings{oo-etal-2020-burmese,\n title = {{Burmese Speech Corpus, Finite-State Text Normalization and Pronunciation Grammars with an Application\n to Text-to-Speech}},\n author = {Oo, Yin May and Wattanavekin, Theeraphol and Li, Chenfang and De Silva, Pasindu and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Jansche, Martin and Kjartansson, Oddur and Gutkin, Alexander},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = \"6328--6339\",\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.777},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR86\n@inproceedings{gutkin-et-al-yoruba2020,\n title = {{Developing an Open-Source Corpus of Yoruba Speech}},\n author = {Alexander Gutkin and I\u015f\u0131n Demir\u015fahin and Oddur Kjartansson and Clara Rivera and K\u00f3\u0323l\u00e1 T\u00fab\u00f2\u0323s\u00fan},\n booktitle = {Proceedings of Interspeech 2020},\n pages = {404--408},\n month = {October},\n year = {2020},\n address = {Shanghai, China},\n publisher = {International Speech and Communication Association (ISCA)},\n doi = {10.21437/Interspeech.2020-1096},\n url = {https://dx.doi.org/10.21437/Interspeech.2020-1096},\n}\n", "homepage": "https://openslr.org/", "license": "", "features": {"path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "open_slr", "config_name": "SLR36", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 86668853, "num_examples": 219156, "dataset_name": "open_slr"}}, "download_checksums": {"https://openslr.org/resources/36/asr_sundanese_0.zip": {"num_bytes": 1433294860, "checksum": "947a0ac86008b88130f7c8f1b27d4a0f93886f653cf65b5948c0532cd0097c0d"}, "https://openslr.org/resources/36/asr_sundanese_1.zip": {"num_bytes": 1445470477, "checksum": "365f052dd9d977343002289ea1f29dea466f1243e5edf22dfb933e3fa93a6d87"}, "https://openslr.org/resources/36/asr_sundanese_2.zip": {"num_bytes": 1431289018, "checksum": "f9b9ee2a925d4fd934be3ebe09545ffb3f294f1e6d1380e837054fdf4ce8cff2"}, "https://openslr.org/resources/36/asr_sundanese_3.zip": {"num_bytes": 1446805642, "checksum": "ba3cc0e8e351a5456269c72edf7a3b50cf820941f93d7eed0e8f02a3b1b0a89f"}, "https://openslr.org/resources/36/asr_sundanese_4.zip": {"num_bytes": 1449187658, "checksum": "a6ca66e2537bd55dfaea4e716d847c70aead58c217184ab37afbd4065cca9262"}, "https://openslr.org/resources/36/asr_sundanese_5.zip": {"num_bytes": 1425741894, "checksum": "31bb8a9981b45855ab0b7c634c89040fe99b122455750a6ab956393dc9dec0d8"}, "https://openslr.org/resources/36/asr_sundanese_6.zip": {"num_bytes": 1415730042, "checksum": "3f23d6c4c67dc6f39a8ebb2af43e2efedb57028abb85eb519394f2d9ef8b3a21"}, "https://openslr.org/resources/36/asr_sundanese_7.zip": {"num_bytes": 1436967650, "checksum": "bce8f33b6ed62978915dfc601957162e9eece8bc3190cd2d548d7679409a3d77"}, "https://openslr.org/resources/36/asr_sundanese_8.zip": {"num_bytes": 1436421462, "checksum": "755e0af77d0bd6d4aa7895b2ab9fbf792c57efc49c8cec21d3d728fe3374b621"}, "https://openslr.org/resources/36/asr_sundanese_9.zip": {"num_bytes": 1434660332, "checksum": "5d426d2c99eb91ffd3db193d510e288133c426556430fe2e70e08f58815f5a31"}, "https://openslr.org/resources/36/asr_sundanese_a.zip": {"num_bytes": 1436753516, "checksum": "e032537b62aa8a8abe660bca418ac2e26a93bdc7a357b948a301bde286952fa5"}, "https://openslr.org/resources/36/asr_sundanese_b.zip": {"num_bytes": 1435014221, "checksum": "e999e83fde37ec973b1a1822aaa8769488c2a95058a3448661ac94c319881549"}, "https://openslr.org/resources/36/asr_sundanese_c.zip": {"num_bytes": 1429102490, "checksum": "275ac684fe7b8bf012dc251ddb91496e2d95c2c257ec87ab0847efa379e96787"}, "https://openslr.org/resources/36/asr_sundanese_d.zip": {"num_bytes": 1432973082, "checksum": "34ae64f8a29ddef2e05ca5ce8122b461a737d58d796dbe577a4e8a4a05c6b2ce"}, "https://openslr.org/resources/36/asr_sundanese_e.zip": {"num_bytes": 1443609656, "checksum": "25e36087063e0cc5e54cf04e5a4e065b19e0c1bc9cbc07a9f98635941b53bfea"}, "https://openslr.org/resources/36/asr_sundanese_f.zip": {"num_bytes": 1463531929, "checksum": "3d1410c31cc70994f82b9555967fa4c8d682aee288cc85b05b9c4e6352a49f14"}}, "download_size": 22996553929, "post_processing_size": null, "dataset_size": 86668853, "size_in_bytes": 23083222782}, "SLR70": {"description": "OpenSLR is a site devoted to hosting speech and language resources, such as training corpora for speech recognition,\nand software related to speech recognition. We intend to be a convenient place for anyone to put resources that\nthey have created, so that they can be downloaded publicly.\n", "citation": "SLR32:\n@inproceedings{van-niekerk-etal-2017,\n title = {{Rapid development of TTS corpora for four South African languages}},\n author = {Daniel van Niekerk and Charl van Heerden and Marelie Davel and Neil Kleynhans and Oddur Kjartansson\n and Martin Jansche and Linne Ha},\n booktitle = {Proc. Interspeech 2017},\n pages = {2178--2182},\n address = {Stockholm, Sweden},\n month = aug,\n year = {2017},\n URL = {http://dx.doi.org/10.21437/Interspeech.2017-1139}\n}\n\nSLR35, SLR36, SLR52, SLR53, SLR54:\n@inproceedings{kjartansson-etal-sltu2018,\n title = {{Crowd-Sourced Speech Corpora for Javanese, Sundanese, Sinhala, Nepali, and Bangladeshi Bengali}},\n author = {Oddur Kjartansson and Supheakmungkol Sarin and Knot Pipatsrisawat and Martin Jansche and Linne Ha},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {52--55},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-11},\n}\n\nSLR41, SLR42, SLR43, SLR44:\n@inproceedings{kjartansson-etal-tts-sltu2018,\n title = {{A Step-by-Step Process for Building TTS Voices Using Open Source Data and Framework for Bangla, Javanese,\n Khmer, Nepali, Sinhala, and Sundanese}},\n author = {Keshan Sodimana and Knot Pipatsrisawat and Linne Ha and Martin Jansche and Oddur Kjartansson and Pasindu\n De Silva and Supheakmungkol Sarin},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {66--70},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-14}\n}\n\nSLR63, SLR64, SLR65, SLR66, SLR78, SLR79:\n@inproceedings{he-etal-2020-open,\n title = {{Open-source Multi-speaker Speech Corpora for Building Gujarati, Kannada, Malayalam, Marathi, Tamil and\n Telugu Speech Synthesis Systems}},\n author = {He, Fei and Chu, Shan-Hui Cathy and Kjartansson, Oddur and Rivera, Clara and Katanova, Anna and Gutkin,\n Alexander and Demirsahin, Isin and Johny, Cibu and Jansche, Martin and Sarin, Supheakmungkol and Pipatsrisawat, Knot},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n pages = {6494--6503},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.800},\n ISBN = \"{979-10-95546-34-4},\n}\n\nSLR69, SLR76, SLR77:\n@inproceedings{kjartansson-etal-2020-open,\n title = {{Open-Source High Quality Speech Datasets for Basque, Catalan and Galician}},\n author = {Kjartansson, Oddur and Gutkin, Alexander and Butryna, Alena and Demirsahin, Isin and Rivera, Clara},\n booktitle = {Proceedings of the 1st Joint Workshop on Spoken Language Technologies for Under-resourced languages\n (SLTU) and Collaboration and Computing for Under-Resourced Languages (CCURL)},\n year = {2020},\n pages = {21--27},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.sltu-1.3},\n ISBN = {979-10-95546-35-1},\n}\n\nSLR71, SLR71, SLR72, SLR73, SLR74, SLR75:\n@inproceedings{guevara-rukoz-etal-2020-crowdsourcing,\n title = {{Crowdsourcing Latin American Spanish for Low-Resource Text-to-Speech}},\n author = {Guevara-Rukoz, Adriana and Demirsahin, Isin and He, Fei and Chu, Shan-Hui Cathy and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Gutkin, Alexander and Butryna, Alena and Kjartansson, Oddur},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n year = {2020},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.801},\n pages = {6504--6513},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR83\n@inproceedings{demirsahin-etal-2020-open,\n title = {{Open-source Multi-speaker Corpora of the English Accents in the British Isles}},\n author = {Demirsahin, Isin and Kjartansson, Oddur and Gutkin, Alexander and Rivera, Clara},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = {6532--6541},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.804},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR80\n@inproceedings{oo-etal-2020-burmese,\n title = {{Burmese Speech Corpus, Finite-State Text Normalization and Pronunciation Grammars with an Application\n to Text-to-Speech}},\n author = {Oo, Yin May and Wattanavekin, Theeraphol and Li, Chenfang and De Silva, Pasindu and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Jansche, Martin and Kjartansson, Oddur and Gutkin, Alexander},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = \"6328--6339\",\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.777},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR86\n@inproceedings{gutkin-et-al-yoruba2020,\n title = {{Developing an Open-Source Corpus of Yoruba Speech}},\n author = {Alexander Gutkin and I\u015f\u0131n Demir\u015fahin and Oddur Kjartansson and Clara Rivera and K\u00f3\u0323l\u00e1 T\u00fab\u00f2\u0323s\u00fan},\n booktitle = {Proceedings of Interspeech 2020},\n pages = {404--408},\n month = {October},\n year = {2020},\n address = {Shanghai, China},\n publisher = {International Speech and Communication Association (ISCA)},\n doi = {10.21437/Interspeech.2020-1096},\n url = {https://dx.doi.org/10.21437/Interspeech.2020-1096},\n}\n", "homepage": "https://openslr.org/", "license": "", "features": {"path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "open_slr", "config_name": "SLR70", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 1304770, "num_examples": 3359, "dataset_name": "open_slr"}}, "download_checksums": {"https://openslr.org/resources/70/en_ng_female.zip": {"num_bytes": 759856787, "checksum": "e840afea824c9075db8c7d574e993837c6a4861fd0ff0275c4cc223aa00a785c"}, "https://openslr.org/resources/70/en_ng_male.zip": {"num_bytes": 454098409, "checksum": "f619d09d5ffdf0d4044ef1d57585eeaa50c0cbf08844782a9dd08f56ea9e567f"}}, "download_size": 1213955196, "post_processing_size": null, "dataset_size": 1304770, "size_in_bytes": 1215259966}, "SLR71": {"description": "OpenSLR is a site devoted to hosting speech and language resources, such as training corpora for speech recognition,\nand software related to speech recognition. We intend to be a convenient place for anyone to put resources that\nthey have created, so that they can be downloaded publicly.\n", "citation": "SLR32:\n@inproceedings{van-niekerk-etal-2017,\n title = {{Rapid development of TTS corpora for four South African languages}},\n author = {Daniel van Niekerk and Charl van Heerden and Marelie Davel and Neil Kleynhans and Oddur Kjartansson\n and Martin Jansche and Linne Ha},\n booktitle = {Proc. Interspeech 2017},\n pages = {2178--2182},\n address = {Stockholm, Sweden},\n month = aug,\n year = {2017},\n URL = {http://dx.doi.org/10.21437/Interspeech.2017-1139}\n}\n\nSLR35, SLR36, SLR52, SLR53, SLR54:\n@inproceedings{kjartansson-etal-sltu2018,\n title = {{Crowd-Sourced Speech Corpora for Javanese, Sundanese, Sinhala, Nepali, and Bangladeshi Bengali}},\n author = {Oddur Kjartansson and Supheakmungkol Sarin and Knot Pipatsrisawat and Martin Jansche and Linne Ha},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {52--55},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-11},\n}\n\nSLR41, SLR42, SLR43, SLR44:\n@inproceedings{kjartansson-etal-tts-sltu2018,\n title = {{A Step-by-Step Process for Building TTS Voices Using Open Source Data and Framework for Bangla, Javanese,\n Khmer, Nepali, Sinhala, and Sundanese}},\n author = {Keshan Sodimana and Knot Pipatsrisawat and Linne Ha and Martin Jansche and Oddur Kjartansson and Pasindu\n De Silva and Supheakmungkol Sarin},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {66--70},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-14}\n}\n\nSLR63, SLR64, SLR65, SLR66, SLR78, SLR79:\n@inproceedings{he-etal-2020-open,\n title = {{Open-source Multi-speaker Speech Corpora for Building Gujarati, Kannada, Malayalam, Marathi, Tamil and\n Telugu Speech Synthesis Systems}},\n author = {He, Fei and Chu, Shan-Hui Cathy and Kjartansson, Oddur and Rivera, Clara and Katanova, Anna and Gutkin,\n Alexander and Demirsahin, Isin and Johny, Cibu and Jansche, Martin and Sarin, Supheakmungkol and Pipatsrisawat, Knot},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n pages = {6494--6503},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.800},\n ISBN = \"{979-10-95546-34-4},\n}\n\nSLR69, SLR76, SLR77:\n@inproceedings{kjartansson-etal-2020-open,\n title = {{Open-Source High Quality Speech Datasets for Basque, Catalan and Galician}},\n author = {Kjartansson, Oddur and Gutkin, Alexander and Butryna, Alena and Demirsahin, Isin and Rivera, Clara},\n booktitle = {Proceedings of the 1st Joint Workshop on Spoken Language Technologies for Under-resourced languages\n (SLTU) and Collaboration and Computing for Under-Resourced Languages (CCURL)},\n year = {2020},\n pages = {21--27},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.sltu-1.3},\n ISBN = {979-10-95546-35-1},\n}\n\nSLR71, SLR71, SLR72, SLR73, SLR74, SLR75:\n@inproceedings{guevara-rukoz-etal-2020-crowdsourcing,\n title = {{Crowdsourcing Latin American Spanish for Low-Resource Text-to-Speech}},\n author = {Guevara-Rukoz, Adriana and Demirsahin, Isin and He, Fei and Chu, Shan-Hui Cathy and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Gutkin, Alexander and Butryna, Alena and Kjartansson, Oddur},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n year = {2020},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.801},\n pages = {6504--6513},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR83\n@inproceedings{demirsahin-etal-2020-open,\n title = {{Open-source Multi-speaker Corpora of the English Accents in the British Isles}},\n author = {Demirsahin, Isin and Kjartansson, Oddur and Gutkin, Alexander and Rivera, Clara},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = {6532--6541},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.804},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR80\n@inproceedings{oo-etal-2020-burmese,\n title = {{Burmese Speech Corpus, Finite-State Text Normalization and Pronunciation Grammars with an Application\n to Text-to-Speech}},\n author = {Oo, Yin May and Wattanavekin, Theeraphol and Li, Chenfang and De Silva, Pasindu and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Jansche, Martin and Kjartansson, Oddur and Gutkin, Alexander},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = \"6328--6339\",\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.777},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR86\n@inproceedings{gutkin-et-al-yoruba2020,\n title = {{Developing an Open-Source Corpus of Yoruba Speech}},\n author = {Alexander Gutkin and I\u015f\u0131n Demir\u015fahin and Oddur Kjartansson and Clara Rivera and K\u00f3\u0323l\u00e1 T\u00fab\u00f2\u0323s\u00fan},\n booktitle = {Proceedings of Interspeech 2020},\n pages = {404--408},\n month = {October},\n year = {2020},\n address = {Shanghai, China},\n publisher = {International Speech and Communication Association (ISCA)},\n doi = {10.21437/Interspeech.2020-1096},\n url = {https://dx.doi.org/10.21437/Interspeech.2020-1096},\n}\n", "homepage": "https://openslr.org/", "license": "", "features": {"path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "open_slr", "config_name": "SLR71", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 1630901, "num_examples": 4374, "dataset_name": "open_slr"}}, "download_checksums": {"https://openslr.org/resources/71/es_cl_female.zip": {"num_bytes": 585615697, "checksum": "23593f3dac085d26f99df38159c1ab0ae2c23f5c97ad869292496abc6e171bc6"}, "https://openslr.org/resources/71/es_cl_male.zip": {"num_bytes": 859750206, "checksum": "ace2cbd6df28e94fdd636ba1263b72b557722b0d2abcf4c6e072011ac870cbee"}}, "download_size": 1445365903, "post_processing_size": null, "dataset_size": 1630901, "size_in_bytes": 1446996804}, "SLR72": {"description": "OpenSLR is a site devoted to hosting speech and language resources, such as training corpora for speech recognition,\nand software related to speech recognition. We intend to be a convenient place for anyone to put resources that\nthey have created, so that they can be downloaded publicly.\n", "citation": "SLR32:\n@inproceedings{van-niekerk-etal-2017,\n title = {{Rapid development of TTS corpora for four South African languages}},\n author = {Daniel van Niekerk and Charl van Heerden and Marelie Davel and Neil Kleynhans and Oddur Kjartansson\n and Martin Jansche and Linne Ha},\n booktitle = {Proc. Interspeech 2017},\n pages = {2178--2182},\n address = {Stockholm, Sweden},\n month = aug,\n year = {2017},\n URL = {http://dx.doi.org/10.21437/Interspeech.2017-1139}\n}\n\nSLR35, SLR36, SLR52, SLR53, SLR54:\n@inproceedings{kjartansson-etal-sltu2018,\n title = {{Crowd-Sourced Speech Corpora for Javanese, Sundanese, Sinhala, Nepali, and Bangladeshi Bengali}},\n author = {Oddur Kjartansson and Supheakmungkol Sarin and Knot Pipatsrisawat and Martin Jansche and Linne Ha},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {52--55},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-11},\n}\n\nSLR41, SLR42, SLR43, SLR44:\n@inproceedings{kjartansson-etal-tts-sltu2018,\n title = {{A Step-by-Step Process for Building TTS Voices Using Open Source Data and Framework for Bangla, Javanese,\n Khmer, Nepali, Sinhala, and Sundanese}},\n author = {Keshan Sodimana and Knot Pipatsrisawat and Linne Ha and Martin Jansche and Oddur Kjartansson and Pasindu\n De Silva and Supheakmungkol Sarin},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {66--70},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-14}\n}\n\nSLR63, SLR64, SLR65, SLR66, SLR78, SLR79:\n@inproceedings{he-etal-2020-open,\n title = {{Open-source Multi-speaker Speech Corpora for Building Gujarati, Kannada, Malayalam, Marathi, Tamil and\n Telugu Speech Synthesis Systems}},\n author = {He, Fei and Chu, Shan-Hui Cathy and Kjartansson, Oddur and Rivera, Clara and Katanova, Anna and Gutkin,\n Alexander and Demirsahin, Isin and Johny, Cibu and Jansche, Martin and Sarin, Supheakmungkol and Pipatsrisawat, Knot},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n pages = {6494--6503},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.800},\n ISBN = \"{979-10-95546-34-4},\n}\n\nSLR69, SLR76, SLR77:\n@inproceedings{kjartansson-etal-2020-open,\n title = {{Open-Source High Quality Speech Datasets for Basque, Catalan and Galician}},\n author = {Kjartansson, Oddur and Gutkin, Alexander and Butryna, Alena and Demirsahin, Isin and Rivera, Clara},\n booktitle = {Proceedings of the 1st Joint Workshop on Spoken Language Technologies for Under-resourced languages\n (SLTU) and Collaboration and Computing for Under-Resourced Languages (CCURL)},\n year = {2020},\n pages = {21--27},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.sltu-1.3},\n ISBN = {979-10-95546-35-1},\n}\n\nSLR71, SLR71, SLR72, SLR73, SLR74, SLR75:\n@inproceedings{guevara-rukoz-etal-2020-crowdsourcing,\n title = {{Crowdsourcing Latin American Spanish for Low-Resource Text-to-Speech}},\n author = {Guevara-Rukoz, Adriana and Demirsahin, Isin and He, Fei and Chu, Shan-Hui Cathy and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Gutkin, Alexander and Butryna, Alena and Kjartansson, Oddur},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n year = {2020},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.801},\n pages = {6504--6513},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR83\n@inproceedings{demirsahin-etal-2020-open,\n title = {{Open-source Multi-speaker Corpora of the English Accents in the British Isles}},\n author = {Demirsahin, Isin and Kjartansson, Oddur and Gutkin, Alexander and Rivera, Clara},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = {6532--6541},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.804},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR80\n@inproceedings{oo-etal-2020-burmese,\n title = {{Burmese Speech Corpus, Finite-State Text Normalization and Pronunciation Grammars with an Application\n to Text-to-Speech}},\n author = {Oo, Yin May and Wattanavekin, Theeraphol and Li, Chenfang and De Silva, Pasindu and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Jansche, Martin and Kjartansson, Oddur and Gutkin, Alexander},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = \"6328--6339\",\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.777},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR86\n@inproceedings{gutkin-et-al-yoruba2020,\n title = {{Developing an Open-Source Corpus of Yoruba Speech}},\n author = {Alexander Gutkin and I\u015f\u0131n Demir\u015fahin and Oddur Kjartansson and Clara Rivera and K\u00f3\u0323l\u00e1 T\u00fab\u00f2\u0323s\u00fan},\n booktitle = {Proceedings of Interspeech 2020},\n pages = {404--408},\n month = {October},\n year = {2020},\n address = {Shanghai, China},\n publisher = {International Speech and Communication Association (ISCA)},\n doi = {10.21437/Interspeech.2020-1096},\n url = {https://dx.doi.org/10.21437/Interspeech.2020-1096},\n}\n", "homepage": "https://openslr.org/", "license": "", "features": {"path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "open_slr", "config_name": "SLR72", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 1825435, "num_examples": 4903, "dataset_name": "open_slr"}}, "download_checksums": {"https://openslr.org/resources/72/es_co_female.zip": {"num_bytes": 801960444, "checksum": "03721aa7b6b7fe1dd309a0c545cbef4898fac99ed811f4e1769b2fc16bb7eb70"}, "https://openslr.org/resources/72/es_co_male.zip": {"num_bytes": 810070088, "checksum": "2e72abf283adf3f52c28d9f4d59709d4a24fa57243dc696a99dfbc1b8e534c9a"}}, "download_size": 1612030532, "post_processing_size": null, "dataset_size": 1825435, "size_in_bytes": 1613855967}, "SLR73": {"description": "OpenSLR is a site devoted to hosting speech and language resources, such as training corpora for speech recognition,\nand software related to speech recognition. We intend to be a convenient place for anyone to put resources that\nthey have created, so that they can be downloaded publicly.\n", "citation": "SLR32:\n@inproceedings{van-niekerk-etal-2017,\n title = {{Rapid development of TTS corpora for four South African languages}},\n author = {Daniel van Niekerk and Charl van Heerden and Marelie Davel and Neil Kleynhans and Oddur Kjartansson\n and Martin Jansche and Linne Ha},\n booktitle = {Proc. Interspeech 2017},\n pages = {2178--2182},\n address = {Stockholm, Sweden},\n month = aug,\n year = {2017},\n URL = {http://dx.doi.org/10.21437/Interspeech.2017-1139}\n}\n\nSLR35, SLR36, SLR52, SLR53, SLR54:\n@inproceedings{kjartansson-etal-sltu2018,\n title = {{Crowd-Sourced Speech Corpora for Javanese, Sundanese, Sinhala, Nepali, and Bangladeshi Bengali}},\n author = {Oddur Kjartansson and Supheakmungkol Sarin and Knot Pipatsrisawat and Martin Jansche and Linne Ha},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {52--55},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-11},\n}\n\nSLR41, SLR42, SLR43, SLR44:\n@inproceedings{kjartansson-etal-tts-sltu2018,\n title = {{A Step-by-Step Process for Building TTS Voices Using Open Source Data and Framework for Bangla, Javanese,\n Khmer, Nepali, Sinhala, and Sundanese}},\n author = {Keshan Sodimana and Knot Pipatsrisawat and Linne Ha and Martin Jansche and Oddur Kjartansson and Pasindu\n De Silva and Supheakmungkol Sarin},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {66--70},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-14}\n}\n\nSLR63, SLR64, SLR65, SLR66, SLR78, SLR79:\n@inproceedings{he-etal-2020-open,\n title = {{Open-source Multi-speaker Speech Corpora for Building Gujarati, Kannada, Malayalam, Marathi, Tamil and\n Telugu Speech Synthesis Systems}},\n author = {He, Fei and Chu, Shan-Hui Cathy and Kjartansson, Oddur and Rivera, Clara and Katanova, Anna and Gutkin,\n Alexander and Demirsahin, Isin and Johny, Cibu and Jansche, Martin and Sarin, Supheakmungkol and Pipatsrisawat, Knot},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n pages = {6494--6503},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.800},\n ISBN = \"{979-10-95546-34-4},\n}\n\nSLR69, SLR76, SLR77:\n@inproceedings{kjartansson-etal-2020-open,\n title = {{Open-Source High Quality Speech Datasets for Basque, Catalan and Galician}},\n author = {Kjartansson, Oddur and Gutkin, Alexander and Butryna, Alena and Demirsahin, Isin and Rivera, Clara},\n booktitle = {Proceedings of the 1st Joint Workshop on Spoken Language Technologies for Under-resourced languages\n (SLTU) and Collaboration and Computing for Under-Resourced Languages (CCURL)},\n year = {2020},\n pages = {21--27},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.sltu-1.3},\n ISBN = {979-10-95546-35-1},\n}\n\nSLR71, SLR71, SLR72, SLR73, SLR74, SLR75:\n@inproceedings{guevara-rukoz-etal-2020-crowdsourcing,\n title = {{Crowdsourcing Latin American Spanish for Low-Resource Text-to-Speech}},\n author = {Guevara-Rukoz, Adriana and Demirsahin, Isin and He, Fei and Chu, Shan-Hui Cathy and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Gutkin, Alexander and Butryna, Alena and Kjartansson, Oddur},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n year = {2020},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.801},\n pages = {6504--6513},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR83\n@inproceedings{demirsahin-etal-2020-open,\n title = {{Open-source Multi-speaker Corpora of the English Accents in the British Isles}},\n author = {Demirsahin, Isin and Kjartansson, Oddur and Gutkin, Alexander and Rivera, Clara},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = {6532--6541},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.804},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR80\n@inproceedings{oo-etal-2020-burmese,\n title = {{Burmese Speech Corpus, Finite-State Text Normalization and Pronunciation Grammars with an Application\n to Text-to-Speech}},\n author = {Oo, Yin May and Wattanavekin, Theeraphol and Li, Chenfang and De Silva, Pasindu and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Jansche, Martin and Kjartansson, Oddur and Gutkin, Alexander},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = \"6328--6339\",\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.777},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR86\n@inproceedings{gutkin-et-al-yoruba2020,\n title = {{Developing an Open-Source Corpus of Yoruba Speech}},\n author = {Alexander Gutkin and I\u015f\u0131n Demir\u015fahin and Oddur Kjartansson and Clara Rivera and K\u00f3\u0323l\u00e1 T\u00fab\u00f2\u0323s\u00fan},\n booktitle = {Proceedings of Interspeech 2020},\n pages = {404--408},\n month = {October},\n year = {2020},\n address = {Shanghai, China},\n publisher = {International Speech and Communication Association (ISCA)},\n doi = {10.21437/Interspeech.2020-1096},\n url = {https://dx.doi.org/10.21437/Interspeech.2020-1096},\n}\n", "homepage": "https://openslr.org/", "license": "", "features": {"path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "open_slr", "config_name": "SLR73", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 2027542, "num_examples": 5447, "dataset_name": "open_slr"}}, "download_checksums": {"https://openslr.org/resources/73/es_pe_female.zip": {"num_bytes": 913983951, "checksum": "0bcb138a6a4657fa52ec6ec129807dc2476d9a89184ea2ab4f588bbbddc12062"}, "https://openslr.org/resources/73/es_pe_male.zip": {"num_bytes": 1026322863, "checksum": "8baf41802bc59f7d170ee091d8676db725903efdcfeda12d699a31a746ae50bf"}}, "download_size": 1940306814, "post_processing_size": null, "dataset_size": 2027542, "size_in_bytes": 1942334356}, "SLR74": {"description": "OpenSLR is a site devoted to hosting speech and language resources, such as training corpora for speech recognition,\nand software related to speech recognition. We intend to be a convenient place for anyone to put resources that\nthey have created, so that they can be downloaded publicly.\n", "citation": "SLR32:\n@inproceedings{van-niekerk-etal-2017,\n title = {{Rapid development of TTS corpora for four South African languages}},\n author = {Daniel van Niekerk and Charl van Heerden and Marelie Davel and Neil Kleynhans and Oddur Kjartansson\n and Martin Jansche and Linne Ha},\n booktitle = {Proc. Interspeech 2017},\n pages = {2178--2182},\n address = {Stockholm, Sweden},\n month = aug,\n year = {2017},\n URL = {http://dx.doi.org/10.21437/Interspeech.2017-1139}\n}\n\nSLR35, SLR36, SLR52, SLR53, SLR54:\n@inproceedings{kjartansson-etal-sltu2018,\n title = {{Crowd-Sourced Speech Corpora for Javanese, Sundanese, Sinhala, Nepali, and Bangladeshi Bengali}},\n author = {Oddur Kjartansson and Supheakmungkol Sarin and Knot Pipatsrisawat and Martin Jansche and Linne Ha},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {52--55},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-11},\n}\n\nSLR41, SLR42, SLR43, SLR44:\n@inproceedings{kjartansson-etal-tts-sltu2018,\n title = {{A Step-by-Step Process for Building TTS Voices Using Open Source Data and Framework for Bangla, Javanese,\n Khmer, Nepali, Sinhala, and Sundanese}},\n author = {Keshan Sodimana and Knot Pipatsrisawat and Linne Ha and Martin Jansche and Oddur Kjartansson and Pasindu\n De Silva and Supheakmungkol Sarin},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {66--70},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-14}\n}\n\nSLR63, SLR64, SLR65, SLR66, SLR78, SLR79:\n@inproceedings{he-etal-2020-open,\n title = {{Open-source Multi-speaker Speech Corpora for Building Gujarati, Kannada, Malayalam, Marathi, Tamil and\n Telugu Speech Synthesis Systems}},\n author = {He, Fei and Chu, Shan-Hui Cathy and Kjartansson, Oddur and Rivera, Clara and Katanova, Anna and Gutkin,\n Alexander and Demirsahin, Isin and Johny, Cibu and Jansche, Martin and Sarin, Supheakmungkol and Pipatsrisawat, Knot},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n pages = {6494--6503},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.800},\n ISBN = \"{979-10-95546-34-4},\n}\n\nSLR69, SLR76, SLR77:\n@inproceedings{kjartansson-etal-2020-open,\n title = {{Open-Source High Quality Speech Datasets for Basque, Catalan and Galician}},\n author = {Kjartansson, Oddur and Gutkin, Alexander and Butryna, Alena and Demirsahin, Isin and Rivera, Clara},\n booktitle = {Proceedings of the 1st Joint Workshop on Spoken Language Technologies for Under-resourced languages\n (SLTU) and Collaboration and Computing for Under-Resourced Languages (CCURL)},\n year = {2020},\n pages = {21--27},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.sltu-1.3},\n ISBN = {979-10-95546-35-1},\n}\n\nSLR71, SLR71, SLR72, SLR73, SLR74, SLR75:\n@inproceedings{guevara-rukoz-etal-2020-crowdsourcing,\n title = {{Crowdsourcing Latin American Spanish for Low-Resource Text-to-Speech}},\n author = {Guevara-Rukoz, Adriana and Demirsahin, Isin and He, Fei and Chu, Shan-Hui Cathy and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Gutkin, Alexander and Butryna, Alena and Kjartansson, Oddur},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n year = {2020},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.801},\n pages = {6504--6513},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR83\n@inproceedings{demirsahin-etal-2020-open,\n title = {{Open-source Multi-speaker Corpora of the English Accents in the British Isles}},\n author = {Demirsahin, Isin and Kjartansson, Oddur and Gutkin, Alexander and Rivera, Clara},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = {6532--6541},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.804},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR80\n@inproceedings{oo-etal-2020-burmese,\n title = {{Burmese Speech Corpus, Finite-State Text Normalization and Pronunciation Grammars with an Application\n to Text-to-Speech}},\n author = {Oo, Yin May and Wattanavekin, Theeraphol and Li, Chenfang and De Silva, Pasindu and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Jansche, Martin and Kjartansson, Oddur and Gutkin, Alexander},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = \"6328--6339\",\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.777},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR86\n@inproceedings{gutkin-et-al-yoruba2020,\n title = {{Developing an Open-Source Corpus of Yoruba Speech}},\n author = {Alexander Gutkin and I\u015f\u0131n Demir\u015fahin and Oddur Kjartansson and Clara Rivera and K\u00f3\u0323l\u00e1 T\u00fab\u00f2\u0323s\u00fan},\n booktitle = {Proceedings of Interspeech 2020},\n pages = {404--408},\n month = {October},\n year = {2020},\n address = {Shanghai, China},\n publisher = {International Speech and Communication Association (ISCA)},\n doi = {10.21437/Interspeech.2020-1096},\n url = {https://dx.doi.org/10.21437/Interspeech.2020-1096},\n}\n", "homepage": "https://openslr.org/", "license": "", "features": {"path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "open_slr", "config_name": "SLR74", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 230997, "num_examples": 617, "dataset_name": "open_slr"}}, "download_checksums": {"https://openslr.org/resources/74/es_pr_female.zip": {"num_bytes": 214181314, "checksum": "0ff2f4ed63fbbc4305140bb88c71ca9a72b18c6686a755534b47ae28dce2861d"}}, "download_size": 214181314, "post_processing_size": null, "dataset_size": 230997, "size_in_bytes": 214412311}, "SLR75": {"description": "OpenSLR is a site devoted to hosting speech and language resources, such as training corpora for speech recognition,\nand software related to speech recognition. We intend to be a convenient place for anyone to put resources that\nthey have created, so that they can be downloaded publicly.\n", "citation": "SLR32:\n@inproceedings{van-niekerk-etal-2017,\n title = {{Rapid development of TTS corpora for four South African languages}},\n author = {Daniel van Niekerk and Charl van Heerden and Marelie Davel and Neil Kleynhans and Oddur Kjartansson\n and Martin Jansche and Linne Ha},\n booktitle = {Proc. Interspeech 2017},\n pages = {2178--2182},\n address = {Stockholm, Sweden},\n month = aug,\n year = {2017},\n URL = {http://dx.doi.org/10.21437/Interspeech.2017-1139}\n}\n\nSLR35, SLR36, SLR52, SLR53, SLR54:\n@inproceedings{kjartansson-etal-sltu2018,\n title = {{Crowd-Sourced Speech Corpora for Javanese, Sundanese, Sinhala, Nepali, and Bangladeshi Bengali}},\n author = {Oddur Kjartansson and Supheakmungkol Sarin and Knot Pipatsrisawat and Martin Jansche and Linne Ha},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {52--55},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-11},\n}\n\nSLR41, SLR42, SLR43, SLR44:\n@inproceedings{kjartansson-etal-tts-sltu2018,\n title = {{A Step-by-Step Process for Building TTS Voices Using Open Source Data and Framework for Bangla, Javanese,\n Khmer, Nepali, Sinhala, and Sundanese}},\n author = {Keshan Sodimana and Knot Pipatsrisawat and Linne Ha and Martin Jansche and Oddur Kjartansson and Pasindu\n De Silva and Supheakmungkol Sarin},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {66--70},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-14}\n}\n\nSLR63, SLR64, SLR65, SLR66, SLR78, SLR79:\n@inproceedings{he-etal-2020-open,\n title = {{Open-source Multi-speaker Speech Corpora for Building Gujarati, Kannada, Malayalam, Marathi, Tamil and\n Telugu Speech Synthesis Systems}},\n author = {He, Fei and Chu, Shan-Hui Cathy and Kjartansson, Oddur and Rivera, Clara and Katanova, Anna and Gutkin,\n Alexander and Demirsahin, Isin and Johny, Cibu and Jansche, Martin and Sarin, Supheakmungkol and Pipatsrisawat, Knot},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n pages = {6494--6503},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.800},\n ISBN = \"{979-10-95546-34-4},\n}\n\nSLR69, SLR76, SLR77:\n@inproceedings{kjartansson-etal-2020-open,\n title = {{Open-Source High Quality Speech Datasets for Basque, Catalan and Galician}},\n author = {Kjartansson, Oddur and Gutkin, Alexander and Butryna, Alena and Demirsahin, Isin and Rivera, Clara},\n booktitle = {Proceedings of the 1st Joint Workshop on Spoken Language Technologies for Under-resourced languages\n (SLTU) and Collaboration and Computing for Under-Resourced Languages (CCURL)},\n year = {2020},\n pages = {21--27},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.sltu-1.3},\n ISBN = {979-10-95546-35-1},\n}\n\nSLR71, SLR71, SLR72, SLR73, SLR74, SLR75:\n@inproceedings{guevara-rukoz-etal-2020-crowdsourcing,\n title = {{Crowdsourcing Latin American Spanish for Low-Resource Text-to-Speech}},\n author = {Guevara-Rukoz, Adriana and Demirsahin, Isin and He, Fei and Chu, Shan-Hui Cathy and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Gutkin, Alexander and Butryna, Alena and Kjartansson, Oddur},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n year = {2020},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.801},\n pages = {6504--6513},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR83\n@inproceedings{demirsahin-etal-2020-open,\n title = {{Open-source Multi-speaker Corpora of the English Accents in the British Isles}},\n author = {Demirsahin, Isin and Kjartansson, Oddur and Gutkin, Alexander and Rivera, Clara},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = {6532--6541},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.804},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR80\n@inproceedings{oo-etal-2020-burmese,\n title = {{Burmese Speech Corpus, Finite-State Text Normalization and Pronunciation Grammars with an Application\n to Text-to-Speech}},\n author = {Oo, Yin May and Wattanavekin, Theeraphol and Li, Chenfang and De Silva, Pasindu and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Jansche, Martin and Kjartansson, Oddur and Gutkin, Alexander},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = \"6328--6339\",\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.777},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR86\n@inproceedings{gutkin-et-al-yoruba2020,\n title = {{Developing an Open-Source Corpus of Yoruba Speech}},\n author = {Alexander Gutkin and I\u015f\u0131n Demir\u015fahin and Oddur Kjartansson and Clara Rivera and K\u00f3\u0323l\u00e1 T\u00fab\u00f2\u0323s\u00fan},\n booktitle = {Proceedings of Interspeech 2020},\n pages = {404--408},\n month = {October},\n year = {2020},\n address = {Shanghai, China},\n publisher = {International Speech and Communication Association (ISCA)},\n doi = {10.21437/Interspeech.2020-1096},\n url = {https://dx.doi.org/10.21437/Interspeech.2020-1096},\n}\n", "homepage": "https://openslr.org/", "license": "", "features": {"path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "open_slr", "config_name": "SLR75", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 1252119, "num_examples": 3357, "dataset_name": "open_slr"}}, "download_checksums": {"https://openslr.org/resources/75/es_ve_female.zip": {"num_bytes": 517000277, "checksum": "4600baead7519afaa5f6b33cf3f4b2373e7f1902aa72841fc38582660b07fe31"}, "https://openslr.org/resources/75/es_ve_male.zip": {"num_bytes": 526316727, "checksum": "3cf8703b1b61de1bf964e26f0a2c7f0ec637b1a85eafd982e98de9301558b289"}}, "download_size": 1043317004, "post_processing_size": null, "dataset_size": 1252119, "size_in_bytes": 1044569123}, "SLR76": {"description": "OpenSLR is a site devoted to hosting speech and language resources, such as training corpora for speech recognition,\nand software related to speech recognition. We intend to be a convenient place for anyone to put resources that\nthey have created, so that they can be downloaded publicly.\n", "citation": "SLR32:\n@inproceedings{van-niekerk-etal-2017,\n title = {{Rapid development of TTS corpora for four South African languages}},\n author = {Daniel van Niekerk and Charl van Heerden and Marelie Davel and Neil Kleynhans and Oddur Kjartansson\n and Martin Jansche and Linne Ha},\n booktitle = {Proc. Interspeech 2017},\n pages = {2178--2182},\n address = {Stockholm, Sweden},\n month = aug,\n year = {2017},\n URL = {http://dx.doi.org/10.21437/Interspeech.2017-1139}\n}\n\nSLR35, SLR36, SLR52, SLR53, SLR54:\n@inproceedings{kjartansson-etal-sltu2018,\n title = {{Crowd-Sourced Speech Corpora for Javanese, Sundanese, Sinhala, Nepali, and Bangladeshi Bengali}},\n author = {Oddur Kjartansson and Supheakmungkol Sarin and Knot Pipatsrisawat and Martin Jansche and Linne Ha},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {52--55},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-11},\n}\n\nSLR41, SLR42, SLR43, SLR44:\n@inproceedings{kjartansson-etal-tts-sltu2018,\n title = {{A Step-by-Step Process for Building TTS Voices Using Open Source Data and Framework for Bangla, Javanese,\n Khmer, Nepali, Sinhala, and Sundanese}},\n author = {Keshan Sodimana and Knot Pipatsrisawat and Linne Ha and Martin Jansche and Oddur Kjartansson and Pasindu\n De Silva and Supheakmungkol Sarin},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {66--70},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-14}\n}\n\nSLR63, SLR64, SLR65, SLR66, SLR78, SLR79:\n@inproceedings{he-etal-2020-open,\n title = {{Open-source Multi-speaker Speech Corpora for Building Gujarati, Kannada, Malayalam, Marathi, Tamil and\n Telugu Speech Synthesis Systems}},\n author = {He, Fei and Chu, Shan-Hui Cathy and Kjartansson, Oddur and Rivera, Clara and Katanova, Anna and Gutkin,\n Alexander and Demirsahin, Isin and Johny, Cibu and Jansche, Martin and Sarin, Supheakmungkol and Pipatsrisawat, Knot},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n pages = {6494--6503},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.800},\n ISBN = \"{979-10-95546-34-4},\n}\n\nSLR69, SLR76, SLR77:\n@inproceedings{kjartansson-etal-2020-open,\n title = {{Open-Source High Quality Speech Datasets for Basque, Catalan and Galician}},\n author = {Kjartansson, Oddur and Gutkin, Alexander and Butryna, Alena and Demirsahin, Isin and Rivera, Clara},\n booktitle = {Proceedings of the 1st Joint Workshop on Spoken Language Technologies for Under-resourced languages\n (SLTU) and Collaboration and Computing for Under-Resourced Languages (CCURL)},\n year = {2020},\n pages = {21--27},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.sltu-1.3},\n ISBN = {979-10-95546-35-1},\n}\n\nSLR71, SLR71, SLR72, SLR73, SLR74, SLR75:\n@inproceedings{guevara-rukoz-etal-2020-crowdsourcing,\n title = {{Crowdsourcing Latin American Spanish for Low-Resource Text-to-Speech}},\n author = {Guevara-Rukoz, Adriana and Demirsahin, Isin and He, Fei and Chu, Shan-Hui Cathy and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Gutkin, Alexander and Butryna, Alena and Kjartansson, Oddur},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n year = {2020},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.801},\n pages = {6504--6513},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR83\n@inproceedings{demirsahin-etal-2020-open,\n title = {{Open-source Multi-speaker Corpora of the English Accents in the British Isles}},\n author = {Demirsahin, Isin and Kjartansson, Oddur and Gutkin, Alexander and Rivera, Clara},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = {6532--6541},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.804},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR80\n@inproceedings{oo-etal-2020-burmese,\n title = {{Burmese Speech Corpus, Finite-State Text Normalization and Pronunciation Grammars with an Application\n to Text-to-Speech}},\n author = {Oo, Yin May and Wattanavekin, Theeraphol and Li, Chenfang and De Silva, Pasindu and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Jansche, Martin and Kjartansson, Oddur and Gutkin, Alexander},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = \"6328--6339\",\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.777},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR86\n@inproceedings{gutkin-et-al-yoruba2020,\n title = {{Developing an Open-Source Corpus of Yoruba Speech}},\n author = {Alexander Gutkin and I\u015f\u0131n Demir\u015fahin and Oddur Kjartansson and Clara Rivera and K\u00f3\u0323l\u00e1 T\u00fab\u00f2\u0323s\u00fan},\n booktitle = {Proceedings of Interspeech 2020},\n pages = {404--408},\n month = {October},\n year = {2020},\n address = {Shanghai, China},\n publisher = {International Speech and Communication Association (ISCA)},\n doi = {10.21437/Interspeech.2020-1096},\n url = {https://dx.doi.org/10.21437/Interspeech.2020-1096},\n}\n", "homepage": "https://openslr.org/", "license": "", "features": {"path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "open_slr", "config_name": "SLR76", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 2682483, "num_examples": 7136, "dataset_name": "open_slr"}}, "download_checksums": {"https://openslr.org/resources/76/eu_es_female.zip": {"num_bytes": 1622676657, "checksum": "b3eaa91f2be198c8455f46e802f671e33cba5d95909e58e0b59cb6638f5b4947"}, "https://openslr.org/resources/76/eu_es_male.zip": {"num_bytes": 1418448856, "checksum": "787bcb8369d3797a6b34b0e2d420f5255e12e6c6a385cd4e72ddde59c6018227"}}, "download_size": 3041125513, "post_processing_size": null, "dataset_size": 2682483, "size_in_bytes": 3043807996}, "SLR77": {"description": "OpenSLR is a site devoted to hosting speech and language resources, such as training corpora for speech recognition,\nand software related to speech recognition. We intend to be a convenient place for anyone to put resources that\nthey have created, so that they can be downloaded publicly.\n", "citation": "SLR32:\n@inproceedings{van-niekerk-etal-2017,\n title = {{Rapid development of TTS corpora for four South African languages}},\n author = {Daniel van Niekerk and Charl van Heerden and Marelie Davel and Neil Kleynhans and Oddur Kjartansson\n and Martin Jansche and Linne Ha},\n booktitle = {Proc. Interspeech 2017},\n pages = {2178--2182},\n address = {Stockholm, Sweden},\n month = aug,\n year = {2017},\n URL = {http://dx.doi.org/10.21437/Interspeech.2017-1139}\n}\n\nSLR35, SLR36, SLR52, SLR53, SLR54:\n@inproceedings{kjartansson-etal-sltu2018,\n title = {{Crowd-Sourced Speech Corpora for Javanese, Sundanese, Sinhala, Nepali, and Bangladeshi Bengali}},\n author = {Oddur Kjartansson and Supheakmungkol Sarin and Knot Pipatsrisawat and Martin Jansche and Linne Ha},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {52--55},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-11},\n}\n\nSLR41, SLR42, SLR43, SLR44:\n@inproceedings{kjartansson-etal-tts-sltu2018,\n title = {{A Step-by-Step Process for Building TTS Voices Using Open Source Data and Framework for Bangla, Javanese,\n Khmer, Nepali, Sinhala, and Sundanese}},\n author = {Keshan Sodimana and Knot Pipatsrisawat and Linne Ha and Martin Jansche and Oddur Kjartansson and Pasindu\n De Silva and Supheakmungkol Sarin},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {66--70},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-14}\n}\n\nSLR63, SLR64, SLR65, SLR66, SLR78, SLR79:\n@inproceedings{he-etal-2020-open,\n title = {{Open-source Multi-speaker Speech Corpora for Building Gujarati, Kannada, Malayalam, Marathi, Tamil and\n Telugu Speech Synthesis Systems}},\n author = {He, Fei and Chu, Shan-Hui Cathy and Kjartansson, Oddur and Rivera, Clara and Katanova, Anna and Gutkin,\n Alexander and Demirsahin, Isin and Johny, Cibu and Jansche, Martin and Sarin, Supheakmungkol and Pipatsrisawat, Knot},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n pages = {6494--6503},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.800},\n ISBN = \"{979-10-95546-34-4},\n}\n\nSLR69, SLR76, SLR77:\n@inproceedings{kjartansson-etal-2020-open,\n title = {{Open-Source High Quality Speech Datasets for Basque, Catalan and Galician}},\n author = {Kjartansson, Oddur and Gutkin, Alexander and Butryna, Alena and Demirsahin, Isin and Rivera, Clara},\n booktitle = {Proceedings of the 1st Joint Workshop on Spoken Language Technologies for Under-resourced languages\n (SLTU) and Collaboration and Computing for Under-Resourced Languages (CCURL)},\n year = {2020},\n pages = {21--27},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.sltu-1.3},\n ISBN = {979-10-95546-35-1},\n}\n\nSLR71, SLR71, SLR72, SLR73, SLR74, SLR75:\n@inproceedings{guevara-rukoz-etal-2020-crowdsourcing,\n title = {{Crowdsourcing Latin American Spanish for Low-Resource Text-to-Speech}},\n author = {Guevara-Rukoz, Adriana and Demirsahin, Isin and He, Fei and Chu, Shan-Hui Cathy and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Gutkin, Alexander and Butryna, Alena and Kjartansson, Oddur},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n year = {2020},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.801},\n pages = {6504--6513},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR83\n@inproceedings{demirsahin-etal-2020-open,\n title = {{Open-source Multi-speaker Corpora of the English Accents in the British Isles}},\n author = {Demirsahin, Isin and Kjartansson, Oddur and Gutkin, Alexander and Rivera, Clara},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = {6532--6541},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.804},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR80\n@inproceedings{oo-etal-2020-burmese,\n title = {{Burmese Speech Corpus, Finite-State Text Normalization and Pronunciation Grammars with an Application\n to Text-to-Speech}},\n author = {Oo, Yin May and Wattanavekin, Theeraphol and Li, Chenfang and De Silva, Pasindu and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Jansche, Martin and Kjartansson, Oddur and Gutkin, Alexander},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = \"6328--6339\",\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.777},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR86\n@inproceedings{gutkin-et-al-yoruba2020,\n title = {{Developing an Open-Source Corpus of Yoruba Speech}},\n author = {Alexander Gutkin and I\u015f\u0131n Demir\u015fahin and Oddur Kjartansson and Clara Rivera and K\u00f3\u0323l\u00e1 T\u00fab\u00f2\u0323s\u00fan},\n booktitle = {Proceedings of Interspeech 2020},\n pages = {404--408},\n month = {October},\n year = {2020},\n address = {Shanghai, China},\n publisher = {International Speech and Communication Association (ISCA)},\n doi = {10.21437/Interspeech.2020-1096},\n url = {https://dx.doi.org/10.21437/Interspeech.2020-1096},\n}\n", "homepage": "https://openslr.org/", "license": "", "features": {"path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "open_slr", "config_name": "SLR77", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 2159694, "num_examples": 5587, "dataset_name": "open_slr"}}, "download_checksums": {"https://openslr.org/resources/77/gl_es_female.zip": {"num_bytes": 1656677564, "checksum": "e2cda7ef8d5f57b5f3086473d5297e6bb73757f0c446409245f407d7612c5060"}, "https://openslr.org/resources/77/gl_es_male.zip": {"num_bytes": 551314211, "checksum": "b768ed0b77fb4e88adf795dedcc872c53a4348ee8d11eb8efb4571fff94688be"}}, "download_size": 2207991775, "post_processing_size": null, "dataset_size": 2159694, "size_in_bytes": 2210151469}, "SLR78": {"description": "OpenSLR is a site devoted to hosting speech and language resources, such as training corpora for speech recognition,\nand software related to speech recognition. We intend to be a convenient place for anyone to put resources that\nthey have created, so that they can be downloaded publicly.\n", "citation": "SLR32:\n@inproceedings{van-niekerk-etal-2017,\n title = {{Rapid development of TTS corpora for four South African languages}},\n author = {Daniel van Niekerk and Charl van Heerden and Marelie Davel and Neil Kleynhans and Oddur Kjartansson\n and Martin Jansche and Linne Ha},\n booktitle = {Proc. Interspeech 2017},\n pages = {2178--2182},\n address = {Stockholm, Sweden},\n month = aug,\n year = {2017},\n URL = {http://dx.doi.org/10.21437/Interspeech.2017-1139}\n}\n\nSLR35, SLR36, SLR52, SLR53, SLR54:\n@inproceedings{kjartansson-etal-sltu2018,\n title = {{Crowd-Sourced Speech Corpora for Javanese, Sundanese, Sinhala, Nepali, and Bangladeshi Bengali}},\n author = {Oddur Kjartansson and Supheakmungkol Sarin and Knot Pipatsrisawat and Martin Jansche and Linne Ha},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {52--55},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-11},\n}\n\nSLR41, SLR42, SLR43, SLR44:\n@inproceedings{kjartansson-etal-tts-sltu2018,\n title = {{A Step-by-Step Process for Building TTS Voices Using Open Source Data and Framework for Bangla, Javanese,\n Khmer, Nepali, Sinhala, and Sundanese}},\n author = {Keshan Sodimana and Knot Pipatsrisawat and Linne Ha and Martin Jansche and Oddur Kjartansson and Pasindu\n De Silva and Supheakmungkol Sarin},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {66--70},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-14}\n}\n\nSLR63, SLR64, SLR65, SLR66, SLR78, SLR79:\n@inproceedings{he-etal-2020-open,\n title = {{Open-source Multi-speaker Speech Corpora for Building Gujarati, Kannada, Malayalam, Marathi, Tamil and\n Telugu Speech Synthesis Systems}},\n author = {He, Fei and Chu, Shan-Hui Cathy and Kjartansson, Oddur and Rivera, Clara and Katanova, Anna and Gutkin,\n Alexander and Demirsahin, Isin and Johny, Cibu and Jansche, Martin and Sarin, Supheakmungkol and Pipatsrisawat, Knot},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n pages = {6494--6503},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.800},\n ISBN = \"{979-10-95546-34-4},\n}\n\nSLR69, SLR76, SLR77:\n@inproceedings{kjartansson-etal-2020-open,\n title = {{Open-Source High Quality Speech Datasets for Basque, Catalan and Galician}},\n author = {Kjartansson, Oddur and Gutkin, Alexander and Butryna, Alena and Demirsahin, Isin and Rivera, Clara},\n booktitle = {Proceedings of the 1st Joint Workshop on Spoken Language Technologies for Under-resourced languages\n (SLTU) and Collaboration and Computing for Under-Resourced Languages (CCURL)},\n year = {2020},\n pages = {21--27},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.sltu-1.3},\n ISBN = {979-10-95546-35-1},\n}\n\nSLR71, SLR71, SLR72, SLR73, SLR74, SLR75:\n@inproceedings{guevara-rukoz-etal-2020-crowdsourcing,\n title = {{Crowdsourcing Latin American Spanish for Low-Resource Text-to-Speech}},\n author = {Guevara-Rukoz, Adriana and Demirsahin, Isin and He, Fei and Chu, Shan-Hui Cathy and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Gutkin, Alexander and Butryna, Alena and Kjartansson, Oddur},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n year = {2020},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.801},\n pages = {6504--6513},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR83\n@inproceedings{demirsahin-etal-2020-open,\n title = {{Open-source Multi-speaker Corpora of the English Accents in the British Isles}},\n author = {Demirsahin, Isin and Kjartansson, Oddur and Gutkin, Alexander and Rivera, Clara},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = {6532--6541},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.804},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR80\n@inproceedings{oo-etal-2020-burmese,\n title = {{Burmese Speech Corpus, Finite-State Text Normalization and Pronunciation Grammars with an Application\n to Text-to-Speech}},\n author = {Oo, Yin May and Wattanavekin, Theeraphol and Li, Chenfang and De Silva, Pasindu and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Jansche, Martin and Kjartansson, Oddur and Gutkin, Alexander},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = \"6328--6339\",\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.777},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR86\n@inproceedings{gutkin-et-al-yoruba2020,\n title = {{Developing an Open-Source Corpus of Yoruba Speech}},\n author = {Alexander Gutkin and I\u015f\u0131n Demir\u015fahin and Oddur Kjartansson and Clara Rivera and K\u00f3\u0323l\u00e1 T\u00fab\u00f2\u0323s\u00fan},\n booktitle = {Proceedings of Interspeech 2020},\n pages = {404--408},\n month = {October},\n year = {2020},\n address = {Shanghai, China},\n publisher = {International Speech and Communication Association (ISCA)},\n doi = {10.21437/Interspeech.2020-1096},\n url = {https://dx.doi.org/10.21437/Interspeech.2020-1096},\n}\n", "homepage": "https://openslr.org/", "license": "", "features": {"path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "open_slr", "config_name": "SLR78", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 2077670, "num_examples": 4272, "dataset_name": "open_slr"}}, "download_checksums": {"https://openslr.org/resources/78/gu_in_female.zip": {"num_bytes": 917450036, "checksum": "bbda0815e0d2e01ad9310768e0e2be9efb612a9c56c66c4ab2f32b817da5c786"}, "https://openslr.org/resources/78/gu_in_male.zip": {"num_bytes": 825772066, "checksum": "ce474d1686104b3bd274a2d5192459cb4dee6e0c9bbcf3de1bb3b39c6ab89caf"}}, "download_size": 1743222102, "post_processing_size": null, "dataset_size": 2077670, "size_in_bytes": 1745299772}, "SLR79": {"description": "OpenSLR is a site devoted to hosting speech and language resources, such as training corpora for speech recognition,\nand software related to speech recognition. We intend to be a convenient place for anyone to put resources that\nthey have created, so that they can be downloaded publicly.\n", "citation": "SLR32:\n@inproceedings{van-niekerk-etal-2017,\n title = {{Rapid development of TTS corpora for four South African languages}},\n author = {Daniel van Niekerk and Charl van Heerden and Marelie Davel and Neil Kleynhans and Oddur Kjartansson\n and Martin Jansche and Linne Ha},\n booktitle = {Proc. Interspeech 2017},\n pages = {2178--2182},\n address = {Stockholm, Sweden},\n month = aug,\n year = {2017},\n URL = {http://dx.doi.org/10.21437/Interspeech.2017-1139}\n}\n\nSLR35, SLR36, SLR52, SLR53, SLR54:\n@inproceedings{kjartansson-etal-sltu2018,\n title = {{Crowd-Sourced Speech Corpora for Javanese, Sundanese, Sinhala, Nepali, and Bangladeshi Bengali}},\n author = {Oddur Kjartansson and Supheakmungkol Sarin and Knot Pipatsrisawat and Martin Jansche and Linne Ha},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {52--55},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-11},\n}\n\nSLR41, SLR42, SLR43, SLR44:\n@inproceedings{kjartansson-etal-tts-sltu2018,\n title = {{A Step-by-Step Process for Building TTS Voices Using Open Source Data and Framework for Bangla, Javanese,\n Khmer, Nepali, Sinhala, and Sundanese}},\n author = {Keshan Sodimana and Knot Pipatsrisawat and Linne Ha and Martin Jansche and Oddur Kjartansson and Pasindu\n De Silva and Supheakmungkol Sarin},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {66--70},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-14}\n}\n\nSLR63, SLR64, SLR65, SLR66, SLR78, SLR79:\n@inproceedings{he-etal-2020-open,\n title = {{Open-source Multi-speaker Speech Corpora for Building Gujarati, Kannada, Malayalam, Marathi, Tamil and\n Telugu Speech Synthesis Systems}},\n author = {He, Fei and Chu, Shan-Hui Cathy and Kjartansson, Oddur and Rivera, Clara and Katanova, Anna and Gutkin,\n Alexander and Demirsahin, Isin and Johny, Cibu and Jansche, Martin and Sarin, Supheakmungkol and Pipatsrisawat, Knot},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n pages = {6494--6503},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.800},\n ISBN = \"{979-10-95546-34-4},\n}\n\nSLR69, SLR76, SLR77:\n@inproceedings{kjartansson-etal-2020-open,\n title = {{Open-Source High Quality Speech Datasets for Basque, Catalan and Galician}},\n author = {Kjartansson, Oddur and Gutkin, Alexander and Butryna, Alena and Demirsahin, Isin and Rivera, Clara},\n booktitle = {Proceedings of the 1st Joint Workshop on Spoken Language Technologies for Under-resourced languages\n (SLTU) and Collaboration and Computing for Under-Resourced Languages (CCURL)},\n year = {2020},\n pages = {21--27},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.sltu-1.3},\n ISBN = {979-10-95546-35-1},\n}\n\nSLR71, SLR71, SLR72, SLR73, SLR74, SLR75:\n@inproceedings{guevara-rukoz-etal-2020-crowdsourcing,\n title = {{Crowdsourcing Latin American Spanish for Low-Resource Text-to-Speech}},\n author = {Guevara-Rukoz, Adriana and Demirsahin, Isin and He, Fei and Chu, Shan-Hui Cathy and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Gutkin, Alexander and Butryna, Alena and Kjartansson, Oddur},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n year = {2020},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.801},\n pages = {6504--6513},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR83\n@inproceedings{demirsahin-etal-2020-open,\n title = {{Open-source Multi-speaker Corpora of the English Accents in the British Isles}},\n author = {Demirsahin, Isin and Kjartansson, Oddur and Gutkin, Alexander and Rivera, Clara},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = {6532--6541},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.804},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR80\n@inproceedings{oo-etal-2020-burmese,\n title = {{Burmese Speech Corpus, Finite-State Text Normalization and Pronunciation Grammars with an Application\n to Text-to-Speech}},\n author = {Oo, Yin May and Wattanavekin, Theeraphol and Li, Chenfang and De Silva, Pasindu and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Jansche, Martin and Kjartansson, Oddur and Gutkin, Alexander},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = \"6328--6339\",\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.777},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR86\n@inproceedings{gutkin-et-al-yoruba2020,\n title = {{Developing an Open-Source Corpus of Yoruba Speech}},\n author = {Alexander Gutkin and I\u015f\u0131n Demir\u015fahin and Oddur Kjartansson and Clara Rivera and K\u00f3\u0323l\u00e1 T\u00fab\u00f2\u0323s\u00fan},\n booktitle = {Proceedings of Interspeech 2020},\n pages = {404--408},\n month = {October},\n year = {2020},\n address = {Shanghai, China},\n publisher = {International Speech and Communication Association (ISCA)},\n doi = {10.21437/Interspeech.2020-1096},\n url = {https://dx.doi.org/10.21437/Interspeech.2020-1096},\n}\n", "homepage": "https://openslr.org/", "license": "", "features": {"path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "open_slr", "config_name": "SLR79", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 2130895, "num_examples": 4400, "dataset_name": "open_slr"}}, "download_checksums": {"https://openslr.org/resources/79/kn_in_female.zip": {"num_bytes": 980825420, "checksum": "182a147e5747ad4f4ac50a5e7e1ee3683e1c2c1d9105963365d151d664466b62"}, "https://openslr.org/resources/79/kn_in_male.zip": {"num_bytes": 840093695, "checksum": "38e3c0c51f792a3655cc8f4747b339df8ec4b1031a0fff590c1a1af6a8bbbcdf"}}, "download_size": 1820919115, "post_processing_size": null, "dataset_size": 2130895, "size_in_bytes": 1823050010}, "SLR80": {"description": "OpenSLR is a site devoted to hosting speech and language resources, such as training corpora for speech recognition,\nand software related to speech recognition. We intend to be a convenient place for anyone to put resources that\nthey have created, so that they can be downloaded publicly.\n", "citation": "SLR32:\n@inproceedings{van-niekerk-etal-2017,\n title = {{Rapid development of TTS corpora for four South African languages}},\n author = {Daniel van Niekerk and Charl van Heerden and Marelie Davel and Neil Kleynhans and Oddur Kjartansson\n and Martin Jansche and Linne Ha},\n booktitle = {Proc. Interspeech 2017},\n pages = {2178--2182},\n address = {Stockholm, Sweden},\n month = aug,\n year = {2017},\n URL = {http://dx.doi.org/10.21437/Interspeech.2017-1139}\n}\n\nSLR35, SLR36, SLR52, SLR53, SLR54:\n@inproceedings{kjartansson-etal-sltu2018,\n title = {{Crowd-Sourced Speech Corpora for Javanese, Sundanese, Sinhala, Nepali, and Bangladeshi Bengali}},\n author = {Oddur Kjartansson and Supheakmungkol Sarin and Knot Pipatsrisawat and Martin Jansche and Linne Ha},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {52--55},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-11},\n}\n\nSLR41, SLR42, SLR43, SLR44:\n@inproceedings{kjartansson-etal-tts-sltu2018,\n title = {{A Step-by-Step Process for Building TTS Voices Using Open Source Data and Framework for Bangla, Javanese,\n Khmer, Nepali, Sinhala, and Sundanese}},\n author = {Keshan Sodimana and Knot Pipatsrisawat and Linne Ha and Martin Jansche and Oddur Kjartansson and Pasindu\n De Silva and Supheakmungkol Sarin},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {66--70},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-14}\n}\n\nSLR63, SLR64, SLR65, SLR66, SLR78, SLR79:\n@inproceedings{he-etal-2020-open,\n title = {{Open-source Multi-speaker Speech Corpora for Building Gujarati, Kannada, Malayalam, Marathi, Tamil and\n Telugu Speech Synthesis Systems}},\n author = {He, Fei and Chu, Shan-Hui Cathy and Kjartansson, Oddur and Rivera, Clara and Katanova, Anna and Gutkin,\n Alexander and Demirsahin, Isin and Johny, Cibu and Jansche, Martin and Sarin, Supheakmungkol and Pipatsrisawat, Knot},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n pages = {6494--6503},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.800},\n ISBN = \"{979-10-95546-34-4},\n}\n\nSLR69, SLR76, SLR77:\n@inproceedings{kjartansson-etal-2020-open,\n title = {{Open-Source High Quality Speech Datasets for Basque, Catalan and Galician}},\n author = {Kjartansson, Oddur and Gutkin, Alexander and Butryna, Alena and Demirsahin, Isin and Rivera, Clara},\n booktitle = {Proceedings of the 1st Joint Workshop on Spoken Language Technologies for Under-resourced languages\n (SLTU) and Collaboration and Computing for Under-Resourced Languages (CCURL)},\n year = {2020},\n pages = {21--27},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.sltu-1.3},\n ISBN = {979-10-95546-35-1},\n}\n\nSLR71, SLR71, SLR72, SLR73, SLR74, SLR75:\n@inproceedings{guevara-rukoz-etal-2020-crowdsourcing,\n title = {{Crowdsourcing Latin American Spanish for Low-Resource Text-to-Speech}},\n author = {Guevara-Rukoz, Adriana and Demirsahin, Isin and He, Fei and Chu, Shan-Hui Cathy and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Gutkin, Alexander and Butryna, Alena and Kjartansson, Oddur},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n year = {2020},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.801},\n pages = {6504--6513},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR83\n@inproceedings{demirsahin-etal-2020-open,\n title = {{Open-source Multi-speaker Corpora of the English Accents in the British Isles}},\n author = {Demirsahin, Isin and Kjartansson, Oddur and Gutkin, Alexander and Rivera, Clara},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = {6532--6541},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.804},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR80\n@inproceedings{oo-etal-2020-burmese,\n title = {{Burmese Speech Corpus, Finite-State Text Normalization and Pronunciation Grammars with an Application\n to Text-to-Speech}},\n author = {Oo, Yin May and Wattanavekin, Theeraphol and Li, Chenfang and De Silva, Pasindu and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Jansche, Martin and Kjartansson, Oddur and Gutkin, Alexander},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = \"6328--6339\",\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.777},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR86\n@inproceedings{gutkin-et-al-yoruba2020,\n title = {{Developing an Open-Source Corpus of Yoruba Speech}},\n author = {Alexander Gutkin and I\u015f\u0131n Demir\u015fahin and Oddur Kjartansson and Clara Rivera and K\u00f3\u0323l\u00e1 T\u00fab\u00f2\u0323s\u00fan},\n booktitle = {Proceedings of Interspeech 2020},\n pages = {404--408},\n month = {October},\n year = {2020},\n address = {Shanghai, China},\n publisher = {International Speech and Communication Association (ISCA)},\n doi = {10.21437/Interspeech.2020-1096},\n url = {https://dx.doi.org/10.21437/Interspeech.2020-1096},\n}\n", "homepage": "https://openslr.org/", "license": "", "features": {"path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "open_slr", "config_name": "SLR80", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 1282403, "num_examples": 2530, "dataset_name": "open_slr"}}, "download_checksums": {"https://openslr.org/resources/80/my_mm_female.zip": {"num_bytes": 948181015, "checksum": "a7cdcaa5e06864e02fa18fc0fe9595feadf332d6a63aadc01ce51a24969a2708"}}, "download_size": 948181015, "post_processing_size": null, "dataset_size": 1282403, "size_in_bytes": 949463418}, "SLR86": {"description": "OpenSLR is a site devoted to hosting speech and language resources, such as training corpora for speech recognition,\nand software related to speech recognition. We intend to be a convenient place for anyone to put resources that\nthey have created, so that they can be downloaded publicly.\n", "citation": "SLR32:\n@inproceedings{van-niekerk-etal-2017,\n title = {{Rapid development of TTS corpora for four South African languages}},\n author = {Daniel van Niekerk and Charl van Heerden and Marelie Davel and Neil Kleynhans and Oddur Kjartansson\n and Martin Jansche and Linne Ha},\n booktitle = {Proc. Interspeech 2017},\n pages = {2178--2182},\n address = {Stockholm, Sweden},\n month = aug,\n year = {2017},\n URL = {http://dx.doi.org/10.21437/Interspeech.2017-1139}\n}\n\nSLR35, SLR36, SLR52, SLR53, SLR54:\n@inproceedings{kjartansson-etal-sltu2018,\n title = {{Crowd-Sourced Speech Corpora for Javanese, Sundanese, Sinhala, Nepali, and Bangladeshi Bengali}},\n author = {Oddur Kjartansson and Supheakmungkol Sarin and Knot Pipatsrisawat and Martin Jansche and Linne Ha},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {52--55},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-11},\n}\n\nSLR41, SLR42, SLR43, SLR44:\n@inproceedings{kjartansson-etal-tts-sltu2018,\n title = {{A Step-by-Step Process for Building TTS Voices Using Open Source Data and Framework for Bangla, Javanese,\n Khmer, Nepali, Sinhala, and Sundanese}},\n author = {Keshan Sodimana and Knot Pipatsrisawat and Linne Ha and Martin Jansche and Oddur Kjartansson and Pasindu\n De Silva and Supheakmungkol Sarin},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {66--70},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-14}\n}\n\nSLR63, SLR64, SLR65, SLR66, SLR78, SLR79:\n@inproceedings{he-etal-2020-open,\n title = {{Open-source Multi-speaker Speech Corpora for Building Gujarati, Kannada, Malayalam, Marathi, Tamil and\n Telugu Speech Synthesis Systems}},\n author = {He, Fei and Chu, Shan-Hui Cathy and Kjartansson, Oddur and Rivera, Clara and Katanova, Anna and Gutkin,\n Alexander and Demirsahin, Isin and Johny, Cibu and Jansche, Martin and Sarin, Supheakmungkol and Pipatsrisawat, Knot},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n pages = {6494--6503},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.800},\n ISBN = \"{979-10-95546-34-4},\n}\n\nSLR69, SLR76, SLR77:\n@inproceedings{kjartansson-etal-2020-open,\n title = {{Open-Source High Quality Speech Datasets for Basque, Catalan and Galician}},\n author = {Kjartansson, Oddur and Gutkin, Alexander and Butryna, Alena and Demirsahin, Isin and Rivera, Clara},\n booktitle = {Proceedings of the 1st Joint Workshop on Spoken Language Technologies for Under-resourced languages\n (SLTU) and Collaboration and Computing for Under-Resourced Languages (CCURL)},\n year = {2020},\n pages = {21--27},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.sltu-1.3},\n ISBN = {979-10-95546-35-1},\n}\n\nSLR71, SLR71, SLR72, SLR73, SLR74, SLR75:\n@inproceedings{guevara-rukoz-etal-2020-crowdsourcing,\n title = {{Crowdsourcing Latin American Spanish for Low-Resource Text-to-Speech}},\n author = {Guevara-Rukoz, Adriana and Demirsahin, Isin and He, Fei and Chu, Shan-Hui Cathy and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Gutkin, Alexander and Butryna, Alena and Kjartansson, Oddur},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n year = {2020},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.801},\n pages = {6504--6513},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR83\n@inproceedings{demirsahin-etal-2020-open,\n title = {{Open-source Multi-speaker Corpora of the English Accents in the British Isles}},\n author = {Demirsahin, Isin and Kjartansson, Oddur and Gutkin, Alexander and Rivera, Clara},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = {6532--6541},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.804},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR80\n@inproceedings{oo-etal-2020-burmese,\n title = {{Burmese Speech Corpus, Finite-State Text Normalization and Pronunciation Grammars with an Application\n to Text-to-Speech}},\n author = {Oo, Yin May and Wattanavekin, Theeraphol and Li, Chenfang and De Silva, Pasindu and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Jansche, Martin and Kjartansson, Oddur and Gutkin, Alexander},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = \"6328--6339\",\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.777},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR86\n@inproceedings{gutkin-et-al-yoruba2020,\n title = {{Developing an Open-Source Corpus of Yoruba Speech}},\n author = {Alexander Gutkin and I\u015f\u0131n Demir\u015fahin and Oddur Kjartansson and Clara Rivera and K\u00f3\u0323l\u00e1 T\u00fab\u00f2\u0323s\u00fan},\n booktitle = {Proceedings of Interspeech 2020},\n pages = {404--408},\n month = {October},\n year = {2020},\n address = {Shanghai, China},\n publisher = {International Speech and Communication Association (ISCA)},\n doi = {10.21437/Interspeech.2020-1096},\n url = {https://dx.doi.org/10.21437/Interspeech.2020-1096},\n}\n", "homepage": "https://openslr.org/", "license": "", "features": {"path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "open_slr", "config_name": "SLR86", "version": "0.0.0", "splits": {"train": {"name": "train", "num_bytes": 1341639, "num_examples": 3583, "dataset_name": "open_slr"}}, "download_checksums": {"https://openslr.org/resources/86/yo_ng_female.zip": {"num_bytes": 462033045, "checksum": "8875ebc839e57a3318ba1ce37d98c35da46d4f99f9f777f83fcf074257804060"}, "https://openslr.org/resources/86/yo_ng_male.zip": {"num_bytes": 445032517, "checksum": "58519b27f6954c446d0e7221b227a6f342b9c5ea66bf02af40c1616e086afc4c"}}, "download_size": 907065562, "post_processing_size": null, "dataset_size": 1341639, "size_in_bytes": 908407201}, "SLR32": {"description": "OpenSLR is a site devoted to hosting speech and language resources, such as training corpora for speech recognition,\nand software related to speech recognition. We intend to be a convenient place for anyone to put resources that\nthey have created, so that they can be downloaded publicly.\n", "citation": "SLR32:\n@inproceedings{van-niekerk-etal-2017,\n title = {{Rapid development of TTS corpora for four South African languages}},\n author = {Daniel van Niekerk and Charl van Heerden and Marelie Davel and Neil Kleynhans and Oddur Kjartansson\n and Martin Jansche and Linne Ha},\n booktitle = {Proc. Interspeech 2017},\n pages = {2178--2182},\n address = {Stockholm, Sweden},\n month = aug,\n year = {2017},\n URL = {http://dx.doi.org/10.21437/Interspeech.2017-1139}\n}\n\nSLR35, SLR36, SLR52, SLR53, SLR54:\n@inproceedings{kjartansson-etal-sltu2018,\n title = {{Crowd-Sourced Speech Corpora for Javanese, Sundanese, Sinhala, Nepali, and Bangladeshi Bengali}},\n author = {Oddur Kjartansson and Supheakmungkol Sarin and Knot Pipatsrisawat and Martin Jansche and Linne Ha},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {52--55},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-11},\n}\n\nSLR41, SLR42, SLR43, SLR44:\n@inproceedings{kjartansson-etal-tts-sltu2018,\n title = {{A Step-by-Step Process for Building TTS Voices Using Open Source Data and Framework for Bangla, Javanese,\n Khmer, Nepali, Sinhala, and Sundanese}},\n author = {Keshan Sodimana and Knot Pipatsrisawat and Linne Ha and Martin Jansche and Oddur Kjartansson and Pasindu\n De Silva and Supheakmungkol Sarin},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {66--70},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-14}\n}\n\nSLR63, SLR64, SLR65, SLR66, SLR78, SLR79:\n@inproceedings{he-etal-2020-open,\n title = {{Open-source Multi-speaker Speech Corpora for Building Gujarati, Kannada, Malayalam, Marathi, Tamil and\n Telugu Speech Synthesis Systems}},\n author = {He, Fei and Chu, Shan-Hui Cathy and Kjartansson, Oddur and Rivera, Clara and Katanova, Anna and Gutkin,\n Alexander and Demirsahin, Isin and Johny, Cibu and Jansche, Martin and Sarin, Supheakmungkol and Pipatsrisawat, Knot},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n pages = {6494--6503},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.800},\n ISBN = \"{979-10-95546-34-4},\n}\n\nSLR69, SLR76, SLR77:\n@inproceedings{kjartansson-etal-2020-open,\n title = {{Open-Source High Quality Speech Datasets for Basque, Catalan and Galician}},\n author = {Kjartansson, Oddur and Gutkin, Alexander and Butryna, Alena and Demirsahin, Isin and Rivera, Clara},\n booktitle = {Proceedings of the 1st Joint Workshop on Spoken Language Technologies for Under-resourced languages\n (SLTU) and Collaboration and Computing for Under-Resourced Languages (CCURL)},\n year = {2020},\n pages = {21--27},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.sltu-1.3},\n ISBN = {979-10-95546-35-1},\n}\n\nSLR71, SLR71, SLR72, SLR73, SLR74, SLR75:\n@inproceedings{guevara-rukoz-etal-2020-crowdsourcing,\n title = {{Crowdsourcing Latin American Spanish for Low-Resource Text-to-Speech}},\n author = {Guevara-Rukoz, Adriana and Demirsahin, Isin and He, Fei and Chu, Shan-Hui Cathy and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Gutkin, Alexander and Butryna, Alena and Kjartansson, Oddur},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n year = {2020},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.801},\n pages = {6504--6513},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR83\n@inproceedings{demirsahin-etal-2020-open,\n title = {{Open-source Multi-speaker Corpora of the English Accents in the British Isles}},\n author = {Demirsahin, Isin and Kjartansson, Oddur and Gutkin, Alexander and Rivera, Clara},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = {6532--6541},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.804},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR80\n@inproceedings{oo-etal-2020-burmese,\n title = {{Burmese Speech Corpus, Finite-State Text Normalization and Pronunciation Grammars with an Application\n to Text-to-Speech}},\n author = {Oo, Yin May and Wattanavekin, Theeraphol and Li, Chenfang and De Silva, Pasindu and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Jansche, Martin and Kjartansson, Oddur and Gutkin, Alexander},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = \"6328--6339\",\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.777},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR86\n@inproceedings{gutkin-et-al-yoruba2020,\n title = {{Developing an Open-Source Corpus of Yoruba Speech}},\n author = {Alexander Gutkin and I\u015f\u0131n Demir\u015fahin and Oddur Kjartansson and Clara Rivera and K\u00f3\u0323l\u00e1 T\u00fab\u00f2\u0323s\u00fan},\n booktitle = {Proceedings of Interspeech 2020},\n pages = {404--408},\n month = {October},\n year = {2020},\n address = {Shanghai, China},\n publisher = {International Speech and Communication Association (ISCA)},\n doi = {10.21437/Interspeech.2020-1096},\n url = {https://dx.doi.org/10.21437/Interspeech.2020-1096},\n}\n", "homepage": "https://openslr.org/", "license": "", "features": {"path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "open_slr", "config_name": "SLR32", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 3958024, "num_examples": 9821, "dataset_name": "open_slr"}}, "download_checksums": {"https://openslr.org/resources/32/af_za.tar.gz": {"num_bytes": 950827926, "checksum": "b702a68486bf16cbf302d6e0808ea2e966f3dfa720ea0d6ce36d881aa266978f"}, "https://openslr.org/resources/32/st_za.tar.gz": {"num_bytes": 724425648, "checksum": "509202bcf6fae3b24508cfdbc3a6c886b29b4c3d822adbf6c40b21d98ada3fcf"}, "https://openslr.org/resources/32/tn_za.tar.gz": {"num_bytes": 729406193, "checksum": "3e6a522d2fafa071ec1d484cb79336ff36008a5d5d34e1444984e5df8312eb6f"}, "https://openslr.org/resources/32/xh_za.tar.gz": {"num_bytes": 907498093, "checksum": "712336c82637cbfb4304766dd7c0889bac1664945aed08bafb49eac29ae756c3"}}, "download_size": 3312157860, "post_processing_size": null, "dataset_size": 3958024, "size_in_bytes": 3316115884}, "SLR52": {"description": "OpenSLR is a site devoted to hosting speech and language resources, such as training corpora for speech recognition,\nand software related to speech recognition. We intend to be a convenient place for anyone to put resources that\nthey have created, so that they can be downloaded publicly.\n", "citation": "SLR32:\n@inproceedings{van-niekerk-etal-2017,\n title = {{Rapid development of TTS corpora for four South African languages}},\n author = {Daniel van Niekerk and Charl van Heerden and Marelie Davel and Neil Kleynhans and Oddur Kjartansson\n and Martin Jansche and Linne Ha},\n booktitle = {Proc. Interspeech 2017},\n pages = {2178--2182},\n address = {Stockholm, Sweden},\n month = aug,\n year = {2017},\n URL = {http://dx.doi.org/10.21437/Interspeech.2017-1139}\n}\n\nSLR35, SLR36, SLR52, SLR53, SLR54:\n@inproceedings{kjartansson-etal-sltu2018,\n title = {{Crowd-Sourced Speech Corpora for Javanese, Sundanese, Sinhala, Nepali, and Bangladeshi Bengali}},\n author = {Oddur Kjartansson and Supheakmungkol Sarin and Knot Pipatsrisawat and Martin Jansche and Linne Ha},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {52--55},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-11},\n}\n\nSLR41, SLR42, SLR43, SLR44:\n@inproceedings{kjartansson-etal-tts-sltu2018,\n title = {{A Step-by-Step Process for Building TTS Voices Using Open Source Data and Framework for Bangla, Javanese,\n Khmer, Nepali, Sinhala, and Sundanese}},\n author = {Keshan Sodimana and Knot Pipatsrisawat and Linne Ha and Martin Jansche and Oddur Kjartansson and Pasindu\n De Silva and Supheakmungkol Sarin},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {66--70},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-14}\n}\n\nSLR63, SLR64, SLR65, SLR66, SLR78, SLR79:\n@inproceedings{he-etal-2020-open,\n title = {{Open-source Multi-speaker Speech Corpora for Building Gujarati, Kannada, Malayalam, Marathi, Tamil and\n Telugu Speech Synthesis Systems}},\n author = {He, Fei and Chu, Shan-Hui Cathy and Kjartansson, Oddur and Rivera, Clara and Katanova, Anna and Gutkin,\n Alexander and Demirsahin, Isin and Johny, Cibu and Jansche, Martin and Sarin, Supheakmungkol and Pipatsrisawat, Knot},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n pages = {6494--6503},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.800},\n ISBN = \"{979-10-95546-34-4},\n}\n\nSLR69, SLR76, SLR77:\n@inproceedings{kjartansson-etal-2020-open,\n title = {{Open-Source High Quality Speech Datasets for Basque, Catalan and Galician}},\n author = {Kjartansson, Oddur and Gutkin, Alexander and Butryna, Alena and Demirsahin, Isin and Rivera, Clara},\n booktitle = {Proceedings of the 1st Joint Workshop on Spoken Language Technologies for Under-resourced languages\n (SLTU) and Collaboration and Computing for Under-Resourced Languages (CCURL)},\n year = {2020},\n pages = {21--27},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.sltu-1.3},\n ISBN = {979-10-95546-35-1},\n}\n\nSLR71, SLR71, SLR72, SLR73, SLR74, SLR75:\n@inproceedings{guevara-rukoz-etal-2020-crowdsourcing,\n title = {{Crowdsourcing Latin American Spanish for Low-Resource Text-to-Speech}},\n author = {Guevara-Rukoz, Adriana and Demirsahin, Isin and He, Fei and Chu, Shan-Hui Cathy and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Gutkin, Alexander and Butryna, Alena and Kjartansson, Oddur},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n year = {2020},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.801},\n pages = {6504--6513},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR83\n@inproceedings{demirsahin-etal-2020-open,\n title = {{Open-source Multi-speaker Corpora of the English Accents in the British Isles}},\n author = {Demirsahin, Isin and Kjartansson, Oddur and Gutkin, Alexander and Rivera, Clara},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = {6532--6541},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.804},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR80\n@inproceedings{oo-etal-2020-burmese,\n title = {{Burmese Speech Corpus, Finite-State Text Normalization and Pronunciation Grammars with an Application\n to Text-to-Speech}},\n author = {Oo, Yin May and Wattanavekin, Theeraphol and Li, Chenfang and De Silva, Pasindu and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Jansche, Martin and Kjartansson, Oddur and Gutkin, Alexander},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = \"6328--6339\",\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.777},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR86\n@inproceedings{gutkin-et-al-yoruba2020,\n title = {{Developing an Open-Source Corpus of Yoruba Speech}},\n author = {Alexander Gutkin and I\u015f\u0131n Demir\u015fahin and Oddur Kjartansson and Clara Rivera and K\u00f3\u0323l\u00e1 T\u00fab\u00f2\u0323s\u00fan},\n booktitle = {Proceedings of Interspeech 2020},\n pages = {404--408},\n month = {October},\n year = {2020},\n address = {Shanghai, China},\n publisher = {International Speech and Communication Association (ISCA)},\n doi = {10.21437/Interspeech.2020-1096},\n url = {https://dx.doi.org/10.21437/Interspeech.2020-1096},\n}\n", "homepage": "https://openslr.org/", "license": "", "features": {"path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "open_slr", "config_name": "SLR52", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 75447705, "num_examples": 185293, "dataset_name": "open_slr"}}, "download_checksums": {"https://openslr.org/resources/52/asr_sinhala_0.zip": {"num_bytes": 915237858, "checksum": "41bcd4cf6edde39e49bf8ca6b54c32e1403609759ff9edea2a2696ef7aa8fff5"}, "https://openslr.org/resources/52/asr_sinhala_1.zip": {"num_bytes": 908852134, "checksum": "7a4dd3279254f06ba8d1e864d2aa68eec1e6740cfc2b718d2bc060b878871e74"}, "https://openslr.org/resources/52/asr_sinhala_2.zip": {"num_bytes": 913568157, "checksum": "746b5ee016e09868016851ff2148000570b6cb6b9acde5d16527f20053d1cd14"}, "https://openslr.org/resources/52/asr_sinhala_3.zip": {"num_bytes": 901325452, "checksum": "a167e6bd9c0b64e105cc57528a455a4653303336b85731273039487d9f94afda"}, "https://openslr.org/resources/52/asr_sinhala_4.zip": {"num_bytes": 922493671, "checksum": "f17fc798ea085e876500095e8dd357d1088303598d190642978c353d51d2b94b"}, "https://openslr.org/resources/52/asr_sinhala_5.zip": {"num_bytes": 922505332, "checksum": "8285340d15064caa1da0635d50471c8de24d33e3d1ae7af3c63e4a23d3ba25fe"}, "https://openslr.org/resources/52/asr_sinhala_6.zip": {"num_bytes": 914729823, "checksum": "a511dc329dfc493c9e25d1315ab95da93a8a4b751e032c1848eeeb8655608403"}, "https://openslr.org/resources/52/asr_sinhala_7.zip": {"num_bytes": 911992962, "checksum": "8180736327c3147bac912c329fe3a571a61ecb6d4da7d4584acb0d34ab204fa5"}, "https://openslr.org/resources/52/asr_sinhala_8.zip": {"num_bytes": 924344925, "checksum": "fdf333751c254f8dc7b649fd1a48cf47ae8e855e369a182d88bee3325ae8a99d"}, "https://openslr.org/resources/52/asr_sinhala_9.zip": {"num_bytes": 920427318, "checksum": "288f4a7ea055b3963ad7d6a6e6e6189672715a42d0a1b6e99a1a8ba0fe67a9c6"}, "https://openslr.org/resources/52/asr_sinhala_a.zip": {"num_bytes": 901532849, "checksum": "da36de6739ce5b8c835c3c232d5122b883a88442ec3f91a534154b2a9177d0ec"}, "https://openslr.org/resources/52/asr_sinhala_b.zip": {"num_bytes": 924132571, "checksum": "4b5dd26de34b27e9cc88842e992626694fd329f23493f40c748d556c61395d2a"}, "https://openslr.org/resources/52/asr_sinhala_c.zip": {"num_bytes": 938991415, "checksum": "f6db1cece623fafe866a56b9f7100976823b32f968036b72a9a634138e87e92d"}, "https://openslr.org/resources/52/asr_sinhala_d.zip": {"num_bytes": 911368918, "checksum": "8ecc58c745998b05b21c8af05fdc741d437a654a8babba16c4970ad981074e2c"}, "https://openslr.org/resources/52/asr_sinhala_e.zip": {"num_bytes": 927771260, "checksum": "f5cbfd3c8d1c5bf6fe7a1c1ee606101368512a852856fb2d01f4dde7869f605a"}, "https://openslr.org/resources/52/asr_sinhala_f.zip": {"num_bytes": 917209429, "checksum": "65782dee2ba4256bab123835ef2277a3fd1116f20f403a2c4ff5ace3ac45714c"}}, "download_size": 14676484074, "post_processing_size": null, "dataset_size": 75447705, "size_in_bytes": 14751931779}, "SLR53": {"description": "OpenSLR is a site devoted to hosting speech and language resources, such as training corpora for speech recognition,\nand software related to speech recognition. We intend to be a convenient place for anyone to put resources that\nthey have created, so that they can be downloaded publicly.\n", "citation": "SLR32:\n@inproceedings{van-niekerk-etal-2017,\n title = {{Rapid development of TTS corpora for four South African languages}},\n author = {Daniel van Niekerk and Charl van Heerden and Marelie Davel and Neil Kleynhans and Oddur Kjartansson\n and Martin Jansche and Linne Ha},\n booktitle = {Proc. Interspeech 2017},\n pages = {2178--2182},\n address = {Stockholm, Sweden},\n month = aug,\n year = {2017},\n URL = {http://dx.doi.org/10.21437/Interspeech.2017-1139}\n}\n\nSLR35, SLR36, SLR52, SLR53, SLR54:\n@inproceedings{kjartansson-etal-sltu2018,\n title = {{Crowd-Sourced Speech Corpora for Javanese, Sundanese, Sinhala, Nepali, and Bangladeshi Bengali}},\n author = {Oddur Kjartansson and Supheakmungkol Sarin and Knot Pipatsrisawat and Martin Jansche and Linne Ha},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {52--55},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-11},\n}\n\nSLR41, SLR42, SLR43, SLR44:\n@inproceedings{kjartansson-etal-tts-sltu2018,\n title = {{A Step-by-Step Process for Building TTS Voices Using Open Source Data and Framework for Bangla, Javanese,\n Khmer, Nepali, Sinhala, and Sundanese}},\n author = {Keshan Sodimana and Knot Pipatsrisawat and Linne Ha and Martin Jansche and Oddur Kjartansson and Pasindu\n De Silva and Supheakmungkol Sarin},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {66--70},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-14}\n}\n\nSLR63, SLR64, SLR65, SLR66, SLR78, SLR79:\n@inproceedings{he-etal-2020-open,\n title = {{Open-source Multi-speaker Speech Corpora for Building Gujarati, Kannada, Malayalam, Marathi, Tamil and\n Telugu Speech Synthesis Systems}},\n author = {He, Fei and Chu, Shan-Hui Cathy and Kjartansson, Oddur and Rivera, Clara and Katanova, Anna and Gutkin,\n Alexander and Demirsahin, Isin and Johny, Cibu and Jansche, Martin and Sarin, Supheakmungkol and Pipatsrisawat, Knot},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n pages = {6494--6503},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.800},\n ISBN = \"{979-10-95546-34-4},\n}\n\nSLR69, SLR76, SLR77:\n@inproceedings{kjartansson-etal-2020-open,\n title = {{Open-Source High Quality Speech Datasets for Basque, Catalan and Galician}},\n author = {Kjartansson, Oddur and Gutkin, Alexander and Butryna, Alena and Demirsahin, Isin and Rivera, Clara},\n booktitle = {Proceedings of the 1st Joint Workshop on Spoken Language Technologies for Under-resourced languages\n (SLTU) and Collaboration and Computing for Under-Resourced Languages (CCURL)},\n year = {2020},\n pages = {21--27},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.sltu-1.3},\n ISBN = {979-10-95546-35-1},\n}\n\nSLR71, SLR71, SLR72, SLR73, SLR74, SLR75:\n@inproceedings{guevara-rukoz-etal-2020-crowdsourcing,\n title = {{Crowdsourcing Latin American Spanish for Low-Resource Text-to-Speech}},\n author = {Guevara-Rukoz, Adriana and Demirsahin, Isin and He, Fei and Chu, Shan-Hui Cathy and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Gutkin, Alexander and Butryna, Alena and Kjartansson, Oddur},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n year = {2020},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.801},\n pages = {6504--6513},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR83\n@inproceedings{demirsahin-etal-2020-open,\n title = {{Open-source Multi-speaker Corpora of the English Accents in the British Isles}},\n author = {Demirsahin, Isin and Kjartansson, Oddur and Gutkin, Alexander and Rivera, Clara},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = {6532--6541},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.804},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR80\n@inproceedings{oo-etal-2020-burmese,\n title = {{Burmese Speech Corpus, Finite-State Text Normalization and Pronunciation Grammars with an Application\n to Text-to-Speech}},\n author = {Oo, Yin May and Wattanavekin, Theeraphol and Li, Chenfang and De Silva, Pasindu and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Jansche, Martin and Kjartansson, Oddur and Gutkin, Alexander},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = \"6328--6339\",\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.777},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR86\n@inproceedings{gutkin-et-al-yoruba2020,\n title = {{Developing an Open-Source Corpus of Yoruba Speech}},\n author = {Alexander Gutkin and I\u015f\u0131n Demir\u015fahin and Oddur Kjartansson and Clara Rivera and K\u00f3\u0323l\u00e1 T\u00fab\u00f2\u0323s\u00fan},\n booktitle = {Proceedings of Interspeech 2020},\n pages = {404--408},\n month = {October},\n year = {2020},\n address = {Shanghai, China},\n publisher = {International Speech and Communication Association (ISCA)},\n doi = {10.21437/Interspeech.2020-1096},\n url = {https://dx.doi.org/10.21437/Interspeech.2020-1096},\n}\n", "homepage": "https://openslr.org/", "license": "", "features": {"path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "open_slr", "config_name": "SLR53", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 85804462, "num_examples": 218703, "dataset_name": "open_slr"}}, "download_checksums": {"https://openslr.org/resources/53/asr_bengali_0.zip": {"num_bytes": 919838172, "checksum": "c1bbeadbcffae8a40d8e54f25c6c3dea922951a322cc7875a18f52dec127741a"}, "https://openslr.org/resources/53/asr_bengali_1.zip": {"num_bytes": 906161405, "checksum": "b6af5d30439d25a5df20efd85bfa2e900ee962e3afb91fe88a65cdbb0689cf84"}, "https://openslr.org/resources/53/asr_bengali_2.zip": {"num_bytes": 921562897, "checksum": "ac0b50d5ad38d5295c16b7eb62901b273bd6df55dea7b1a8495e69c1a50c0986"}, "https://openslr.org/resources/53/asr_bengali_3.zip": {"num_bytes": 918817316, "checksum": "444760953dc4e006cd6e38ea647b611c7be93a07a78b1a6b83974fe3ebba6b65"}, "https://openslr.org/resources/53/asr_bengali_4.zip": {"num_bytes": 908199672, "checksum": "975a1b690ccfe0609ba50738666758ad92c3683416d1cf7771972496adb4313f"}, "https://openslr.org/resources/53/asr_bengali_5.zip": {"num_bytes": 932042725, "checksum": "21dec790c4f96771a28347ed4430c74d3f3bff046684f4522c2301f7029f632d"}, "https://openslr.org/resources/53/asr_bengali_6.zip": {"num_bytes": 900826997, "checksum": "b0f93fb831bb36c75a6f4c0731bfb991f8b6529bc3b16ee0bede3e7108a7679e"}, "https://openslr.org/resources/53/asr_bengali_7.zip": {"num_bytes": 927750265, "checksum": "647cbcfb9c92930f4625dbc107f4218cdd37f8e3494df23d42917640da22938c"}, "https://openslr.org/resources/53/asr_bengali_8.zip": {"num_bytes": 927268934, "checksum": "73168b982a0665fb4f1104eaafeb3ddc01780b39978649e01ce6ab7850a86de1"}, "https://openslr.org/resources/53/asr_bengali_9.zip": {"num_bytes": 906382286, "checksum": "25f678604ffe93fc986cc402dc4a4329f36eb44ab627c645c4957dbf8e85917c"}, "https://openslr.org/resources/53/asr_bengali_a.zip": {"num_bytes": 900283300, "checksum": "daf0fc69dbd041fd254e96df1732359666ace7c9aea9d5c64c03ab8add3a00c4"}, "https://openslr.org/resources/53/asr_bengali_b.zip": {"num_bytes": 910050386, "checksum": "2d6fc0f464130bc3761546ac0e8b085921d5f1c9afbf886b9c1fa95f9755fd26"}, "https://openslr.org/resources/53/asr_bengali_c.zip": {"num_bytes": 897120616, "checksum": "116e8e63882f548410a3b835d2d3b6a11e6a05969374d173b9c01a8ba7112abd"}, "https://openslr.org/resources/53/asr_bengali_d.zip": {"num_bytes": 914366610, "checksum": "aa155d8e0688d032229ad7a5e4c713e696d1ea531feae83ae3230e526f1db7a6"}, "https://openslr.org/resources/53/asr_bengali_e.zip": {"num_bytes": 922936447, "checksum": "2f6f97591adde2b469f29b601ba33bfc3e8049681594fe31be8a55204c70ae15"}, "https://openslr.org/resources/53/asr_bengali_f.zip": {"num_bytes": 917202893, "checksum": "42542ec7d434bd6a34b30c01fa24de206fb2d2e56afea745a14867a8c0eaa32c"}}, "download_size": 14630810921, "post_processing_size": null, "dataset_size": 85804462, "size_in_bytes": 14716615383}, "SLR54": {"description": "OpenSLR is a site devoted to hosting speech and language resources, such as training corpora for speech recognition,\nand software related to speech recognition. We intend to be a convenient place for anyone to put resources that\nthey have created, so that they can be downloaded publicly.\n", "citation": "SLR32:\n@inproceedings{van-niekerk-etal-2017,\n title = {{Rapid development of TTS corpora for four South African languages}},\n author = {Daniel van Niekerk and Charl van Heerden and Marelie Davel and Neil Kleynhans and Oddur Kjartansson\n and Martin Jansche and Linne Ha},\n booktitle = {Proc. Interspeech 2017},\n pages = {2178--2182},\n address = {Stockholm, Sweden},\n month = aug,\n year = {2017},\n URL = {http://dx.doi.org/10.21437/Interspeech.2017-1139}\n}\n\nSLR35, SLR36, SLR52, SLR53, SLR54:\n@inproceedings{kjartansson-etal-sltu2018,\n title = {{Crowd-Sourced Speech Corpora for Javanese, Sundanese, Sinhala, Nepali, and Bangladeshi Bengali}},\n author = {Oddur Kjartansson and Supheakmungkol Sarin and Knot Pipatsrisawat and Martin Jansche and Linne Ha},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {52--55},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-11},\n}\n\nSLR41, SLR42, SLR43, SLR44:\n@inproceedings{kjartansson-etal-tts-sltu2018,\n title = {{A Step-by-Step Process for Building TTS Voices Using Open Source Data and Framework for Bangla, Javanese,\n Khmer, Nepali, Sinhala, and Sundanese}},\n author = {Keshan Sodimana and Knot Pipatsrisawat and Linne Ha and Martin Jansche and Oddur Kjartansson and Pasindu\n De Silva and Supheakmungkol Sarin},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {66--70},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-14}\n}\n\nSLR63, SLR64, SLR65, SLR66, SLR78, SLR79:\n@inproceedings{he-etal-2020-open,\n title = {{Open-source Multi-speaker Speech Corpora for Building Gujarati, Kannada, Malayalam, Marathi, Tamil and\n Telugu Speech Synthesis Systems}},\n author = {He, Fei and Chu, Shan-Hui Cathy and Kjartansson, Oddur and Rivera, Clara and Katanova, Anna and Gutkin,\n Alexander and Demirsahin, Isin and Johny, Cibu and Jansche, Martin and Sarin, Supheakmungkol and Pipatsrisawat, Knot},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n pages = {6494--6503},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.800},\n ISBN = \"{979-10-95546-34-4},\n}\n\nSLR69, SLR76, SLR77:\n@inproceedings{kjartansson-etal-2020-open,\n title = {{Open-Source High Quality Speech Datasets for Basque, Catalan and Galician}},\n author = {Kjartansson, Oddur and Gutkin, Alexander and Butryna, Alena and Demirsahin, Isin and Rivera, Clara},\n booktitle = {Proceedings of the 1st Joint Workshop on Spoken Language Technologies for Under-resourced languages\n (SLTU) and Collaboration and Computing for Under-Resourced Languages (CCURL)},\n year = {2020},\n pages = {21--27},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.sltu-1.3},\n ISBN = {979-10-95546-35-1},\n}\n\nSLR71, SLR71, SLR72, SLR73, SLR74, SLR75:\n@inproceedings{guevara-rukoz-etal-2020-crowdsourcing,\n title = {{Crowdsourcing Latin American Spanish for Low-Resource Text-to-Speech}},\n author = {Guevara-Rukoz, Adriana and Demirsahin, Isin and He, Fei and Chu, Shan-Hui Cathy and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Gutkin, Alexander and Butryna, Alena and Kjartansson, Oddur},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n year = {2020},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.801},\n pages = {6504--6513},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR83\n@inproceedings{demirsahin-etal-2020-open,\n title = {{Open-source Multi-speaker Corpora of the English Accents in the British Isles}},\n author = {Demirsahin, Isin and Kjartansson, Oddur and Gutkin, Alexander and Rivera, Clara},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = {6532--6541},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.804},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR80\n@inproceedings{oo-etal-2020-burmese,\n title = {{Burmese Speech Corpus, Finite-State Text Normalization and Pronunciation Grammars with an Application\n to Text-to-Speech}},\n author = {Oo, Yin May and Wattanavekin, Theeraphol and Li, Chenfang and De Silva, Pasindu and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Jansche, Martin and Kjartansson, Oddur and Gutkin, Alexander},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = \"6328--6339\",\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.777},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR86\n@inproceedings{gutkin-et-al-yoruba2020,\n title = {{Developing an Open-Source Corpus of Yoruba Speech}},\n author = {Alexander Gutkin and I\u015f\u0131n Demir\u015fahin and Oddur Kjartansson and Clara Rivera and K\u00f3\u0323l\u00e1 T\u00fab\u00f2\u0323s\u00fan},\n booktitle = {Proceedings of Interspeech 2020},\n pages = {404--408},\n month = {October},\n year = {2020},\n address = {Shanghai, China},\n publisher = {International Speech and Communication Association (ISCA)},\n doi = {10.21437/Interspeech.2020-1096},\n url = {https://dx.doi.org/10.21437/Interspeech.2020-1096},\n}\n", "homepage": "https://openslr.org/", "license": "", "features": {"path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "open_slr", "config_name": "SLR54", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 61097744, "num_examples": 157905, "dataset_name": "open_slr"}}, "download_checksums": {"https://openslr.org/resources/54/asr_nepali_0.zip": {"num_bytes": 589002210, "checksum": "6c783a5a731c7a9c2cac678823a2ee7866db1acbad7f9a199bce3bf7a64e22b6"}, "https://openslr.org/resources/54/asr_nepali_1.zip": {"num_bytes": 582088242, "checksum": "661865704f3d9adacd74f8c98cd0f6a6e869902c6441efb96c761573dd1d2f05"}, "https://openslr.org/resources/54/asr_nepali_2.zip": {"num_bytes": 589401540, "checksum": "a2b4c373d7ebe5f2d491bf73c2324e6f5645d724df58fd765c71a3a86e7ab6d4"}, "https://openslr.org/resources/54/asr_nepali_3.zip": {"num_bytes": 574596426, "checksum": "6a925d4448f98694185d50cacfa380fad128b47ebf9d5519526b83dd6586348d"}, "https://openslr.org/resources/54/asr_nepali_4.zip": {"num_bytes": 583746586, "checksum": "7315f69b392690c22db32b3c2f14b82b1f64215c5a21d697c421d6d220a55bf0"}, "https://openslr.org/resources/54/asr_nepali_5.zip": {"num_bytes": 572967016, "checksum": "3891b332a9fc55e4fb0579bf67431989e92ab05b9715c0e9673cf356e878e0df"}, "https://openslr.org/resources/54/asr_nepali_6.zip": {"num_bytes": 588104006, "checksum": "78c321a8f55a5aa0c56feb791826a2751087cc87a36b27bba56ac6b124eac73f"}, "https://openslr.org/resources/54/asr_nepali_7.zip": {"num_bytes": 588410232, "checksum": "8b05b8b4aedfc9829cf33cd65ab3c1474eb8f738078b414d40b61f08782064ec"}, "https://openslr.org/resources/54/asr_nepali_8.zip": {"num_bytes": 585192213, "checksum": "0125cfc7c54e44bd4ac01d5558130a752cad26aa7055df753c65b400ece2c9f8"}, "https://openslr.org/resources/54/asr_nepali_9.zip": {"num_bytes": 578834881, "checksum": "6c68e80fe7c58a33aeb91b5b9bc37a99f9374a8f629e2a109bddba51d1712b12"}, "https://openslr.org/resources/54/asr_nepali_a.zip": {"num_bytes": 587798317, "checksum": "03b7bf7b6ace01a677e2a0dd079053ea29abf45743f197761190f3f52678e6df"}, "https://openslr.org/resources/54/asr_nepali_b.zip": {"num_bytes": 584397714, "checksum": "9a98d93ae91e75c6928d9222b387105e99030b8b81df9ada57c87f6b317c0853"}, "https://openslr.org/resources/54/asr_nepali_c.zip": {"num_bytes": 579440365, "checksum": "8bac1a046a86fc3684bfec2e5af1b1e0916ec5c2f1be5ccb1fb4778ecd7bb357"}, "https://openslr.org/resources/54/asr_nepali_d.zip": {"num_bytes": 588470094, "checksum": "9aad327fd72efcc009d060a8299aa70ca1757f1ec32fe3280d53e449ef75e5c3"}, "https://openslr.org/resources/54/asr_nepali_e.zip": {"num_bytes": 578091869, "checksum": "4ba73ada7cf482611b3ad3e17a77685b1ac872e5840953c07a1c6c2b10a83e4a"}, "https://openslr.org/resources/54/asr_nepali_f.zip": {"num_bytes": 577705651, "checksum": "062f4908802ab0d57362da1dfea4898898f6d21ba09596c1e271c2cda47297c6"}}, "download_size": 9328247362, "post_processing_size": null, "dataset_size": 61097744, "size_in_bytes": 9389345106}, "SLR83": {"description": "OpenSLR is a site devoted to hosting speech and language resources, such as training corpora for speech recognition,\nand software related to speech recognition. We intend to be a convenient place for anyone to put resources that\nthey have created, so that they can be downloaded publicly.\n", "citation": "SLR32:\n@inproceedings{van-niekerk-etal-2017,\n title = {{Rapid development of TTS corpora for four South African languages}},\n author = {Daniel van Niekerk and Charl van Heerden and Marelie Davel and Neil Kleynhans and Oddur Kjartansson\n and Martin Jansche and Linne Ha},\n booktitle = {Proc. Interspeech 2017},\n pages = {2178--2182},\n address = {Stockholm, Sweden},\n month = aug,\n year = {2017},\n URL = {http://dx.doi.org/10.21437/Interspeech.2017-1139}\n}\n\nSLR35, SLR36, SLR52, SLR53, SLR54:\n@inproceedings{kjartansson-etal-sltu2018,\n title = {{Crowd-Sourced Speech Corpora for Javanese, Sundanese, Sinhala, Nepali, and Bangladeshi Bengali}},\n author = {Oddur Kjartansson and Supheakmungkol Sarin and Knot Pipatsrisawat and Martin Jansche and Linne Ha},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {52--55},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-11},\n}\n\nSLR41, SLR42, SLR43, SLR44:\n@inproceedings{kjartansson-etal-tts-sltu2018,\n title = {{A Step-by-Step Process for Building TTS Voices Using Open Source Data and Framework for Bangla, Javanese,\n Khmer, Nepali, Sinhala, and Sundanese}},\n author = {Keshan Sodimana and Knot Pipatsrisawat and Linne Ha and Martin Jansche and Oddur Kjartansson and Pasindu\n De Silva and Supheakmungkol Sarin},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {66--70},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-14}\n}\n\nSLR63, SLR64, SLR65, SLR66, SLR78, SLR79:\n@inproceedings{he-etal-2020-open,\n title = {{Open-source Multi-speaker Speech Corpora for Building Gujarati, Kannada, Malayalam, Marathi, Tamil and\n Telugu Speech Synthesis Systems}},\n author = {He, Fei and Chu, Shan-Hui Cathy and Kjartansson, Oddur and Rivera, Clara and Katanova, Anna and Gutkin,\n Alexander and Demirsahin, Isin and Johny, Cibu and Jansche, Martin and Sarin, Supheakmungkol and Pipatsrisawat, Knot},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n pages = {6494--6503},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.800},\n ISBN = \"{979-10-95546-34-4},\n}\n\nSLR69, SLR76, SLR77:\n@inproceedings{kjartansson-etal-2020-open,\n title = {{Open-Source High Quality Speech Datasets for Basque, Catalan and Galician}},\n author = {Kjartansson, Oddur and Gutkin, Alexander and Butryna, Alena and Demirsahin, Isin and Rivera, Clara},\n booktitle = {Proceedings of the 1st Joint Workshop on Spoken Language Technologies for Under-resourced languages\n (SLTU) and Collaboration and Computing for Under-Resourced Languages (CCURL)},\n year = {2020},\n pages = {21--27},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.sltu-1.3},\n ISBN = {979-10-95546-35-1},\n}\n\nSLR71, SLR71, SLR72, SLR73, SLR74, SLR75:\n@inproceedings{guevara-rukoz-etal-2020-crowdsourcing,\n title = {{Crowdsourcing Latin American Spanish for Low-Resource Text-to-Speech}},\n author = {Guevara-Rukoz, Adriana and Demirsahin, Isin and He, Fei and Chu, Shan-Hui Cathy and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Gutkin, Alexander and Butryna, Alena and Kjartansson, Oddur},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n year = {2020},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.801},\n pages = {6504--6513},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR83\n@inproceedings{demirsahin-etal-2020-open,\n title = {{Open-source Multi-speaker Corpora of the English Accents in the British Isles}},\n author = {Demirsahin, Isin and Kjartansson, Oddur and Gutkin, Alexander and Rivera, Clara},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = {6532--6541},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.804},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR80\n@inproceedings{oo-etal-2020-burmese,\n title = {{Burmese Speech Corpus, Finite-State Text Normalization and Pronunciation Grammars with an Application\n to Text-to-Speech}},\n author = {Oo, Yin May and Wattanavekin, Theeraphol and Li, Chenfang and De Silva, Pasindu and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Jansche, Martin and Kjartansson, Oddur and Gutkin, Alexander},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = \"6328--6339\",\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.777},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR86\n@inproceedings{gutkin-et-al-yoruba2020,\n title = {{Developing an Open-Source Corpus of Yoruba Speech}},\n author = {Alexander Gutkin and I\u015f\u0131n Demir\u015fahin and Oddur Kjartansson and Clara Rivera and K\u00f3\u0323l\u00e1 T\u00fab\u00f2\u0323s\u00fan},\n booktitle = {Proceedings of Interspeech 2020},\n pages = {404--408},\n month = {October},\n year = {2020},\n address = {Shanghai, China},\n publisher = {International Speech and Communication Association (ISCA)},\n doi = {10.21437/Interspeech.2020-1096},\n url = {https://dx.doi.org/10.21437/Interspeech.2020-1096},\n}\n", "homepage": "https://openslr.org/", "license": "", "features": {"path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "open_slr", "config_name": "SLR83", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 7098985, "num_examples": 17877, "dataset_name": "open_slr"}}, "download_checksums": {"https://openslr.org/resources/83/irish_english_male.zip": {"num_bytes": 164531638, "checksum": "2e5dbae4cc27e0e24e21f21c8e7464d219feb703f5fee3e567de6561a05024ed"}, "https://openslr.org/resources/83/midlands_english_female.zip": {"num_bytes": 103085118, "checksum": "aa1083a319e52d658b85c162905ec27cdf2ac6d5645b4caeab05a385a2c8a37f"}, "https://openslr.org/resources/83/midlands_english_male.zip": {"num_bytes": 166833961, "checksum": "8192c7a0626eb742f9999e63162289f8f9a86c9cb49ef68298dc7f624acaebcf"}, "https://openslr.org/resources/83/northern_english_female.zip": {"num_bytes": 314983063, "checksum": "22b6229d08481e7605b028185dc55dccd0611db428854f2d485d9ff34395a65c"}, "https://openslr.org/resources/83/northern_english_male.zip": {"num_bytes": 817772034, "checksum": "b627d500d1b2e3c4921fb6d91338ead7b972f67c1c2f0babb300e0ef844c7248"}, "https://openslr.org/resources/83/scottish_english_female.zip": {"num_bytes": 351443880, "checksum": "2dbe5545a7ab87112c7730086586f738ec4f42171f7738628ba084ed4ba15ccb"}, "https://openslr.org/resources/83/scottish_english_male.zip": {"num_bytes": 620254118, "checksum": "c7d2d9cd581c48a8323f6cc3886d879e2e7aca5931d98228e07d07b350d9f9a9"}, "https://openslr.org/resources/83/southern_english_female.zip": {"num_bytes": 1636701939, "checksum": "e0a2e8e64b9efdbd7bae5cdf33ac8b81db495b499c9d40da0a7d7842e42b1e76"}, "https://openslr.org/resources/83/southern_english_male.zip": {"num_bytes": 1700955740, "checksum": "788b1c59fb5713b0e1efebc02b7aa1b55182b21955493b299b9941c70a878cad"}, "https://openslr.org/resources/83/welsh_english_female.zip": {"num_bytes": 595683538, "checksum": "3c2465b9618e33f42c7d2ee753b54ae593714e758e236efcdd56c14c5bd89f1d"}, "https://openslr.org/resources/83/welsh_english_male.zip": {"num_bytes": 757645790, "checksum": "eaf8de0f8872bb647d5c159bb33713cfd58966bd59d733f5f399793778ea5058"}}, "download_size": 7229890819, "post_processing_size": null, "dataset_size": 7098985, "size_in_bytes": 7236989804}} \ No newline at end of file +{"SLR41": {"description": "OpenSLR is a site devoted to hosting speech and language resources, such as training corpora for speech recognition,\nand software related to speech recognition. We intend to be a convenient place for anyone to put resources that\nthey have created, so that they can be downloaded publicly.\n", "citation": "SLR32:\n@inproceedings{van-niekerk-etal-2017,\n title = {{Rapid development of TTS corpora for four South African languages}},\n author = {Daniel van Niekerk and Charl van Heerden and Marelie Davel and Neil Kleynhans and Oddur Kjartansson\n and Martin Jansche and Linne Ha},\n booktitle = {Proc. Interspeech 2017},\n pages = {2178--2182},\n address = {Stockholm, Sweden},\n month = aug,\n year = {2017},\n URL = {http://dx.doi.org/10.21437/Interspeech.2017-1139}\n}\n\nSLR35, SLR36, SLR52, SLR53, SLR54:\n@inproceedings{kjartansson-etal-sltu2018,\n title = {{Crowd-Sourced Speech Corpora for Javanese, Sundanese, Sinhala, Nepali, and Bangladeshi Bengali}},\n author = {Oddur Kjartansson and Supheakmungkol Sarin and Knot Pipatsrisawat and Martin Jansche and Linne Ha},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {52--55},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-11},\n}\n\nSLR41, SLR42, SLR43, SLR44:\n@inproceedings{kjartansson-etal-tts-sltu2018,\n title = {{A Step-by-Step Process for Building TTS Voices Using Open Source Data and Framework for Bangla, Javanese,\n Khmer, Nepali, Sinhala, and Sundanese}},\n author = {Keshan Sodimana and Knot Pipatsrisawat and Linne Ha and Martin Jansche and Oddur Kjartansson and Pasindu\n De Silva and Supheakmungkol Sarin},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {66--70},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-14}\n}\n\nSLR63, SLR64, SLR65, SLR66, SLR78, SLR79:\n@inproceedings{he-etal-2020-open,\n title = {{Open-source Multi-speaker Speech Corpora for Building Gujarati, Kannada, Malayalam, Marathi, Tamil and\n Telugu Speech Synthesis Systems}},\n author = {He, Fei and Chu, Shan-Hui Cathy and Kjartansson, Oddur and Rivera, Clara and Katanova, Anna and Gutkin,\n Alexander and Demirsahin, Isin and Johny, Cibu and Jansche, Martin and Sarin, Supheakmungkol and Pipatsrisawat, Knot},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n pages = {6494--6503},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.800},\n ISBN = \"{979-10-95546-34-4},\n}\n\nSLR69, SLR76, SLR77:\n@inproceedings{kjartansson-etal-2020-open,\n title = {{Open-Source High Quality Speech Datasets for Basque, Catalan and Galician}},\n author = {Kjartansson, Oddur and Gutkin, Alexander and Butryna, Alena and Demirsahin, Isin and Rivera, Clara},\n booktitle = {Proceedings of the 1st Joint Workshop on Spoken Language Technologies for Under-resourced languages\n (SLTU) and Collaboration and Computing for Under-Resourced Languages (CCURL)},\n year = {2020},\n pages = {21--27},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.sltu-1.3},\n ISBN = {979-10-95546-35-1},\n}\n\nSLR71, SLR71, SLR72, SLR73, SLR74, SLR75:\n@inproceedings{guevara-rukoz-etal-2020-crowdsourcing,\n title = {{Crowdsourcing Latin American Spanish for Low-Resource Text-to-Speech}},\n author = {Guevara-Rukoz, Adriana and Demirsahin, Isin and He, Fei and Chu, Shan-Hui Cathy and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Gutkin, Alexander and Butryna, Alena and Kjartansson, Oddur},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n year = {2020},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.801},\n pages = {6504--6513},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR80\n@inproceedings{oo-etal-2020-burmese,\n title = {{Burmese Speech Corpus, Finite-State Text Normalization and Pronunciation Grammars with an Application\n to Text-to-Speech}},\n author = {Oo, Yin May and Wattanavekin, Theeraphol and Li, Chenfang and De Silva, Pasindu and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Jansche, Martin and Kjartansson, Oddur and Gutkin, Alexander},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = \"6328--6339\",\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.777},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR86\n@inproceedings{gutkin-et-al-yoruba2020,\n title = {{Developing an Open-Source Corpus of Yoruba Speech}},\n author = {Alexander Gutkin and I\u015f\u0131n Demir\u015fahin and Oddur Kjartansson and Clara Rivera and K\u00f3\u0323l\u00e1 T\u00fab\u00f2\u0323s\u00fan},\n booktitle = {Proceedings of Interspeech 2020},\n pages = {404--408},\n month = {October},\n year = {2020},\n address = {Shanghai, China},\n publisher = {International Speech and Communication Association (ISCA)},\n doi = {10.21437/Interspeech.2020-1096},\n url = {https://dx.doi.org/10.21437/Interspeech.2020-1096},\n}\n", "homepage": "https://openslr.org/", "license": "", "features": {"path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "string", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "open_slr", "config_name": "SLR41", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 2423902, "num_examples": 5822, "dataset_name": "open_slr"}}, "download_checksums": {"https://openslr.org/resources/41/jv_id_female.zip": {"num_bytes": 967179448, "checksum": "6fd795a441b3ddd62d6131d4bbd9231151af89f5d9ce5ac7d8ecb370a49576c7"}, "https://openslr.org/resources/41/jv_id_male.zip": {"num_bytes": 923612912, "checksum": "6ee23916b7489420a538e7032f58d7be088a615fb67ec3e7043414d436bb5c1a"}}, "download_size": 1890792360, "post_processing_size": null, "dataset_size": 2423902, "size_in_bytes": 1893216262}, "SLR42": {"description": "OpenSLR is a site devoted to hosting speech and language resources, such as training corpora for speech recognition,\nand software related to speech recognition. We intend to be a convenient place for anyone to put resources that\nthey have created, so that they can be downloaded publicly.\n", "citation": "SLR32:\n@inproceedings{van-niekerk-etal-2017,\n title = {{Rapid development of TTS corpora for four South African languages}},\n author = {Daniel van Niekerk and Charl van Heerden and Marelie Davel and Neil Kleynhans and Oddur Kjartansson\n and Martin Jansche and Linne Ha},\n booktitle = {Proc. Interspeech 2017},\n pages = {2178--2182},\n address = {Stockholm, Sweden},\n month = aug,\n year = {2017},\n URL = {http://dx.doi.org/10.21437/Interspeech.2017-1139}\n}\n\nSLR35, SLR36, SLR52, SLR53, SLR54:\n@inproceedings{kjartansson-etal-sltu2018,\n title = {{Crowd-Sourced Speech Corpora for Javanese, Sundanese, Sinhala, Nepali, and Bangladeshi Bengali}},\n author = {Oddur Kjartansson and Supheakmungkol Sarin and Knot Pipatsrisawat and Martin Jansche and Linne Ha},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {52--55},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-11},\n}\n\nSLR41, SLR42, SLR43, SLR44:\n@inproceedings{kjartansson-etal-tts-sltu2018,\n title = {{A Step-by-Step Process for Building TTS Voices Using Open Source Data and Framework for Bangla, Javanese,\n Khmer, Nepali, Sinhala, and Sundanese}},\n author = {Keshan Sodimana and Knot Pipatsrisawat and Linne Ha and Martin Jansche and Oddur Kjartansson and Pasindu\n De Silva and Supheakmungkol Sarin},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {66--70},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-14}\n}\n\nSLR63, SLR64, SLR65, SLR66, SLR78, SLR79:\n@inproceedings{he-etal-2020-open,\n title = {{Open-source Multi-speaker Speech Corpora for Building Gujarati, Kannada, Malayalam, Marathi, Tamil and\n Telugu Speech Synthesis Systems}},\n author = {He, Fei and Chu, Shan-Hui Cathy and Kjartansson, Oddur and Rivera, Clara and Katanova, Anna and Gutkin,\n Alexander and Demirsahin, Isin and Johny, Cibu and Jansche, Martin and Sarin, Supheakmungkol and Pipatsrisawat, Knot},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n pages = {6494--6503},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.800},\n ISBN = \"{979-10-95546-34-4},\n}\n\nSLR69, SLR76, SLR77:\n@inproceedings{kjartansson-etal-2020-open,\n title = {{Open-Source High Quality Speech Datasets for Basque, Catalan and Galician}},\n author = {Kjartansson, Oddur and Gutkin, Alexander and Butryna, Alena and Demirsahin, Isin and Rivera, Clara},\n booktitle = {Proceedings of the 1st Joint Workshop on Spoken Language Technologies for Under-resourced languages\n (SLTU) and Collaboration and Computing for Under-Resourced Languages (CCURL)},\n year = {2020},\n pages = {21--27},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.sltu-1.3},\n ISBN = {979-10-95546-35-1},\n}\n\nSLR71, SLR71, SLR72, SLR73, SLR74, SLR75:\n@inproceedings{guevara-rukoz-etal-2020-crowdsourcing,\n title = {{Crowdsourcing Latin American Spanish for Low-Resource Text-to-Speech}},\n author = {Guevara-Rukoz, Adriana and Demirsahin, Isin and He, Fei and Chu, Shan-Hui Cathy and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Gutkin, Alexander and Butryna, Alena and Kjartansson, Oddur},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n year = {2020},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.801},\n pages = {6504--6513},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR80\n@inproceedings{oo-etal-2020-burmese,\n title = {{Burmese Speech Corpus, Finite-State Text Normalization and Pronunciation Grammars with an Application\n to Text-to-Speech}},\n author = {Oo, Yin May and Wattanavekin, Theeraphol and Li, Chenfang and De Silva, Pasindu and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Jansche, Martin and Kjartansson, Oddur and Gutkin, Alexander},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = \"6328--6339\",\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.777},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR86\n@inproceedings{gutkin-et-al-yoruba2020,\n title = {{Developing an Open-Source Corpus of Yoruba Speech}},\n author = {Alexander Gutkin and I\u015f\u0131n Demir\u015fahin and Oddur Kjartansson and Clara Rivera and K\u00f3\u0323l\u00e1 T\u00fab\u00f2\u0323s\u00fan},\n booktitle = {Proceedings of Interspeech 2020},\n pages = {404--408},\n month = {October},\n year = {2020},\n address = {Shanghai, China},\n publisher = {International Speech and Communication Association (ISCA)},\n doi = {10.21437/Interspeech.2020-1096},\n url = {https://dx.doi.org/10.21437/Interspeech.2020-1096},\n}\n", "homepage": "https://openslr.org/", "license": "", "features": {"path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "string", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "open_slr", "config_name": "SLR42", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 1427984, "num_examples": 2906, "dataset_name": "open_slr"}}, "download_checksums": {"https://openslr.org/resources/42/km_kh_male.zip": {"num_bytes": 866086951, "checksum": "c0ec9c0494c57f04cf1f2d8d2668d517598375f24e34de07272ecd637c332591"}}, "download_size": 866086951, "post_processing_size": null, "dataset_size": 1427984, "size_in_bytes": 867514935}, "SLR43": {"description": "OpenSLR is a site devoted to hosting speech and language resources, such as training corpora for speech recognition,\nand software related to speech recognition. We intend to be a convenient place for anyone to put resources that\nthey have created, so that they can be downloaded publicly.\n", "citation": "SLR32:\n@inproceedings{van-niekerk-etal-2017,\n title = {{Rapid development of TTS corpora for four South African languages}},\n author = {Daniel van Niekerk and Charl van Heerden and Marelie Davel and Neil Kleynhans and Oddur Kjartansson\n and Martin Jansche and Linne Ha},\n booktitle = {Proc. Interspeech 2017},\n pages = {2178--2182},\n address = {Stockholm, Sweden},\n month = aug,\n year = {2017},\n URL = {http://dx.doi.org/10.21437/Interspeech.2017-1139}\n}\n\nSLR35, SLR36, SLR52, SLR53, SLR54:\n@inproceedings{kjartansson-etal-sltu2018,\n title = {{Crowd-Sourced Speech Corpora for Javanese, Sundanese, Sinhala, Nepali, and Bangladeshi Bengali}},\n author = {Oddur Kjartansson and Supheakmungkol Sarin and Knot Pipatsrisawat and Martin Jansche and Linne Ha},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {52--55},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-11},\n}\n\nSLR41, SLR42, SLR43, SLR44:\n@inproceedings{kjartansson-etal-tts-sltu2018,\n title = {{A Step-by-Step Process for Building TTS Voices Using Open Source Data and Framework for Bangla, Javanese,\n Khmer, Nepali, Sinhala, and Sundanese}},\n author = {Keshan Sodimana and Knot Pipatsrisawat and Linne Ha and Martin Jansche and Oddur Kjartansson and Pasindu\n De Silva and Supheakmungkol Sarin},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {66--70},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-14}\n}\n\nSLR63, SLR64, SLR65, SLR66, SLR78, SLR79:\n@inproceedings{he-etal-2020-open,\n title = {{Open-source Multi-speaker Speech Corpora for Building Gujarati, Kannada, Malayalam, Marathi, Tamil and\n Telugu Speech Synthesis Systems}},\n author = {He, Fei and Chu, Shan-Hui Cathy and Kjartansson, Oddur and Rivera, Clara and Katanova, Anna and Gutkin,\n Alexander and Demirsahin, Isin and Johny, Cibu and Jansche, Martin and Sarin, Supheakmungkol and Pipatsrisawat, Knot},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n pages = {6494--6503},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.800},\n ISBN = \"{979-10-95546-34-4},\n}\n\nSLR69, SLR76, SLR77:\n@inproceedings{kjartansson-etal-2020-open,\n title = {{Open-Source High Quality Speech Datasets for Basque, Catalan and Galician}},\n author = {Kjartansson, Oddur and Gutkin, Alexander and Butryna, Alena and Demirsahin, Isin and Rivera, Clara},\n booktitle = {Proceedings of the 1st Joint Workshop on Spoken Language Technologies for Under-resourced languages\n (SLTU) and Collaboration and Computing for Under-Resourced Languages (CCURL)},\n year = {2020},\n pages = {21--27},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.sltu-1.3},\n ISBN = {979-10-95546-35-1},\n}\n\nSLR71, SLR71, SLR72, SLR73, SLR74, SLR75:\n@inproceedings{guevara-rukoz-etal-2020-crowdsourcing,\n title = {{Crowdsourcing Latin American Spanish for Low-Resource Text-to-Speech}},\n author = {Guevara-Rukoz, Adriana and Demirsahin, Isin and He, Fei and Chu, Shan-Hui Cathy and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Gutkin, Alexander and Butryna, Alena and Kjartansson, Oddur},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n year = {2020},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.801},\n pages = {6504--6513},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR80\n@inproceedings{oo-etal-2020-burmese,\n title = {{Burmese Speech Corpus, Finite-State Text Normalization and Pronunciation Grammars with an Application\n to Text-to-Speech}},\n author = {Oo, Yin May and Wattanavekin, Theeraphol and Li, Chenfang and De Silva, Pasindu and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Jansche, Martin and Kjartansson, Oddur and Gutkin, Alexander},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = \"6328--6339\",\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.777},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR86\n@inproceedings{gutkin-et-al-yoruba2020,\n title = {{Developing an Open-Source Corpus of Yoruba Speech}},\n author = {Alexander Gutkin and I\u015f\u0131n Demir\u015fahin and Oddur Kjartansson and Clara Rivera and K\u00f3\u0323l\u00e1 T\u00fab\u00f2\u0323s\u00fan},\n booktitle = {Proceedings of Interspeech 2020},\n pages = {404--408},\n month = {October},\n year = {2020},\n address = {Shanghai, China},\n publisher = {International Speech and Communication Association (ISCA)},\n doi = {10.21437/Interspeech.2020-1096},\n url = {https://dx.doi.org/10.21437/Interspeech.2020-1096},\n}\n", "homepage": "https://openslr.org/", "license": "", "features": {"path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "string", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "open_slr", "config_name": "SLR43", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 1074005, "num_examples": 2064, "dataset_name": "open_slr"}}, "download_checksums": {"https://openslr.org/resources/43/ne_np_female.zip": {"num_bytes": 800375645, "checksum": "3f355b543e1fad7af5e63116db871fac8e0a2d2f1a2c8f6ebc742270819da101"}}, "download_size": 800375645, "post_processing_size": null, "dataset_size": 1074005, "size_in_bytes": 801449650}, "SLR44": {"description": "OpenSLR is a site devoted to hosting speech and language resources, such as training corpora for speech recognition,\nand software related to speech recognition. We intend to be a convenient place for anyone to put resources that\nthey have created, so that they can be downloaded publicly.\n", "citation": "SLR32:\n@inproceedings{van-niekerk-etal-2017,\n title = {{Rapid development of TTS corpora for four South African languages}},\n author = {Daniel van Niekerk and Charl van Heerden and Marelie Davel and Neil Kleynhans and Oddur Kjartansson\n and Martin Jansche and Linne Ha},\n booktitle = {Proc. Interspeech 2017},\n pages = {2178--2182},\n address = {Stockholm, Sweden},\n month = aug,\n year = {2017},\n URL = {http://dx.doi.org/10.21437/Interspeech.2017-1139}\n}\n\nSLR35, SLR36, SLR52, SLR53, SLR54:\n@inproceedings{kjartansson-etal-sltu2018,\n title = {{Crowd-Sourced Speech Corpora for Javanese, Sundanese, Sinhala, Nepali, and Bangladeshi Bengali}},\n author = {Oddur Kjartansson and Supheakmungkol Sarin and Knot Pipatsrisawat and Martin Jansche and Linne Ha},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {52--55},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-11},\n}\n\nSLR41, SLR42, SLR43, SLR44:\n@inproceedings{kjartansson-etal-tts-sltu2018,\n title = {{A Step-by-Step Process for Building TTS Voices Using Open Source Data and Framework for Bangla, Javanese,\n Khmer, Nepali, Sinhala, and Sundanese}},\n author = {Keshan Sodimana and Knot Pipatsrisawat and Linne Ha and Martin Jansche and Oddur Kjartansson and Pasindu\n De Silva and Supheakmungkol Sarin},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {66--70},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-14}\n}\n\nSLR63, SLR64, SLR65, SLR66, SLR78, SLR79:\n@inproceedings{he-etal-2020-open,\n title = {{Open-source Multi-speaker Speech Corpora for Building Gujarati, Kannada, Malayalam, Marathi, Tamil and\n Telugu Speech Synthesis Systems}},\n author = {He, Fei and Chu, Shan-Hui Cathy and Kjartansson, Oddur and Rivera, Clara and Katanova, Anna and Gutkin,\n Alexander and Demirsahin, Isin and Johny, Cibu and Jansche, Martin and Sarin, Supheakmungkol and Pipatsrisawat, Knot},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n pages = {6494--6503},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.800},\n ISBN = \"{979-10-95546-34-4},\n}\n\nSLR69, SLR76, SLR77:\n@inproceedings{kjartansson-etal-2020-open,\n title = {{Open-Source High Quality Speech Datasets for Basque, Catalan and Galician}},\n author = {Kjartansson, Oddur and Gutkin, Alexander and Butryna, Alena and Demirsahin, Isin and Rivera, Clara},\n booktitle = {Proceedings of the 1st Joint Workshop on Spoken Language Technologies for Under-resourced languages\n (SLTU) and Collaboration and Computing for Under-Resourced Languages (CCURL)},\n year = {2020},\n pages = {21--27},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.sltu-1.3},\n ISBN = {979-10-95546-35-1},\n}\n\nSLR71, SLR71, SLR72, SLR73, SLR74, SLR75:\n@inproceedings{guevara-rukoz-etal-2020-crowdsourcing,\n title = {{Crowdsourcing Latin American Spanish for Low-Resource Text-to-Speech}},\n author = {Guevara-Rukoz, Adriana and Demirsahin, Isin and He, Fei and Chu, Shan-Hui Cathy and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Gutkin, Alexander and Butryna, Alena and Kjartansson, Oddur},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n year = {2020},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.801},\n pages = {6504--6513},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR80\n@inproceedings{oo-etal-2020-burmese,\n title = {{Burmese Speech Corpus, Finite-State Text Normalization and Pronunciation Grammars with an Application\n to Text-to-Speech}},\n author = {Oo, Yin May and Wattanavekin, Theeraphol and Li, Chenfang and De Silva, Pasindu and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Jansche, Martin and Kjartansson, Oddur and Gutkin, Alexander},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = \"6328--6339\",\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.777},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR86\n@inproceedings{gutkin-et-al-yoruba2020,\n title = {{Developing an Open-Source Corpus of Yoruba Speech}},\n author = {Alexander Gutkin and I\u015f\u0131n Demir\u015fahin and Oddur Kjartansson and Clara Rivera and K\u00f3\u0323l\u00e1 T\u00fab\u00f2\u0323s\u00fan},\n booktitle = {Proceedings of Interspeech 2020},\n pages = {404--408},\n month = {October},\n year = {2020},\n address = {Shanghai, China},\n publisher = {International Speech and Communication Association (ISCA)},\n doi = {10.21437/Interspeech.2020-1096},\n url = {https://dx.doi.org/10.21437/Interspeech.2020-1096},\n}\n", "homepage": "https://openslr.org/", "license": "", "features": {"path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "string", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "open_slr", "config_name": "SLR44", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 1776827, "num_examples": 4213, "dataset_name": "open_slr"}}, "download_checksums": {"https://openslr.org/resources/44/su_id_female.zip": {"num_bytes": 861425671, "checksum": "aa75bdef23b7bf0b980431d68df6bb32f695f3be365eb379d4c22516d2d11c5a"}, "https://openslr.org/resources/44/su_id_male.zip": {"num_bytes": 610827081, "checksum": "cabed03a45d4ce0f76e2de4d34b82d6876cd00d5ad6a5349629359028460652d"}}, "download_size": 1472252752, "post_processing_size": null, "dataset_size": 1776827, "size_in_bytes": 1474029579}, "SLR63": {"description": "OpenSLR is a site devoted to hosting speech and language resources, such as training corpora for speech recognition,\nand software related to speech recognition. We intend to be a convenient place for anyone to put resources that\nthey have created, so that they can be downloaded publicly.\n", "citation": "SLR32:\n@inproceedings{van-niekerk-etal-2017,\n title = {{Rapid development of TTS corpora for four South African languages}},\n author = {Daniel van Niekerk and Charl van Heerden and Marelie Davel and Neil Kleynhans and Oddur Kjartansson\n and Martin Jansche and Linne Ha},\n booktitle = {Proc. Interspeech 2017},\n pages = {2178--2182},\n address = {Stockholm, Sweden},\n month = aug,\n year = {2017},\n URL = {http://dx.doi.org/10.21437/Interspeech.2017-1139}\n}\n\nSLR35, SLR36, SLR52, SLR53, SLR54:\n@inproceedings{kjartansson-etal-sltu2018,\n title = {{Crowd-Sourced Speech Corpora for Javanese, Sundanese, Sinhala, Nepali, and Bangladeshi Bengali}},\n author = {Oddur Kjartansson and Supheakmungkol Sarin and Knot Pipatsrisawat and Martin Jansche and Linne Ha},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {52--55},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-11},\n}\n\nSLR41, SLR42, SLR43, SLR44:\n@inproceedings{kjartansson-etal-tts-sltu2018,\n title = {{A Step-by-Step Process for Building TTS Voices Using Open Source Data and Framework for Bangla, Javanese,\n Khmer, Nepali, Sinhala, and Sundanese}},\n author = {Keshan Sodimana and Knot Pipatsrisawat and Linne Ha and Martin Jansche and Oddur Kjartansson and Pasindu\n De Silva and Supheakmungkol Sarin},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {66--70},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-14}\n}\n\nSLR63, SLR64, SLR65, SLR66, SLR78, SLR79:\n@inproceedings{he-etal-2020-open,\n title = {{Open-source Multi-speaker Speech Corpora for Building Gujarati, Kannada, Malayalam, Marathi, Tamil and\n Telugu Speech Synthesis Systems}},\n author = {He, Fei and Chu, Shan-Hui Cathy and Kjartansson, Oddur and Rivera, Clara and Katanova, Anna and Gutkin,\n Alexander and Demirsahin, Isin and Johny, Cibu and Jansche, Martin and Sarin, Supheakmungkol and Pipatsrisawat, Knot},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n pages = {6494--6503},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.800},\n ISBN = \"{979-10-95546-34-4},\n}\n\nSLR69, SLR76, SLR77:\n@inproceedings{kjartansson-etal-2020-open,\n title = {{Open-Source High Quality Speech Datasets for Basque, Catalan and Galician}},\n author = {Kjartansson, Oddur and Gutkin, Alexander and Butryna, Alena and Demirsahin, Isin and Rivera, Clara},\n booktitle = {Proceedings of the 1st Joint Workshop on Spoken Language Technologies for Under-resourced languages\n (SLTU) and Collaboration and Computing for Under-Resourced Languages (CCURL)},\n year = {2020},\n pages = {21--27},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.sltu-1.3},\n ISBN = {979-10-95546-35-1},\n}\n\nSLR71, SLR71, SLR72, SLR73, SLR74, SLR75:\n@inproceedings{guevara-rukoz-etal-2020-crowdsourcing,\n title = {{Crowdsourcing Latin American Spanish for Low-Resource Text-to-Speech}},\n author = {Guevara-Rukoz, Adriana and Demirsahin, Isin and He, Fei and Chu, Shan-Hui Cathy and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Gutkin, Alexander and Butryna, Alena and Kjartansson, Oddur},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n year = {2020},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.801},\n pages = {6504--6513},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR80\n@inproceedings{oo-etal-2020-burmese,\n title = {{Burmese Speech Corpus, Finite-State Text Normalization and Pronunciation Grammars with an Application\n to Text-to-Speech}},\n author = {Oo, Yin May and Wattanavekin, Theeraphol and Li, Chenfang and De Silva, Pasindu and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Jansche, Martin and Kjartansson, Oddur and Gutkin, Alexander},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = \"6328--6339\",\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.777},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR86\n@inproceedings{gutkin-et-al-yoruba2020,\n title = {{Developing an Open-Source Corpus of Yoruba Speech}},\n author = {Alexander Gutkin and I\u015f\u0131n Demir\u015fahin and Oddur Kjartansson and Clara Rivera and K\u00f3\u0323l\u00e1 T\u00fab\u00f2\u0323s\u00fan},\n booktitle = {Proceedings of Interspeech 2020},\n pages = {404--408},\n month = {October},\n year = {2020},\n address = {Shanghai, China},\n publisher = {International Speech and Communication Association (ISCA)},\n doi = {10.21437/Interspeech.2020-1096},\n url = {https://dx.doi.org/10.21437/Interspeech.2020-1096},\n}\n", "homepage": "https://openslr.org/", "license": "", "features": {"path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "string", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "open_slr", "config_name": "SLR63", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 2016587, "num_examples": 4126, "dataset_name": "open_slr"}}, "download_checksums": {"https://openslr.org/resources/63/ml_in_female.zip": {"num_bytes": 710218411, "checksum": "e82d70717d20304f20f635d248c8cb1fd0c9c888e35b9105c8306fc76498a67e"}, "https://openslr.org/resources/63/ml_in_male.zip": {"num_bytes": 635657888, "checksum": "d1a6de4f58f53b973596ff1c69a64afea70f899b044397ce37465c626eee2ab9"}}, "download_size": 1345876299, "post_processing_size": null, "dataset_size": 2016587, "size_in_bytes": 1347892886}, "SLR64": {"description": "OpenSLR is a site devoted to hosting speech and language resources, such as training corpora for speech recognition,\nand software related to speech recognition. We intend to be a convenient place for anyone to put resources that\nthey have created, so that they can be downloaded publicly.\n", "citation": "SLR32:\n@inproceedings{van-niekerk-etal-2017,\n title = {{Rapid development of TTS corpora for four South African languages}},\n author = {Daniel van Niekerk and Charl van Heerden and Marelie Davel and Neil Kleynhans and Oddur Kjartansson\n and Martin Jansche and Linne Ha},\n booktitle = {Proc. Interspeech 2017},\n pages = {2178--2182},\n address = {Stockholm, Sweden},\n month = aug,\n year = {2017},\n URL = {http://dx.doi.org/10.21437/Interspeech.2017-1139}\n}\n\nSLR35, SLR36, SLR52, SLR53, SLR54:\n@inproceedings{kjartansson-etal-sltu2018,\n title = {{Crowd-Sourced Speech Corpora for Javanese, Sundanese, Sinhala, Nepali, and Bangladeshi Bengali}},\n author = {Oddur Kjartansson and Supheakmungkol Sarin and Knot Pipatsrisawat and Martin Jansche and Linne Ha},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {52--55},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-11},\n}\n\nSLR41, SLR42, SLR43, SLR44:\n@inproceedings{kjartansson-etal-tts-sltu2018,\n title = {{A Step-by-Step Process for Building TTS Voices Using Open Source Data and Framework for Bangla, Javanese,\n Khmer, Nepali, Sinhala, and Sundanese}},\n author = {Keshan Sodimana and Knot Pipatsrisawat and Linne Ha and Martin Jansche and Oddur Kjartansson and Pasindu\n De Silva and Supheakmungkol Sarin},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {66--70},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-14}\n}\n\nSLR63, SLR64, SLR65, SLR66, SLR78, SLR79:\n@inproceedings{he-etal-2020-open,\n title = {{Open-source Multi-speaker Speech Corpora for Building Gujarati, Kannada, Malayalam, Marathi, Tamil and\n Telugu Speech Synthesis Systems}},\n author = {He, Fei and Chu, Shan-Hui Cathy and Kjartansson, Oddur and Rivera, Clara and Katanova, Anna and Gutkin,\n Alexander and Demirsahin, Isin and Johny, Cibu and Jansche, Martin and Sarin, Supheakmungkol and Pipatsrisawat, Knot},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n pages = {6494--6503},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.800},\n ISBN = \"{979-10-95546-34-4},\n}\n\nSLR69, SLR76, SLR77:\n@inproceedings{kjartansson-etal-2020-open,\n title = {{Open-Source High Quality Speech Datasets for Basque, Catalan and Galician}},\n author = {Kjartansson, Oddur and Gutkin, Alexander and Butryna, Alena and Demirsahin, Isin and Rivera, Clara},\n booktitle = {Proceedings of the 1st Joint Workshop on Spoken Language Technologies for Under-resourced languages\n (SLTU) and Collaboration and Computing for Under-Resourced Languages (CCURL)},\n year = {2020},\n pages = {21--27},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.sltu-1.3},\n ISBN = {979-10-95546-35-1},\n}\n\nSLR71, SLR71, SLR72, SLR73, SLR74, SLR75:\n@inproceedings{guevara-rukoz-etal-2020-crowdsourcing,\n title = {{Crowdsourcing Latin American Spanish for Low-Resource Text-to-Speech}},\n author = {Guevara-Rukoz, Adriana and Demirsahin, Isin and He, Fei and Chu, Shan-Hui Cathy and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Gutkin, Alexander and Butryna, Alena and Kjartansson, Oddur},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n year = {2020},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.801},\n pages = {6504--6513},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR80\n@inproceedings{oo-etal-2020-burmese,\n title = {{Burmese Speech Corpus, Finite-State Text Normalization and Pronunciation Grammars with an Application\n to Text-to-Speech}},\n author = {Oo, Yin May and Wattanavekin, Theeraphol and Li, Chenfang and De Silva, Pasindu and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Jansche, Martin and Kjartansson, Oddur and Gutkin, Alexander},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = \"6328--6339\",\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.777},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR86\n@inproceedings{gutkin-et-al-yoruba2020,\n title = {{Developing an Open-Source Corpus of Yoruba Speech}},\n author = {Alexander Gutkin and I\u015f\u0131n Demir\u015fahin and Oddur Kjartansson and Clara Rivera and K\u00f3\u0323l\u00e1 T\u00fab\u00f2\u0323s\u00fan},\n booktitle = {Proceedings of Interspeech 2020},\n pages = {404--408},\n month = {October},\n year = {2020},\n address = {Shanghai, China},\n publisher = {International Speech and Communication Association (ISCA)},\n doi = {10.21437/Interspeech.2020-1096},\n url = {https://dx.doi.org/10.21437/Interspeech.2020-1096},\n}\n", "homepage": "https://openslr.org/", "license": "", "features": {"path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "string", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "open_slr", "config_name": "SLR64", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 810375, "num_examples": 1569, "dataset_name": "open_slr"}}, "download_checksums": {"https://openslr.org/resources/64/mr_in_female.zip": {"num_bytes": 712155683, "checksum": "42b770ee87c95379b55e187b17dccb9fbacb05d0e8292430ffe16a7483948fe5"}}, "download_size": 712155683, "post_processing_size": null, "dataset_size": 810375, "size_in_bytes": 712966058}, "SLR65": {"description": "OpenSLR is a site devoted to hosting speech and language resources, such as training corpora for speech recognition,\nand software related to speech recognition. We intend to be a convenient place for anyone to put resources that\nthey have created, so that they can be downloaded publicly.\n", "citation": "SLR32:\n@inproceedings{van-niekerk-etal-2017,\n title = {{Rapid development of TTS corpora for four South African languages}},\n author = {Daniel van Niekerk and Charl van Heerden and Marelie Davel and Neil Kleynhans and Oddur Kjartansson\n and Martin Jansche and Linne Ha},\n booktitle = {Proc. Interspeech 2017},\n pages = {2178--2182},\n address = {Stockholm, Sweden},\n month = aug,\n year = {2017},\n URL = {http://dx.doi.org/10.21437/Interspeech.2017-1139}\n}\n\nSLR35, SLR36, SLR52, SLR53, SLR54:\n@inproceedings{kjartansson-etal-sltu2018,\n title = {{Crowd-Sourced Speech Corpora for Javanese, Sundanese, Sinhala, Nepali, and Bangladeshi Bengali}},\n author = {Oddur Kjartansson and Supheakmungkol Sarin and Knot Pipatsrisawat and Martin Jansche and Linne Ha},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {52--55},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-11},\n}\n\nSLR41, SLR42, SLR43, SLR44:\n@inproceedings{kjartansson-etal-tts-sltu2018,\n title = {{A Step-by-Step Process for Building TTS Voices Using Open Source Data and Framework for Bangla, Javanese,\n Khmer, Nepali, Sinhala, and Sundanese}},\n author = {Keshan Sodimana and Knot Pipatsrisawat and Linne Ha and Martin Jansche and Oddur Kjartansson and Pasindu\n De Silva and Supheakmungkol Sarin},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {66--70},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-14}\n}\n\nSLR63, SLR64, SLR65, SLR66, SLR78, SLR79:\n@inproceedings{he-etal-2020-open,\n title = {{Open-source Multi-speaker Speech Corpora for Building Gujarati, Kannada, Malayalam, Marathi, Tamil and\n Telugu Speech Synthesis Systems}},\n author = {He, Fei and Chu, Shan-Hui Cathy and Kjartansson, Oddur and Rivera, Clara and Katanova, Anna and Gutkin,\n Alexander and Demirsahin, Isin and Johny, Cibu and Jansche, Martin and Sarin, Supheakmungkol and Pipatsrisawat, Knot},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n pages = {6494--6503},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.800},\n ISBN = \"{979-10-95546-34-4},\n}\n\nSLR69, SLR76, SLR77:\n@inproceedings{kjartansson-etal-2020-open,\n title = {{Open-Source High Quality Speech Datasets for Basque, Catalan and Galician}},\n author = {Kjartansson, Oddur and Gutkin, Alexander and Butryna, Alena and Demirsahin, Isin and Rivera, Clara},\n booktitle = {Proceedings of the 1st Joint Workshop on Spoken Language Technologies for Under-resourced languages\n (SLTU) and Collaboration and Computing for Under-Resourced Languages (CCURL)},\n year = {2020},\n pages = {21--27},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.sltu-1.3},\n ISBN = {979-10-95546-35-1},\n}\n\nSLR71, SLR71, SLR72, SLR73, SLR74, SLR75:\n@inproceedings{guevara-rukoz-etal-2020-crowdsourcing,\n title = {{Crowdsourcing Latin American Spanish for Low-Resource Text-to-Speech}},\n author = {Guevara-Rukoz, Adriana and Demirsahin, Isin and He, Fei and Chu, Shan-Hui Cathy and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Gutkin, Alexander and Butryna, Alena and Kjartansson, Oddur},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n year = {2020},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.801},\n pages = {6504--6513},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR80\n@inproceedings{oo-etal-2020-burmese,\n title = {{Burmese Speech Corpus, Finite-State Text Normalization and Pronunciation Grammars with an Application\n to Text-to-Speech}},\n author = {Oo, Yin May and Wattanavekin, Theeraphol and Li, Chenfang and De Silva, Pasindu and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Jansche, Martin and Kjartansson, Oddur and Gutkin, Alexander},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = \"6328--6339\",\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.777},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR86\n@inproceedings{gutkin-et-al-yoruba2020,\n title = {{Developing an Open-Source Corpus of Yoruba Speech}},\n author = {Alexander Gutkin and I\u015f\u0131n Demir\u015fahin and Oddur Kjartansson and Clara Rivera and K\u00f3\u0323l\u00e1 T\u00fab\u00f2\u0323s\u00fan},\n booktitle = {Proceedings of Interspeech 2020},\n pages = {404--408},\n month = {October},\n year = {2020},\n address = {Shanghai, China},\n publisher = {International Speech and Communication Association (ISCA)},\n doi = {10.21437/Interspeech.2020-1096},\n url = {https://dx.doi.org/10.21437/Interspeech.2020-1096},\n}\n", "homepage": "https://openslr.org/", "license": "", "features": {"path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "string", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "open_slr", "config_name": "SLR65", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 2136447, "num_examples": 4284, "dataset_name": "open_slr"}}, "download_checksums": {"https://openslr.org/resources/65/ta_in_female.zip": {"num_bytes": 769504014, "checksum": "fe00da10ae12ecd6dbe1afcc5abe365d44ad9036fb017cbd73bcfed71e0f8c81"}, "https://openslr.org/resources/65/ta_in_male.zip": {"num_bytes": 603800641, "checksum": "80e546e954939c92a0cd732446418b583b61da9f538f83b00cbd445cbebd4395"}}, "download_size": 1373304655, "post_processing_size": null, "dataset_size": 2136447, "size_in_bytes": 1375441102}, "SLR66": {"description": "OpenSLR is a site devoted to hosting speech and language resources, such as training corpora for speech recognition,\nand software related to speech recognition. We intend to be a convenient place for anyone to put resources that\nthey have created, so that they can be downloaded publicly.\n", "citation": "SLR32:\n@inproceedings{van-niekerk-etal-2017,\n title = {{Rapid development of TTS corpora for four South African languages}},\n author = {Daniel van Niekerk and Charl van Heerden and Marelie Davel and Neil Kleynhans and Oddur Kjartansson\n and Martin Jansche and Linne Ha},\n booktitle = {Proc. Interspeech 2017},\n pages = {2178--2182},\n address = {Stockholm, Sweden},\n month = aug,\n year = {2017},\n URL = {http://dx.doi.org/10.21437/Interspeech.2017-1139}\n}\n\nSLR35, SLR36, SLR52, SLR53, SLR54:\n@inproceedings{kjartansson-etal-sltu2018,\n title = {{Crowd-Sourced Speech Corpora for Javanese, Sundanese, Sinhala, Nepali, and Bangladeshi Bengali}},\n author = {Oddur Kjartansson and Supheakmungkol Sarin and Knot Pipatsrisawat and Martin Jansche and Linne Ha},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {52--55},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-11},\n}\n\nSLR41, SLR42, SLR43, SLR44:\n@inproceedings{kjartansson-etal-tts-sltu2018,\n title = {{A Step-by-Step Process for Building TTS Voices Using Open Source Data and Framework for Bangla, Javanese,\n Khmer, Nepali, Sinhala, and Sundanese}},\n author = {Keshan Sodimana and Knot Pipatsrisawat and Linne Ha and Martin Jansche and Oddur Kjartansson and Pasindu\n De Silva and Supheakmungkol Sarin},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {66--70},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-14}\n}\n\nSLR63, SLR64, SLR65, SLR66, SLR78, SLR79:\n@inproceedings{he-etal-2020-open,\n title = {{Open-source Multi-speaker Speech Corpora for Building Gujarati, Kannada, Malayalam, Marathi, Tamil and\n Telugu Speech Synthesis Systems}},\n author = {He, Fei and Chu, Shan-Hui Cathy and Kjartansson, Oddur and Rivera, Clara and Katanova, Anna and Gutkin,\n Alexander and Demirsahin, Isin and Johny, Cibu and Jansche, Martin and Sarin, Supheakmungkol and Pipatsrisawat, Knot},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n pages = {6494--6503},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.800},\n ISBN = \"{979-10-95546-34-4},\n}\n\nSLR69, SLR76, SLR77:\n@inproceedings{kjartansson-etal-2020-open,\n title = {{Open-Source High Quality Speech Datasets for Basque, Catalan and Galician}},\n author = {Kjartansson, Oddur and Gutkin, Alexander and Butryna, Alena and Demirsahin, Isin and Rivera, Clara},\n booktitle = {Proceedings of the 1st Joint Workshop on Spoken Language Technologies for Under-resourced languages\n (SLTU) and Collaboration and Computing for Under-Resourced Languages (CCURL)},\n year = {2020},\n pages = {21--27},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.sltu-1.3},\n ISBN = {979-10-95546-35-1},\n}\n\nSLR71, SLR71, SLR72, SLR73, SLR74, SLR75:\n@inproceedings{guevara-rukoz-etal-2020-crowdsourcing,\n title = {{Crowdsourcing Latin American Spanish for Low-Resource Text-to-Speech}},\n author = {Guevara-Rukoz, Adriana and Demirsahin, Isin and He, Fei and Chu, Shan-Hui Cathy and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Gutkin, Alexander and Butryna, Alena and Kjartansson, Oddur},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n year = {2020},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.801},\n pages = {6504--6513},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR80\n@inproceedings{oo-etal-2020-burmese,\n title = {{Burmese Speech Corpus, Finite-State Text Normalization and Pronunciation Grammars with an Application\n to Text-to-Speech}},\n author = {Oo, Yin May and Wattanavekin, Theeraphol and Li, Chenfang and De Silva, Pasindu and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Jansche, Martin and Kjartansson, Oddur and Gutkin, Alexander},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = \"6328--6339\",\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.777},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR86\n@inproceedings{gutkin-et-al-yoruba2020,\n title = {{Developing an Open-Source Corpus of Yoruba Speech}},\n author = {Alexander Gutkin and I\u015f\u0131n Demir\u015fahin and Oddur Kjartansson and Clara Rivera and K\u00f3\u0323l\u00e1 T\u00fab\u00f2\u0323s\u00fan},\n booktitle = {Proceedings of Interspeech 2020},\n pages = {404--408},\n month = {October},\n year = {2020},\n address = {Shanghai, China},\n publisher = {International Speech and Communication Association (ISCA)},\n doi = {10.21437/Interspeech.2020-1096},\n url = {https://dx.doi.org/10.21437/Interspeech.2020-1096},\n}\n", "homepage": "https://openslr.org/", "license": "", "features": {"path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "string", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "open_slr", "config_name": "SLR66", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 1898335, "num_examples": 4448, "dataset_name": "open_slr"}}, "download_checksums": {"https://openslr.org/resources/66/te_in_female.zip": {"num_bytes": 505680804, "checksum": "3aa3c22d6fad33ed68951f4934ae47349ee76b77220d8261ec3bda8c24bf42b2"}, "https://openslr.org/resources/66/te_in_male.zip": {"num_bytes": 529447066, "checksum": "f8a0f239d39088b6702a2186681e2874328e9fcd9bfa6a0dd9e1dc5695be3185"}}, "download_size": 1035127870, "post_processing_size": null, "dataset_size": 1898335, "size_in_bytes": 1037026205}, "SLR69": {"description": "OpenSLR is a site devoted to hosting speech and language resources, such as training corpora for speech recognition,\nand software related to speech recognition. We intend to be a convenient place for anyone to put resources that\nthey have created, so that they can be downloaded publicly.\n", "citation": "SLR32:\n@inproceedings{van-niekerk-etal-2017,\n title = {{Rapid development of TTS corpora for four South African languages}},\n author = {Daniel van Niekerk and Charl van Heerden and Marelie Davel and Neil Kleynhans and Oddur Kjartansson\n and Martin Jansche and Linne Ha},\n booktitle = {Proc. Interspeech 2017},\n pages = {2178--2182},\n address = {Stockholm, Sweden},\n month = aug,\n year = {2017},\n URL = {http://dx.doi.org/10.21437/Interspeech.2017-1139}\n}\n\nSLR35, SLR36, SLR52, SLR53, SLR54:\n@inproceedings{kjartansson-etal-sltu2018,\n title = {{Crowd-Sourced Speech Corpora for Javanese, Sundanese, Sinhala, Nepali, and Bangladeshi Bengali}},\n author = {Oddur Kjartansson and Supheakmungkol Sarin and Knot Pipatsrisawat and Martin Jansche and Linne Ha},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {52--55},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-11},\n}\n\nSLR41, SLR42, SLR43, SLR44:\n@inproceedings{kjartansson-etal-tts-sltu2018,\n title = {{A Step-by-Step Process for Building TTS Voices Using Open Source Data and Framework for Bangla, Javanese,\n Khmer, Nepali, Sinhala, and Sundanese}},\n author = {Keshan Sodimana and Knot Pipatsrisawat and Linne Ha and Martin Jansche and Oddur Kjartansson and Pasindu\n De Silva and Supheakmungkol Sarin},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {66--70},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-14}\n}\n\nSLR63, SLR64, SLR65, SLR66, SLR78, SLR79:\n@inproceedings{he-etal-2020-open,\n title = {{Open-source Multi-speaker Speech Corpora for Building Gujarati, Kannada, Malayalam, Marathi, Tamil and\n Telugu Speech Synthesis Systems}},\n author = {He, Fei and Chu, Shan-Hui Cathy and Kjartansson, Oddur and Rivera, Clara and Katanova, Anna and Gutkin,\n Alexander and Demirsahin, Isin and Johny, Cibu and Jansche, Martin and Sarin, Supheakmungkol and Pipatsrisawat, Knot},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n pages = {6494--6503},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.800},\n ISBN = \"{979-10-95546-34-4},\n}\n\nSLR69, SLR76, SLR77:\n@inproceedings{kjartansson-etal-2020-open,\n title = {{Open-Source High Quality Speech Datasets for Basque, Catalan and Galician}},\n author = {Kjartansson, Oddur and Gutkin, Alexander and Butryna, Alena and Demirsahin, Isin and Rivera, Clara},\n booktitle = {Proceedings of the 1st Joint Workshop on Spoken Language Technologies for Under-resourced languages\n (SLTU) and Collaboration and Computing for Under-Resourced Languages (CCURL)},\n year = {2020},\n pages = {21--27},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.sltu-1.3},\n ISBN = {979-10-95546-35-1},\n}\n\nSLR71, SLR71, SLR72, SLR73, SLR74, SLR75:\n@inproceedings{guevara-rukoz-etal-2020-crowdsourcing,\n title = {{Crowdsourcing Latin American Spanish for Low-Resource Text-to-Speech}},\n author = {Guevara-Rukoz, Adriana and Demirsahin, Isin and He, Fei and Chu, Shan-Hui Cathy and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Gutkin, Alexander and Butryna, Alena and Kjartansson, Oddur},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n year = {2020},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.801},\n pages = {6504--6513},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR80\n@inproceedings{oo-etal-2020-burmese,\n title = {{Burmese Speech Corpus, Finite-State Text Normalization and Pronunciation Grammars with an Application\n to Text-to-Speech}},\n author = {Oo, Yin May and Wattanavekin, Theeraphol and Li, Chenfang and De Silva, Pasindu and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Jansche, Martin and Kjartansson, Oddur and Gutkin, Alexander},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = \"6328--6339\",\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.777},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR86\n@inproceedings{gutkin-et-al-yoruba2020,\n title = {{Developing an Open-Source Corpus of Yoruba Speech}},\n author = {Alexander Gutkin and I\u015f\u0131n Demir\u015fahin and Oddur Kjartansson and Clara Rivera and K\u00f3\u0323l\u00e1 T\u00fab\u00f2\u0323s\u00fan},\n booktitle = {Proceedings of Interspeech 2020},\n pages = {404--408},\n month = {October},\n year = {2020},\n address = {Shanghai, China},\n publisher = {International Speech and Communication Association (ISCA)},\n doi = {10.21437/Interspeech.2020-1096},\n url = {https://dx.doi.org/10.21437/Interspeech.2020-1096},\n}\n", "homepage": "https://openslr.org/", "license": "", "features": {"path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "string", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "open_slr", "config_name": "SLR69", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 1647263, "num_examples": 4240, "dataset_name": "open_slr"}}, "download_checksums": {"https://openslr.org/resources/69/ca_es_female.zip": {"num_bytes": 1043934596, "checksum": "2ec39de70550a1cdb93aee960967125fb652b8d26b8de4f6e8658c62847c3f11"}, "https://openslr.org/resources/69/ca_es_male.zip": {"num_bytes": 804724947, "checksum": "8b412ffaa65cd85692c6eab038fc085a8ae5613c6eed38c097a65946c2ee9146"}}, "download_size": 1848659543, "post_processing_size": null, "dataset_size": 1647263, "size_in_bytes": 1850306806}, "SLR35": {"description": "OpenSLR is a site devoted to hosting speech and language resources, such as training corpora for speech recognition,\nand software related to speech recognition. We intend to be a convenient place for anyone to put resources that\nthey have created, so that they can be downloaded publicly.\n", "citation": "SLR32:\n@inproceedings{van-niekerk-etal-2017,\n title = {{Rapid development of TTS corpora for four South African languages}},\n author = {Daniel van Niekerk and Charl van Heerden and Marelie Davel and Neil Kleynhans and Oddur Kjartansson\n and Martin Jansche and Linne Ha},\n booktitle = {Proc. Interspeech 2017},\n pages = {2178--2182},\n address = {Stockholm, Sweden},\n month = aug,\n year = {2017},\n URL = {http://dx.doi.org/10.21437/Interspeech.2017-1139}\n}\n\nSLR35, SLR36, SLR52, SLR53, SLR54:\n@inproceedings{kjartansson-etal-sltu2018,\n title = {{Crowd-Sourced Speech Corpora for Javanese, Sundanese, Sinhala, Nepali, and Bangladeshi Bengali}},\n author = {Oddur Kjartansson and Supheakmungkol Sarin and Knot Pipatsrisawat and Martin Jansche and Linne Ha},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {52--55},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-11},\n}\n\nSLR41, SLR42, SLR43, SLR44:\n@inproceedings{kjartansson-etal-tts-sltu2018,\n title = {{A Step-by-Step Process for Building TTS Voices Using Open Source Data and Framework for Bangla, Javanese,\n Khmer, Nepali, Sinhala, and Sundanese}},\n author = {Keshan Sodimana and Knot Pipatsrisawat and Linne Ha and Martin Jansche and Oddur Kjartansson and Pasindu\n De Silva and Supheakmungkol Sarin},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {66--70},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-14}\n}\n\nSLR63, SLR64, SLR65, SLR66, SLR78, SLR79:\n@inproceedings{he-etal-2020-open,\n title = {{Open-source Multi-speaker Speech Corpora for Building Gujarati, Kannada, Malayalam, Marathi, Tamil and\n Telugu Speech Synthesis Systems}},\n author = {He, Fei and Chu, Shan-Hui Cathy and Kjartansson, Oddur and Rivera, Clara and Katanova, Anna and Gutkin,\n Alexander and Demirsahin, Isin and Johny, Cibu and Jansche, Martin and Sarin, Supheakmungkol and Pipatsrisawat, Knot},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n pages = {6494--6503},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.800},\n ISBN = \"{979-10-95546-34-4},\n}\n\nSLR69, SLR76, SLR77:\n@inproceedings{kjartansson-etal-2020-open,\n title = {{Open-Source High Quality Speech Datasets for Basque, Catalan and Galician}},\n author = {Kjartansson, Oddur and Gutkin, Alexander and Butryna, Alena and Demirsahin, Isin and Rivera, Clara},\n booktitle = {Proceedings of the 1st Joint Workshop on Spoken Language Technologies for Under-resourced languages\n (SLTU) and Collaboration and Computing for Under-Resourced Languages (CCURL)},\n year = {2020},\n pages = {21--27},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.sltu-1.3},\n ISBN = {979-10-95546-35-1},\n}\n\nSLR71, SLR71, SLR72, SLR73, SLR74, SLR75:\n@inproceedings{guevara-rukoz-etal-2020-crowdsourcing,\n title = {{Crowdsourcing Latin American Spanish for Low-Resource Text-to-Speech}},\n author = {Guevara-Rukoz, Adriana and Demirsahin, Isin and He, Fei and Chu, Shan-Hui Cathy and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Gutkin, Alexander and Butryna, Alena and Kjartansson, Oddur},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n year = {2020},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.801},\n pages = {6504--6513},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR80\n@inproceedings{oo-etal-2020-burmese,\n title = {{Burmese Speech Corpus, Finite-State Text Normalization and Pronunciation Grammars with an Application\n to Text-to-Speech}},\n author = {Oo, Yin May and Wattanavekin, Theeraphol and Li, Chenfang and De Silva, Pasindu and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Jansche, Martin and Kjartansson, Oddur and Gutkin, Alexander},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = \"6328--6339\",\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.777},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR86\n@inproceedings{gutkin-et-al-yoruba2020,\n title = {{Developing an Open-Source Corpus of Yoruba Speech}},\n author = {Alexander Gutkin and I\u015f\u0131n Demir\u015fahin and Oddur Kjartansson and Clara Rivera and K\u00f3\u0323l\u00e1 T\u00fab\u00f2\u0323s\u00fan},\n booktitle = {Proceedings of Interspeech 2020},\n pages = {404--408},\n month = {October},\n year = {2020},\n address = {Shanghai, China},\n publisher = {International Speech and Communication Association (ISCA)},\n doi = {10.21437/Interspeech.2020-1096},\n url = {https://dx.doi.org/10.21437/Interspeech.2020-1096},\n}\n", "homepage": "https://openslr.org/", "license": "", "features": {"path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "string", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "open_slr", "config_name": "SLR35", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 73565374, "num_examples": 185076, "dataset_name": "open_slr"}}, "download_checksums": {"https://openslr.org/resources/35/asr_javanese_0.zip": {"num_bytes": 1197540348, "checksum": "a871c8b71ff8fa9d95955447ca0c388e8c6f925aecfce92e1880bda2da113bcb"}, "https://openslr.org/resources/35/asr_javanese_1.zip": {"num_bytes": 1172552957, "checksum": "8024b18acc265bd502f2c36930ab41bd9a8a9cbc67d3db340698df1f6799eeef"}, "https://openslr.org/resources/35/asr_javanese_2.zip": {"num_bytes": 1187099390, "checksum": "c1605da9f74b0951533bcd9bb66a868dc4552929a6e3597d1f6b66c8436cd87e"}, "https://openslr.org/resources/35/asr_javanese_3.zip": {"num_bytes": 1178721705, "checksum": "f813cfa6ea5db1a2c7af65d62dd4d2edc932e67990570f0e5418675c0c9443d3"}, "https://openslr.org/resources/35/asr_javanese_4.zip": {"num_bytes": 1174850803, "checksum": "506af733d9c1f02372e83e997c924fac5a8141a7920d1ab345bd607e26438f0c"}, "https://openslr.org/resources/35/asr_javanese_5.zip": {"num_bytes": 1178642105, "checksum": "5300df2d2fd95033632fe7d3d77042804c92bf4f9983f11e707c20e358e45a91"}, "https://openslr.org/resources/35/asr_javanese_6.zip": {"num_bytes": 1197026293, "checksum": "a487e12f9d3fd1d3e6d8a8c2b58363813d6121e6a84937ec0d27601fea2654db"}, "https://openslr.org/resources/35/asr_javanese_7.zip": {"num_bytes": 1197789186, "checksum": "944ce7e3463f2e0d6024f8a1768e161a64dd4ab7cf8a96b7924fb8666ae2142e"}, "https://openslr.org/resources/35/asr_javanese_8.zip": {"num_bytes": 1185807385, "checksum": "cb598b81bd681dc51965c912bf4aabc4af6eb9b57d5a7cb0998ed121cec63dcd"}, "https://openslr.org/resources/35/asr_javanese_9.zip": {"num_bytes": 1160028499, "checksum": "7ee9de72360a59dc2a3cd3570627565a638d7a47f0f95ce4c14545bc9b6690b2"}, "https://openslr.org/resources/35/asr_javanese_a.zip": {"num_bytes": 1176016135, "checksum": "1fd1e4b06ed5d18614ef7ce414e7e0b6c105d6f5d87b3a6210fcedc4cc6f35cd"}, "https://openslr.org/resources/35/asr_javanese_b.zip": {"num_bytes": 1176960512, "checksum": "036bb70c60e8ba4b9be090dcd717e1da8744dd1cfdfab1eb4a4cd29d7755b938"}, "https://openslr.org/resources/35/asr_javanese_c.zip": {"num_bytes": 1178017086, "checksum": "a46d7b1ad184a4c2ac9099c8399f18fb8b14dd9ab4172a61f8abe3e464f7b2b9"}, "https://openslr.org/resources/35/asr_javanese_d.zip": {"num_bytes": 1199910382, "checksum": "9f3058916fe721f92a4d1a6c2794d82920b7c88ed780ef06fe69f8e448d0ddb6"}, "https://openslr.org/resources/35/asr_javanese_e.zip": {"num_bytes": 1175431904, "checksum": "d9234d3331fb11c082bc17f3b54c13dfa183c4cb13e35c030f7a1dbbe4c819cd"}, "https://openslr.org/resources/35/asr_javanese_f.zip": {"num_bytes": 1163711036, "checksum": "1bedbc295e4d1592e5730da8f0774fe360fe146d193b9c9815a8025072dd0b70"}}, "download_size": 18900105726, "post_processing_size": null, "dataset_size": 73565374, "size_in_bytes": 18973671100}, "SLR36": {"description": "OpenSLR is a site devoted to hosting speech and language resources, such as training corpora for speech recognition,\nand software related to speech recognition. We intend to be a convenient place for anyone to put resources that\nthey have created, so that they can be downloaded publicly.\n", "citation": "SLR32:\n@inproceedings{van-niekerk-etal-2017,\n title = {{Rapid development of TTS corpora for four South African languages}},\n author = {Daniel van Niekerk and Charl van Heerden and Marelie Davel and Neil Kleynhans and Oddur Kjartansson\n and Martin Jansche and Linne Ha},\n booktitle = {Proc. Interspeech 2017},\n pages = {2178--2182},\n address = {Stockholm, Sweden},\n month = aug,\n year = {2017},\n URL = {http://dx.doi.org/10.21437/Interspeech.2017-1139}\n}\n\nSLR35, SLR36, SLR52, SLR53, SLR54:\n@inproceedings{kjartansson-etal-sltu2018,\n title = {{Crowd-Sourced Speech Corpora for Javanese, Sundanese, Sinhala, Nepali, and Bangladeshi Bengali}},\n author = {Oddur Kjartansson and Supheakmungkol Sarin and Knot Pipatsrisawat and Martin Jansche and Linne Ha},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {52--55},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-11},\n}\n\nSLR41, SLR42, SLR43, SLR44:\n@inproceedings{kjartansson-etal-tts-sltu2018,\n title = {{A Step-by-Step Process for Building TTS Voices Using Open Source Data and Framework for Bangla, Javanese,\n Khmer, Nepali, Sinhala, and Sundanese}},\n author = {Keshan Sodimana and Knot Pipatsrisawat and Linne Ha and Martin Jansche and Oddur Kjartansson and Pasindu\n De Silva and Supheakmungkol Sarin},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {66--70},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-14}\n}\n\nSLR63, SLR64, SLR65, SLR66, SLR78, SLR79:\n@inproceedings{he-etal-2020-open,\n title = {{Open-source Multi-speaker Speech Corpora for Building Gujarati, Kannada, Malayalam, Marathi, Tamil and\n Telugu Speech Synthesis Systems}},\n author = {He, Fei and Chu, Shan-Hui Cathy and Kjartansson, Oddur and Rivera, Clara and Katanova, Anna and Gutkin,\n Alexander and Demirsahin, Isin and Johny, Cibu and Jansche, Martin and Sarin, Supheakmungkol and Pipatsrisawat, Knot},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n pages = {6494--6503},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.800},\n ISBN = \"{979-10-95546-34-4},\n}\n\nSLR69, SLR76, SLR77:\n@inproceedings{kjartansson-etal-2020-open,\n title = {{Open-Source High Quality Speech Datasets for Basque, Catalan and Galician}},\n author = {Kjartansson, Oddur and Gutkin, Alexander and Butryna, Alena and Demirsahin, Isin and Rivera, Clara},\n booktitle = {Proceedings of the 1st Joint Workshop on Spoken Language Technologies for Under-resourced languages\n (SLTU) and Collaboration and Computing for Under-Resourced Languages (CCURL)},\n year = {2020},\n pages = {21--27},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.sltu-1.3},\n ISBN = {979-10-95546-35-1},\n}\n\nSLR71, SLR71, SLR72, SLR73, SLR74, SLR75:\n@inproceedings{guevara-rukoz-etal-2020-crowdsourcing,\n title = {{Crowdsourcing Latin American Spanish for Low-Resource Text-to-Speech}},\n author = {Guevara-Rukoz, Adriana and Demirsahin, Isin and He, Fei and Chu, Shan-Hui Cathy and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Gutkin, Alexander and Butryna, Alena and Kjartansson, Oddur},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n year = {2020},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.801},\n pages = {6504--6513},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR80\n@inproceedings{oo-etal-2020-burmese,\n title = {{Burmese Speech Corpus, Finite-State Text Normalization and Pronunciation Grammars with an Application\n to Text-to-Speech}},\n author = {Oo, Yin May and Wattanavekin, Theeraphol and Li, Chenfang and De Silva, Pasindu and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Jansche, Martin and Kjartansson, Oddur and Gutkin, Alexander},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = \"6328--6339\",\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.777},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR86\n@inproceedings{gutkin-et-al-yoruba2020,\n title = {{Developing an Open-Source Corpus of Yoruba Speech}},\n author = {Alexander Gutkin and I\u015f\u0131n Demir\u015fahin and Oddur Kjartansson and Clara Rivera and K\u00f3\u0323l\u00e1 T\u00fab\u00f2\u0323s\u00fan},\n booktitle = {Proceedings of Interspeech 2020},\n pages = {404--408},\n month = {October},\n year = {2020},\n address = {Shanghai, China},\n publisher = {International Speech and Communication Association (ISCA)},\n doi = {10.21437/Interspeech.2020-1096},\n url = {https://dx.doi.org/10.21437/Interspeech.2020-1096},\n}\n", "homepage": "https://openslr.org/", "license": "", "features": {"path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "string", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "open_slr", "config_name": "SLR36", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 88942337, "num_examples": 219156, "dataset_name": "open_slr"}}, "download_checksums": {"https://openslr.org/resources/36/asr_sundanese_0.zip": {"num_bytes": 1433294860, "checksum": "947a0ac86008b88130f7c8f1b27d4a0f93886f653cf65b5948c0532cd0097c0d"}, "https://openslr.org/resources/36/asr_sundanese_1.zip": {"num_bytes": 1445470477, "checksum": "365f052dd9d977343002289ea1f29dea466f1243e5edf22dfb933e3fa93a6d87"}, "https://openslr.org/resources/36/asr_sundanese_2.zip": {"num_bytes": 1431289018, "checksum": "f9b9ee2a925d4fd934be3ebe09545ffb3f294f1e6d1380e837054fdf4ce8cff2"}, "https://openslr.org/resources/36/asr_sundanese_3.zip": {"num_bytes": 1446805642, "checksum": "ba3cc0e8e351a5456269c72edf7a3b50cf820941f93d7eed0e8f02a3b1b0a89f"}, "https://openslr.org/resources/36/asr_sundanese_4.zip": {"num_bytes": 1449187658, "checksum": "a6ca66e2537bd55dfaea4e716d847c70aead58c217184ab37afbd4065cca9262"}, "https://openslr.org/resources/36/asr_sundanese_5.zip": {"num_bytes": 1425741894, "checksum": "31bb8a9981b45855ab0b7c634c89040fe99b122455750a6ab956393dc9dec0d8"}, "https://openslr.org/resources/36/asr_sundanese_6.zip": {"num_bytes": 1415730042, "checksum": "3f23d6c4c67dc6f39a8ebb2af43e2efedb57028abb85eb519394f2d9ef8b3a21"}, "https://openslr.org/resources/36/asr_sundanese_7.zip": {"num_bytes": 1436967650, "checksum": "bce8f33b6ed62978915dfc601957162e9eece8bc3190cd2d548d7679409a3d77"}, "https://openslr.org/resources/36/asr_sundanese_8.zip": {"num_bytes": 1436421462, "checksum": "755e0af77d0bd6d4aa7895b2ab9fbf792c57efc49c8cec21d3d728fe3374b621"}, "https://openslr.org/resources/36/asr_sundanese_9.zip": {"num_bytes": 1434660332, "checksum": "5d426d2c99eb91ffd3db193d510e288133c426556430fe2e70e08f58815f5a31"}, "https://openslr.org/resources/36/asr_sundanese_a.zip": {"num_bytes": 1436753516, "checksum": "e032537b62aa8a8abe660bca418ac2e26a93bdc7a357b948a301bde286952fa5"}, "https://openslr.org/resources/36/asr_sundanese_b.zip": {"num_bytes": 1435014221, "checksum": "e999e83fde37ec973b1a1822aaa8769488c2a95058a3448661ac94c319881549"}, "https://openslr.org/resources/36/asr_sundanese_c.zip": {"num_bytes": 1429102490, "checksum": "275ac684fe7b8bf012dc251ddb91496e2d95c2c257ec87ab0847efa379e96787"}, "https://openslr.org/resources/36/asr_sundanese_d.zip": {"num_bytes": 1432973082, "checksum": "34ae64f8a29ddef2e05ca5ce8122b461a737d58d796dbe577a4e8a4a05c6b2ce"}, "https://openslr.org/resources/36/asr_sundanese_e.zip": {"num_bytes": 1443609656, "checksum": "25e36087063e0cc5e54cf04e5a4e065b19e0c1bc9cbc07a9f98635941b53bfea"}, "https://openslr.org/resources/36/asr_sundanese_f.zip": {"num_bytes": 1463531929, "checksum": "3d1410c31cc70994f82b9555967fa4c8d682aee288cc85b05b9c4e6352a49f14"}}, "download_size": 22996553929, "post_processing_size": null, "dataset_size": 88942337, "size_in_bytes": 23085496266}, "SLR70": {"description": "OpenSLR is a site devoted to hosting speech and language resources, such as training corpora for speech recognition,\nand software related to speech recognition. We intend to be a convenient place for anyone to put resources that\nthey have created, so that they can be downloaded publicly.\n", "citation": "SLR32:\n@inproceedings{van-niekerk-etal-2017,\n title = {{Rapid development of TTS corpora for four South African languages}},\n author = {Daniel van Niekerk and Charl van Heerden and Marelie Davel and Neil Kleynhans and Oddur Kjartansson\n and Martin Jansche and Linne Ha},\n booktitle = {Proc. Interspeech 2017},\n pages = {2178--2182},\n address = {Stockholm, Sweden},\n month = aug,\n year = {2017},\n URL = {http://dx.doi.org/10.21437/Interspeech.2017-1139}\n}\n\nSLR35, SLR36, SLR52, SLR53, SLR54:\n@inproceedings{kjartansson-etal-sltu2018,\n title = {{Crowd-Sourced Speech Corpora for Javanese, Sundanese, Sinhala, Nepali, and Bangladeshi Bengali}},\n author = {Oddur Kjartansson and Supheakmungkol Sarin and Knot Pipatsrisawat and Martin Jansche and Linne Ha},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {52--55},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-11},\n}\n\nSLR41, SLR42, SLR43, SLR44:\n@inproceedings{kjartansson-etal-tts-sltu2018,\n title = {{A Step-by-Step Process for Building TTS Voices Using Open Source Data and Framework for Bangla, Javanese,\n Khmer, Nepali, Sinhala, and Sundanese}},\n author = {Keshan Sodimana and Knot Pipatsrisawat and Linne Ha and Martin Jansche and Oddur Kjartansson and Pasindu\n De Silva and Supheakmungkol Sarin},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {66--70},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-14}\n}\n\nSLR63, SLR64, SLR65, SLR66, SLR78, SLR79:\n@inproceedings{he-etal-2020-open,\n title = {{Open-source Multi-speaker Speech Corpora for Building Gujarati, Kannada, Malayalam, Marathi, Tamil and\n Telugu Speech Synthesis Systems}},\n author = {He, Fei and Chu, Shan-Hui Cathy and Kjartansson, Oddur and Rivera, Clara and Katanova, Anna and Gutkin,\n Alexander and Demirsahin, Isin and Johny, Cibu and Jansche, Martin and Sarin, Supheakmungkol and Pipatsrisawat, Knot},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n pages = {6494--6503},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.800},\n ISBN = \"{979-10-95546-34-4},\n}\n\nSLR69, SLR76, SLR77:\n@inproceedings{kjartansson-etal-2020-open,\n title = {{Open-Source High Quality Speech Datasets for Basque, Catalan and Galician}},\n author = {Kjartansson, Oddur and Gutkin, Alexander and Butryna, Alena and Demirsahin, Isin and Rivera, Clara},\n booktitle = {Proceedings of the 1st Joint Workshop on Spoken Language Technologies for Under-resourced languages\n (SLTU) and Collaboration and Computing for Under-Resourced Languages (CCURL)},\n year = {2020},\n pages = {21--27},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.sltu-1.3},\n ISBN = {979-10-95546-35-1},\n}\n\nSLR71, SLR71, SLR72, SLR73, SLR74, SLR75:\n@inproceedings{guevara-rukoz-etal-2020-crowdsourcing,\n title = {{Crowdsourcing Latin American Spanish for Low-Resource Text-to-Speech}},\n author = {Guevara-Rukoz, Adriana and Demirsahin, Isin and He, Fei and Chu, Shan-Hui Cathy and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Gutkin, Alexander and Butryna, Alena and Kjartansson, Oddur},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n year = {2020},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.801},\n pages = {6504--6513},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR80\n@inproceedings{oo-etal-2020-burmese,\n title = {{Burmese Speech Corpus, Finite-State Text Normalization and Pronunciation Grammars with an Application\n to Text-to-Speech}},\n author = {Oo, Yin May and Wattanavekin, Theeraphol and Li, Chenfang and De Silva, Pasindu and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Jansche, Martin and Kjartansson, Oddur and Gutkin, Alexander},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = \"6328--6339\",\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.777},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR86\n@inproceedings{gutkin-et-al-yoruba2020,\n title = {{Developing an Open-Source Corpus of Yoruba Speech}},\n author = {Alexander Gutkin and I\u015f\u0131n Demir\u015fahin and Oddur Kjartansson and Clara Rivera and K\u00f3\u0323l\u00e1 T\u00fab\u00f2\u0323s\u00fan},\n booktitle = {Proceedings of Interspeech 2020},\n pages = {404--408},\n month = {October},\n year = {2020},\n address = {Shanghai, China},\n publisher = {International Speech and Communication Association (ISCA)},\n doi = {10.21437/Interspeech.2020-1096},\n url = {https://dx.doi.org/10.21437/Interspeech.2020-1096},\n}\n", "homepage": "https://openslr.org/", "license": "", "features": {"path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "string", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "open_slr", "config_name": "SLR70", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 1339608, "num_examples": 3359, "dataset_name": "open_slr"}}, "download_checksums": {"https://openslr.org/resources/70/en_ng_female.zip": {"num_bytes": 759856787, "checksum": "e840afea824c9075db8c7d574e993837c6a4861fd0ff0275c4cc223aa00a785c"}, "https://openslr.org/resources/70/en_ng_male.zip": {"num_bytes": 454098409, "checksum": "f619d09d5ffdf0d4044ef1d57585eeaa50c0cbf08844782a9dd08f56ea9e567f"}}, "download_size": 1213955196, "post_processing_size": null, "dataset_size": 1339608, "size_in_bytes": 1215294804}, "SLR71": {"description": "OpenSLR is a site devoted to hosting speech and language resources, such as training corpora for speech recognition,\nand software related to speech recognition. We intend to be a convenient place for anyone to put resources that\nthey have created, so that they can be downloaded publicly.\n", "citation": "SLR32:\n@inproceedings{van-niekerk-etal-2017,\n title = {{Rapid development of TTS corpora for four South African languages}},\n author = {Daniel van Niekerk and Charl van Heerden and Marelie Davel and Neil Kleynhans and Oddur Kjartansson\n and Martin Jansche and Linne Ha},\n booktitle = {Proc. Interspeech 2017},\n pages = {2178--2182},\n address = {Stockholm, Sweden},\n month = aug,\n year = {2017},\n URL = {http://dx.doi.org/10.21437/Interspeech.2017-1139}\n}\n\nSLR35, SLR36, SLR52, SLR53, SLR54:\n@inproceedings{kjartansson-etal-sltu2018,\n title = {{Crowd-Sourced Speech Corpora for Javanese, Sundanese, Sinhala, Nepali, and Bangladeshi Bengali}},\n author = {Oddur Kjartansson and Supheakmungkol Sarin and Knot Pipatsrisawat and Martin Jansche and Linne Ha},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {52--55},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-11},\n}\n\nSLR41, SLR42, SLR43, SLR44:\n@inproceedings{kjartansson-etal-tts-sltu2018,\n title = {{A Step-by-Step Process for Building TTS Voices Using Open Source Data and Framework for Bangla, Javanese,\n Khmer, Nepali, Sinhala, and Sundanese}},\n author = {Keshan Sodimana and Knot Pipatsrisawat and Linne Ha and Martin Jansche and Oddur Kjartansson and Pasindu\n De Silva and Supheakmungkol Sarin},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {66--70},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-14}\n}\n\nSLR63, SLR64, SLR65, SLR66, SLR78, SLR79:\n@inproceedings{he-etal-2020-open,\n title = {{Open-source Multi-speaker Speech Corpora for Building Gujarati, Kannada, Malayalam, Marathi, Tamil and\n Telugu Speech Synthesis Systems}},\n author = {He, Fei and Chu, Shan-Hui Cathy and Kjartansson, Oddur and Rivera, Clara and Katanova, Anna and Gutkin,\n Alexander and Demirsahin, Isin and Johny, Cibu and Jansche, Martin and Sarin, Supheakmungkol and Pipatsrisawat, Knot},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n pages = {6494--6503},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.800},\n ISBN = \"{979-10-95546-34-4},\n}\n\nSLR69, SLR76, SLR77:\n@inproceedings{kjartansson-etal-2020-open,\n title = {{Open-Source High Quality Speech Datasets for Basque, Catalan and Galician}},\n author = {Kjartansson, Oddur and Gutkin, Alexander and Butryna, Alena and Demirsahin, Isin and Rivera, Clara},\n booktitle = {Proceedings of the 1st Joint Workshop on Spoken Language Technologies for Under-resourced languages\n (SLTU) and Collaboration and Computing for Under-Resourced Languages (CCURL)},\n year = {2020},\n pages = {21--27},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.sltu-1.3},\n ISBN = {979-10-95546-35-1},\n}\n\nSLR71, SLR71, SLR72, SLR73, SLR74, SLR75:\n@inproceedings{guevara-rukoz-etal-2020-crowdsourcing,\n title = {{Crowdsourcing Latin American Spanish for Low-Resource Text-to-Speech}},\n author = {Guevara-Rukoz, Adriana and Demirsahin, Isin and He, Fei and Chu, Shan-Hui Cathy and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Gutkin, Alexander and Butryna, Alena and Kjartansson, Oddur},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n year = {2020},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.801},\n pages = {6504--6513},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR80\n@inproceedings{oo-etal-2020-burmese,\n title = {{Burmese Speech Corpus, Finite-State Text Normalization and Pronunciation Grammars with an Application\n to Text-to-Speech}},\n author = {Oo, Yin May and Wattanavekin, Theeraphol and Li, Chenfang and De Silva, Pasindu and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Jansche, Martin and Kjartansson, Oddur and Gutkin, Alexander},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = \"6328--6339\",\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.777},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR86\n@inproceedings{gutkin-et-al-yoruba2020,\n title = {{Developing an Open-Source Corpus of Yoruba Speech}},\n author = {Alexander Gutkin and I\u015f\u0131n Demir\u015fahin and Oddur Kjartansson and Clara Rivera and K\u00f3\u0323l\u00e1 T\u00fab\u00f2\u0323s\u00fan},\n booktitle = {Proceedings of Interspeech 2020},\n pages = {404--408},\n month = {October},\n year = {2020},\n address = {Shanghai, China},\n publisher = {International Speech and Communication Association (ISCA)},\n doi = {10.21437/Interspeech.2020-1096},\n url = {https://dx.doi.org/10.21437/Interspeech.2020-1096},\n}\n", "homepage": "https://openslr.org/", "license": "", "features": {"path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "string", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "open_slr", "config_name": "SLR71", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 1676273, "num_examples": 4374, "dataset_name": "open_slr"}}, "download_checksums": {"https://openslr.org/resources/71/es_cl_female.zip": {"num_bytes": 585615697, "checksum": "23593f3dac085d26f99df38159c1ab0ae2c23f5c97ad869292496abc6e171bc6"}, "https://openslr.org/resources/71/es_cl_male.zip": {"num_bytes": 859750206, "checksum": "ace2cbd6df28e94fdd636ba1263b72b557722b0d2abcf4c6e072011ac870cbee"}}, "download_size": 1445365903, "post_processing_size": null, "dataset_size": 1676273, "size_in_bytes": 1447042176}, "SLR72": {"description": "OpenSLR is a site devoted to hosting speech and language resources, such as training corpora for speech recognition,\nand software related to speech recognition. We intend to be a convenient place for anyone to put resources that\nthey have created, so that they can be downloaded publicly.\n", "citation": "SLR32:\n@inproceedings{van-niekerk-etal-2017,\n title = {{Rapid development of TTS corpora for four South African languages}},\n author = {Daniel van Niekerk and Charl van Heerden and Marelie Davel and Neil Kleynhans and Oddur Kjartansson\n and Martin Jansche and Linne Ha},\n booktitle = {Proc. Interspeech 2017},\n pages = {2178--2182},\n address = {Stockholm, Sweden},\n month = aug,\n year = {2017},\n URL = {http://dx.doi.org/10.21437/Interspeech.2017-1139}\n}\n\nSLR35, SLR36, SLR52, SLR53, SLR54:\n@inproceedings{kjartansson-etal-sltu2018,\n title = {{Crowd-Sourced Speech Corpora for Javanese, Sundanese, Sinhala, Nepali, and Bangladeshi Bengali}},\n author = {Oddur Kjartansson and Supheakmungkol Sarin and Knot Pipatsrisawat and Martin Jansche and Linne Ha},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {52--55},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-11},\n}\n\nSLR41, SLR42, SLR43, SLR44:\n@inproceedings{kjartansson-etal-tts-sltu2018,\n title = {{A Step-by-Step Process for Building TTS Voices Using Open Source Data and Framework for Bangla, Javanese,\n Khmer, Nepali, Sinhala, and Sundanese}},\n author = {Keshan Sodimana and Knot Pipatsrisawat and Linne Ha and Martin Jansche and Oddur Kjartansson and Pasindu\n De Silva and Supheakmungkol Sarin},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {66--70},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-14}\n}\n\nSLR63, SLR64, SLR65, SLR66, SLR78, SLR79:\n@inproceedings{he-etal-2020-open,\n title = {{Open-source Multi-speaker Speech Corpora for Building Gujarati, Kannada, Malayalam, Marathi, Tamil and\n Telugu Speech Synthesis Systems}},\n author = {He, Fei and Chu, Shan-Hui Cathy and Kjartansson, Oddur and Rivera, Clara and Katanova, Anna and Gutkin,\n Alexander and Demirsahin, Isin and Johny, Cibu and Jansche, Martin and Sarin, Supheakmungkol and Pipatsrisawat, Knot},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n pages = {6494--6503},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.800},\n ISBN = \"{979-10-95546-34-4},\n}\n\nSLR69, SLR76, SLR77:\n@inproceedings{kjartansson-etal-2020-open,\n title = {{Open-Source High Quality Speech Datasets for Basque, Catalan and Galician}},\n author = {Kjartansson, Oddur and Gutkin, Alexander and Butryna, Alena and Demirsahin, Isin and Rivera, Clara},\n booktitle = {Proceedings of the 1st Joint Workshop on Spoken Language Technologies for Under-resourced languages\n (SLTU) and Collaboration and Computing for Under-Resourced Languages (CCURL)},\n year = {2020},\n pages = {21--27},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.sltu-1.3},\n ISBN = {979-10-95546-35-1},\n}\n\nSLR71, SLR71, SLR72, SLR73, SLR74, SLR75:\n@inproceedings{guevara-rukoz-etal-2020-crowdsourcing,\n title = {{Crowdsourcing Latin American Spanish for Low-Resource Text-to-Speech}},\n author = {Guevara-Rukoz, Adriana and Demirsahin, Isin and He, Fei and Chu, Shan-Hui Cathy and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Gutkin, Alexander and Butryna, Alena and Kjartansson, Oddur},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n year = {2020},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.801},\n pages = {6504--6513},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR80\n@inproceedings{oo-etal-2020-burmese,\n title = {{Burmese Speech Corpus, Finite-State Text Normalization and Pronunciation Grammars with an Application\n to Text-to-Speech}},\n author = {Oo, Yin May and Wattanavekin, Theeraphol and Li, Chenfang and De Silva, Pasindu and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Jansche, Martin and Kjartansson, Oddur and Gutkin, Alexander},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = \"6328--6339\",\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.777},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR86\n@inproceedings{gutkin-et-al-yoruba2020,\n title = {{Developing an Open-Source Corpus of Yoruba Speech}},\n author = {Alexander Gutkin and I\u015f\u0131n Demir\u015fahin and Oddur Kjartansson and Clara Rivera and K\u00f3\u0323l\u00e1 T\u00fab\u00f2\u0323s\u00fan},\n booktitle = {Proceedings of Interspeech 2020},\n pages = {404--408},\n month = {October},\n year = {2020},\n address = {Shanghai, China},\n publisher = {International Speech and Communication Association (ISCA)},\n doi = {10.21437/Interspeech.2020-1096},\n url = {https://dx.doi.org/10.21437/Interspeech.2020-1096},\n}\n", "homepage": "https://openslr.org/", "license": "", "features": {"path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "string", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "open_slr", "config_name": "SLR72", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 1876301, "num_examples": 4903, "dataset_name": "open_slr"}}, "download_checksums": {"https://openslr.org/resources/72/es_co_female.zip": {"num_bytes": 801960444, "checksum": "03721aa7b6b7fe1dd309a0c545cbef4898fac99ed811f4e1769b2fc16bb7eb70"}, "https://openslr.org/resources/72/es_co_male.zip": {"num_bytes": 810070088, "checksum": "2e72abf283adf3f52c28d9f4d59709d4a24fa57243dc696a99dfbc1b8e534c9a"}}, "download_size": 1612030532, "post_processing_size": null, "dataset_size": 1876301, "size_in_bytes": 1613906833}, "SLR73": {"description": "OpenSLR is a site devoted to hosting speech and language resources, such as training corpora for speech recognition,\nand software related to speech recognition. We intend to be a convenient place for anyone to put resources that\nthey have created, so that they can be downloaded publicly.\n", "citation": "SLR32:\n@inproceedings{van-niekerk-etal-2017,\n title = {{Rapid development of TTS corpora for four South African languages}},\n author = {Daniel van Niekerk and Charl van Heerden and Marelie Davel and Neil Kleynhans and Oddur Kjartansson\n and Martin Jansche and Linne Ha},\n booktitle = {Proc. Interspeech 2017},\n pages = {2178--2182},\n address = {Stockholm, Sweden},\n month = aug,\n year = {2017},\n URL = {http://dx.doi.org/10.21437/Interspeech.2017-1139}\n}\n\nSLR35, SLR36, SLR52, SLR53, SLR54:\n@inproceedings{kjartansson-etal-sltu2018,\n title = {{Crowd-Sourced Speech Corpora for Javanese, Sundanese, Sinhala, Nepali, and Bangladeshi Bengali}},\n author = {Oddur Kjartansson and Supheakmungkol Sarin and Knot Pipatsrisawat and Martin Jansche and Linne Ha},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {52--55},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-11},\n}\n\nSLR41, SLR42, SLR43, SLR44:\n@inproceedings{kjartansson-etal-tts-sltu2018,\n title = {{A Step-by-Step Process for Building TTS Voices Using Open Source Data and Framework for Bangla, Javanese,\n Khmer, Nepali, Sinhala, and Sundanese}},\n author = {Keshan Sodimana and Knot Pipatsrisawat and Linne Ha and Martin Jansche and Oddur Kjartansson and Pasindu\n De Silva and Supheakmungkol Sarin},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {66--70},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-14}\n}\n\nSLR63, SLR64, SLR65, SLR66, SLR78, SLR79:\n@inproceedings{he-etal-2020-open,\n title = {{Open-source Multi-speaker Speech Corpora for Building Gujarati, Kannada, Malayalam, Marathi, Tamil and\n Telugu Speech Synthesis Systems}},\n author = {He, Fei and Chu, Shan-Hui Cathy and Kjartansson, Oddur and Rivera, Clara and Katanova, Anna and Gutkin,\n Alexander and Demirsahin, Isin and Johny, Cibu and Jansche, Martin and Sarin, Supheakmungkol and Pipatsrisawat, Knot},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n pages = {6494--6503},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.800},\n ISBN = \"{979-10-95546-34-4},\n}\n\nSLR69, SLR76, SLR77:\n@inproceedings{kjartansson-etal-2020-open,\n title = {{Open-Source High Quality Speech Datasets for Basque, Catalan and Galician}},\n author = {Kjartansson, Oddur and Gutkin, Alexander and Butryna, Alena and Demirsahin, Isin and Rivera, Clara},\n booktitle = {Proceedings of the 1st Joint Workshop on Spoken Language Technologies for Under-resourced languages\n (SLTU) and Collaboration and Computing for Under-Resourced Languages (CCURL)},\n year = {2020},\n pages = {21--27},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.sltu-1.3},\n ISBN = {979-10-95546-35-1},\n}\n\nSLR71, SLR71, SLR72, SLR73, SLR74, SLR75:\n@inproceedings{guevara-rukoz-etal-2020-crowdsourcing,\n title = {{Crowdsourcing Latin American Spanish for Low-Resource Text-to-Speech}},\n author = {Guevara-Rukoz, Adriana and Demirsahin, Isin and He, Fei and Chu, Shan-Hui Cathy and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Gutkin, Alexander and Butryna, Alena and Kjartansson, Oddur},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n year = {2020},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.801},\n pages = {6504--6513},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR80\n@inproceedings{oo-etal-2020-burmese,\n title = {{Burmese Speech Corpus, Finite-State Text Normalization and Pronunciation Grammars with an Application\n to Text-to-Speech}},\n author = {Oo, Yin May and Wattanavekin, Theeraphol and Li, Chenfang and De Silva, Pasindu and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Jansche, Martin and Kjartansson, Oddur and Gutkin, Alexander},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = \"6328--6339\",\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.777},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR86\n@inproceedings{gutkin-et-al-yoruba2020,\n title = {{Developing an Open-Source Corpus of Yoruba Speech}},\n author = {Alexander Gutkin and I\u015f\u0131n Demir\u015fahin and Oddur Kjartansson and Clara Rivera and K\u00f3\u0323l\u00e1 T\u00fab\u00f2\u0323s\u00fan},\n booktitle = {Proceedings of Interspeech 2020},\n pages = {404--408},\n month = {October},\n year = {2020},\n address = {Shanghai, China},\n publisher = {International Speech and Communication Association (ISCA)},\n doi = {10.21437/Interspeech.2020-1096},\n url = {https://dx.doi.org/10.21437/Interspeech.2020-1096},\n}\n", "homepage": "https://openslr.org/", "license": "", "features": {"path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "string", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "open_slr", "config_name": "SLR73", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 2084052, "num_examples": 5447, "dataset_name": "open_slr"}}, "download_checksums": {"https://openslr.org/resources/73/es_pe_female.zip": {"num_bytes": 913983951, "checksum": "0bcb138a6a4657fa52ec6ec129807dc2476d9a89184ea2ab4f588bbbddc12062"}, "https://openslr.org/resources/73/es_pe_male.zip": {"num_bytes": 1026322863, "checksum": "8baf41802bc59f7d170ee091d8676db725903efdcfeda12d699a31a746ae50bf"}}, "download_size": 1940306814, "post_processing_size": null, "dataset_size": 2084052, "size_in_bytes": 1942390866}, "SLR74": {"description": "OpenSLR is a site devoted to hosting speech and language resources, such as training corpora for speech recognition,\nand software related to speech recognition. We intend to be a convenient place for anyone to put resources that\nthey have created, so that they can be downloaded publicly.\n", "citation": "SLR32:\n@inproceedings{van-niekerk-etal-2017,\n title = {{Rapid development of TTS corpora for four South African languages}},\n author = {Daniel van Niekerk and Charl van Heerden and Marelie Davel and Neil Kleynhans and Oddur Kjartansson\n and Martin Jansche and Linne Ha},\n booktitle = {Proc. Interspeech 2017},\n pages = {2178--2182},\n address = {Stockholm, Sweden},\n month = aug,\n year = {2017},\n URL = {http://dx.doi.org/10.21437/Interspeech.2017-1139}\n}\n\nSLR35, SLR36, SLR52, SLR53, SLR54:\n@inproceedings{kjartansson-etal-sltu2018,\n title = {{Crowd-Sourced Speech Corpora for Javanese, Sundanese, Sinhala, Nepali, and Bangladeshi Bengali}},\n author = {Oddur Kjartansson and Supheakmungkol Sarin and Knot Pipatsrisawat and Martin Jansche and Linne Ha},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {52--55},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-11},\n}\n\nSLR41, SLR42, SLR43, SLR44:\n@inproceedings{kjartansson-etal-tts-sltu2018,\n title = {{A Step-by-Step Process for Building TTS Voices Using Open Source Data and Framework for Bangla, Javanese,\n Khmer, Nepali, Sinhala, and Sundanese}},\n author = {Keshan Sodimana and Knot Pipatsrisawat and Linne Ha and Martin Jansche and Oddur Kjartansson and Pasindu\n De Silva and Supheakmungkol Sarin},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {66--70},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-14}\n}\n\nSLR63, SLR64, SLR65, SLR66, SLR78, SLR79:\n@inproceedings{he-etal-2020-open,\n title = {{Open-source Multi-speaker Speech Corpora for Building Gujarati, Kannada, Malayalam, Marathi, Tamil and\n Telugu Speech Synthesis Systems}},\n author = {He, Fei and Chu, Shan-Hui Cathy and Kjartansson, Oddur and Rivera, Clara and Katanova, Anna and Gutkin,\n Alexander and Demirsahin, Isin and Johny, Cibu and Jansche, Martin and Sarin, Supheakmungkol and Pipatsrisawat, Knot},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n pages = {6494--6503},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.800},\n ISBN = \"{979-10-95546-34-4},\n}\n\nSLR69, SLR76, SLR77:\n@inproceedings{kjartansson-etal-2020-open,\n title = {{Open-Source High Quality Speech Datasets for Basque, Catalan and Galician}},\n author = {Kjartansson, Oddur and Gutkin, Alexander and Butryna, Alena and Demirsahin, Isin and Rivera, Clara},\n booktitle = {Proceedings of the 1st Joint Workshop on Spoken Language Technologies for Under-resourced languages\n (SLTU) and Collaboration and Computing for Under-Resourced Languages (CCURL)},\n year = {2020},\n pages = {21--27},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.sltu-1.3},\n ISBN = {979-10-95546-35-1},\n}\n\nSLR71, SLR71, SLR72, SLR73, SLR74, SLR75:\n@inproceedings{guevara-rukoz-etal-2020-crowdsourcing,\n title = {{Crowdsourcing Latin American Spanish for Low-Resource Text-to-Speech}},\n author = {Guevara-Rukoz, Adriana and Demirsahin, Isin and He, Fei and Chu, Shan-Hui Cathy and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Gutkin, Alexander and Butryna, Alena and Kjartansson, Oddur},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n year = {2020},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.801},\n pages = {6504--6513},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR80\n@inproceedings{oo-etal-2020-burmese,\n title = {{Burmese Speech Corpus, Finite-State Text Normalization and Pronunciation Grammars with an Application\n to Text-to-Speech}},\n author = {Oo, Yin May and Wattanavekin, Theeraphol and Li, Chenfang and De Silva, Pasindu and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Jansche, Martin and Kjartansson, Oddur and Gutkin, Alexander},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = \"6328--6339\",\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.777},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR86\n@inproceedings{gutkin-et-al-yoruba2020,\n title = {{Developing an Open-Source Corpus of Yoruba Speech}},\n author = {Alexander Gutkin and I\u015f\u0131n Demir\u015fahin and Oddur Kjartansson and Clara Rivera and K\u00f3\u0323l\u00e1 T\u00fab\u00f2\u0323s\u00fan},\n booktitle = {Proceedings of Interspeech 2020},\n pages = {404--408},\n month = {October},\n year = {2020},\n address = {Shanghai, China},\n publisher = {International Speech and Communication Association (ISCA)},\n doi = {10.21437/Interspeech.2020-1096},\n url = {https://dx.doi.org/10.21437/Interspeech.2020-1096},\n}\n", "homepage": "https://openslr.org/", "license": "", "features": {"path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "string", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "open_slr", "config_name": "SLR74", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 237395, "num_examples": 617, "dataset_name": "open_slr"}}, "download_checksums": {"https://openslr.org/resources/74/es_pr_female.zip": {"num_bytes": 214181314, "checksum": "0ff2f4ed63fbbc4305140bb88c71ca9a72b18c6686a755534b47ae28dce2861d"}}, "download_size": 214181314, "post_processing_size": null, "dataset_size": 237395, "size_in_bytes": 214418709}, "SLR75": {"description": "OpenSLR is a site devoted to hosting speech and language resources, such as training corpora for speech recognition,\nand software related to speech recognition. We intend to be a convenient place for anyone to put resources that\nthey have created, so that they can be downloaded publicly.\n", "citation": "SLR32:\n@inproceedings{van-niekerk-etal-2017,\n title = {{Rapid development of TTS corpora for four South African languages}},\n author = {Daniel van Niekerk and Charl van Heerden and Marelie Davel and Neil Kleynhans and Oddur Kjartansson\n and Martin Jansche and Linne Ha},\n booktitle = {Proc. Interspeech 2017},\n pages = {2178--2182},\n address = {Stockholm, Sweden},\n month = aug,\n year = {2017},\n URL = {http://dx.doi.org/10.21437/Interspeech.2017-1139}\n}\n\nSLR35, SLR36, SLR52, SLR53, SLR54:\n@inproceedings{kjartansson-etal-sltu2018,\n title = {{Crowd-Sourced Speech Corpora for Javanese, Sundanese, Sinhala, Nepali, and Bangladeshi Bengali}},\n author = {Oddur Kjartansson and Supheakmungkol Sarin and Knot Pipatsrisawat and Martin Jansche and Linne Ha},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {52--55},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-11},\n}\n\nSLR41, SLR42, SLR43, SLR44:\n@inproceedings{kjartansson-etal-tts-sltu2018,\n title = {{A Step-by-Step Process for Building TTS Voices Using Open Source Data and Framework for Bangla, Javanese,\n Khmer, Nepali, Sinhala, and Sundanese}},\n author = {Keshan Sodimana and Knot Pipatsrisawat and Linne Ha and Martin Jansche and Oddur Kjartansson and Pasindu\n De Silva and Supheakmungkol Sarin},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {66--70},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-14}\n}\n\nSLR63, SLR64, SLR65, SLR66, SLR78, SLR79:\n@inproceedings{he-etal-2020-open,\n title = {{Open-source Multi-speaker Speech Corpora for Building Gujarati, Kannada, Malayalam, Marathi, Tamil and\n Telugu Speech Synthesis Systems}},\n author = {He, Fei and Chu, Shan-Hui Cathy and Kjartansson, Oddur and Rivera, Clara and Katanova, Anna and Gutkin,\n Alexander and Demirsahin, Isin and Johny, Cibu and Jansche, Martin and Sarin, Supheakmungkol and Pipatsrisawat, Knot},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n pages = {6494--6503},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.800},\n ISBN = \"{979-10-95546-34-4},\n}\n\nSLR69, SLR76, SLR77:\n@inproceedings{kjartansson-etal-2020-open,\n title = {{Open-Source High Quality Speech Datasets for Basque, Catalan and Galician}},\n author = {Kjartansson, Oddur and Gutkin, Alexander and Butryna, Alena and Demirsahin, Isin and Rivera, Clara},\n booktitle = {Proceedings of the 1st Joint Workshop on Spoken Language Technologies for Under-resourced languages\n (SLTU) and Collaboration and Computing for Under-Resourced Languages (CCURL)},\n year = {2020},\n pages = {21--27},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.sltu-1.3},\n ISBN = {979-10-95546-35-1},\n}\n\nSLR71, SLR71, SLR72, SLR73, SLR74, SLR75:\n@inproceedings{guevara-rukoz-etal-2020-crowdsourcing,\n title = {{Crowdsourcing Latin American Spanish for Low-Resource Text-to-Speech}},\n author = {Guevara-Rukoz, Adriana and Demirsahin, Isin and He, Fei and Chu, Shan-Hui Cathy and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Gutkin, Alexander and Butryna, Alena and Kjartansson, Oddur},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n year = {2020},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.801},\n pages = {6504--6513},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR80\n@inproceedings{oo-etal-2020-burmese,\n title = {{Burmese Speech Corpus, Finite-State Text Normalization and Pronunciation Grammars with an Application\n to Text-to-Speech}},\n author = {Oo, Yin May and Wattanavekin, Theeraphol and Li, Chenfang and De Silva, Pasindu and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Jansche, Martin and Kjartansson, Oddur and Gutkin, Alexander},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = \"6328--6339\",\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.777},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR86\n@inproceedings{gutkin-et-al-yoruba2020,\n title = {{Developing an Open-Source Corpus of Yoruba Speech}},\n author = {Alexander Gutkin and I\u015f\u0131n Demir\u015fahin and Oddur Kjartansson and Clara Rivera and K\u00f3\u0323l\u00e1 T\u00fab\u00f2\u0323s\u00fan},\n booktitle = {Proceedings of Interspeech 2020},\n pages = {404--408},\n month = {October},\n year = {2020},\n address = {Shanghai, China},\n publisher = {International Speech and Communication Association (ISCA)},\n doi = {10.21437/Interspeech.2020-1096},\n url = {https://dx.doi.org/10.21437/Interspeech.2020-1096},\n}\n", "homepage": "https://openslr.org/", "license": "", "features": {"path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "string", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "open_slr", "config_name": "SLR75", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 1286937, "num_examples": 3357, "dataset_name": "open_slr"}}, "download_checksums": {"https://openslr.org/resources/75/es_ve_female.zip": {"num_bytes": 517000277, "checksum": "4600baead7519afaa5f6b33cf3f4b2373e7f1902aa72841fc38582660b07fe31"}, "https://openslr.org/resources/75/es_ve_male.zip": {"num_bytes": 526316727, "checksum": "3cf8703b1b61de1bf964e26f0a2c7f0ec637b1a85eafd982e98de9301558b289"}}, "download_size": 1043317004, "post_processing_size": null, "dataset_size": 1286937, "size_in_bytes": 1044603941}, "SLR76": {"description": "OpenSLR is a site devoted to hosting speech and language resources, such as training corpora for speech recognition,\nand software related to speech recognition. We intend to be a convenient place for anyone to put resources that\nthey have created, so that they can be downloaded publicly.\n", "citation": "SLR32:\n@inproceedings{van-niekerk-etal-2017,\n title = {{Rapid development of TTS corpora for four South African languages}},\n author = {Daniel van Niekerk and Charl van Heerden and Marelie Davel and Neil Kleynhans and Oddur Kjartansson\n and Martin Jansche and Linne Ha},\n booktitle = {Proc. Interspeech 2017},\n pages = {2178--2182},\n address = {Stockholm, Sweden},\n month = aug,\n year = {2017},\n URL = {http://dx.doi.org/10.21437/Interspeech.2017-1139}\n}\n\nSLR35, SLR36, SLR52, SLR53, SLR54:\n@inproceedings{kjartansson-etal-sltu2018,\n title = {{Crowd-Sourced Speech Corpora for Javanese, Sundanese, Sinhala, Nepali, and Bangladeshi Bengali}},\n author = {Oddur Kjartansson and Supheakmungkol Sarin and Knot Pipatsrisawat and Martin Jansche and Linne Ha},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {52--55},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-11},\n}\n\nSLR41, SLR42, SLR43, SLR44:\n@inproceedings{kjartansson-etal-tts-sltu2018,\n title = {{A Step-by-Step Process for Building TTS Voices Using Open Source Data and Framework for Bangla, Javanese,\n Khmer, Nepali, Sinhala, and Sundanese}},\n author = {Keshan Sodimana and Knot Pipatsrisawat and Linne Ha and Martin Jansche and Oddur Kjartansson and Pasindu\n De Silva and Supheakmungkol Sarin},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {66--70},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-14}\n}\n\nSLR63, SLR64, SLR65, SLR66, SLR78, SLR79:\n@inproceedings{he-etal-2020-open,\n title = {{Open-source Multi-speaker Speech Corpora for Building Gujarati, Kannada, Malayalam, Marathi, Tamil and\n Telugu Speech Synthesis Systems}},\n author = {He, Fei and Chu, Shan-Hui Cathy and Kjartansson, Oddur and Rivera, Clara and Katanova, Anna and Gutkin,\n Alexander and Demirsahin, Isin and Johny, Cibu and Jansche, Martin and Sarin, Supheakmungkol and Pipatsrisawat, Knot},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n pages = {6494--6503},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.800},\n ISBN = \"{979-10-95546-34-4},\n}\n\nSLR69, SLR76, SLR77:\n@inproceedings{kjartansson-etal-2020-open,\n title = {{Open-Source High Quality Speech Datasets for Basque, Catalan and Galician}},\n author = {Kjartansson, Oddur and Gutkin, Alexander and Butryna, Alena and Demirsahin, Isin and Rivera, Clara},\n booktitle = {Proceedings of the 1st Joint Workshop on Spoken Language Technologies for Under-resourced languages\n (SLTU) and Collaboration and Computing for Under-Resourced Languages (CCURL)},\n year = {2020},\n pages = {21--27},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.sltu-1.3},\n ISBN = {979-10-95546-35-1},\n}\n\nSLR71, SLR71, SLR72, SLR73, SLR74, SLR75:\n@inproceedings{guevara-rukoz-etal-2020-crowdsourcing,\n title = {{Crowdsourcing Latin American Spanish for Low-Resource Text-to-Speech}},\n author = {Guevara-Rukoz, Adriana and Demirsahin, Isin and He, Fei and Chu, Shan-Hui Cathy and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Gutkin, Alexander and Butryna, Alena and Kjartansson, Oddur},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n year = {2020},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.801},\n pages = {6504--6513},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR80\n@inproceedings{oo-etal-2020-burmese,\n title = {{Burmese Speech Corpus, Finite-State Text Normalization and Pronunciation Grammars with an Application\n to Text-to-Speech}},\n author = {Oo, Yin May and Wattanavekin, Theeraphol and Li, Chenfang and De Silva, Pasindu and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Jansche, Martin and Kjartansson, Oddur and Gutkin, Alexander},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = \"6328--6339\",\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.777},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR86\n@inproceedings{gutkin-et-al-yoruba2020,\n title = {{Developing an Open-Source Corpus of Yoruba Speech}},\n author = {Alexander Gutkin and I\u015f\u0131n Demir\u015fahin and Oddur Kjartansson and Clara Rivera and K\u00f3\u0323l\u00e1 T\u00fab\u00f2\u0323s\u00fan},\n booktitle = {Proceedings of Interspeech 2020},\n pages = {404--408},\n month = {October},\n year = {2020},\n address = {Shanghai, China},\n publisher = {International Speech and Communication Association (ISCA)},\n doi = {10.21437/Interspeech.2020-1096},\n url = {https://dx.doi.org/10.21437/Interspeech.2020-1096},\n}\n", "homepage": "https://openslr.org/", "license": "", "features": {"path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "string", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "open_slr", "config_name": "SLR76", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 2756507, "num_examples": 7136, "dataset_name": "open_slr"}}, "download_checksums": {"https://openslr.org/resources/76/eu_es_female.zip": {"num_bytes": 1622676657, "checksum": "b3eaa91f2be198c8455f46e802f671e33cba5d95909e58e0b59cb6638f5b4947"}, "https://openslr.org/resources/76/eu_es_male.zip": {"num_bytes": 1418448856, "checksum": "787bcb8369d3797a6b34b0e2d420f5255e12e6c6a385cd4e72ddde59c6018227"}}, "download_size": 3041125513, "post_processing_size": null, "dataset_size": 2756507, "size_in_bytes": 3043882020}, "SLR77": {"description": "OpenSLR is a site devoted to hosting speech and language resources, such as training corpora for speech recognition,\nand software related to speech recognition. We intend to be a convenient place for anyone to put resources that\nthey have created, so that they can be downloaded publicly.\n", "citation": "SLR32:\n@inproceedings{van-niekerk-etal-2017,\n title = {{Rapid development of TTS corpora for four South African languages}},\n author = {Daniel van Niekerk and Charl van Heerden and Marelie Davel and Neil Kleynhans and Oddur Kjartansson\n and Martin Jansche and Linne Ha},\n booktitle = {Proc. Interspeech 2017},\n pages = {2178--2182},\n address = {Stockholm, Sweden},\n month = aug,\n year = {2017},\n URL = {http://dx.doi.org/10.21437/Interspeech.2017-1139}\n}\n\nSLR35, SLR36, SLR52, SLR53, SLR54:\n@inproceedings{kjartansson-etal-sltu2018,\n title = {{Crowd-Sourced Speech Corpora for Javanese, Sundanese, Sinhala, Nepali, and Bangladeshi Bengali}},\n author = {Oddur Kjartansson and Supheakmungkol Sarin and Knot Pipatsrisawat and Martin Jansche and Linne Ha},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {52--55},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-11},\n}\n\nSLR41, SLR42, SLR43, SLR44:\n@inproceedings{kjartansson-etal-tts-sltu2018,\n title = {{A Step-by-Step Process for Building TTS Voices Using Open Source Data and Framework for Bangla, Javanese,\n Khmer, Nepali, Sinhala, and Sundanese}},\n author = {Keshan Sodimana and Knot Pipatsrisawat and Linne Ha and Martin Jansche and Oddur Kjartansson and Pasindu\n De Silva and Supheakmungkol Sarin},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {66--70},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-14}\n}\n\nSLR63, SLR64, SLR65, SLR66, SLR78, SLR79:\n@inproceedings{he-etal-2020-open,\n title = {{Open-source Multi-speaker Speech Corpora for Building Gujarati, Kannada, Malayalam, Marathi, Tamil and\n Telugu Speech Synthesis Systems}},\n author = {He, Fei and Chu, Shan-Hui Cathy and Kjartansson, Oddur and Rivera, Clara and Katanova, Anna and Gutkin,\n Alexander and Demirsahin, Isin and Johny, Cibu and Jansche, Martin and Sarin, Supheakmungkol and Pipatsrisawat, Knot},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n pages = {6494--6503},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.800},\n ISBN = \"{979-10-95546-34-4},\n}\n\nSLR69, SLR76, SLR77:\n@inproceedings{kjartansson-etal-2020-open,\n title = {{Open-Source High Quality Speech Datasets for Basque, Catalan and Galician}},\n author = {Kjartansson, Oddur and Gutkin, Alexander and Butryna, Alena and Demirsahin, Isin and Rivera, Clara},\n booktitle = {Proceedings of the 1st Joint Workshop on Spoken Language Technologies for Under-resourced languages\n (SLTU) and Collaboration and Computing for Under-Resourced Languages (CCURL)},\n year = {2020},\n pages = {21--27},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.sltu-1.3},\n ISBN = {979-10-95546-35-1},\n}\n\nSLR71, SLR71, SLR72, SLR73, SLR74, SLR75:\n@inproceedings{guevara-rukoz-etal-2020-crowdsourcing,\n title = {{Crowdsourcing Latin American Spanish for Low-Resource Text-to-Speech}},\n author = {Guevara-Rukoz, Adriana and Demirsahin, Isin and He, Fei and Chu, Shan-Hui Cathy and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Gutkin, Alexander and Butryna, Alena and Kjartansson, Oddur},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n year = {2020},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.801},\n pages = {6504--6513},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR80\n@inproceedings{oo-etal-2020-burmese,\n title = {{Burmese Speech Corpus, Finite-State Text Normalization and Pronunciation Grammars with an Application\n to Text-to-Speech}},\n author = {Oo, Yin May and Wattanavekin, Theeraphol and Li, Chenfang and De Silva, Pasindu and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Jansche, Martin and Kjartansson, Oddur and Gutkin, Alexander},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = \"6328--6339\",\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.777},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR86\n@inproceedings{gutkin-et-al-yoruba2020,\n title = {{Developing an Open-Source Corpus of Yoruba Speech}},\n author = {Alexander Gutkin and I\u015f\u0131n Demir\u015fahin and Oddur Kjartansson and Clara Rivera and K\u00f3\u0323l\u00e1 T\u00fab\u00f2\u0323s\u00fan},\n booktitle = {Proceedings of Interspeech 2020},\n pages = {404--408},\n month = {October},\n year = {2020},\n address = {Shanghai, China},\n publisher = {International Speech and Communication Association (ISCA)},\n doi = {10.21437/Interspeech.2020-1096},\n url = {https://dx.doi.org/10.21437/Interspeech.2020-1096},\n}\n", "homepage": "https://openslr.org/", "license": "", "features": {"path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "string", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "open_slr", "config_name": "SLR77", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 2217652, "num_examples": 5587, "dataset_name": "open_slr"}}, "download_checksums": {"https://openslr.org/resources/77/gl_es_female.zip": {"num_bytes": 1656677564, "checksum": "e2cda7ef8d5f57b5f3086473d5297e6bb73757f0c446409245f407d7612c5060"}, "https://openslr.org/resources/77/gl_es_male.zip": {"num_bytes": 551314211, "checksum": "b768ed0b77fb4e88adf795dedcc872c53a4348ee8d11eb8efb4571fff94688be"}}, "download_size": 2207991775, "post_processing_size": null, "dataset_size": 2217652, "size_in_bytes": 2210209427}, "SLR78": {"description": "OpenSLR is a site devoted to hosting speech and language resources, such as training corpora for speech recognition,\nand software related to speech recognition. We intend to be a convenient place for anyone to put resources that\nthey have created, so that they can be downloaded publicly.\n", "citation": "SLR32:\n@inproceedings{van-niekerk-etal-2017,\n title = {{Rapid development of TTS corpora for four South African languages}},\n author = {Daniel van Niekerk and Charl van Heerden and Marelie Davel and Neil Kleynhans and Oddur Kjartansson\n and Martin Jansche and Linne Ha},\n booktitle = {Proc. Interspeech 2017},\n pages = {2178--2182},\n address = {Stockholm, Sweden},\n month = aug,\n year = {2017},\n URL = {http://dx.doi.org/10.21437/Interspeech.2017-1139}\n}\n\nSLR35, SLR36, SLR52, SLR53, SLR54:\n@inproceedings{kjartansson-etal-sltu2018,\n title = {{Crowd-Sourced Speech Corpora for Javanese, Sundanese, Sinhala, Nepali, and Bangladeshi Bengali}},\n author = {Oddur Kjartansson and Supheakmungkol Sarin and Knot Pipatsrisawat and Martin Jansche and Linne Ha},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {52--55},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-11},\n}\n\nSLR41, SLR42, SLR43, SLR44:\n@inproceedings{kjartansson-etal-tts-sltu2018,\n title = {{A Step-by-Step Process for Building TTS Voices Using Open Source Data and Framework for Bangla, Javanese,\n Khmer, Nepali, Sinhala, and Sundanese}},\n author = {Keshan Sodimana and Knot Pipatsrisawat and Linne Ha and Martin Jansche and Oddur Kjartansson and Pasindu\n De Silva and Supheakmungkol Sarin},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {66--70},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-14}\n}\n\nSLR63, SLR64, SLR65, SLR66, SLR78, SLR79:\n@inproceedings{he-etal-2020-open,\n title = {{Open-source Multi-speaker Speech Corpora for Building Gujarati, Kannada, Malayalam, Marathi, Tamil and\n Telugu Speech Synthesis Systems}},\n author = {He, Fei and Chu, Shan-Hui Cathy and Kjartansson, Oddur and Rivera, Clara and Katanova, Anna and Gutkin,\n Alexander and Demirsahin, Isin and Johny, Cibu and Jansche, Martin and Sarin, Supheakmungkol and Pipatsrisawat, Knot},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n pages = {6494--6503},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.800},\n ISBN = \"{979-10-95546-34-4},\n}\n\nSLR69, SLR76, SLR77:\n@inproceedings{kjartansson-etal-2020-open,\n title = {{Open-Source High Quality Speech Datasets for Basque, Catalan and Galician}},\n author = {Kjartansson, Oddur and Gutkin, Alexander and Butryna, Alena and Demirsahin, Isin and Rivera, Clara},\n booktitle = {Proceedings of the 1st Joint Workshop on Spoken Language Technologies for Under-resourced languages\n (SLTU) and Collaboration and Computing for Under-Resourced Languages (CCURL)},\n year = {2020},\n pages = {21--27},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.sltu-1.3},\n ISBN = {979-10-95546-35-1},\n}\n\nSLR71, SLR71, SLR72, SLR73, SLR74, SLR75:\n@inproceedings{guevara-rukoz-etal-2020-crowdsourcing,\n title = {{Crowdsourcing Latin American Spanish for Low-Resource Text-to-Speech}},\n author = {Guevara-Rukoz, Adriana and Demirsahin, Isin and He, Fei and Chu, Shan-Hui Cathy and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Gutkin, Alexander and Butryna, Alena and Kjartansson, Oddur},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n year = {2020},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.801},\n pages = {6504--6513},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR80\n@inproceedings{oo-etal-2020-burmese,\n title = {{Burmese Speech Corpus, Finite-State Text Normalization and Pronunciation Grammars with an Application\n to Text-to-Speech}},\n author = {Oo, Yin May and Wattanavekin, Theeraphol and Li, Chenfang and De Silva, Pasindu and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Jansche, Martin and Kjartansson, Oddur and Gutkin, Alexander},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = \"6328--6339\",\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.777},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR86\n@inproceedings{gutkin-et-al-yoruba2020,\n title = {{Developing an Open-Source Corpus of Yoruba Speech}},\n author = {Alexander Gutkin and I\u015f\u0131n Demir\u015fahin and Oddur Kjartansson and Clara Rivera and K\u00f3\u0323l\u00e1 T\u00fab\u00f2\u0323s\u00fan},\n booktitle = {Proceedings of Interspeech 2020},\n pages = {404--408},\n month = {October},\n year = {2020},\n address = {Shanghai, China},\n publisher = {International Speech and Communication Association (ISCA)},\n doi = {10.21437/Interspeech.2020-1096},\n url = {https://dx.doi.org/10.21437/Interspeech.2020-1096},\n}\n", "homepage": "https://openslr.org/", "license": "", "features": {"path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "string", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "open_slr", "config_name": "SLR78", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 2121986, "num_examples": 4272, "dataset_name": "open_slr"}}, "download_checksums": {"https://openslr.org/resources/78/gu_in_female.zip": {"num_bytes": 917450036, "checksum": "bbda0815e0d2e01ad9310768e0e2be9efb612a9c56c66c4ab2f32b817da5c786"}, "https://openslr.org/resources/78/gu_in_male.zip": {"num_bytes": 825772066, "checksum": "ce474d1686104b3bd274a2d5192459cb4dee6e0c9bbcf3de1bb3b39c6ab89caf"}}, "download_size": 1743222102, "post_processing_size": null, "dataset_size": 2121986, "size_in_bytes": 1745344088}, "SLR79": {"description": "OpenSLR is a site devoted to hosting speech and language resources, such as training corpora for speech recognition,\nand software related to speech recognition. We intend to be a convenient place for anyone to put resources that\nthey have created, so that they can be downloaded publicly.\n", "citation": "SLR32:\n@inproceedings{van-niekerk-etal-2017,\n title = {{Rapid development of TTS corpora for four South African languages}},\n author = {Daniel van Niekerk and Charl van Heerden and Marelie Davel and Neil Kleynhans and Oddur Kjartansson\n and Martin Jansche and Linne Ha},\n booktitle = {Proc. Interspeech 2017},\n pages = {2178--2182},\n address = {Stockholm, Sweden},\n month = aug,\n year = {2017},\n URL = {http://dx.doi.org/10.21437/Interspeech.2017-1139}\n}\n\nSLR35, SLR36, SLR52, SLR53, SLR54:\n@inproceedings{kjartansson-etal-sltu2018,\n title = {{Crowd-Sourced Speech Corpora for Javanese, Sundanese, Sinhala, Nepali, and Bangladeshi Bengali}},\n author = {Oddur Kjartansson and Supheakmungkol Sarin and Knot Pipatsrisawat and Martin Jansche and Linne Ha},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {52--55},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-11},\n}\n\nSLR41, SLR42, SLR43, SLR44:\n@inproceedings{kjartansson-etal-tts-sltu2018,\n title = {{A Step-by-Step Process for Building TTS Voices Using Open Source Data and Framework for Bangla, Javanese,\n Khmer, Nepali, Sinhala, and Sundanese}},\n author = {Keshan Sodimana and Knot Pipatsrisawat and Linne Ha and Martin Jansche and Oddur Kjartansson and Pasindu\n De Silva and Supheakmungkol Sarin},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {66--70},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-14}\n}\n\nSLR63, SLR64, SLR65, SLR66, SLR78, SLR79:\n@inproceedings{he-etal-2020-open,\n title = {{Open-source Multi-speaker Speech Corpora for Building Gujarati, Kannada, Malayalam, Marathi, Tamil and\n Telugu Speech Synthesis Systems}},\n author = {He, Fei and Chu, Shan-Hui Cathy and Kjartansson, Oddur and Rivera, Clara and Katanova, Anna and Gutkin,\n Alexander and Demirsahin, Isin and Johny, Cibu and Jansche, Martin and Sarin, Supheakmungkol and Pipatsrisawat, Knot},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n pages = {6494--6503},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.800},\n ISBN = \"{979-10-95546-34-4},\n}\n\nSLR69, SLR76, SLR77:\n@inproceedings{kjartansson-etal-2020-open,\n title = {{Open-Source High Quality Speech Datasets for Basque, Catalan and Galician}},\n author = {Kjartansson, Oddur and Gutkin, Alexander and Butryna, Alena and Demirsahin, Isin and Rivera, Clara},\n booktitle = {Proceedings of the 1st Joint Workshop on Spoken Language Technologies for Under-resourced languages\n (SLTU) and Collaboration and Computing for Under-Resourced Languages (CCURL)},\n year = {2020},\n pages = {21--27},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.sltu-1.3},\n ISBN = {979-10-95546-35-1},\n}\n\nSLR71, SLR71, SLR72, SLR73, SLR74, SLR75:\n@inproceedings{guevara-rukoz-etal-2020-crowdsourcing,\n title = {{Crowdsourcing Latin American Spanish for Low-Resource Text-to-Speech}},\n author = {Guevara-Rukoz, Adriana and Demirsahin, Isin and He, Fei and Chu, Shan-Hui Cathy and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Gutkin, Alexander and Butryna, Alena and Kjartansson, Oddur},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n year = {2020},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.801},\n pages = {6504--6513},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR80\n@inproceedings{oo-etal-2020-burmese,\n title = {{Burmese Speech Corpus, Finite-State Text Normalization and Pronunciation Grammars with an Application\n to Text-to-Speech}},\n author = {Oo, Yin May and Wattanavekin, Theeraphol and Li, Chenfang and De Silva, Pasindu and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Jansche, Martin and Kjartansson, Oddur and Gutkin, Alexander},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = \"6328--6339\",\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.777},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR86\n@inproceedings{gutkin-et-al-yoruba2020,\n title = {{Developing an Open-Source Corpus of Yoruba Speech}},\n author = {Alexander Gutkin and I\u015f\u0131n Demir\u015fahin and Oddur Kjartansson and Clara Rivera and K\u00f3\u0323l\u00e1 T\u00fab\u00f2\u0323s\u00fan},\n booktitle = {Proceedings of Interspeech 2020},\n pages = {404--408},\n month = {October},\n year = {2020},\n address = {Shanghai, China},\n publisher = {International Speech and Communication Association (ISCA)},\n doi = {10.21437/Interspeech.2020-1096},\n url = {https://dx.doi.org/10.21437/Interspeech.2020-1096},\n}\n", "homepage": "https://openslr.org/", "license": "", "features": {"path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "string", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "open_slr", "config_name": "SLR79", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 2176539, "num_examples": 4400, "dataset_name": "open_slr"}}, "download_checksums": {"https://openslr.org/resources/79/kn_in_female.zip": {"num_bytes": 980825420, "checksum": "182a147e5747ad4f4ac50a5e7e1ee3683e1c2c1d9105963365d151d664466b62"}, "https://openslr.org/resources/79/kn_in_male.zip": {"num_bytes": 840093695, "checksum": "38e3c0c51f792a3655cc8f4747b339df8ec4b1031a0fff590c1a1af6a8bbbcdf"}}, "download_size": 1820919115, "post_processing_size": null, "dataset_size": 2176539, "size_in_bytes": 1823095654}, "SLR80": {"description": "OpenSLR is a site devoted to hosting speech and language resources, such as training corpora for speech recognition,\nand software related to speech recognition. We intend to be a convenient place for anyone to put resources that\nthey have created, so that they can be downloaded publicly.\n", "citation": "SLR32:\n@inproceedings{van-niekerk-etal-2017,\n title = {{Rapid development of TTS corpora for four South African languages}},\n author = {Daniel van Niekerk and Charl van Heerden and Marelie Davel and Neil Kleynhans and Oddur Kjartansson\n and Martin Jansche and Linne Ha},\n booktitle = {Proc. Interspeech 2017},\n pages = {2178--2182},\n address = {Stockholm, Sweden},\n month = aug,\n year = {2017},\n URL = {http://dx.doi.org/10.21437/Interspeech.2017-1139}\n}\n\nSLR35, SLR36, SLR52, SLR53, SLR54:\n@inproceedings{kjartansson-etal-sltu2018,\n title = {{Crowd-Sourced Speech Corpora for Javanese, Sundanese, Sinhala, Nepali, and Bangladeshi Bengali}},\n author = {Oddur Kjartansson and Supheakmungkol Sarin and Knot Pipatsrisawat and Martin Jansche and Linne Ha},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {52--55},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-11},\n}\n\nSLR41, SLR42, SLR43, SLR44:\n@inproceedings{kjartansson-etal-tts-sltu2018,\n title = {{A Step-by-Step Process for Building TTS Voices Using Open Source Data and Framework for Bangla, Javanese,\n Khmer, Nepali, Sinhala, and Sundanese}},\n author = {Keshan Sodimana and Knot Pipatsrisawat and Linne Ha and Martin Jansche and Oddur Kjartansson and Pasindu\n De Silva and Supheakmungkol Sarin},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {66--70},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-14}\n}\n\nSLR63, SLR64, SLR65, SLR66, SLR78, SLR79:\n@inproceedings{he-etal-2020-open,\n title = {{Open-source Multi-speaker Speech Corpora for Building Gujarati, Kannada, Malayalam, Marathi, Tamil and\n Telugu Speech Synthesis Systems}},\n author = {He, Fei and Chu, Shan-Hui Cathy and Kjartansson, Oddur and Rivera, Clara and Katanova, Anna and Gutkin,\n Alexander and Demirsahin, Isin and Johny, Cibu and Jansche, Martin and Sarin, Supheakmungkol and Pipatsrisawat, Knot},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n pages = {6494--6503},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.800},\n ISBN = \"{979-10-95546-34-4},\n}\n\nSLR69, SLR76, SLR77:\n@inproceedings{kjartansson-etal-2020-open,\n title = {{Open-Source High Quality Speech Datasets for Basque, Catalan and Galician}},\n author = {Kjartansson, Oddur and Gutkin, Alexander and Butryna, Alena and Demirsahin, Isin and Rivera, Clara},\n booktitle = {Proceedings of the 1st Joint Workshop on Spoken Language Technologies for Under-resourced languages\n (SLTU) and Collaboration and Computing for Under-Resourced Languages (CCURL)},\n year = {2020},\n pages = {21--27},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.sltu-1.3},\n ISBN = {979-10-95546-35-1},\n}\n\nSLR71, SLR71, SLR72, SLR73, SLR74, SLR75:\n@inproceedings{guevara-rukoz-etal-2020-crowdsourcing,\n title = {{Crowdsourcing Latin American Spanish for Low-Resource Text-to-Speech}},\n author = {Guevara-Rukoz, Adriana and Demirsahin, Isin and He, Fei and Chu, Shan-Hui Cathy and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Gutkin, Alexander and Butryna, Alena and Kjartansson, Oddur},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n year = {2020},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.801},\n pages = {6504--6513},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR80\n@inproceedings{oo-etal-2020-burmese,\n title = {{Burmese Speech Corpus, Finite-State Text Normalization and Pronunciation Grammars with an Application\n to Text-to-Speech}},\n author = {Oo, Yin May and Wattanavekin, Theeraphol and Li, Chenfang and De Silva, Pasindu and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Jansche, Martin and Kjartansson, Oddur and Gutkin, Alexander},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = \"6328--6339\",\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.777},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR86\n@inproceedings{gutkin-et-al-yoruba2020,\n title = {{Developing an Open-Source Corpus of Yoruba Speech}},\n author = {Alexander Gutkin and I\u015f\u0131n Demir\u015fahin and Oddur Kjartansson and Clara Rivera and K\u00f3\u0323l\u00e1 T\u00fab\u00f2\u0323s\u00fan},\n booktitle = {Proceedings of Interspeech 2020},\n pages = {404--408},\n month = {October},\n year = {2020},\n address = {Shanghai, China},\n publisher = {International Speech and Communication Association (ISCA)},\n doi = {10.21437/Interspeech.2020-1096},\n url = {https://dx.doi.org/10.21437/Interspeech.2020-1096},\n}\n", "homepage": "https://openslr.org/", "license": "", "features": {"path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "string", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "open_slr", "config_name": "SLR80", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 1308651, "num_examples": 2530, "dataset_name": "open_slr"}}, "download_checksums": {"https://openslr.org/resources/80/my_mm_female.zip": {"num_bytes": 948181015, "checksum": "a7cdcaa5e06864e02fa18fc0fe9595feadf332d6a63aadc01ce51a24969a2708"}}, "download_size": 948181015, "post_processing_size": null, "dataset_size": 1308651, "size_in_bytes": 949489666}, "SLR86": {"description": "OpenSLR is a site devoted to hosting speech and language resources, such as training corpora for speech recognition,\nand software related to speech recognition. We intend to be a convenient place for anyone to put resources that\nthey have created, so that they can be downloaded publicly.\n", "citation": "SLR32:\n@inproceedings{van-niekerk-etal-2017,\n title = {{Rapid development of TTS corpora for four South African languages}},\n author = {Daniel van Niekerk and Charl van Heerden and Marelie Davel and Neil Kleynhans and Oddur Kjartansson\n and Martin Jansche and Linne Ha},\n booktitle = {Proc. Interspeech 2017},\n pages = {2178--2182},\n address = {Stockholm, Sweden},\n month = aug,\n year = {2017},\n URL = {http://dx.doi.org/10.21437/Interspeech.2017-1139}\n}\n\nSLR35, SLR36, SLR52, SLR53, SLR54:\n@inproceedings{kjartansson-etal-sltu2018,\n title = {{Crowd-Sourced Speech Corpora for Javanese, Sundanese, Sinhala, Nepali, and Bangladeshi Bengali}},\n author = {Oddur Kjartansson and Supheakmungkol Sarin and Knot Pipatsrisawat and Martin Jansche and Linne Ha},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {52--55},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-11},\n}\n\nSLR41, SLR42, SLR43, SLR44:\n@inproceedings{kjartansson-etal-tts-sltu2018,\n title = {{A Step-by-Step Process for Building TTS Voices Using Open Source Data and Framework for Bangla, Javanese,\n Khmer, Nepali, Sinhala, and Sundanese}},\n author = {Keshan Sodimana and Knot Pipatsrisawat and Linne Ha and Martin Jansche and Oddur Kjartansson and Pasindu\n De Silva and Supheakmungkol Sarin},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {66--70},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-14}\n}\n\nSLR63, SLR64, SLR65, SLR66, SLR78, SLR79:\n@inproceedings{he-etal-2020-open,\n title = {{Open-source Multi-speaker Speech Corpora for Building Gujarati, Kannada, Malayalam, Marathi, Tamil and\n Telugu Speech Synthesis Systems}},\n author = {He, Fei and Chu, Shan-Hui Cathy and Kjartansson, Oddur and Rivera, Clara and Katanova, Anna and Gutkin,\n Alexander and Demirsahin, Isin and Johny, Cibu and Jansche, Martin and Sarin, Supheakmungkol and Pipatsrisawat, Knot},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n pages = {6494--6503},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.800},\n ISBN = \"{979-10-95546-34-4},\n}\n\nSLR69, SLR76, SLR77:\n@inproceedings{kjartansson-etal-2020-open,\n title = {{Open-Source High Quality Speech Datasets for Basque, Catalan and Galician}},\n author = {Kjartansson, Oddur and Gutkin, Alexander and Butryna, Alena and Demirsahin, Isin and Rivera, Clara},\n booktitle = {Proceedings of the 1st Joint Workshop on Spoken Language Technologies for Under-resourced languages\n (SLTU) and Collaboration and Computing for Under-Resourced Languages (CCURL)},\n year = {2020},\n pages = {21--27},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.sltu-1.3},\n ISBN = {979-10-95546-35-1},\n}\n\nSLR71, SLR71, SLR72, SLR73, SLR74, SLR75:\n@inproceedings{guevara-rukoz-etal-2020-crowdsourcing,\n title = {{Crowdsourcing Latin American Spanish for Low-Resource Text-to-Speech}},\n author = {Guevara-Rukoz, Adriana and Demirsahin, Isin and He, Fei and Chu, Shan-Hui Cathy and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Gutkin, Alexander and Butryna, Alena and Kjartansson, Oddur},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n year = {2020},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.801},\n pages = {6504--6513},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR80\n@inproceedings{oo-etal-2020-burmese,\n title = {{Burmese Speech Corpus, Finite-State Text Normalization and Pronunciation Grammars with an Application\n to Text-to-Speech}},\n author = {Oo, Yin May and Wattanavekin, Theeraphol and Li, Chenfang and De Silva, Pasindu and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Jansche, Martin and Kjartansson, Oddur and Gutkin, Alexander},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = \"6328--6339\",\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.777},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR86\n@inproceedings{gutkin-et-al-yoruba2020,\n title = {{Developing an Open-Source Corpus of Yoruba Speech}},\n author = {Alexander Gutkin and I\u015f\u0131n Demir\u015fahin and Oddur Kjartansson and Clara Rivera and K\u00f3\u0323l\u00e1 T\u00fab\u00f2\u0323s\u00fan},\n booktitle = {Proceedings of Interspeech 2020},\n pages = {404--408},\n month = {October},\n year = {2020},\n address = {Shanghai, China},\n publisher = {International Speech and Communication Association (ISCA)},\n doi = {10.21437/Interspeech.2020-1096},\n url = {https://dx.doi.org/10.21437/Interspeech.2020-1096},\n}\n", "homepage": "https://openslr.org/", "license": "", "features": {"path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "string", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "open_slr", "config_name": "SLR86", "version": "0.0.0", "splits": {"train": {"name": "train", "num_bytes": 1378801, "num_examples": 3583, "dataset_name": "open_slr"}}, "download_checksums": {"https://openslr.org/resources/86/yo_ng_female.zip": {"num_bytes": 462033045, "checksum": "8875ebc839e57a3318ba1ce37d98c35da46d4f99f9f777f83fcf074257804060"}, "https://openslr.org/resources/86/yo_ng_male.zip": {"num_bytes": 445032517, "checksum": "58519b27f6954c446d0e7221b227a6f342b9c5ea66bf02af40c1616e086afc4c"}}, "download_size": 907065562, "post_processing_size": null, "dataset_size": 1378801, "size_in_bytes": 908444363}, "SLR32": {"description": "OpenSLR is a site devoted to hosting speech and language resources, such as training corpora for speech recognition,\nand software related to speech recognition. We intend to be a convenient place for anyone to put resources that\nthey have created, so that they can be downloaded publicly.\n", "citation": "SLR32:\n@inproceedings{van-niekerk-etal-2017,\n title = {{Rapid development of TTS corpora for four South African languages}},\n author = {Daniel van Niekerk and Charl van Heerden and Marelie Davel and Neil Kleynhans and Oddur Kjartansson\n and Martin Jansche and Linne Ha},\n booktitle = {Proc. Interspeech 2017},\n pages = {2178--2182},\n address = {Stockholm, Sweden},\n month = aug,\n year = {2017},\n URL = {http://dx.doi.org/10.21437/Interspeech.2017-1139}\n}\n\nSLR35, SLR36, SLR52, SLR53, SLR54:\n@inproceedings{kjartansson-etal-sltu2018,\n title = {{Crowd-Sourced Speech Corpora for Javanese, Sundanese, Sinhala, Nepali, and Bangladeshi Bengali}},\n author = {Oddur Kjartansson and Supheakmungkol Sarin and Knot Pipatsrisawat and Martin Jansche and Linne Ha},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {52--55},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-11},\n}\n\nSLR41, SLR42, SLR43, SLR44:\n@inproceedings{kjartansson-etal-tts-sltu2018,\n title = {{A Step-by-Step Process for Building TTS Voices Using Open Source Data and Framework for Bangla, Javanese,\n Khmer, Nepali, Sinhala, and Sundanese}},\n author = {Keshan Sodimana and Knot Pipatsrisawat and Linne Ha and Martin Jansche and Oddur Kjartansson and Pasindu\n De Silva and Supheakmungkol Sarin},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {66--70},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-14}\n}\n\nSLR63, SLR64, SLR65, SLR66, SLR78, SLR79:\n@inproceedings{he-etal-2020-open,\n title = {{Open-source Multi-speaker Speech Corpora for Building Gujarati, Kannada, Malayalam, Marathi, Tamil and\n Telugu Speech Synthesis Systems}},\n author = {He, Fei and Chu, Shan-Hui Cathy and Kjartansson, Oddur and Rivera, Clara and Katanova, Anna and Gutkin,\n Alexander and Demirsahin, Isin and Johny, Cibu and Jansche, Martin and Sarin, Supheakmungkol and Pipatsrisawat, Knot},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n pages = {6494--6503},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.800},\n ISBN = \"{979-10-95546-34-4},\n}\n\nSLR69, SLR76, SLR77:\n@inproceedings{kjartansson-etal-2020-open,\n title = {{Open-Source High Quality Speech Datasets for Basque, Catalan and Galician}},\n author = {Kjartansson, Oddur and Gutkin, Alexander and Butryna, Alena and Demirsahin, Isin and Rivera, Clara},\n booktitle = {Proceedings of the 1st Joint Workshop on Spoken Language Technologies for Under-resourced languages\n (SLTU) and Collaboration and Computing for Under-Resourced Languages (CCURL)},\n year = {2020},\n pages = {21--27},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.sltu-1.3},\n ISBN = {979-10-95546-35-1},\n}\n\nSLR71, SLR71, SLR72, SLR73, SLR74, SLR75:\n@inproceedings{guevara-rukoz-etal-2020-crowdsourcing,\n title = {{Crowdsourcing Latin American Spanish for Low-Resource Text-to-Speech}},\n author = {Guevara-Rukoz, Adriana and Demirsahin, Isin and He, Fei and Chu, Shan-Hui Cathy and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Gutkin, Alexander and Butryna, Alena and Kjartansson, Oddur},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n year = {2020},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.801},\n pages = {6504--6513},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR80\n@inproceedings{oo-etal-2020-burmese,\n title = {{Burmese Speech Corpus, Finite-State Text Normalization and Pronunciation Grammars with an Application\n to Text-to-Speech}},\n author = {Oo, Yin May and Wattanavekin, Theeraphol and Li, Chenfang and De Silva, Pasindu and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Jansche, Martin and Kjartansson, Oddur and Gutkin, Alexander},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = \"6328--6339\",\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.777},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR86\n@inproceedings{gutkin-et-al-yoruba2020,\n title = {{Developing an Open-Source Corpus of Yoruba Speech}},\n author = {Alexander Gutkin and I\u015f\u0131n Demir\u015fahin and Oddur Kjartansson and Clara Rivera and K\u00f3\u0323l\u00e1 T\u00fab\u00f2\u0323s\u00fan},\n booktitle = {Proceedings of Interspeech 2020},\n pages = {404--408},\n month = {October},\n year = {2020},\n address = {Shanghai, China},\n publisher = {International Speech and Communication Association (ISCA)},\n doi = {10.21437/Interspeech.2020-1096},\n url = {https://dx.doi.org/10.21437/Interspeech.2020-1096},\n}\n", "homepage": "https://openslr.org/", "license": "", "features": {"path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "struct", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "open_slr", "config_name": "SLR32", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 4544052380, "num_examples": 9821, "dataset_name": "open_slr"}}, "download_checksums": {"https://openslr.org/resources/32/af_za.tar.gz": {"num_bytes": 950827926, "checksum": "b702a68486bf16cbf302d6e0808ea2e966f3dfa720ea0d6ce36d881aa266978f"}, "https://openslr.org/resources/32/st_za.tar.gz": {"num_bytes": 724425648, "checksum": "509202bcf6fae3b24508cfdbc3a6c886b29b4c3d822adbf6c40b21d98ada3fcf"}, "https://openslr.org/resources/32/tn_za.tar.gz": {"num_bytes": 729406193, "checksum": "3e6a522d2fafa071ec1d484cb79336ff36008a5d5d34e1444984e5df8312eb6f"}, "https://openslr.org/resources/32/xh_za.tar.gz": {"num_bytes": 907498093, "checksum": "712336c82637cbfb4304766dd7c0889bac1664945aed08bafb49eac29ae756c3"}, "https://s3.amazonaws.com/datasets.huggingface.co/openslr/SLR32/af_za/line_index.tsv": {"num_bytes": 218947, "checksum": "c4d096cb50a037ce8c3a41a198615083d93c3bbbd6f1cfdb52c3ebfa5de09340"}, "https://s3.amazonaws.com/datasets.huggingface.co/openslr/SLR32/st_za/line_index.tsv": {"num_bytes": 154784, "checksum": "04cd7e8db7eae8ad9044fa8ac79f3e48fd3a64d045cd907ff005fd82f1ca6a82"}, "https://s3.amazonaws.com/datasets.huggingface.co/openslr/SLR32/tn_za/line_index.tsv": {"num_bytes": 174447, "checksum": "c621270b3ee70d515bbce846e1b64135dc4554f62cf3528d9550a1512f5841f1"}, "https://s3.amazonaws.com/datasets.huggingface.co/openslr/SLR32/xh_za/line_index.tsv": {"num_bytes": 178725, "checksum": "6a356aac4e698561302574f62be30029536ac057e009633f0af8de68513d874a"}}, "download_size": 3312884763, "post_processing_size": null, "dataset_size": 4544052380, "size_in_bytes": 7856937143}, "SLR52": {"description": "OpenSLR is a site devoted to hosting speech and language resources, such as training corpora for speech recognition,\nand software related to speech recognition. We intend to be a convenient place for anyone to put resources that\nthey have created, so that they can be downloaded publicly.\n", "citation": "SLR32:\n@inproceedings{van-niekerk-etal-2017,\n title = {{Rapid development of TTS corpora for four South African languages}},\n author = {Daniel van Niekerk and Charl van Heerden and Marelie Davel and Neil Kleynhans and Oddur Kjartansson\n and Martin Jansche and Linne Ha},\n booktitle = {Proc. Interspeech 2017},\n pages = {2178--2182},\n address = {Stockholm, Sweden},\n month = aug,\n year = {2017},\n URL = {http://dx.doi.org/10.21437/Interspeech.2017-1139}\n}\n\nSLR35, SLR36, SLR52, SLR53, SLR54:\n@inproceedings{kjartansson-etal-sltu2018,\n title = {{Crowd-Sourced Speech Corpora for Javanese, Sundanese, Sinhala, Nepali, and Bangladeshi Bengali}},\n author = {Oddur Kjartansson and Supheakmungkol Sarin and Knot Pipatsrisawat and Martin Jansche and Linne Ha},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {52--55},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-11},\n}\n\nSLR41, SLR42, SLR43, SLR44:\n@inproceedings{kjartansson-etal-tts-sltu2018,\n title = {{A Step-by-Step Process for Building TTS Voices Using Open Source Data and Framework for Bangla, Javanese,\n Khmer, Nepali, Sinhala, and Sundanese}},\n author = {Keshan Sodimana and Knot Pipatsrisawat and Linne Ha and Martin Jansche and Oddur Kjartansson and Pasindu\n De Silva and Supheakmungkol Sarin},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {66--70},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-14}\n}\n\nSLR63, SLR64, SLR65, SLR66, SLR78, SLR79:\n@inproceedings{he-etal-2020-open,\n title = {{Open-source Multi-speaker Speech Corpora for Building Gujarati, Kannada, Malayalam, Marathi, Tamil and\n Telugu Speech Synthesis Systems}},\n author = {He, Fei and Chu, Shan-Hui Cathy and Kjartansson, Oddur and Rivera, Clara and Katanova, Anna and Gutkin,\n Alexander and Demirsahin, Isin and Johny, Cibu and Jansche, Martin and Sarin, Supheakmungkol and Pipatsrisawat, Knot},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n pages = {6494--6503},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.800},\n ISBN = \"{979-10-95546-34-4},\n}\n\nSLR69, SLR76, SLR77:\n@inproceedings{kjartansson-etal-2020-open,\n title = {{Open-Source High Quality Speech Datasets for Basque, Catalan and Galician}},\n author = {Kjartansson, Oddur and Gutkin, Alexander and Butryna, Alena and Demirsahin, Isin and Rivera, Clara},\n booktitle = {Proceedings of the 1st Joint Workshop on Spoken Language Technologies for Under-resourced languages\n (SLTU) and Collaboration and Computing for Under-Resourced Languages (CCURL)},\n year = {2020},\n pages = {21--27},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.sltu-1.3},\n ISBN = {979-10-95546-35-1},\n}\n\nSLR71, SLR71, SLR72, SLR73, SLR74, SLR75:\n@inproceedings{guevara-rukoz-etal-2020-crowdsourcing,\n title = {{Crowdsourcing Latin American Spanish for Low-Resource Text-to-Speech}},\n author = {Guevara-Rukoz, Adriana and Demirsahin, Isin and He, Fei and Chu, Shan-Hui Cathy and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Gutkin, Alexander and Butryna, Alena and Kjartansson, Oddur},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n year = {2020},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.801},\n pages = {6504--6513},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR80\n@inproceedings{oo-etal-2020-burmese,\n title = {{Burmese Speech Corpus, Finite-State Text Normalization and Pronunciation Grammars with an Application\n to Text-to-Speech}},\n author = {Oo, Yin May and Wattanavekin, Theeraphol and Li, Chenfang and De Silva, Pasindu and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Jansche, Martin and Kjartansson, Oddur and Gutkin, Alexander},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = \"6328--6339\",\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.777},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR86\n@inproceedings{gutkin-et-al-yoruba2020,\n title = {{Developing an Open-Source Corpus of Yoruba Speech}},\n author = {Alexander Gutkin and I\u015f\u0131n Demir\u015fahin and Oddur Kjartansson and Clara Rivera and K\u00f3\u0323l\u00e1 T\u00fab\u00f2\u0323s\u00fan},\n booktitle = {Proceedings of Interspeech 2020},\n pages = {404--408},\n month = {October},\n year = {2020},\n address = {Shanghai, China},\n publisher = {International Speech and Communication Association (ISCA)},\n doi = {10.21437/Interspeech.2020-1096},\n url = {https://dx.doi.org/10.21437/Interspeech.2020-1096},\n}\n", "homepage": "https://openslr.org/", "license": "", "features": {"path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "string", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "open_slr", "config_name": "SLR52", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 77369899, "num_examples": 185293, "dataset_name": "open_slr"}}, "download_checksums": {"https://openslr.org/resources/52/asr_sinhala_0.zip": {"num_bytes": 915237858, "checksum": "41bcd4cf6edde39e49bf8ca6b54c32e1403609759ff9edea2a2696ef7aa8fff5"}, "https://openslr.org/resources/52/asr_sinhala_1.zip": {"num_bytes": 908852134, "checksum": "7a4dd3279254f06ba8d1e864d2aa68eec1e6740cfc2b718d2bc060b878871e74"}, "https://openslr.org/resources/52/asr_sinhala_2.zip": {"num_bytes": 913568157, "checksum": "746b5ee016e09868016851ff2148000570b6cb6b9acde5d16527f20053d1cd14"}, "https://openslr.org/resources/52/asr_sinhala_3.zip": {"num_bytes": 901325452, "checksum": "a167e6bd9c0b64e105cc57528a455a4653303336b85731273039487d9f94afda"}, "https://openslr.org/resources/52/asr_sinhala_4.zip": {"num_bytes": 922493671, "checksum": "f17fc798ea085e876500095e8dd357d1088303598d190642978c353d51d2b94b"}, "https://openslr.org/resources/52/asr_sinhala_5.zip": {"num_bytes": 922505332, "checksum": "8285340d15064caa1da0635d50471c8de24d33e3d1ae7af3c63e4a23d3ba25fe"}, "https://openslr.org/resources/52/asr_sinhala_6.zip": {"num_bytes": 914729823, "checksum": "a511dc329dfc493c9e25d1315ab95da93a8a4b751e032c1848eeeb8655608403"}, "https://openslr.org/resources/52/asr_sinhala_7.zip": {"num_bytes": 911992962, "checksum": "8180736327c3147bac912c329fe3a571a61ecb6d4da7d4584acb0d34ab204fa5"}, "https://openslr.org/resources/52/asr_sinhala_8.zip": {"num_bytes": 924344925, "checksum": "fdf333751c254f8dc7b649fd1a48cf47ae8e855e369a182d88bee3325ae8a99d"}, "https://openslr.org/resources/52/asr_sinhala_9.zip": {"num_bytes": 920427318, "checksum": "288f4a7ea055b3963ad7d6a6e6e6189672715a42d0a1b6e99a1a8ba0fe67a9c6"}, "https://openslr.org/resources/52/asr_sinhala_a.zip": {"num_bytes": 901532849, "checksum": "da36de6739ce5b8c835c3c232d5122b883a88442ec3f91a534154b2a9177d0ec"}, "https://openslr.org/resources/52/asr_sinhala_b.zip": {"num_bytes": 924132571, "checksum": "4b5dd26de34b27e9cc88842e992626694fd329f23493f40c748d556c61395d2a"}, "https://openslr.org/resources/52/asr_sinhala_c.zip": {"num_bytes": 938991415, "checksum": "f6db1cece623fafe866a56b9f7100976823b32f968036b72a9a634138e87e92d"}, "https://openslr.org/resources/52/asr_sinhala_d.zip": {"num_bytes": 911368918, "checksum": "8ecc58c745998b05b21c8af05fdc741d437a654a8babba16c4970ad981074e2c"}, "https://openslr.org/resources/52/asr_sinhala_e.zip": {"num_bytes": 927771260, "checksum": "f5cbfd3c8d1c5bf6fe7a1c1ee606101368512a852856fb2d01f4dde7869f605a"}, "https://openslr.org/resources/52/asr_sinhala_f.zip": {"num_bytes": 917209429, "checksum": "65782dee2ba4256bab123835ef2277a3fd1116f20f403a2c4ff5ace3ac45714c"}}, "download_size": 14676484074, "post_processing_size": null, "dataset_size": 77369899, "size_in_bytes": 14753853973}, "SLR53": {"description": "OpenSLR is a site devoted to hosting speech and language resources, such as training corpora for speech recognition,\nand software related to speech recognition. We intend to be a convenient place for anyone to put resources that\nthey have created, so that they can be downloaded publicly.\n", "citation": "SLR32:\n@inproceedings{van-niekerk-etal-2017,\n title = {{Rapid development of TTS corpora for four South African languages}},\n author = {Daniel van Niekerk and Charl van Heerden and Marelie Davel and Neil Kleynhans and Oddur Kjartansson\n and Martin Jansche and Linne Ha},\n booktitle = {Proc. Interspeech 2017},\n pages = {2178--2182},\n address = {Stockholm, Sweden},\n month = aug,\n year = {2017},\n URL = {http://dx.doi.org/10.21437/Interspeech.2017-1139}\n}\n\nSLR35, SLR36, SLR52, SLR53, SLR54:\n@inproceedings{kjartansson-etal-sltu2018,\n title = {{Crowd-Sourced Speech Corpora for Javanese, Sundanese, Sinhala, Nepali, and Bangladeshi Bengali}},\n author = {Oddur Kjartansson and Supheakmungkol Sarin and Knot Pipatsrisawat and Martin Jansche and Linne Ha},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {52--55},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-11},\n}\n\nSLR41, SLR42, SLR43, SLR44:\n@inproceedings{kjartansson-etal-tts-sltu2018,\n title = {{A Step-by-Step Process for Building TTS Voices Using Open Source Data and Framework for Bangla, Javanese,\n Khmer, Nepali, Sinhala, and Sundanese}},\n author = {Keshan Sodimana and Knot Pipatsrisawat and Linne Ha and Martin Jansche and Oddur Kjartansson and Pasindu\n De Silva and Supheakmungkol Sarin},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {66--70},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-14}\n}\n\nSLR63, SLR64, SLR65, SLR66, SLR78, SLR79:\n@inproceedings{he-etal-2020-open,\n title = {{Open-source Multi-speaker Speech Corpora for Building Gujarati, Kannada, Malayalam, Marathi, Tamil and\n Telugu Speech Synthesis Systems}},\n author = {He, Fei and Chu, Shan-Hui Cathy and Kjartansson, Oddur and Rivera, Clara and Katanova, Anna and Gutkin,\n Alexander and Demirsahin, Isin and Johny, Cibu and Jansche, Martin and Sarin, Supheakmungkol and Pipatsrisawat, Knot},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n pages = {6494--6503},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.800},\n ISBN = \"{979-10-95546-34-4},\n}\n\nSLR69, SLR76, SLR77:\n@inproceedings{kjartansson-etal-2020-open,\n title = {{Open-Source High Quality Speech Datasets for Basque, Catalan and Galician}},\n author = {Kjartansson, Oddur and Gutkin, Alexander and Butryna, Alena and Demirsahin, Isin and Rivera, Clara},\n booktitle = {Proceedings of the 1st Joint Workshop on Spoken Language Technologies for Under-resourced languages\n (SLTU) and Collaboration and Computing for Under-Resourced Languages (CCURL)},\n year = {2020},\n pages = {21--27},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.sltu-1.3},\n ISBN = {979-10-95546-35-1},\n}\n\nSLR71, SLR71, SLR72, SLR73, SLR74, SLR75:\n@inproceedings{guevara-rukoz-etal-2020-crowdsourcing,\n title = {{Crowdsourcing Latin American Spanish for Low-Resource Text-to-Speech}},\n author = {Guevara-Rukoz, Adriana and Demirsahin, Isin and He, Fei and Chu, Shan-Hui Cathy and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Gutkin, Alexander and Butryna, Alena and Kjartansson, Oddur},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n year = {2020},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.801},\n pages = {6504--6513},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR80\n@inproceedings{oo-etal-2020-burmese,\n title = {{Burmese Speech Corpus, Finite-State Text Normalization and Pronunciation Grammars with an Application\n to Text-to-Speech}},\n author = {Oo, Yin May and Wattanavekin, Theeraphol and Li, Chenfang and De Silva, Pasindu and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Jansche, Martin and Kjartansson, Oddur and Gutkin, Alexander},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = \"6328--6339\",\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.777},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR86\n@inproceedings{gutkin-et-al-yoruba2020,\n title = {{Developing an Open-Source Corpus of Yoruba Speech}},\n author = {Alexander Gutkin and I\u015f\u0131n Demir\u015fahin and Oddur Kjartansson and Clara Rivera and K\u00f3\u0323l\u00e1 T\u00fab\u00f2\u0323s\u00fan},\n booktitle = {Proceedings of Interspeech 2020},\n pages = {404--408},\n month = {October},\n year = {2020},\n address = {Shanghai, China},\n publisher = {International Speech and Communication Association (ISCA)},\n doi = {10.21437/Interspeech.2020-1096},\n url = {https://dx.doi.org/10.21437/Interspeech.2020-1096},\n}\n", "homepage": "https://openslr.org/", "license": "", "features": {"path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "string", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "open_slr", "config_name": "SLR53", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 88073248, "num_examples": 218703, "dataset_name": "open_slr"}}, "download_checksums": {"https://openslr.org/resources/53/asr_bengali_0.zip": {"num_bytes": 919838172, "checksum": "c1bbeadbcffae8a40d8e54f25c6c3dea922951a322cc7875a18f52dec127741a"}, "https://openslr.org/resources/53/asr_bengali_1.zip": {"num_bytes": 906161405, "checksum": "b6af5d30439d25a5df20efd85bfa2e900ee962e3afb91fe88a65cdbb0689cf84"}, "https://openslr.org/resources/53/asr_bengali_2.zip": {"num_bytes": 921562897, "checksum": "ac0b50d5ad38d5295c16b7eb62901b273bd6df55dea7b1a8495e69c1a50c0986"}, "https://openslr.org/resources/53/asr_bengali_3.zip": {"num_bytes": 918817316, "checksum": "444760953dc4e006cd6e38ea647b611c7be93a07a78b1a6b83974fe3ebba6b65"}, "https://openslr.org/resources/53/asr_bengali_4.zip": {"num_bytes": 908199672, "checksum": "975a1b690ccfe0609ba50738666758ad92c3683416d1cf7771972496adb4313f"}, "https://openslr.org/resources/53/asr_bengali_5.zip": {"num_bytes": 932042725, "checksum": "21dec790c4f96771a28347ed4430c74d3f3bff046684f4522c2301f7029f632d"}, "https://openslr.org/resources/53/asr_bengali_6.zip": {"num_bytes": 900826997, "checksum": "b0f93fb831bb36c75a6f4c0731bfb991f8b6529bc3b16ee0bede3e7108a7679e"}, "https://openslr.org/resources/53/asr_bengali_7.zip": {"num_bytes": 927750265, "checksum": "647cbcfb9c92930f4625dbc107f4218cdd37f8e3494df23d42917640da22938c"}, "https://openslr.org/resources/53/asr_bengali_8.zip": {"num_bytes": 927268934, "checksum": "73168b982a0665fb4f1104eaafeb3ddc01780b39978649e01ce6ab7850a86de1"}, "https://openslr.org/resources/53/asr_bengali_9.zip": {"num_bytes": 906382286, "checksum": "25f678604ffe93fc986cc402dc4a4329f36eb44ab627c645c4957dbf8e85917c"}, "https://openslr.org/resources/53/asr_bengali_a.zip": {"num_bytes": 900283300, "checksum": "daf0fc69dbd041fd254e96df1732359666ace7c9aea9d5c64c03ab8add3a00c4"}, "https://openslr.org/resources/53/asr_bengali_b.zip": {"num_bytes": 910050386, "checksum": "2d6fc0f464130bc3761546ac0e8b085921d5f1c9afbf886b9c1fa95f9755fd26"}, "https://openslr.org/resources/53/asr_bengali_c.zip": {"num_bytes": 897120616, "checksum": "116e8e63882f548410a3b835d2d3b6a11e6a05969374d173b9c01a8ba7112abd"}, "https://openslr.org/resources/53/asr_bengali_d.zip": {"num_bytes": 914366610, "checksum": "aa155d8e0688d032229ad7a5e4c713e696d1ea531feae83ae3230e526f1db7a6"}, "https://openslr.org/resources/53/asr_bengali_e.zip": {"num_bytes": 922936447, "checksum": "2f6f97591adde2b469f29b601ba33bfc3e8049681594fe31be8a55204c70ae15"}, "https://openslr.org/resources/53/asr_bengali_f.zip": {"num_bytes": 917202893, "checksum": "42542ec7d434bd6a34b30c01fa24de206fb2d2e56afea745a14867a8c0eaa32c"}}, "download_size": 14630810921, "post_processing_size": null, "dataset_size": 88073248, "size_in_bytes": 14718884169}, "SLR54": {"description": "OpenSLR is a site devoted to hosting speech and language resources, such as training corpora for speech recognition,\nand software related to speech recognition. We intend to be a convenient place for anyone to put resources that\nthey have created, so that they can be downloaded publicly.\n", "citation": "SLR32:\n@inproceedings{van-niekerk-etal-2017,\n title = {{Rapid development of TTS corpora for four South African languages}},\n author = {Daniel van Niekerk and Charl van Heerden and Marelie Davel and Neil Kleynhans and Oddur Kjartansson\n and Martin Jansche and Linne Ha},\n booktitle = {Proc. Interspeech 2017},\n pages = {2178--2182},\n address = {Stockholm, Sweden},\n month = aug,\n year = {2017},\n URL = {http://dx.doi.org/10.21437/Interspeech.2017-1139}\n}\n\nSLR35, SLR36, SLR52, SLR53, SLR54:\n@inproceedings{kjartansson-etal-sltu2018,\n title = {{Crowd-Sourced Speech Corpora for Javanese, Sundanese, Sinhala, Nepali, and Bangladeshi Bengali}},\n author = {Oddur Kjartansson and Supheakmungkol Sarin and Knot Pipatsrisawat and Martin Jansche and Linne Ha},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {52--55},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-11},\n}\n\nSLR41, SLR42, SLR43, SLR44:\n@inproceedings{kjartansson-etal-tts-sltu2018,\n title = {{A Step-by-Step Process for Building TTS Voices Using Open Source Data and Framework for Bangla, Javanese,\n Khmer, Nepali, Sinhala, and Sundanese}},\n author = {Keshan Sodimana and Knot Pipatsrisawat and Linne Ha and Martin Jansche and Oddur Kjartansson and Pasindu\n De Silva and Supheakmungkol Sarin},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {66--70},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-14}\n}\n\nSLR63, SLR64, SLR65, SLR66, SLR78, SLR79:\n@inproceedings{he-etal-2020-open,\n title = {{Open-source Multi-speaker Speech Corpora for Building Gujarati, Kannada, Malayalam, Marathi, Tamil and\n Telugu Speech Synthesis Systems}},\n author = {He, Fei and Chu, Shan-Hui Cathy and Kjartansson, Oddur and Rivera, Clara and Katanova, Anna and Gutkin,\n Alexander and Demirsahin, Isin and Johny, Cibu and Jansche, Martin and Sarin, Supheakmungkol and Pipatsrisawat, Knot},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n pages = {6494--6503},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.800},\n ISBN = \"{979-10-95546-34-4},\n}\n\nSLR69, SLR76, SLR77:\n@inproceedings{kjartansson-etal-2020-open,\n title = {{Open-Source High Quality Speech Datasets for Basque, Catalan and Galician}},\n author = {Kjartansson, Oddur and Gutkin, Alexander and Butryna, Alena and Demirsahin, Isin and Rivera, Clara},\n booktitle = {Proceedings of the 1st Joint Workshop on Spoken Language Technologies for Under-resourced languages\n (SLTU) and Collaboration and Computing for Under-Resourced Languages (CCURL)},\n year = {2020},\n pages = {21--27},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.sltu-1.3},\n ISBN = {979-10-95546-35-1},\n}\n\nSLR71, SLR71, SLR72, SLR73, SLR74, SLR75:\n@inproceedings{guevara-rukoz-etal-2020-crowdsourcing,\n title = {{Crowdsourcing Latin American Spanish for Low-Resource Text-to-Speech}},\n author = {Guevara-Rukoz, Adriana and Demirsahin, Isin and He, Fei and Chu, Shan-Hui Cathy and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Gutkin, Alexander and Butryna, Alena and Kjartansson, Oddur},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n year = {2020},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.801},\n pages = {6504--6513},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR80\n@inproceedings{oo-etal-2020-burmese,\n title = {{Burmese Speech Corpus, Finite-State Text Normalization and Pronunciation Grammars with an Application\n to Text-to-Speech}},\n author = {Oo, Yin May and Wattanavekin, Theeraphol and Li, Chenfang and De Silva, Pasindu and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Jansche, Martin and Kjartansson, Oddur and Gutkin, Alexander},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = \"6328--6339\",\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.777},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR86\n@inproceedings{gutkin-et-al-yoruba2020,\n title = {{Developing an Open-Source Corpus of Yoruba Speech}},\n author = {Alexander Gutkin and I\u015f\u0131n Demir\u015fahin and Oddur Kjartansson and Clara Rivera and K\u00f3\u0323l\u00e1 T\u00fab\u00f2\u0323s\u00fan},\n booktitle = {Proceedings of Interspeech 2020},\n pages = {404--408},\n month = {October},\n year = {2020},\n address = {Shanghai, China},\n publisher = {International Speech and Communication Association (ISCA)},\n doi = {10.21437/Interspeech.2020-1096},\n url = {https://dx.doi.org/10.21437/Interspeech.2020-1096},\n}\n", "homepage": "https://openslr.org/", "license": "", "features": {"path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "string", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "open_slr", "config_name": "SLR54", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 62735822, "num_examples": 157905, "dataset_name": "open_slr"}}, "download_checksums": {"https://openslr.org/resources/54/asr_nepali_0.zip": {"num_bytes": 589002210, "checksum": "6c783a5a731c7a9c2cac678823a2ee7866db1acbad7f9a199bce3bf7a64e22b6"}, "https://openslr.org/resources/54/asr_nepali_1.zip": {"num_bytes": 582088242, "checksum": "661865704f3d9adacd74f8c98cd0f6a6e869902c6441efb96c761573dd1d2f05"}, "https://openslr.org/resources/54/asr_nepali_2.zip": {"num_bytes": 589401540, "checksum": "a2b4c373d7ebe5f2d491bf73c2324e6f5645d724df58fd765c71a3a86e7ab6d4"}, "https://openslr.org/resources/54/asr_nepali_3.zip": {"num_bytes": 574596426, "checksum": "6a925d4448f98694185d50cacfa380fad128b47ebf9d5519526b83dd6586348d"}, "https://openslr.org/resources/54/asr_nepali_4.zip": {"num_bytes": 583746586, "checksum": "7315f69b392690c22db32b3c2f14b82b1f64215c5a21d697c421d6d220a55bf0"}, "https://openslr.org/resources/54/asr_nepali_5.zip": {"num_bytes": 572967016, "checksum": "3891b332a9fc55e4fb0579bf67431989e92ab05b9715c0e9673cf356e878e0df"}, "https://openslr.org/resources/54/asr_nepali_6.zip": {"num_bytes": 588104006, "checksum": "78c321a8f55a5aa0c56feb791826a2751087cc87a36b27bba56ac6b124eac73f"}, "https://openslr.org/resources/54/asr_nepali_7.zip": {"num_bytes": 588410232, "checksum": "8b05b8b4aedfc9829cf33cd65ab3c1474eb8f738078b414d40b61f08782064ec"}, "https://openslr.org/resources/54/asr_nepali_8.zip": {"num_bytes": 585192213, "checksum": "0125cfc7c54e44bd4ac01d5558130a752cad26aa7055df753c65b400ece2c9f8"}, "https://openslr.org/resources/54/asr_nepali_9.zip": {"num_bytes": 578834881, "checksum": "6c68e80fe7c58a33aeb91b5b9bc37a99f9374a8f629e2a109bddba51d1712b12"}, "https://openslr.org/resources/54/asr_nepali_a.zip": {"num_bytes": 587798317, "checksum": "03b7bf7b6ace01a677e2a0dd079053ea29abf45743f197761190f3f52678e6df"}, "https://openslr.org/resources/54/asr_nepali_b.zip": {"num_bytes": 584397714, "checksum": "9a98d93ae91e75c6928d9222b387105e99030b8b81df9ada57c87f6b317c0853"}, "https://openslr.org/resources/54/asr_nepali_c.zip": {"num_bytes": 579440365, "checksum": "8bac1a046a86fc3684bfec2e5af1b1e0916ec5c2f1be5ccb1fb4778ecd7bb357"}, "https://openslr.org/resources/54/asr_nepali_d.zip": {"num_bytes": 588470094, "checksum": "9aad327fd72efcc009d060a8299aa70ca1757f1ec32fe3280d53e449ef75e5c3"}, "https://openslr.org/resources/54/asr_nepali_e.zip": {"num_bytes": 578091869, "checksum": "4ba73ada7cf482611b3ad3e17a77685b1ac872e5840953c07a1c6c2b10a83e4a"}, "https://openslr.org/resources/54/asr_nepali_f.zip": {"num_bytes": 577705651, "checksum": "062f4908802ab0d57362da1dfea4898898f6d21ba09596c1e271c2cda47297c6"}}, "download_size": 9328247362, "post_processing_size": null, "dataset_size": 62735822, "size_in_bytes": 9390983184}, "SLR83": {"description": "OpenSLR is a site devoted to hosting speech and language resources, such as training corpora for speech recognition,\nand software related to speech recognition. We intend to be a convenient place for anyone to put resources that\nthey have created, so that they can be downloaded publicly.\n", "citation": "SLR32:\n@inproceedings{van-niekerk-etal-2017,\n title = {{Rapid development of TTS corpora for four South African languages}},\n author = {Daniel van Niekerk and Charl van Heerden and Marelie Davel and Neil Kleynhans and Oddur Kjartansson\n and Martin Jansche and Linne Ha},\n booktitle = {Proc. Interspeech 2017},\n pages = {2178--2182},\n address = {Stockholm, Sweden},\n month = aug,\n year = {2017},\n URL = {http://dx.doi.org/10.21437/Interspeech.2017-1139}\n}\n\nSLR35, SLR36, SLR52, SLR53, SLR54:\n@inproceedings{kjartansson-etal-sltu2018,\n title = {{Crowd-Sourced Speech Corpora for Javanese, Sundanese, Sinhala, Nepali, and Bangladeshi Bengali}},\n author = {Oddur Kjartansson and Supheakmungkol Sarin and Knot Pipatsrisawat and Martin Jansche and Linne Ha},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {52--55},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-11},\n}\n\nSLR41, SLR42, SLR43, SLR44:\n@inproceedings{kjartansson-etal-tts-sltu2018,\n title = {{A Step-by-Step Process for Building TTS Voices Using Open Source Data and Framework for Bangla, Javanese,\n Khmer, Nepali, Sinhala, and Sundanese}},\n author = {Keshan Sodimana and Knot Pipatsrisawat and Linne Ha and Martin Jansche and Oddur Kjartansson and Pasindu\n De Silva and Supheakmungkol Sarin},\n booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},\n year = {2018},\n address = {Gurugram, India},\n month = aug,\n pages = {66--70},\n URL = {https://dx.doi.org/10.21437/SLTU.2018-14}\n}\n\nSLR63, SLR64, SLR65, SLR66, SLR78, SLR79:\n@inproceedings{he-etal-2020-open,\n title = {{Open-source Multi-speaker Speech Corpora for Building Gujarati, Kannada, Malayalam, Marathi, Tamil and\n Telugu Speech Synthesis Systems}},\n author = {He, Fei and Chu, Shan-Hui Cathy and Kjartansson, Oddur and Rivera, Clara and Katanova, Anna and Gutkin,\n Alexander and Demirsahin, Isin and Johny, Cibu and Jansche, Martin and Sarin, Supheakmungkol and Pipatsrisawat, Knot},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n pages = {6494--6503},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.800},\n ISBN = \"{979-10-95546-34-4},\n}\n\nSLR69, SLR76, SLR77:\n@inproceedings{kjartansson-etal-2020-open,\n title = {{Open-Source High Quality Speech Datasets for Basque, Catalan and Galician}},\n author = {Kjartansson, Oddur and Gutkin, Alexander and Butryna, Alena and Demirsahin, Isin and Rivera, Clara},\n booktitle = {Proceedings of the 1st Joint Workshop on Spoken Language Technologies for Under-resourced languages\n (SLTU) and Collaboration and Computing for Under-Resourced Languages (CCURL)},\n year = {2020},\n pages = {21--27},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.sltu-1.3},\n ISBN = {979-10-95546-35-1},\n}\n\nSLR71, SLR71, SLR72, SLR73, SLR74, SLR75:\n@inproceedings{guevara-rukoz-etal-2020-crowdsourcing,\n title = {{Crowdsourcing Latin American Spanish for Low-Resource Text-to-Speech}},\n author = {Guevara-Rukoz, Adriana and Demirsahin, Isin and He, Fei and Chu, Shan-Hui Cathy and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Gutkin, Alexander and Butryna, Alena and Kjartansson, Oddur},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n year = {2020},\n month = may,\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.801},\n pages = {6504--6513},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR83\n@inproceedings{demirsahin-etal-2020-open,\n title = {{Open-source Multi-speaker Corpora of the English Accents in the British Isles}},\n author = {Demirsahin, Isin and Kjartansson, Oddur and Gutkin, Alexander and Rivera, Clara},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = {6532--6541},\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.804},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR80\n@inproceedings{oo-etal-2020-burmese,\n title = {{Burmese Speech Corpus, Finite-State Text Normalization and Pronunciation Grammars with an Application\n to Text-to-Speech}},\n author = {Oo, Yin May and Wattanavekin, Theeraphol and Li, Chenfang and De Silva, Pasindu and Sarin,\n Supheakmungkol and Pipatsrisawat, Knot and Jansche, Martin and Kjartansson, Oddur and Gutkin, Alexander},\n booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},\n month = may,\n year = {2020},\n pages = \"6328--6339\",\n address = {Marseille, France},\n publisher = {European Language Resources Association (ELRA)},\n url = {https://www.aclweb.org/anthology/2020.lrec-1.777},\n ISBN = {979-10-95546-34-4},\n}\n\nSLR86\n@inproceedings{gutkin-et-al-yoruba2020,\n title = {{Developing an Open-Source Corpus of Yoruba Speech}},\n author = {Alexander Gutkin and I\u015f\u0131n Demir\u015fahin and Oddur Kjartansson and Clara Rivera and K\u00f3\u0323l\u00e1 T\u00fab\u00f2\u0323s\u00fan},\n booktitle = {Proceedings of Interspeech 2020},\n pages = {404--408},\n month = {October},\n year = {2020},\n address = {Shanghai, China},\n publisher = {International Speech and Communication Association (ISCA)},\n doi = {10.21437/Interspeech.2020-1096},\n url = {https://dx.doi.org/10.21437/Interspeech.2020-1096},\n}\n", "homepage": "https://openslr.org/", "license": "", "features": {"path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 48000, "mono": true, "_storage_dtype": "string", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": [{"task": "automatic-speech-recognition", "audio_file_path_column": "path", "transcription_column": "sentence"}], "builder_name": "open_slr", "config_name": "SLR83", "version": {"version_str": "0.0.0", "description": null, "major": 0, "minor": 0, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 7098985, "num_examples": 17877, "dataset_name": "open_slr"}}, "download_checksums": {"https://openslr.org/resources/83/irish_english_male.zip": {"num_bytes": 164531638, "checksum": "2e5dbae4cc27e0e24e21f21c8e7464d219feb703f5fee3e567de6561a05024ed"}, "https://openslr.org/resources/83/midlands_english_female.zip": {"num_bytes": 103085118, "checksum": "aa1083a319e52d658b85c162905ec27cdf2ac6d5645b4caeab05a385a2c8a37f"}, "https://openslr.org/resources/83/midlands_english_male.zip": {"num_bytes": 166833961, "checksum": "8192c7a0626eb742f9999e63162289f8f9a86c9cb49ef68298dc7f624acaebcf"}, "https://openslr.org/resources/83/northern_english_female.zip": {"num_bytes": 314983063, "checksum": "22b6229d08481e7605b028185dc55dccd0611db428854f2d485d9ff34395a65c"}, "https://openslr.org/resources/83/northern_english_male.zip": {"num_bytes": 817772034, "checksum": "b627d500d1b2e3c4921fb6d91338ead7b972f67c1c2f0babb300e0ef844c7248"}, "https://openslr.org/resources/83/scottish_english_female.zip": {"num_bytes": 351443880, "checksum": "2dbe5545a7ab87112c7730086586f738ec4f42171f7738628ba084ed4ba15ccb"}, "https://openslr.org/resources/83/scottish_english_male.zip": {"num_bytes": 620254118, "checksum": "c7d2d9cd581c48a8323f6cc3886d879e2e7aca5931d98228e07d07b350d9f9a9"}, "https://openslr.org/resources/83/southern_english_female.zip": {"num_bytes": 1636701939, "checksum": "e0a2e8e64b9efdbd7bae5cdf33ac8b81db495b499c9d40da0a7d7842e42b1e76"}, "https://openslr.org/resources/83/southern_english_male.zip": {"num_bytes": 1700955740, "checksum": "788b1c59fb5713b0e1efebc02b7aa1b55182b21955493b299b9941c70a878cad"}, "https://openslr.org/resources/83/welsh_english_female.zip": {"num_bytes": 595683538, "checksum": "3c2465b9618e33f42c7d2ee753b54ae593714e758e236efcdd56c14c5bd89f1d"}, "https://openslr.org/resources/83/welsh_english_male.zip": {"num_bytes": 757645790, "checksum": "eaf8de0f8872bb647d5c159bb33713cfd58966bd59d733f5f399793778ea5058"}}, "download_size": 7229890819, "post_processing_size": null, "dataset_size": 7098985, "size_in_bytes": 7236989804}} \ No newline at end of file diff --git a/datasets/openslr/dummy/SLR32/0.0.0/dummy_data.zip b/datasets/openslr/dummy/SLR32/0.0.0/dummy_data.zip index 4c518d6a7ff..505fa794f07 100644 Binary files a/datasets/openslr/dummy/SLR32/0.0.0/dummy_data.zip and b/datasets/openslr/dummy/SLR32/0.0.0/dummy_data.zip differ diff --git a/datasets/openslr/openslr.py b/datasets/openslr/openslr.py index 72a08deccf2..c76375b22f3 100644 --- a/datasets/openslr/openslr.py +++ b/datasets/openslr/openslr.py @@ -112,20 +112,6 @@ ISBN = {979-10-95546-34-4}, } -SLR83 -@inproceedings{demirsahin-etal-2020-open, - title = {{Open-source Multi-speaker Corpora of the English Accents in the British Isles}}, - author = {Demirsahin, Isin and Kjartansson, Oddur and Gutkin, Alexander and Rivera, Clara}, - booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)}, - month = may, - year = {2020}, - pages = {6532--6541}, - address = {Marseille, France}, - publisher = {European Language Resources Association (ELRA)}, - url = {https://www.aclweb.org/anthology/2020.lrec-1.804}, - ISBN = {979-10-95546-34-4}, -} - SLR80 @inproceedings{oo-etal-2020-burmese, title = {{Burmese Speech Corpus, Finite-State Text Normalization and Pronunciation Grammars with an Application @@ -176,10 +162,10 @@ "Setswana and isiXhosa.", "Files": ["af_za.tar.gz", "st_za.tar.gz", "tn_za.tar.gz", "xh_za.tar.gz"], "IndexFiles": [ - "af_za/za/afr/line_index.tsv", - "st_za/za/sso/line_index.tsv", - "tn_za/za/tsn/line_index.tsv", - "xh_za/za/xho/line_index.tsv", + "https://s3.amazonaws.com/datasets.huggingface.co/openslr/SLR32/af_za/line_index.tsv", + "https://s3.amazonaws.com/datasets.huggingface.co/openslr/SLR32/st_za/line_index.tsv", + "https://s3.amazonaws.com/datasets.huggingface.co/openslr/SLR32/tn_za/line_index.tsv", + "https://s3.amazonaws.com/datasets.huggingface.co/openslr/SLR32/xh_za/line_index.tsv", ], "DataDirs": ["af_za/za/afr/wavs", "st_za/za/sso/wavs", "tn_za/za/tsn/wavs", "xh_za/za/xho/wavs"], }, @@ -493,39 +479,6 @@ "IndexFiles": ["line_index.tsv"], "DataDirs": [""], }, - "SLR83": { - "Language": "English", - "LongName": "Crowdsourced high-quality UK and Ireland English Dialect speech data set", - "Category": "Speech", - "Summary": "Data set which contains male and female recordings of English from various dialects of the UK and Ireland", - "Files": [ - "irish_english_male.zip", - "midlands_english_female.zip", - "midlands_english_male.zip", - "northern_english_female.zip", - "northern_english_male.zip", - "scottish_english_female.zip", - "scottish_english_male.zip", - "southern_english_female.zip", - "southern_english_male.zip", - "welsh_english_female.zip", - "welsh_english_male.zip", - ], - "IndexFiles": [ - "line_index.csv", - "line_index.csv", - "line_index.csv", - "line_index.csv", - "line_index.csv", - "line_index.csv", - "line_index.csv", - "line_index.csv", - "line_index.csv", - "line_index.csv", - "line_index.csv", - ], - "DataDirs": ["", "", "", "", "", "", "", "", "", "", ""], - }, "SLR86": { "Language": "Yoruba", "LongName": "Crowdsourced high-quality Yoruba speech data set", @@ -565,6 +518,7 @@ def __init__(self, name, **kwargs): class OpenSlr(datasets.GeneratorBasedBuilder): + DEFAULT_WRITER_BATCH_SIZE = 32 BUILDER_CONFIGS = [ OpenSlrConfig( @@ -605,21 +559,28 @@ def _split_generators(self, dl_manager): """Returns SplitGenerators.""" resource_number = self.config.name.replace("SLR", "") urls = [f"{_DATA_URL.format(resource_number)}/{file}" for file in self.config.files] - dl_paths = dl_manager.download_and_extract(urls) - abs_path_to_indexs = [os.path.join(path, f"{self.config.index_files[i]}") for i, path in enumerate(dl_paths)] - abs_path_to_datas = [os.path.join(path, f"{self.config.data_dirs[i]}") for i, path in enumerate(dl_paths)] + if urls[0].endswith(".zip"): + dl_paths = dl_manager.download_and_extract(urls) + path_to_indexs = [os.path.join(path, f"{self.config.index_files[i]}") for i, path in enumerate(dl_paths)] + path_to_datas = [os.path.join(path, f"{self.config.data_dirs[i]}") for i, path in enumerate(dl_paths)] + archives = None + else: + archives = dl_manager.download(urls) + path_to_indexs = dl_manager.download(self.config.index_files) + path_to_datas = self.config.data_dirs return [ datasets.SplitGenerator( name=datasets.Split.TRAIN, gen_kwargs={ - "path_to_indexs": abs_path_to_indexs, - "path_to_datas": abs_path_to_datas, + "path_to_indexs": path_to_indexs, + "path_to_datas": path_to_datas, + "archive_files": [dl_manager.iter_archive(archive) for archive in archives] if archives else None, }, ), ] - def _generate_examples(self, path_to_indexs, path_to_datas): + def _generate_examples(self, path_to_indexs, path_to_datas, archive_files): """Yields examples.""" counter = -1 @@ -640,16 +601,26 @@ def _generate_examples(self, path_to_indexs, path_to_datas): sentence = sentence_index[filename] counter += 1 yield counter, {"path": path, "audio": path, "sentence": sentence} - elif self.config.name in ["SLR83"]: - for i, path_to_index in enumerate(path_to_indexs): + elif self.config.name in ["SLR32"]: # use archives + for path_to_index, path_to_data, files in zip(path_to_indexs, path_to_datas, archive_files): + sentences = {} with open(path_to_index, encoding="utf-8") as f: - lines = f.readlines() - for id_, line in enumerate(lines): - field_values = re.split(r",\s?", line.strip()) - user_id, filename, sentence = field_values - path = os.path.join(path_to_datas[i], f"{filename}.wav") + for line in f: + # Following regexs are needed to normalise the lines, since the datasets + # are not always consistent and have bugs: + line = re.sub(r"\t[^\t]*\t", "\t", line.strip()) + field_values = re.split(r"\t\t?", line) + if len(field_values) != 2: + continue + filename, sentence = field_values + # set absolute path for audio file + path = f"{path_to_data}/{filename}.wav" + sentences[path] = sentence + for path, f in files: + if path.startswith(path_to_data): counter += 1 - yield counter, {"path": path, "audio": path, "sentence": sentence} + audio = {"path": path, "bytes": f.read()} + yield counter, {"path": path, "audio": audio, "sentence": sentences[path]} else: for i, path_to_index in enumerate(path_to_indexs): with open(path_to_index, encoding="utf-8") as f: diff --git a/datasets/vivos/dataset_infos.json b/datasets/vivos/dataset_infos.json index 0b87dd10d94..df8ed1e44dd 100644 --- a/datasets/vivos/dataset_infos.json +++ b/datasets/vivos/dataset_infos.json @@ -1 +1 @@ -{"default": {"description": "VIVOS is a free Vietnamese speech corpus consisting of 15 hours of recording speech prepared for\nVietnamese Automatic Speech Recognition task.\nThe corpus was prepared by AILAB, a computer science lab of VNUHCM - University of Science, with Prof. Vu Hai Quan is the head of.\nWe publish this corpus in hope to attract more scientists to solve Vietnamese speech recognition problems.\n", "citation": "@InProceedings{vivos:2016,\nAddress = {Ho Chi Minh, Vietnam}\ntitle = {VIVOS: 15 hours of recording speech prepared for Vietnamese Automatic Speech Recognition},\nauthor={Prof. Vu Hai Quan},\nyear={2016}\n}\n", "homepage": "https://ailab.hcmus.edu.vn/vivos", "license": "cc-by-sa-4.0", "features": {"speaker_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "vivos_dataset", "config_name": "default", "version": {"version_str": "1.1.0", "description": null, "major": 1, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 3186233, "num_examples": 11660, "dataset_name": "vivos_dataset"}, "test": {"name": "test", "num_bytes": 193258, "num_examples": 760, "dataset_name": "vivos_dataset"}}, "download_checksums": {"https://ailab.hcmus.edu.vn/assets/vivos.tar.gz": {"num_bytes": 1474408300, "checksum": "147477f7a7702cbafc2ee3808d1c142989d0dbc8d9fce8e07d5f329d5119e4ca"}}, "download_size": 1474408300, "post_processing_size": null, "dataset_size": 3379491, "size_in_bytes": 1477787791}} \ No newline at end of file +{"default": {"description": "VIVOS is a free Vietnamese speech corpus consisting of 15 hours of recording speech prepared for\nVietnamese Automatic Speech Recognition task.\nThe corpus was prepared by AILAB, a computer science lab of VNUHCM - University of Science, with Prof. Vu Hai Quan is the head of.\nWe publish this corpus in hope to attract more scientists to solve Vietnamese speech recognition problems.\n", "citation": "@InProceedings{vivos:2016,\nAddress = {Ho Chi Minh, Vietnam}\ntitle = {VIVOS: 15 hours of recording speech prepared for Vietnamese Automatic Speech Recognition},\nauthor={Prof. Vu Hai Quan},\nyear={2016}\n}\n", "homepage": "https://ailab.hcmus.edu.vn/vivos", "license": "cc-by-sa-4.0", "features": {"speaker_id": {"dtype": "string", "id": null, "_type": "Value"}, "path": {"dtype": "string", "id": null, "_type": "Value"}, "audio": {"sampling_rate": 16000, "mono": true, "_storage_dtype": "struct", "id": null, "_type": "Audio"}, "sentence": {"dtype": "string", "id": null, "_type": "Value"}}, "post_processed": null, "supervised_keys": null, "task_templates": null, "builder_name": "vivos_dataset", "config_name": "default", "version": {"version_str": "1.1.0", "description": null, "major": 1, "minor": 1, "patch": 0}, "splits": {"train": {"name": "train", "num_bytes": 1722000675, "num_examples": 11660, "dataset_name": "vivos_dataset"}, "test": {"name": "test", "num_bytes": 86120132, "num_examples": 760, "dataset_name": "vivos_dataset"}}, "download_checksums": {"https://s3.amazonaws.com/datasets.huggingface.co/vivos/train/prompts.txt": {"num_bytes": 1075754, "checksum": "d6c6fcbe258d80d0f63e0f87d414b805f6ae11f41d40cdba5454152c3d6f14c0"}, "https://s3.amazonaws.com/datasets.huggingface.co/vivos/test/prompts.txt": {"num_bytes": 56446, "checksum": "ed27898d081eaa41b1e7e38451eb85f7ca06138896b471691510e7bab1187c2e"}, "https://ailab.hcmus.edu.vn/assets/vivos.tar.gz": {"num_bytes": 1474408300, "checksum": "147477f7a7702cbafc2ee3808d1c142989d0dbc8d9fce8e07d5f329d5119e4ca"}}, "download_size": 1475540500, "post_processing_size": null, "dataset_size": 1808120807, "size_in_bytes": 3283661307}} \ No newline at end of file diff --git a/datasets/vivos/dummy/1.1.0/dummy_data.zip b/datasets/vivos/dummy/1.1.0/dummy_data.zip index 271d0bda93c..1c7173a58a3 100644 Binary files a/datasets/vivos/dummy/1.1.0/dummy_data.zip and b/datasets/vivos/dummy/1.1.0/dummy_data.zip differ diff --git a/datasets/vivos/vivos.py b/datasets/vivos/vivos.py index 4dda623b705..0e596402b77 100644 --- a/datasets/vivos/vivos.py +++ b/datasets/vivos/vivos.py @@ -12,7 +12,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import os import datasets @@ -40,6 +39,11 @@ _DATA_URL = "https://ailab.hcmus.edu.vn/assets/vivos.tar.gz" +_PROMPTS_URLS = { + "train": "https://s3.amazonaws.com/datasets.huggingface.co/vivos/train/prompts.txt", + "test": "https://s3.amazonaws.com/datasets.huggingface.co/vivos/test/prompts.txt", +} + class VivosDataset(datasets.GeneratorBasedBuilder): """VIVOS is a free Vietnamese speech corpus consisting of 15 hours of recording speech prepared for @@ -80,46 +84,55 @@ def _split_generators(self, dl_manager): # dl_manager is a datasets.download.DownloadManager that can be used to download and extract URLs # It can accept any type or nested list/dict and will give back the same structure with the url replaced with path to local files. # By default the archives will be extracted and a path to a cached folder where they are extracted is returned instead of the archive - dl_path = dl_manager.download_and_extract(_DATA_URL) - data_dir = os.path.join(dl_path, "vivos") - train_dir = os.path.join(data_dir, "train") - test_dir = os.path.join(data_dir, "test") + prompts_paths = dl_manager.download(_PROMPTS_URLS) + archive = dl_manager.download(_DATA_URL) + train_dir = "vivos/train" + test_dir = "vivos/test" return [ datasets.SplitGenerator( name=datasets.Split.TRAIN, # These kwargs will be passed to _generate_examples gen_kwargs={ - "filepath": os.path.join(train_dir, "prompts.txt"), - "path_to_clips": os.path.join(train_dir, "waves"), + "prompts_path": prompts_paths["train"], + "path_to_clips": train_dir + "/waves", + "audio_files": dl_manager.iter_archive(archive), }, ), datasets.SplitGenerator( name=datasets.Split.TEST, # These kwargs will be passed to _generate_examples gen_kwargs={ - "filepath": os.path.join(test_dir, "prompts.txt"), - "path_to_clips": os.path.join(test_dir, "waves"), + "prompts_path": prompts_paths["test"], + "path_to_clips": test_dir + "/waves", + "audio_files": dl_manager.iter_archive(archive), }, ), ] - def _generate_examples( - self, - filepath, - path_to_clips, # method parameters are unpacked from `gen_kwargs` as given in `_split_generators` - ): + def _generate_examples(self, prompts_path, path_to_clips, audio_files): """Yields examples as (key, example) tuples.""" # This method handles input defined in _split_generators to yield (key, example) tuples from the dataset. # The `key` is here for legacy reason (tfds) and is not important in itself. - - with open(filepath, encoding="utf-8") as f: - for id_, row in enumerate(f): + examples = {} + with open(prompts_path, encoding="utf-8") as f: + for row in f: data = row.strip().split(" ", 1) speaker_id = data[0].split("_")[0] - yield id_, { + audio_path = "/".join([path_to_clips, speaker_id, data[0] + ".wav"]) + examples[audio_path] = { "speaker_id": speaker_id, - "path": os.path.join(path_to_clips, speaker_id, data[0] + ".wav"), - "audio": os.path.join(path_to_clips, speaker_id, data[0] + ".wav"), + "path": audio_path, "sentence": data[1], } + inside_clips_dir = False + id_ = 0 + for path, f in audio_files: + if path.startswith(path_to_clips): + inside_clips_dir = True + if path in examples: + audio = {"path": path, "bytes": f.read()} + yield id_, {**examples[path], "audio": audio} + id_ += 1 + elif inside_clips_dir: + break