From 0c30a584e68cc1fa4c11ad450cd0c4998addebb5 Mon Sep 17 00:00:00 2001 From: Kushashwa Ravi Shrimali Date: Tue, 2 Aug 2022 16:41:51 +0530 Subject: [PATCH 1/3] Hotfix: don't pass fsspec object to soundfile --- flash/core/data/utilities/loading.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/flash/core/data/utilities/loading.py b/flash/core/data/utilities/loading.py index d42a007287..a57cbfff79 100644 --- a/flash/core/data/utilities/loading.py +++ b/flash/core/data/utilities/loading.py @@ -165,7 +165,10 @@ def load_spectrogram(file_path: str, sampling_rate: int = 16000, n_fft: int = 40 """ loaders = copy.copy(_spectrogram_loaders) loaders[AUDIO_EXTENSIONS] = partial(loaders[AUDIO_EXTENSIONS], sampling_rate=sampling_rate, n_fft=n_fft) - return load(file_path, loaders) + loader = _get_loader(file_path, loaders) + # FIXME: Following error is raised while trying to read fsspec object from SoundFile: + # RuntimeError: Error opening : Format not recognised. + return loader(file_path) def load_audio(file_path: str, sampling_rate: int = 16000): From 1685e7cb4405141f1fe0a074a0ede33b53b775b7 Mon Sep 17 00:00:00 2001 From: Kushashwa Ravi Shrimali Date: Tue, 2 Aug 2022 21:03:58 +0530 Subject: [PATCH 2/3] Remove sd2, revert temporary fix used before --- flash/audio/classification/data.py | 8 ++++---- flash/audio/speech_recognition/data.py | 8 ++++---- flash/core/data/utilities/loading.py | 6 +----- 3 files changed, 9 insertions(+), 13 deletions(-) diff --git a/flash/audio/classification/data.py b/flash/audio/classification/data.py index e440c96482..a9fbd95786 100644 --- a/flash/audio/classification/data.py +++ b/flash/audio/classification/data.py @@ -72,7 +72,7 @@ def from_files( ``.bmp``, ``.pgm``, ``.tif``, ``.tiff``, ``.webp``, and ``.npy``. The supported file extensions for raw audio (where spectrograms will be computed automatically) are: ``.aiff``, ``.au``, ``.avr``, ``.caf``, ``.flac``, ``.mat``, ``.mat4``, ``.mat5``, ``.mpc2k``, ``.ogg``, ``.paf``, - ``.pvf``, ``.rf64``, ``.sd2``, ``.ircam``, ``.voc``, ``.w64``, ``.wav``, ``.nist``, and ``.wavex``. + ``.pvf``, ``.rf64``, ``.ircam``, ``.voc``, ``.w64``, ``.wav``, ``.nist``, and ``.wavex``. The targets can be in any of our :ref:`supported classification target formats `. To learn how to customize the transforms applied for each stage, read our @@ -184,7 +184,7 @@ def from_folders( ``.bmp``, ``.pgm``, ``.tif``, ``.tiff``, ``.webp``, and ``.npy``. The supported file extensions for raw audio (where spectrograms will be computed automatically) are: ``.aiff``, ``.au``, ``.avr``, ``.caf``, ``.flac``, ``.mat``, ``.mat4``, ``.mat5``, ``.mpc2k``, ``.ogg``, ``.paf``, - ``.pvf``, ``.rf64``, ``.sd2``, ``.ircam``, ``.voc``, ``.w64``, ``.wav``, ``.nist``, and ``.wavex``. + ``.pvf``, ``.rf64``, ``.ircam``, ``.voc``, ``.w64``, ``.wav``, ``.nist``, and ``.wavex``. For train, test, and validation data, the folders are expected to contain a sub-folder for each class. Here's the required structure: @@ -505,7 +505,7 @@ def from_data_frame( ``.bmp``, ``.pgm``, ``.tif``, ``.tiff``, ``.webp``, and ``.npy``. The supported file extensions for raw audio (where spectrograms will be computed automatically) are: ``.aiff``, ``.au``, ``.avr``, ``.caf``, ``.flac``, ``.mat``, ``.mat4``, ``.mat5``, ``.mpc2k``, ``.ogg``, ``.paf``, - ``.pvf``, ``.rf64``, ``.sd2``, ``.ircam``, ``.voc``, ``.w64``, ``.wav``, ``.nist``, and ``.wavex``. + ``.pvf``, ``.rf64``, ``.ircam``, ``.voc``, ``.w64``, ``.wav``, ``.nist``, and ``.wavex``. The targets will be extracted from the ``target_fields`` in the DataFrame and can be in any of our :ref:`supported classification target formats `. To learn how to customize the transforms applied for each stage, read our @@ -666,7 +666,7 @@ def from_csv( ``.bmp``, ``.pgm``, ``.tif``, ``.tiff``, ``.webp``, and ``.npy``. The supported file extensions for raw audio (where spectrograms will be computed automatically) are: ``.aiff``, ``.au``, ``.avr``, ``.caf``, ``.flac``, ``.mat``, ``.mat4``, ``.mat5``, ``.mpc2k``, ``.ogg``, ``.paf``, - ``.pvf``, ``.rf64``, ``.sd2``, ``.ircam``, ``.voc``, ``.w64``, ``.wav``, ``.nist``, and ``.wavex``. + ``.pvf``, ``.rf64``, ``.ircam``, ``.voc``, ``.w64``, ``.wav``, ``.nist``, and ``.wavex``. The targets will be extracted from the ``target_fields`` in the CSV files and can be in any of our :ref:`supported classification target formats `. To learn how to customize the transforms applied for each stage, read our diff --git a/flash/audio/speech_recognition/data.py b/flash/audio/speech_recognition/data.py index 85a993ebe0..e7c64d0c03 100644 --- a/flash/audio/speech_recognition/data.py +++ b/flash/audio/speech_recognition/data.py @@ -60,7 +60,7 @@ def from_files( and corresponding lists of targets. The supported file extensions are: ``.aiff``, ``.au``, ``.avr``, ``.caf``, ``.flac``, ``.mat``, ``.mat4``, - ``.mat5``, ``.mpc2k``, ``.ogg``, ``.paf``, ``.pvf``, ``.rf64``, ``.sd2``, ``.ircam``, ``.voc``, ``.w64``, + ``.mat5``, ``.mpc2k``, ``.ogg``, ``.paf``, ``.pvf``, ``.rf64``, ``.ircam``, ``.voc``, ``.w64``, ``.wav``, ``.nist``, and ``.wavex``. To learn how to customize the transforms applied for each stage, read our :ref:`customizing transforms guide `. @@ -153,7 +153,7 @@ def from_csv( Input audio file paths will be extracted from the ``input_field`` column in the CSV files. The supported file extensions are: ``.aiff``, ``.au``, ``.avr``, ``.caf``, ``.flac``, ``.mat``, ``.mat4``, - ``.mat5``, ``.mpc2k``, ``.ogg``, ``.paf``, ``.pvf``, ``.rf64``, ``.sd2``, ``.ircam``, ``.voc``, ``.w64``, + ``.mat5``, ``.mpc2k``, ``.ogg``, ``.paf``, ``.pvf``, ``.rf64``, ``.ircam``, ``.voc``, ``.w64``, ``.wav``, ``.nist``, and ``.wavex``. The targets will be extracted from the ``target_field`` in the CSV files. To learn how to customize the transforms applied for each stage, read our @@ -342,7 +342,7 @@ def from_json( Input audio file paths will be extracted from the ``input_field`` field in the JSON files. The supported file extensions are: ``.aiff``, ``.au``, ``.avr``, ``.caf``, ``.flac``, ``.mat``, ``.mat4``, - ``.mat5``, ``.mpc2k``, ``.ogg``, ``.paf``, ``.pvf``, ``.rf64``, ``.sd2``, ``.ircam``, ``.voc``, ``.w64``, + ``.mat5``, ``.mpc2k``, ``.ogg``, ``.paf``, ``.pvf``, ``.rf64``, ``.ircam``, ``.voc``, ``.w64``, ``.wav``, ``.nist``, and ``.wavex``. The targets will be extracted from the ``target_field`` field in the JSON files. To learn how to customize the transforms applied for each stage, read our @@ -468,7 +468,7 @@ def from_datasets( * A PyTorch Dataset where the ``__getitem__`` returns a dict: ``{"input": file_path, "target": target}`` The supported file extensions are: ``.aiff``, ``.au``, ``.avr``, ``.caf``, ``.flac``, ``.mat``, ``.mat4``, - ``.mat5``, ``.mpc2k``, ``.ogg``, ``.paf``, ``.pvf``, ``.rf64``, ``.sd2``, ``.ircam``, ``.voc``, ``.w64``, + ``.mat5``, ``.mpc2k``, ``.ogg``, ``.paf``, ``.pvf``, ``.rf64``, ``.ircam``, ``.voc``, ``.w64``, ``.wav``, ``.nist``, and ``.wavex``. To learn how to customize the transforms applied for each stage, read our :ref:`customizing transforms guide `. diff --git a/flash/core/data/utilities/loading.py b/flash/core/data/utilities/loading.py index a57cbfff79..7522882f30 100644 --- a/flash/core/data/utilities/loading.py +++ b/flash/core/data/utilities/loading.py @@ -46,7 +46,6 @@ ".paf", ".pvf", ".rf64", - ".sd2", ".ircam", ".voc", ".w64", @@ -165,10 +164,7 @@ def load_spectrogram(file_path: str, sampling_rate: int = 16000, n_fft: int = 40 """ loaders = copy.copy(_spectrogram_loaders) loaders[AUDIO_EXTENSIONS] = partial(loaders[AUDIO_EXTENSIONS], sampling_rate=sampling_rate, n_fft=n_fft) - loader = _get_loader(file_path, loaders) - # FIXME: Following error is raised while trying to read fsspec object from SoundFile: - # RuntimeError: Error opening : Format not recognised. - return loader(file_path) + return load(file_path, loaders) def load_audio(file_path: str, sampling_rate: int = 16000): From 80d5dbb480607fc9577f30268805f9d2d331434e Mon Sep 17 00:00:00 2001 From: Kushashwa Ravi Shrimali Date: Sat, 13 Aug 2022 09:16:42 +0530 Subject: [PATCH 3/3] Update CHANGELOG.md --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 804b60dd19..161acaf8f1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -42,6 +42,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). ### Removed +- Removed support for audio files with `sd2` extension, because SoundFile (for sd2 extension) doesn't accept fsspec objects. ([#1409](https://github.com/Lightning-AI/lightning-flash/pull/1409)) + ### Fixed - Fixed a bug where grayscale images were not properly converted to RGB when loaded. ([#1394](https://github.com/PyTorchLightning/lightning-flash/pull/1394))