Skip to content

Commit

Permalink
Remove ffmpeg fallback from sox_io backend
Browse files Browse the repository at this point in the history
In pytorch#2419, we added ffmpeg as fallback for sox_io backend.
The was a warkaround for solving the issue with libmad removal.

Now that we introduced `backend` argument to I/O functions,
and libsox integration is moved to dynamic binding where users can
use libsox with libmad integration, we do not need the workaround.

This commit is based on reverting pytorch#2416 (fd7ace1).
  • Loading branch information
mthrok committed Jul 28, 2023
1 parent 7368e33 commit 653031c
Show file tree
Hide file tree
Showing 14 changed files with 27 additions and 113 deletions.
19 changes: 0 additions & 19 deletions test/torchaudio_unittest/backend/dispatcher/sox/load_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,25 +318,6 @@ def test_sox(self, frame_offset, num_frames, channels_first, normalize):

self._test(torch.ops.torchaudio.sox_io_load_audio_file, frame_offset, num_frames, channels_first, normalize)

@nested_params(
[0, 1, 10, 100, 1000],
[-1, 1, 10, 100, 1000],
[True, False],
[True, False],
)
def test_ffmpeg(self, frame_offset, num_frames, channels_first, normalize):
"""The combination of properly changes the output tensor"""
from torchaudio.io._compat import load_audio, load_audio_fileobj

self._test(load_audio, frame_offset, num_frames, channels_first, normalize)

# test file-like obj
def func(path, *args):
with open(path, "rb") as fileobj:
return load_audio_fileobj(fileobj, *args)

self._test(func, frame_offset, num_frames, channels_first, normalize)


@skipIfNoSox
@skipIfNoExec("sox")
Expand Down
7 changes: 3 additions & 4 deletions test/torchaudio_unittest/backend/sox_io/info_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,7 @@ def test_htk(self):


@skipIfNoSox
@skipIfNoSoxDecoder("opus")
class TestInfoOpus(PytorchTestCase):
@parameterized.expand(
list(
Expand Down Expand Up @@ -304,17 +305,15 @@ class TestLoadWithoutExtension(PytorchTestCase):
def test_mp3(self):
"""MP3 file without extension can be loaded
Originally, we added `format` argument for this case, but now we use FFmpeg
for MP3 decoding, which works even without `format` argument.
https://github.com/pytorch/audio/issues/1040
The file was generated with the following command
ffmpeg -f lavfi -i "sine=frequency=1000:duration=5" -ar 16000 -f mp3 test_noext
"""
path = get_asset_path("mp3_without_ext")
sinfo = sox_io_backend.info(path)
sinfo = sox_io_backend.info(path, format="mp3")
assert sinfo.sample_rate == 16000
assert sinfo.num_frames == 80000
assert sinfo.num_frames == 81216
assert sinfo.num_channels == 1
assert sinfo.bits_per_sample == 0 # bit_per_sample is irrelevant for compressed formats
assert sinfo.encoding == "MP3"
Expand Down
19 changes: 0 additions & 19 deletions test/torchaudio_unittest/backend/sox_io/load_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,25 +315,6 @@ def test_sox(self, frame_offset, num_frames, channels_first, normalize):

self._test(torch.ops.torchaudio.sox_io_load_audio_file, frame_offset, num_frames, channels_first, normalize)

@nested_params(
[0, 1, 10, 100, 1000],
[-1, 1, 10, 100, 1000],
[True, False],
[True, False],
)
def test_ffmpeg(self, frame_offset, num_frames, channels_first, normalize):
"""The combination of properly changes the output tensor"""
from torchaudio.io._compat import load_audio, load_audio_fileobj

self._test(load_audio, frame_offset, num_frames, channels_first, normalize)

# test file-like obj
def func(path, *args):
with open(path, "rb") as fileobj:
return load_audio_fileobj(fileobj, *args)

self._test(func, frame_offset, num_frames, channels_first, normalize)


@skipIfNoSox
class TestLoadWithoutExtension(PytorchTestCase):
Expand Down
36 changes: 2 additions & 34 deletions torchaudio/backend/sox_io_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,33 +7,6 @@
from .common import AudioMetaData


# Note: need to comply TorchScript syntax -- need annotation and no f-string
def _fail_info(filepath: str, format: Optional[str]) -> AudioMetaData:
raise RuntimeError("Failed to fetch metadata from {}".format(filepath))


# Note: need to comply TorchScript syntax -- need annotation and no f-string
def _fail_load(
filepath: str,
frame_offset: int = 0,
num_frames: int = -1,
normalize: bool = True,
channels_first: bool = True,
format: Optional[str] = None,
) -> Tuple[torch.Tensor, int]:
raise RuntimeError("Failed to load audio from {}".format(filepath))


if torchaudio._extension._FFMPEG_EXT is not None:
import torchaudio.io._compat as _compat

_fallback_info = _compat.info_audio
_fallback_load = _compat.load_audio
else:
_fallback_info = _fail_info
_fallback_load = _fail_load


@torchaudio._extension.fail_if_no_sox
def info(
filepath: str,
Expand All @@ -58,9 +31,7 @@ def info(
raise RuntimeError("sox_io backend does not support file-like object.")
filepath = os.fspath(filepath)
sinfo = torch.ops.torchaudio.sox_io_get_info(filepath, format)
if sinfo is not None:
return AudioMetaData(*sinfo)
return _fallback_info(filepath, format)
return AudioMetaData(*sinfo)


@torchaudio._extension.fail_if_no_sox
Expand Down Expand Up @@ -153,12 +124,9 @@ def load(
if hasattr(filepath, "read"):
raise RuntimeError("sox_io backend does not support file-like object.")
filepath = os.fspath(filepath)
ret = torch.ops.torchaudio.sox_io_load_audio_file(
return torch.ops.torchaudio.sox_io_load_audio_file(
filepath, frame_offset, num_frames, normalize, channels_first, format
)
if ret is not None:
return ret
return _fallback_load(filepath, frame_offset, num_frames, normalize, channels_first, format)


@torchaudio._extension.fail_if_no_sox
Expand Down
7 changes: 2 additions & 5 deletions torchaudio/csrc/sox/effects.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,18 +89,15 @@ auto apply_effects_file(
c10::optional<bool> normalize,
c10::optional<bool> channels_first,
const c10::optional<std::string>& format)
-> c10::optional<std::tuple<torch::Tensor, int64_t>> {
-> std::tuple<torch::Tensor, int64_t> {
// Open input file
SoxFormat sf(sox_open_read(
path.c_str(),
/*signal=*/nullptr,
/*encoding=*/nullptr,
/*filetype=*/format.has_value() ? format.value().c_str() : nullptr));

if (static_cast<sox_format_t*>(sf) == nullptr ||
sf->encoding.encoding == SOX_ENCODING_UNKNOWN) {
return {};
}
validate_input_file(sf, path);

const auto dtype = get_dtype(sf->encoding.encoding, sf->signal.precision);

Expand Down
2 changes: 1 addition & 1 deletion torchaudio/csrc/sox/effects.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ auto apply_effects_file(
c10::optional<bool> normalize,
c10::optional<bool> channels_first,
const c10::optional<std::string>& format)
-> c10::optional<std::tuple<torch::Tensor, int64_t>>;
-> std::tuple<torch::Tensor, int64_t>;

} // namespace torchaudio::sox

Expand Down
11 changes: 4 additions & 7 deletions torchaudio/csrc/sox/io.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ using namespace torch::indexing;

namespace torchaudio::sox {

c10::optional<MetaDataTuple> get_info_file(
std::tuple<int64_t, int64_t, int64_t, int64_t, std::string> get_info_file(
const std::string& path,
const c10::optional<std::string>& format) {
SoxFormat sf(sox_open_read(
Expand All @@ -17,12 +17,9 @@ c10::optional<MetaDataTuple> get_info_file(
/*encoding=*/nullptr,
/*filetype=*/format.has_value() ? format.value().c_str() : nullptr));

if (static_cast<sox_format_t*>(sf) == nullptr ||
sf->encoding.encoding == SOX_ENCODING_UNKNOWN) {
return {};
}
validate_input_file(sf, path);

return std::forward_as_tuple(
return std::make_tuple(
static_cast<int64_t>(sf->signal.rate),
static_cast<int64_t>(sf->signal.length / sf->signal.channels),
static_cast<int64_t>(sf->signal.channels),
Expand Down Expand Up @@ -58,7 +55,7 @@ std::vector<std::vector<std::string>> get_effects(
return effects;
}

c10::optional<std::tuple<torch::Tensor, int64_t>> load_audio_file(
std::tuple<torch::Tensor, int64_t> load_audio_file(
const std::string& path,
const c10::optional<int64_t>& frame_offset,
const c10::optional<int64_t>& num_frames,
Expand Down
7 changes: 2 additions & 5 deletions torchaudio/csrc/sox/io.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,11 @@ auto get_effects(
const c10::optional<int64_t>& num_frames)
-> std::vector<std::vector<std::string>>;

using MetaDataTuple =
std::tuple<int64_t, int64_t, int64_t, int64_t, std::string>;

c10::optional<MetaDataTuple> get_info_file(
std::tuple<int64_t, int64_t, int64_t, int64_t, std::string> get_info_file(
const std::string& path,
const c10::optional<std::string>& format);

c10::optional<std::tuple<torch::Tensor, int64_t>> load_audio_file(
std::tuple<torch::Tensor, int64_t> load_audio_file(
const std::string& path,
const c10::optional<int64_t>& frame_offset,
const c10::optional<int64_t>& num_frames,
Expand Down
5 changes: 2 additions & 3 deletions torchaudio/csrc/sox/pybind/effects.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,7 @@ auto apply_effects_fileobj(
const std::vector<std::vector<std::string>>& effects,
c10::optional<bool> normalize,
c10::optional<bool> channels_first,
c10::optional<std::string> format)
-> c10::optional<std::tuple<torch::Tensor, int64_t>> {
c10::optional<std::string> format) -> std::tuple<torch::Tensor, int64_t> {
// Prepare the buffer used throughout the lifecycle of SoxEffectChain.
//
// For certain format (such as FLAC), libsox keeps reading the content at
Expand Down Expand Up @@ -112,7 +111,7 @@ auto apply_effects_fileobj(
normalize.value_or(true),
channels_first_);

return std::forward_as_tuple(
return std::make_tuple(
tensor, static_cast<int64_t>(chain.getOutputSampleRate()));
}

Expand Down
3 changes: 1 addition & 2 deletions torchaudio/csrc/sox/pybind/effects.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,7 @@ auto apply_effects_fileobj(
const std::vector<std::vector<std::string>>& effects,
c10::optional<bool> normalize,
c10::optional<bool> channels_first,
c10::optional<std::string> format)
-> c10::optional<std::tuple<torch::Tensor, int64_t>>;
c10::optional<std::string> format) -> std::tuple<torch::Tensor, int64_t>;

} // namespace torchaudio::sox

Expand Down
7 changes: 3 additions & 4 deletions torchaudio/csrc/sox/pybind/io.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
namespace torchaudio::sox {

auto get_info_fileobj(py::object fileobj, c10::optional<std::string> format)
-> c10::optional<MetaDataTuple> {
-> std::tuple<int64_t, int64_t, int64_t, int64_t, std::string> {
// Prepare in-memory file object
// When libsox opens a file, it also reads the header.
// When opening a file there are two functions that might touch FILE* (and the
Expand Down Expand Up @@ -63,7 +63,7 @@ auto get_info_fileobj(py::object fileobj, c10::optional<std::string> format)
return c10::optional<MetaDataTuple>{};
}

return std::forward_as_tuple(
return std::make_tuple(
static_cast<int64_t>(sf->signal.rate),
static_cast<int64_t>(sf->signal.length / sf->signal.channels),
static_cast<int64_t>(sf->signal.channels),
Expand All @@ -77,8 +77,7 @@ auto load_audio_fileobj(
c10::optional<int64_t> num_frames,
c10::optional<bool> normalize,
c10::optional<bool> channels_first,
c10::optional<std::string> format)
-> c10::optional<std::tuple<torch::Tensor, int64_t>> {
c10::optional<std::string> format) -> std::tuple<torch::Tensor, int64_t> {
auto effects = get_effects(frame_offset, num_frames);
return apply_effects_fileobj(
std::move(fileobj),
Expand Down
8 changes: 2 additions & 6 deletions torchaudio/csrc/sox/pybind/io.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,16 @@

namespace torchaudio::sox {

using MetaDataTuple =
std::tuple<int64_t, int64_t, int64_t, int64_t, std::string>;

auto get_info_fileobj(py::object fileobj, c10::optional<std::string> format)
-> c10::optional<MetaDataTuple>;
-> std::tuple<int64_t, int64_t, int64_t, int64_t, std::string>;

auto load_audio_fileobj(
py::object fileobj,
c10::optional<int64_t> frame_offset,
c10::optional<int64_t> num_frames,
c10::optional<bool> normalize,
c10::optional<bool> channels_first,
c10::optional<std::string> format)
-> c10::optional<std::tuple<torch::Tensor, int64_t>>;
c10::optional<std::string> format) -> std::tuple<torch::Tensor, int64_t>;

void save_audio_fileobj(
py::object fileobj,
Expand Down
4 changes: 4 additions & 0 deletions torchaudio/csrc/sox/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@ struct SoxFormat {
sox_format_t* fd_;
};

///
/// Verify that input file is found, has known encoding, and not empty
void validate_input_file(const SoxFormat& sf, const std::string& path);

///
/// Verify that input Tensor is 2D, CPU and either uin8, int16, int32 or float32
void validate_input_tensor(const torch::Tensor&);
Expand Down
5 changes: 1 addition & 4 deletions torchaudio/sox_effects/sox_effects.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,4 @@ def apply_effects_file(
"Please use torchaudio.io.AudioEffector."
)
path = os.fspath(path)
ret = torch.ops.torchaudio.sox_effects_apply_effects_file(path, effects, normalize, channels_first, format)
if ret is not None:
return ret
raise RuntimeError("Failed to load audio from {}".format(path))
return torch.ops.torchaudio.sox_effects_apply_effects_file(path, effects, normalize, channels_first, format)

0 comments on commit 653031c

Please sign in to comment.