Skip to content

Commit

Permalink
Update source info (pytorch#2418)
Browse files Browse the repository at this point in the history
Summary:
Add num_frames and bits_per_sample to match with the current
`torchaudio.info` capability.

Pull Request resolved: pytorch#2418

Reviewed By: carolineechen

Differential Revision: D36749077

Pulled By: mthrok

fbshipit-source-id: 7b368ee993cf5ed63ff2f53c9e3b1f50fcce7713
  • Loading branch information
mthrok authored and facebook-github-bot committed May 29, 2022
1 parent fd7ace1 commit bb77cbe
Show file tree
Hide file tree
Showing 6 changed files with 65 additions and 21 deletions.
12 changes: 12 additions & 0 deletions test/torchaudio_unittest/io/stream_reader_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,8 @@ def test_src_info(self):
codec_long_name="H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10",
format="yuv420p",
bit_rate=71925,
num_frames=325,
bits_per_sample=8,
width=320,
height=180,
frame_rate=25.0,
Expand All @@ -106,6 +108,8 @@ def test_src_info(self):
codec_long_name="AAC (Advanced Audio Coding)",
format="fltp",
bit_rate=72093,
num_frames=103,
bits_per_sample=0,
sample_rate=8000.0,
num_channels=2,
),
Expand All @@ -115,13 +119,17 @@ def test_src_info(self):
codec_long_name="MOV text",
format=None,
bit_rate=None,
num_frames=None,
bits_per_sample=None,
),
StreamReaderSourceVideoStream(
media_type="video",
codec="h264",
codec_long_name="H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10",
format="yuv420p",
bit_rate=128783,
num_frames=390,
bits_per_sample=8,
width=480,
height=270,
frame_rate=29.97002997002997,
Expand All @@ -132,6 +140,8 @@ def test_src_info(self):
codec_long_name="AAC (Advanced Audio Coding)",
format="fltp",
bit_rate=128837,
num_frames=205,
bits_per_sample=0,
sample_rate=16000.0,
num_channels=2,
),
Expand All @@ -141,6 +151,8 @@ def test_src_info(self):
codec_long_name="MOV text",
format=None,
bit_rate=None,
num_frames=None,
bits_per_sample=None,
),
]
output = [s.get_src_stream_info(i) for i in range(6)]
Expand Down
2 changes: 2 additions & 0 deletions torchaudio/csrc/ffmpeg/stream_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,8 @@ SrcStreamInfo StreamReader::get_src_stream_info(int i) const {
SrcStreamInfo ret;
ret.media_type = codecpar->codec_type;
ret.bit_rate = codecpar->bit_rate;
ret.num_frames = stream->nb_frames;
ret.bits_per_sample = codecpar->bits_per_raw_sample;
const AVCodecDescriptor* desc = avcodec_descriptor_get(codecpar->codec_id);
if (desc) {
ret.codec_name = desc->name;
Expand Down
2 changes: 2 additions & 0 deletions torchaudio/csrc/ffmpeg/stream_reader_wrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ SrcInfo convert(SrcStreamInfo ssi) {
ssi.codec_long_name,
ssi.fmt_name,
ssi.bit_rate,
ssi.num_frames,
ssi.bits_per_sample,
ssi.sample_rate,
ssi.num_channels,
ssi.width,
Expand Down
2 changes: 2 additions & 0 deletions torchaudio/csrc/ffmpeg/stream_reader_wrapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ using SrcInfo = std::tuple<
std::string, // codec long name
std::string, // format name
int64_t, // bit_rate
int64_t, // num_frames
int64_t, // bits_per_sample
// Audio
double, // sample_rate
int64_t, // num_channels
Expand Down
2 changes: 2 additions & 0 deletions torchaudio/csrc/ffmpeg/typedefs.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ struct SrcStreamInfo {
const char* codec_long_name = "N/A";
const char* fmt_name = "N/A";
int64_t bit_rate = 0;
int64_t num_frames = 0;
int bits_per_sample = 0;
// Audio
double sample_rate = 0;
int num_channels = 0;
Expand Down
66 changes: 45 additions & 21 deletions torchaudio/io/_stream_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,12 @@ class StreamReaderSourceStream:
This is an estimated values based on the initial few frames of the stream.
For container formats and variable bit rate, it can be 0.
"""
num_frames: Optional[int]
"""The number of frames in the stream"""
bits_per_sample: Optional[int]
"""This is the number of valid bits in each output sample.
For compressed format, it can be 0.
"""


@dataclass
Expand Down Expand Up @@ -100,41 +106,59 @@ class StreamReaderSourceVideoStream(StreamReaderSourceStream):
_CODEC_LONG = 2
_FORMAT = 3
_BIT_RATE = 4
_NUM_FRAMES = 5
_BPS = 6
# - AUDIO
_SAMPLE_RATE = 5
_NUM_CHANNELS = 6
_SAMPLE_RATE = 7
_NUM_CHANNELS = 8
# - VIDEO
_WIDTH = 7
_HEIGHT = 8
_FRAME_RATE = 9
_WIDTH = 9
_HEIGHT = 10
_FRAME_RATE = 11


def _parse_si(i):
media_type = i[_MEDIA_TYPE]
codec_name = i[_CODEC]
codec_long_name = i[_CODEC_LONG]
fmt = i[_FORMAT]
bit_rate = i[_BIT_RATE]
num_frames = i[_NUM_FRAMES]
bps = i[_BPS]
if media_type == "audio":
return StreamReaderSourceAudioStream(
media_type,
codec_name,
codec_long_name,
i[_FORMAT],
i[_BIT_RATE],
i[_SAMPLE_RATE],
i[_NUM_CHANNELS],
media_type=media_type,
codec=codec_name,
codec_long_name=codec_long_name,
format=fmt,
bit_rate=bit_rate,
num_frames=num_frames,
bits_per_sample=bps,
sample_rate=i[_SAMPLE_RATE],
num_channels=i[_NUM_CHANNELS],
)
if media_type == "video":
return StreamReaderSourceVideoStream(
media_type,
codec_name,
codec_long_name,
i[_FORMAT],
i[_BIT_RATE],
i[_WIDTH],
i[_HEIGHT],
i[_FRAME_RATE],
media_type=media_type,
codec=codec_name,
codec_long_name=codec_long_name,
format=fmt,
bit_rate=bit_rate,
num_frames=num_frames,
bits_per_sample=bps,
width=i[_WIDTH],
height=i[_HEIGHT],
frame_rate=i[_FRAME_RATE],
)
return StreamReaderSourceStream(media_type, codec_name, codec_long_name, None, None)
return StreamReaderSourceStream(
media_type=media_type,
codec=codec_name,
codec_long_name=codec_long_name,
format=None,
bit_rate=None,
num_frames=None,
bits_per_sample=None,
)


@dataclass
Expand Down

0 comments on commit bb77cbe

Please sign in to comment.