Skip to content

Commit

Permalink
Integrated base decoder into VideoReader class and video_utils.py (py…
Browse files Browse the repository at this point in the history
…torch#1766)

Summary:
Pull Request resolved: pytorch#1766

Replaced FfmpegDecoder (incompativle with VUE) by base decoder (compatible with VUE).
Modified python utilities video_utils.py for internal simplification. Public interface got preserved.

Differential Revision: D19415903

fbshipit-source-id: fcd4a7a6453c1468e578441a55ce424b72fe6778
  • Loading branch information
Yuri Putivsky authored and facebook-github-bot committed Jan 28, 2020
1 parent a4d3475 commit 8c7006f
Show file tree
Hide file tree
Showing 52 changed files with 770 additions and 2,345 deletions.
32 changes: 6 additions & 26 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,41 +155,21 @@ def get_extensions():
ffmpeg_root = os.path.dirname(ffmpeg_bin)
ffmpeg_include_dir = os.path.join(ffmpeg_root, 'include')

# TorchVision video reader
# TorchVision base decoder + video reader
video_reader_src_dir = os.path.join(this_dir, 'torchvision', 'csrc', 'cpu', 'video_reader')
video_reader_src = glob.glob(os.path.join(video_reader_src_dir, "*.cpp"))

ext_modules.append(
CppExtension(
'torchvision.video_reader',
video_reader_src,
include_dirs=[
video_reader_src_dir,
ffmpeg_include_dir,
extensions_dir,
],
libraries=[
'avcodec',
'avformat',
'avutil',
'swresample',
'swscale',
],
extra_compile_args=["-std=c++14"],
extra_link_args=["-std=c++14"],
)
)

# TorchVision base decoder
base_decoder_src_dir = os.path.join(this_dir, 'torchvision', 'csrc', 'cpu', 'decoder')
base_decoder_src = glob.glob(os.path.join(base_decoder_src_dir, "[!sync_decoder_test]*.cpp"))

combined_src = video_reader_src + base_decoder_src

ext_modules.append(
CppExtension(
'torchvision.base_decoder',
base_decoder_src,
'torchvision.video_reader',
combined_src,
include_dirs=[
base_decoder_src_dir,
video_reader_src_dir,
ffmpeg_include_dir,
extensions_dir,
],
Expand Down
2 changes: 0 additions & 2 deletions torchvision/csrc/cpu/decoder/audio_sampler.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
// Copyright 2004-present Facebook. All Rights Reserved.

#include "audio_sampler.h"
#include <c10/util/Logging.h>
#include "util.h"
Expand Down
2 changes: 0 additions & 2 deletions torchvision/csrc/cpu/decoder/audio_sampler.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
// Copyright 2004-present Facebook. All Rights Reserved.

#pragma once

#include "defs.h"
Expand Down
50 changes: 18 additions & 32 deletions torchvision/csrc/cpu/decoder/audio_stream.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
// Copyright 2004-present Facebook. All Rights Reserved.

#include "audio_stream.h"
#include <c10/util/Logging.h>
#include <limits>
Expand All @@ -8,11 +6,23 @@
namespace ffmpeg {

namespace {
bool operator==(const AudioFormat& x, const AVFrame& y) {
return x.samples == y.sample_rate && x.channels == y.channels &&
x.format == y.format;
}

bool operator==(const AudioFormat& x, const AVCodecContext& y) {
return x.samples == y.sample_rate && x.channels == y.channels &&
x.format == y.sample_fmt;
}

AudioFormat& toAudioFormat(AudioFormat& x, const AVFrame& y) {
x.samples = y.sample_rate;
x.channels = y.channels;
x.format = y.format;
return x;
}

AudioFormat& toAudioFormat(AudioFormat& x, const AVCodecContext& y) {
x.samples = y.sample_rate;
x.channels = y.channels;
Expand Down Expand Up @@ -65,12 +75,15 @@ int AudioStream::initFormat() {

int AudioStream::estimateBytes(bool flush) {
ensureSampler();
if (!(sampler_->getInputFormat().audio == *codecCtx_)) {
// check if input format gets changed
if (flush ? !(sampler_->getInputFormat().audio == *codecCtx_)
: !(sampler_->getInputFormat().audio == *frame_)) {
// - reinit sampler
SamplerParameters params;
params.type = format_.type;
params.out = format_.format;
toAudioFormat(params.in.audio, *codecCtx_);
flush ? toAudioFormat(params.in.audio, *codecCtx_)
: toAudioFormat(params.in.audio, *frame_);
if (flush || !sampler_->init(params)) {
return -1;
}
Expand All @@ -84,39 +97,12 @@ int AudioStream::estimateBytes(bool flush) {
<< ", channels: " << format_.format.audio.channels
<< ", format: " << format_.format.audio.format;
}
return sampler_->getSamplesBytes(frame_);
return sampler_->getSamplesBytes(flush ? nullptr : frame_);
}

int AudioStream::copyFrameBytes(ByteStorage* out, bool flush) {
ensureSampler();
return sampler_->sample(flush ? nullptr : frame_, out);
}

void AudioStream::setHeader(DecoderHeader* header) {
header->seqno = numGenerator_++;

if (codecCtx_->time_base.num != 0) {
header->pts = av_rescale_q(
av_frame_get_best_effort_timestamp(frame_),
codecCtx_->time_base,
AV_TIME_BASE_Q);
} else {
// If the codec time_base is missing then we would've skipped the
// rescalePackage step to rescale to codec time_base, so here we can
// rescale straight from the stream time_base into AV_TIME_BASE_Q.
header->pts = av_rescale_q(
av_frame_get_best_effort_timestamp(frame_),
inputCtx_->streams[format_.stream]->time_base,
AV_TIME_BASE_Q);
}

if (convertPtsToWallTime_) {
keeper_.adjust(header->pts);
}

header->keyFrame = 1;
header->fps = std::numeric_limits<double>::quiet_NaN();
header->format = format_;
}

} // namespace ffmpeg
5 changes: 0 additions & 5 deletions torchvision/csrc/cpu/decoder/audio_stream.h
Original file line number Diff line number Diff line change
@@ -1,10 +1,7 @@
// Copyright 2004-present Facebook. All Rights Reserved.

#pragma once

#include "audio_sampler.h"
#include "stream.h"
#include "time_keeper.h"

namespace ffmpeg {

Expand All @@ -25,13 +22,11 @@ class AudioStream : public Stream {
int initFormat() override;
int estimateBytes(bool flush) override;
int copyFrameBytes(ByteStorage* out, bool flush) override;
void setHeader(DecoderHeader* header) override;

void ensureSampler();

private:
std::unique_ptr<AudioSampler> sampler_;
TimeKeeper keeper_;
};

} // namespace ffmpeg
2 changes: 0 additions & 2 deletions torchvision/csrc/cpu/decoder/cc_stream.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
// Copyright 2004-present Facebook. All Rights Reserved.

#include "cc_stream.h"

namespace ffmpeg {
Expand Down
2 changes: 0 additions & 2 deletions torchvision/csrc/cpu/decoder/cc_stream.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
// Copyright 2004-present Facebook. All Rights Reserved.

#pragma once

#include "subtitle_stream.h"
Expand Down
Loading

0 comments on commit 8c7006f

Please sign in to comment.