Skip to content

Commit

Permalink
Move helper functions out of common utility for better locality (#2512)
Browse files Browse the repository at this point in the history
Summary:
This commits move helper functions/definitions around so that better locality of logics are achieved.

## Detail

`ffmpeg.[h|cpp]` implements classes that convert FFmpeg structures into RAII semantics.
Initially it these classes included the construction logic in their constructors, but such logics were
extracted to factory functions in #2373.

Now the reason why the factory functions stayed in `ffmpeg.[h|cpp]` was because the logic for
the initialization and  clean-up of AVDictionary class was only available in `ffmpeg.cpp`.

Now AVDictionary class handling is properly defined in #2507, the factory functions, which are not
that reusable better stay with the implementation that use them.

This makes `ffmpeg.h` lean and clean, makes it easier to see what can be reused.

Pull Request resolved: #2512

Reviewed By: hwangjeff

Differential Revision: D37477592

Pulled By: mthrok

fbshipit-source-id: 8c1b5059ea5f44649cc0eb1f82d1a92877ef186e
  • Loading branch information
mthrok authored and facebook-github-bot committed Jul 7, 2022
1 parent 515fd01 commit 10ac6d2
Show file tree
Hide file tree
Showing 5 changed files with 175 additions and 209 deletions.
126 changes: 120 additions & 6 deletions torchaudio/csrc/ffmpeg/decoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,133 @@ namespace ffmpeg {
////////////////////////////////////////////////////////////////////////////////
// Decoder
////////////////////////////////////////////////////////////////////////////////
namespace {
AVCodecContextPtr get_decode_context(
enum AVCodecID codec_id,
const c10::optional<std::string>& decoder_name) {
const AVCodec* pCodec = !decoder_name.has_value()
? avcodec_find_decoder(codec_id)
: avcodec_find_decoder_by_name(decoder_name.value().c_str());

if (!pCodec) {
std::stringstream ss;
if (!decoder_name.has_value()) {
ss << "Unsupported codec: \"" << avcodec_get_name(codec_id) << "\", ("
<< codec_id << ").";
} else {
ss << "Unsupported codec: \"" << decoder_name.value() << "\".";
}
throw std::runtime_error(ss.str());
}

AVCodecContext* pCodecContext = avcodec_alloc_context3(pCodec);
if (!pCodecContext) {
throw std::runtime_error("Failed to allocate CodecContext.");
}
return AVCodecContextPtr(pCodecContext);
}

#ifdef USE_CUDA
enum AVPixelFormat get_hw_format(
AVCodecContext* ctx,
const enum AVPixelFormat* pix_fmts) {
const enum AVPixelFormat* p = nullptr;
AVPixelFormat pix_fmt = *static_cast<AVPixelFormat*>(ctx->opaque);
for (p = pix_fmts; *p != -1; p++) {
if (*p == pix_fmt) {
return *p;
}
}
TORCH_WARN("Failed to get HW surface format.");
return AV_PIX_FMT_NONE;
}

const AVCodecHWConfig* get_cuda_config(const AVCodec* pCodec) {
for (int i = 0;; ++i) {
const AVCodecHWConfig* config = avcodec_get_hw_config(pCodec, i);
if (!config) {
break;
}
if (config->device_type == AV_HWDEVICE_TYPE_CUDA &&
config->methods & AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX) {
return config;
}
}
std::stringstream ss;
ss << "CUDA device was requested, but the codec \"" << pCodec->name
<< "\" is not supported.";
throw std::runtime_error(ss.str());
}
#endif

void init_codec_context(
AVCodecContext* pCodecContext,
AVCodecParameters* pParams,
const OptionDict& decoder_option,
const torch::Device& device,
AVBufferRefPtr& pHWBufferRef) {
int ret = avcodec_parameters_to_context(pCodecContext, pParams);
if (ret < 0) {
throw std::runtime_error(
"Failed to set CodecContext parameter: " + av_err2string(ret));
}

#ifdef USE_CUDA
// Enable HW Acceleration
if (device.type() == c10::DeviceType::CUDA) {
const AVCodecHWConfig* config = get_cuda_config(pCodecContext->codec);
// TODO: check how to log
// C10_LOG << "Decoder " << pCodec->name << " supports device " <<
// av_hwdevice_get_type_name(config->device_type);

// https://www.ffmpeg.org/doxygen/trunk/hw__decode_8c_source.html#l00221
// 1. Set HW pixel format (config->pix_fmt) to opaue pointer.
static thread_local AVPixelFormat pix_fmt = config->pix_fmt;
pCodecContext->opaque = static_cast<void*>(&pix_fmt);
// 2. Set pCodecContext->get_format call back function which
// will retrieve the HW pixel format from opaque pointer.
pCodecContext->get_format = get_hw_format;
// 3. Create HW device context and set to pCodecContext.
AVBufferRef* hw_device_ctx = nullptr;
ret = av_hwdevice_ctx_create(
&hw_device_ctx,
AV_HWDEVICE_TYPE_CUDA,
std::to_string(device.index()).c_str(),
nullptr,
0);
if (ret < 0) {
throw std::runtime_error(
"Failed to create CUDA device context: " + av_err2string(ret));
}
assert(hw_device_ctx);
pCodecContext->hw_device_ctx = av_buffer_ref(hw_device_ctx);
pHWBufferRef.reset(hw_device_ctx);
}
#endif

AVDictionary* opts = get_option_dict(decoder_option);
ret = avcodec_open2(pCodecContext, pCodecContext->codec, &opts);
clean_up_dict(opts);

if (ret < 0) {
throw std::runtime_error(
"Failed to initialize CodecContext: " + av_err2string(ret));
}

if (pParams->codec_type == AVMEDIA_TYPE_AUDIO && !pParams->channel_layout)
pParams->channel_layout =
av_get_default_channel_layout(pCodecContext->channels);
}
} // namespace

Decoder::Decoder(
AVCodecParameters* pParam,
const c10::optional<std::string>& decoder_name,
const OptionDict& decoder_option,
const torch::Device& device)
: pCodecContext(get_decode_context(pParam->codec_id, decoder_name)) {
init_codec_context(
pCodecContext,
pParam,
decoder_name,
decoder_option,
device,
pHWBufferRef);
pCodecContext, pParam, decoder_option, device, pHWBufferRef);
}

int Decoder::process_packet(AVPacket* pPacket) {
Expand Down
182 changes: 0 additions & 182 deletions torchaudio/csrc/ffmpeg/ffmpeg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,65 +35,13 @@ void clean_up_dict(AVDictionary* p) {
}
}

namespace {

// https://github.com/FFmpeg/FFmpeg/blob/4e6debe1df7d53f3f59b37449b82265d5c08a172/doc/APIchanges#L252-L260
// Starting from libavformat 59 (ffmpeg 5),
// AVInputFormat is const and related functions expect constant.
#if LIBAVFORMAT_VERSION_MAJOR >= 59
#define AVINPUT_FORMAT_CONST const
#else
#define AVINPUT_FORMAT_CONST
#endif

} // namespace

////////////////////////////////////////////////////////////////////////////////
// AVFormatContext
////////////////////////////////////////////////////////////////////////////////
void AVFormatContextDeleter::operator()(AVFormatContext* p) {
avformat_close_input(&p);
};

AVFormatContextPtr get_input_format_context(
const std::string& src,
const c10::optional<std::string>& device,
const OptionDict& option,
AVIOContext* io_ctx) {
AVFormatContext* pFormat = avformat_alloc_context();
if (!pFormat) {
throw std::runtime_error("Failed to allocate AVFormatContext.");
}
if (io_ctx) {
pFormat->pb = io_ctx;
}

auto* pInput = [&]() -> AVINPUT_FORMAT_CONST AVInputFormat* {
if (device.has_value()) {
std::string device_str = device.value();
AVINPUT_FORMAT_CONST AVInputFormat* p =
av_find_input_format(device_str.c_str());
if (!p) {
std::ostringstream msg;
msg << "Unsupported device/format: \"" << device_str << "\"";
throw std::runtime_error(msg.str());
}
return p;
}
return nullptr;
}();

AVDictionary* opt = get_option_dict(option);
int ret = avformat_open_input(&pFormat, src.c_str(), pInput, &opt);
clean_up_dict(opt);

if (ret < 0)
throw std::runtime_error(
"Failed to open the input \"" + src + "\" (" + av_err2string(ret) +
").");
return AVFormatContextPtr(pFormat);
}

AVFormatContextPtr::AVFormatContextPtr(AVFormatContext* p)
: Wrapper<AVFormatContext, AVFormatContextDeleter>(p) {}

Expand Down Expand Up @@ -162,136 +110,6 @@ void AVCodecContextDeleter::operator()(AVCodecContext* p) {
avcodec_free_context(&p);
};

namespace {
const AVCodec* get_decode_codec(
enum AVCodecID codec_id,
const c10::optional<std::string>& decoder_name) {
const AVCodec* pCodec = !decoder_name.has_value()
? avcodec_find_decoder(codec_id)
: avcodec_find_decoder_by_name(decoder_name.value().c_str());

if (!pCodec) {
std::stringstream ss;
if (!decoder_name.has_value()) {
ss << "Unsupported codec: \"" << avcodec_get_name(codec_id) << "\", ("
<< codec_id << ").";
} else {
ss << "Unsupported codec: \"" << decoder_name.value() << "\".";
}
throw std::runtime_error(ss.str());
}
return pCodec;
}

} // namespace

AVCodecContextPtr get_decode_context(
enum AVCodecID codec_id,
const c10::optional<std::string>& decoder_name) {
const AVCodec* pCodec = get_decode_codec(codec_id, decoder_name);

AVCodecContext* pCodecContext = avcodec_alloc_context3(pCodec);
if (!pCodecContext) {
throw std::runtime_error("Failed to allocate CodecContext.");
}
return AVCodecContextPtr(pCodecContext);
}

#ifdef USE_CUDA
enum AVPixelFormat get_hw_format(
AVCodecContext* ctx,
const enum AVPixelFormat* pix_fmts) {
const enum AVPixelFormat* p = nullptr;
AVPixelFormat pix_fmt = *static_cast<AVPixelFormat*>(ctx->opaque);
for (p = pix_fmts; *p != -1; p++) {
if (*p == pix_fmt) {
return *p;
}
}
TORCH_WARN("Failed to get HW surface format.");
return AV_PIX_FMT_NONE;
}

const AVCodecHWConfig* get_cuda_config(const AVCodec* pCodec) {
for (int i = 0;; ++i) {
const AVCodecHWConfig* config = avcodec_get_hw_config(pCodec, i);
if (!config) {
break;
}
if (config->device_type == AV_HWDEVICE_TYPE_CUDA &&
config->methods & AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX) {
return config;
}
}
std::stringstream ss;
ss << "CUDA device was requested, but the codec \"" << pCodec->name
<< "\" is not supported.";
throw std::runtime_error(ss.str());
}
#endif

void init_codec_context(
AVCodecContext* pCodecContext,
AVCodecParameters* pParams,
const c10::optional<std::string>& decoder_name,
const OptionDict& decoder_option,
const torch::Device& device,
AVBufferRefPtr& pHWBufferRef) {
const AVCodec* pCodec = get_decode_codec(pParams->codec_id, decoder_name);

int ret = avcodec_parameters_to_context(pCodecContext, pParams);
if (ret < 0) {
throw std::runtime_error(
"Failed to set CodecContext parameter: " + av_err2string(ret));
}

#ifdef USE_CUDA
// Enable HW Acceleration
if (device.type() == c10::DeviceType::CUDA) {
const AVCodecHWConfig* config = get_cuda_config(pCodec);
// TODO: check how to log
// C10_LOG << "Decoder " << pCodec->name << " supports device " <<
// av_hwdevice_get_type_name(config->device_type);

// https://www.ffmpeg.org/doxygen/trunk/hw__decode_8c_source.html#l00221
// 1. Set HW pixel format (config->pix_fmt) to opaue pointer.
static thread_local AVPixelFormat pix_fmt = config->pix_fmt;
pCodecContext->opaque = static_cast<void*>(&pix_fmt);
// 2. Set pCodecContext->get_format call back function which
// will retrieve the HW pixel format from opaque pointer.
pCodecContext->get_format = get_hw_format;
// 3. Create HW device context and set to pCodecContext.
AVBufferRef* hw_device_ctx = nullptr;
ret = av_hwdevice_ctx_create(
&hw_device_ctx,
AV_HWDEVICE_TYPE_CUDA,
std::to_string(device.index()).c_str(),
nullptr,
0);
if (ret < 0) {
throw std::runtime_error(
"Failed to create CUDA device context: " + av_err2string(ret));
}
assert(hw_device_ctx);
pCodecContext->hw_device_ctx = av_buffer_ref(hw_device_ctx);
pHWBufferRef.reset(hw_device_ctx);
}
#endif

AVDictionary* opts = get_option_dict(decoder_option);
ret = avcodec_open2(pCodecContext, pCodec, &opts);
clean_up_dict(opts);

if (ret < 0) {
throw std::runtime_error(
"Failed to initialize CodecContext: " + av_err2string(ret));
}

if (pParams->codec_type == AVMEDIA_TYPE_AUDIO && !pParams->channel_layout)
pParams->channel_layout =
av_get_default_channel_layout(pCodecContext->channels);
}

AVCodecContextPtr::AVCodecContextPtr(AVCodecContext* p)
: Wrapper<AVCodecContext, AVCodecContextDeleter>(p) {}

Expand Down
30 changes: 9 additions & 21 deletions torchaudio/csrc/ffmpeg/ffmpeg.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,15 @@ namespace ffmpeg {

using OptionDict = std::map<std::string, std::string>;

// https://github.com/FFmpeg/FFmpeg/blob/4e6debe1df7d53f3f59b37449b82265d5c08a172/doc/APIchanges#L252-L260
// Starting from libavformat 59 (ffmpeg 5),
// AVInputFormat is const and related functions expect constant.
#if LIBAVFORMAT_VERSION_MAJOR >= 59
#define AVFORMAT_CONST const
#else
#define AVFORMAT_CONST
#endif

// Replacement of av_err2str, which causes
// `error: taking address of temporary array`
// https://github.com/joncampbell123/composite-video-simulator/issues/5
Expand Down Expand Up @@ -84,13 +93,6 @@ struct AVFormatContextPtr
explicit AVFormatContextPtr(AVFormatContext* p);
};

// create format context for reading media
AVFormatContextPtr get_input_format_context(
const std::string& src,
const c10::optional<std::string>& device,
const OptionDict& option,
AVIOContext* io_ctx = nullptr);

////////////////////////////////////////////////////////////////////////////////
// AVIO
////////////////////////////////////////////////////////////////////////////////
Expand Down Expand Up @@ -166,20 +168,6 @@ struct AVCodecContextPtr
explicit AVCodecContextPtr(AVCodecContext* p);
};

// Allocate codec context from either decoder name or ID
AVCodecContextPtr get_decode_context(
enum AVCodecID codec_id,
const c10::optional<std::string>& decoder);

// Initialize codec context with the parameters
void init_codec_context(
AVCodecContext* pCodecContext,
AVCodecParameters* pParams,
const c10::optional<std::string>& decoder_name,
const OptionDict& decoder_option,
const torch::Device& device,
AVBufferRefPtr& pHWBufferRef);

////////////////////////////////////////////////////////////////////////////////
// AVFilterGraph
////////////////////////////////////////////////////////////////////////////////
Expand Down
Loading

0 comments on commit 10ac6d2

Please sign in to comment.