From 36c61a3312110d0f1e6167d1c5ae209ae499559a Mon Sep 17 00:00:00 2001 From: moto <855818+mthrok@users.noreply.github.com> Date: Sun, 28 Nov 2021 13:21:30 -0500 Subject: [PATCH 1/2] Add wrapper classes that manage memories allocated by ffmpeg --- torchaudio/csrc/ffmpeg/ffmpeg.cpp | 151 ++++++++++++++++++++++++++++++ torchaudio/csrc/ffmpeg/ffmpeg.h | 132 ++++++++++++++++++++++++++ 2 files changed, 283 insertions(+) create mode 100644 torchaudio/csrc/ffmpeg/ffmpeg.cpp create mode 100644 torchaudio/csrc/ffmpeg/ffmpeg.h diff --git a/torchaudio/csrc/ffmpeg/ffmpeg.cpp b/torchaudio/csrc/ffmpeg/ffmpeg.cpp new file mode 100644 index 0000000000..547c8e915b --- /dev/null +++ b/torchaudio/csrc/ffmpeg/ffmpeg.cpp @@ -0,0 +1,151 @@ +#include + +namespace torchaudio { +namespace ffmpeg { + +//////////////////////////////////////////////////////////////////////////////// +// AVFormatContext +//////////////////////////////////////////////////////////////////////////////// +void AVFormatContextDeleter::operator()(AVFormatContext* p) { + avformat_close_input(&p); +}; + +namespace { +AVFormatContext* get_format_context(const std::string& src) { + AVFormatContext* pFormat = NULL; + if (avformat_open_input(&pFormat, src.c_str(), NULL, NULL) < 0) + throw std::runtime_error("Failed to open the input: " + src); + return pFormat; +} +} // namespace + +AVFormatContextPtr::AVFormatContextPtr(const std::string& src) + : Wrapper( + get_format_context(src)) { + if (avformat_find_stream_info(ptr.get(), NULL) < 0) + throw std::runtime_error("Failed to find stream information."); +} + +//////////////////////////////////////////////////////////////////////////////// +// AVPacket +//////////////////////////////////////////////////////////////////////////////// +void AVPacketDeleter::operator()(AVPacket* p) { + av_packet_free(&p); +}; + +namespace { +AVPacket* get_av_packet() { + AVPacket* pPacket = av_packet_alloc(); + if (!pPacket) + throw std::runtime_error("Failed to allocate AVPacket object."); + return pPacket; +} +} // namespace + +AVPacketPtr::AVPacketPtr() + : Wrapper(get_av_packet()) {} + +//////////////////////////////////////////////////////////////////////////////// +// AVPacket - buffer unref +//////////////////////////////////////////////////////////////////////////////// +AutoPacketUnref::AutoPacketUnref(AVPacketPtr& p) : p_(p){}; +AutoPacketUnref::~AutoPacketUnref() { + av_packet_unref(p_); +} +AutoPacketUnref::operator AVPacket*() const { + return p_; +} + +//////////////////////////////////////////////////////////////////////////////// +// AVFrame +//////////////////////////////////////////////////////////////////////////////// +void AVFrameDeleter::operator()(AVFrame* p) { + av_frame_free(&p); +}; +namespace { +AVFrame* get_av_frame() { + AVFrame* pFrame = av_frame_alloc(); + if (!pFrame) + throw std::runtime_error("Failed to allocate AVFrame object."); + return pFrame; +} +} // namespace + +AVFramePtr::AVFramePtr() : Wrapper(get_av_frame()) {} + +/////////////////////////////////////////////////////////////////////////////// +// AVFrame - buffer unref +//////////////////////////////////////////////////////////////////////////////// +AutoFrameUnref::AutoFrameUnref(AVFramePtr& p) : p_(p){}; +AutoFrameUnref::~AutoFrameUnref() { + av_frame_unref(p_); +} +AutoFrameUnref::operator AVFrame*() const { + return p_; +} + +//////////////////////////////////////////////////////////////////////////////// +// AVCodecContext +//////////////////////////////////////////////////////////////////////////////// +void AVCodecContextDeleter::operator()(AVCodecContext* p) { + avcodec_free_context(&p); +}; + +namespace { +AVCodecContext* get_codec_context(AVCodecParameters* pParams) { + const AVCodec* pCodec = avcodec_find_decoder(pParams->codec_id); + + if (!pCodec) { + throw std::runtime_error("Unknown codec."); + } + + AVCodecContext* pCodecContext = avcodec_alloc_context3(pCodec); + if (!pCodecContext) { + throw std::runtime_error("Failed to allocate CodecContext."); + } + return pCodecContext; +} + +void init_codec_context( + AVCodecContext* pCodecContext, + AVCodecParameters* pParams) { + const AVCodec* pCodec = avcodec_find_decoder(pParams->codec_id); + + if (avcodec_parameters_to_context(pCodecContext, pParams) < 0) { + throw std::runtime_error("Failed to set CodecContext parameter."); + } + + if (avcodec_open2(pCodecContext, pCodec, NULL) < 0) { + throw std::runtime_error("Failed to initialize CodecContext."); + } + + if (pParams->codec_type == AVMEDIA_TYPE_AUDIO && !pParams->channel_layout) + pParams->channel_layout = + av_get_default_channel_layout(pCodecContext->channels); +} +} // namespace + +AVCodecContextPtr::AVCodecContextPtr(AVCodecParameters* pParam) + : Wrapper( + get_codec_context(pParam)) { + init_codec_context(ptr.get(), pParam); +} +//////////////////////////////////////////////////////////////////////////////// +// AVFilterGraph +//////////////////////////////////////////////////////////////////////////////// +void AVFilterGraphDeleter::operator()(AVFilterGraph* p) { + avfilter_graph_free(&p); +}; + +namespace { +AVFilterGraph* get_filter_graph() { + AVFilterGraph* ptr = avfilter_graph_alloc(); + if (!ptr) + throw std::runtime_error("Failed to allocate resouce."); + return ptr; +} +} // namespace +AVFilterGraphPtr::AVFilterGraphPtr() + : Wrapper(get_filter_graph()) {} +} // namespace ffmpeg +} // namespace torchaudio diff --git a/torchaudio/csrc/ffmpeg/ffmpeg.h b/torchaudio/csrc/ffmpeg/ffmpeg.h new file mode 100644 index 0000000000..ec9c40976c --- /dev/null +++ b/torchaudio/csrc/ffmpeg/ffmpeg.h @@ -0,0 +1,132 @@ +// One stop header for all ffmepg needs +#pragma once +#include +#include +#include + +extern "C" { +#include +#include +#include +#include +#include +#include +#include +#include +#include +} + +namespace torchaudio { +namespace ffmpeg { + +// Base structure that handles memory management. +// Resource is freed by the destructor of unique_ptr, +// which will call custom delete mechanism provided via Deleter +// https://stackoverflow.com/a/19054280 +// +// The resource allocation will be provided by custom constructors. +template +class Wrapper { + protected: + std::unique_ptr ptr; + + public: + Wrapper() = delete; + Wrapper(T* t) : ptr(t){}; + T* operator->() const { + return ptr.get(); + }; + explicit operator bool() const { + return (bool)ptr; + }; + operator T*() const { + return ptr.get(); + } +}; + +//////////////////////////////////////////////////////////////////////////////// +// AVFormatContext +//////////////////////////////////////////////////////////////////////////////// +struct AVFormatContextDeleter { + void operator()(AVFormatContext* p); +}; + +struct AVFormatContextPtr + : public Wrapper { + AVFormatContextPtr(const std::string& src); +}; + +//////////////////////////////////////////////////////////////////////////////// +// AVPacket +//////////////////////////////////////////////////////////////////////////////// +struct AVPacketDeleter { + void operator()(AVPacket* p); +}; + +struct AVPacketPtr : public Wrapper { + AVPacketPtr(); +}; + +//////////////////////////////////////////////////////////////////////////////// +// AVPacket - buffer unref +//////////////////////////////////////////////////////////////////////////////// +// AVPacket structure employs two-staged memory allocation. +// The first-stage is for allocating AVPacket object itself, and it typically +// happens only once throughout the lifetime of application. +// The second-stage is for allocating the content (media data) each time the +// input file is processed and a chunk of data is read. The memory allocated +// during this time has to be released before the next iteration. +// The first-stage memory management is handled by `AVPacketPtr`. +// `AutoPacketUnref` handles the second-stage memory management. +struct AutoPacketUnref { + AVPacketPtr& p_; + AutoPacketUnref(AVPacketPtr& p); + ~AutoPacketUnref(); + operator AVPacket*() const; +}; + +//////////////////////////////////////////////////////////////////////////////// +// AVFrame +//////////////////////////////////////////////////////////////////////////////// +struct AVFrameDeleter { + void operator()(AVFrame* p); +}; + +struct AVFramePtr : public Wrapper { + AVFramePtr(); +}; + +//////////////////////////////////////////////////////////////////////////////// +// AVFrame - buffer unref +//////////////////////////////////////////////////////////////////////////////// +// Similar to `AutoPacketUnref`, this structure will release the memory +// allocated for frame content. +struct AutoFrameUnref { + AVFramePtr& p_; + AutoFrameUnref(AVFramePtr& p); + ~AutoFrameUnref(); + operator AVFrame*() const; +}; + +//////////////////////////////////////////////////////////////////////////////// +// AVCodecContext +//////////////////////////////////////////////////////////////////////////////// +struct AVCodecContextDeleter { + void operator()(AVCodecContext* p); +}; +struct AVCodecContextPtr + : public Wrapper { + AVCodecContextPtr(AVCodecParameters* pParam); +}; + +//////////////////////////////////////////////////////////////////////////////// +// AVFilterGraph +//////////////////////////////////////////////////////////////////////////////// +struct AVFilterGraphDeleter { + void operator()(AVFilterGraph* p); +}; +struct AVFilterGraphPtr : public Wrapper { + AVFilterGraphPtr(); +}; +} // namespace ffmpeg +} // namespace torchaudio From 014b11136efb63efbc0519d7ba5815f65410f326 Mon Sep 17 00:00:00 2001 From: moto <855818+mthrok@users.noreply.github.com> Date: Fri, 3 Dec 2021 23:33:10 -0500 Subject: [PATCH 2/2] add device support --- torchaudio/csrc/ffmpeg/ffmpeg.cpp | 17 +++++++++++++---- torchaudio/csrc/ffmpeg/ffmpeg.h | 7 ++++++- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/torchaudio/csrc/ffmpeg/ffmpeg.cpp b/torchaudio/csrc/ffmpeg/ffmpeg.cpp index 547c8e915b..ed434dc0aa 100644 --- a/torchaudio/csrc/ffmpeg/ffmpeg.cpp +++ b/torchaudio/csrc/ffmpeg/ffmpeg.cpp @@ -11,17 +11,26 @@ void AVFormatContextDeleter::operator()(AVFormatContext* p) { }; namespace { -AVFormatContext* get_format_context(const std::string& src) { +AVFormatContext* get_format_context( + const std::string& src, + const std::string& device, + AVDictionary** option) { AVFormatContext* pFormat = NULL; - if (avformat_open_input(&pFormat, src.c_str(), NULL, NULL) < 0) + AVInputFormat* pInput = + device.empty() ? NULL : av_find_input_format(device.c_str()); + + if (avformat_open_input(&pFormat, src.c_str(), pInput, option) < 0) throw std::runtime_error("Failed to open the input: " + src); return pFormat; } } // namespace -AVFormatContextPtr::AVFormatContextPtr(const std::string& src) +AVFormatContextPtr::AVFormatContextPtr( + const std::string& src, + const std::string& device, + AVDictionary** option) : Wrapper( - get_format_context(src)) { + get_format_context(src, device, option)) { if (avformat_find_stream_info(ptr.get(), NULL) < 0) throw std::runtime_error("Failed to find stream information."); } diff --git a/torchaudio/csrc/ffmpeg/ffmpeg.h b/torchaudio/csrc/ffmpeg/ffmpeg.h index ec9c40976c..da058e33c8 100644 --- a/torchaudio/csrc/ffmpeg/ffmpeg.h +++ b/torchaudio/csrc/ffmpeg/ffmpeg.h @@ -6,6 +6,7 @@ extern "C" { #include +#include #include #include #include @@ -13,6 +14,7 @@ extern "C" { #include #include #include +#include #include } @@ -53,7 +55,10 @@ struct AVFormatContextDeleter { struct AVFormatContextPtr : public Wrapper { - AVFormatContextPtr(const std::string& src); + AVFormatContextPtr( + const std::string& src, + const std::string& device, + AVDictionary** option); }; ////////////////////////////////////////////////////////////////////////////////