pytorch · mthrok · Nov 28, 2021 · Dec 4, 2021
@@ -0,0 +1,160 @@
+#include <torchaudio/csrc/ffmpeg/ffmpeg.h>
+
+namespace torchaudio {
+namespace ffmpeg {
+
+////////////////////////////////////////////////////////////////////////////////
+// AVFormatContext
+////////////////////////////////////////////////////////////////////////////////
+void AVFormatContextDeleter::operator()(AVFormatContext* p) {
+  avformat_close_input(&p);
+};
+
+namespace {
+AVFormatContext* get_format_context(
+    const std::string& src,
+    const std::string& device,
+    AVDictionary** option) {
+  AVFormatContext* pFormat = NULL;
+  AVInputFormat* pInput =
+      device.empty() ? NULL : av_find_input_format(device.c_str());
+
+  if (avformat_open_input(&pFormat, src.c_str(), pInput, option) < 0)
+    throw std::runtime_error("Failed to open the input: " + src);
+  return pFormat;
+}
+} // namespace
+
+AVFormatContextPtr::AVFormatContextPtr(
+    const std::string& src,
+    const std::string& device,
+    AVDictionary** option)
+    : Wrapper<AVFormatContext, AVFormatContextDeleter>(
+          get_format_context(src, device, option)) {
+  if (avformat_find_stream_info(ptr.get(), NULL) < 0)
+    throw std::runtime_error("Failed to find stream information.");
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// AVPacket
+////////////////////////////////////////////////////////////////////////////////
+void AVPacketDeleter::operator()(AVPacket* p) {
+  av_packet_free(&p);
+};
+
+namespace {
+AVPacket* get_av_packet() {
+  AVPacket* pPacket = av_packet_alloc();
+  if (!pPacket)
+    throw std::runtime_error("Failed to allocate AVPacket object.");
+  return pPacket;
+}
+} // namespace
+
+AVPacketPtr::AVPacketPtr()
+    : Wrapper<AVPacket, AVPacketDeleter>(get_av_packet()) {}
+
+////////////////////////////////////////////////////////////////////////////////
+// AVPacket - buffer unref
+////////////////////////////////////////////////////////////////////////////////
+AutoPacketUnref::AutoPacketUnref(AVPacketPtr& p) : p_(p){};
+AutoPacketUnref::~AutoPacketUnref() {
+  av_packet_unref(p_);
+}
+AutoPacketUnref::operator AVPacket*() const {
+  return p_;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// AVFrame
+////////////////////////////////////////////////////////////////////////////////
+void AVFrameDeleter::operator()(AVFrame* p) {
+  av_frame_free(&p);
+};
+namespace {
+AVFrame* get_av_frame() {
+  AVFrame* pFrame = av_frame_alloc();
+  if (!pFrame)
+    throw std::runtime_error("Failed to allocate AVFrame object.");
+  return pFrame;
+}
+} // namespace
+
+AVFramePtr::AVFramePtr() : Wrapper<AVFrame, AVFrameDeleter>(get_av_frame()) {}
+
+///////////////////////////////////////////////////////////////////////////////
+// AVFrame - buffer unref
+////////////////////////////////////////////////////////////////////////////////
+AutoFrameUnref::AutoFrameUnref(AVFramePtr& p) : p_(p){};
+AutoFrameUnref::~AutoFrameUnref() {
+  av_frame_unref(p_);
+}
+AutoFrameUnref::operator AVFrame*() const {
+  return p_;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// AVCodecContext
+////////////////////////////////////////////////////////////////////////////////
+void AVCodecContextDeleter::operator()(AVCodecContext* p) {
+  avcodec_free_context(&p);
+};
+
+namespace {
+AVCodecContext* get_codec_context(AVCodecParameters* pParams) {
+  const AVCodec* pCodec = avcodec_find_decoder(pParams->codec_id);
+
+  if (!pCodec) {
+    throw std::runtime_error("Unknown codec.");
+  }
+
+  AVCodecContext* pCodecContext = avcodec_alloc_context3(pCodec);
+  if (!pCodecContext) {
+    throw std::runtime_error("Failed to allocate CodecContext.");
+  }
+  return pCodecContext;
+}
+
+void init_codec_context(
+    AVCodecContext* pCodecContext,
+    AVCodecParameters* pParams) {
+  const AVCodec* pCodec = avcodec_find_decoder(pParams->codec_id);
+
+  if (avcodec_parameters_to_context(pCodecContext, pParams) < 0) {
+    throw std::runtime_error("Failed to set CodecContext parameter.");
+  }
+
+  if (avcodec_open2(pCodecContext, pCodec, NULL) < 0) {
+    throw std::runtime_error("Failed to initialize CodecContext.");
+  }
+
+  if (pParams->codec_type == AVMEDIA_TYPE_AUDIO && !pParams->channel_layout)
+    pParams->channel_layout =
+        av_get_default_channel_layout(pCodecContext->channels);
+}
+} // namespace
+
+AVCodecContextPtr::AVCodecContextPtr(AVCodecParameters* pParam)
+    : Wrapper<AVCodecContext, AVCodecContextDeleter>(
+          get_codec_context(pParam)) {
+  init_codec_context(ptr.get(), pParam);
+}
+////////////////////////////////////////////////////////////////////////////////
+// AVFilterGraph
+////////////////////////////////////////////////////////////////////////////////
+void AVFilterGraphDeleter::operator()(AVFilterGraph* p) {
+  avfilter_graph_free(&p);
+};
+
+namespace {
+AVFilterGraph* get_filter_graph() {
+  AVFilterGraph* ptr = avfilter_graph_alloc();
+  if (!ptr)
+    throw std::runtime_error("Failed to allocate resouce.");
+  return ptr;
+}
+} // namespace
+AVFilterGraphPtr::AVFilterGraphPtr()
+    : Wrapper<AVFilterGraph, AVFilterGraphDeleter>(get_filter_graph()) {}
+} // namespace ffmpeg
+} // namespace torchaudio
@@ -0,0 +1,137 @@
+// One stop header for all ffmepg needs
+#pragma once
+#include <cstdint>
+#include <memory>
+#include <string>
+
+extern "C" {
+#include <libavcodec/avcodec.h>
+#include <libavdevice/avdevice.h>
+#include <libavfilter/avfilter.h>
+#include <libavfilter/buffersink.h>
+#include <libavfilter/buffersrc.h>
+#include <libavformat/avformat.h>
+#include <libavutil/avutil.h>
+#include <libavutil/frame.h>
+#include <libavutil/imgutils.h>
+#include <libavutil/log.h>
+#include <libavutil/pixdesc.h>
+}
+
+namespace torchaudio {
+namespace ffmpeg {
+
+// Base structure that handles memory management.
+// Resource is freed by the destructor of unique_ptr,
+// which will call custom delete mechanism provided via Deleter
+// https://stackoverflow.com/a/19054280
+//
+// The resource allocation will be provided by custom constructors.
+template <typename T, typename Deleter>
+class Wrapper {
+ protected:
+  std::unique_ptr<T, Deleter> ptr;
+
+ public:
+  Wrapper() = delete;
+  Wrapper<T, Deleter>(T* t) : ptr(t){};
+  T* operator->() const {
+    return ptr.get();
+  };
+  explicit operator bool() const {
+    return (bool)ptr;
+  };
+  operator T*() const {
+    return ptr.get();
+  }
+};
+
+////////////////////////////////////////////////////////////////////////////////
+// AVFormatContext
+////////////////////////////////////////////////////////////////////////////////
+struct AVFormatContextDeleter {
+  void operator()(AVFormatContext* p);
+};
+
+struct AVFormatContextPtr
+    : public Wrapper<AVFormatContext, AVFormatContextDeleter> {
+  AVFormatContextPtr(
+      const std::string& src,
+      const std::string& device,
+      AVDictionary** option);
+};
+
+////////////////////////////////////////////////////////////////////////////////
+// AVPacket
+////////////////////////////////////////////////////////////////////////////////
+struct AVPacketDeleter {
+  void operator()(AVPacket* p);
+};
+
+struct AVPacketPtr : public Wrapper<AVPacket, AVPacketDeleter> {
+  AVPacketPtr();
+};
+
+////////////////////////////////////////////////////////////////////////////////
+// AVPacket - buffer unref
+////////////////////////////////////////////////////////////////////////////////
+// AVPacket structure employs two-staged memory allocation.
+// The first-stage is for allocating AVPacket object itself, and it typically
+// happens only once throughout the lifetime of application.
+// The second-stage is for allocating the content (media data) each time the
+// input file is processed and a chunk of data is read. The memory allocated
+// during this time has to be released before the next iteration.
+// The first-stage memory management is handled by `AVPacketPtr`.
+// `AutoPacketUnref` handles the second-stage memory management.
+struct AutoPacketUnref {
+  AVPacketPtr& p_;
+  AutoPacketUnref(AVPacketPtr& p);
+  ~AutoPacketUnref();
+  operator AVPacket*() const;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+// AVFrame
+////////////////////////////////////////////////////////////////////////////////
+struct AVFrameDeleter {
+  void operator()(AVFrame* p);
+};
+
+struct AVFramePtr : public Wrapper<AVFrame, AVFrameDeleter> {
+  AVFramePtr();
+};
+
+////////////////////////////////////////////////////////////////////////////////
+// AVFrame - buffer unref
+////////////////////////////////////////////////////////////////////////////////
+// Similar to `AutoPacketUnref`, this structure will release the memory
+// allocated for frame content.
+struct AutoFrameUnref {
+  AVFramePtr& p_;
+  AutoFrameUnref(AVFramePtr& p);
+  ~AutoFrameUnref();
+  operator AVFrame*() const;
+};
+
+////////////////////////////////////////////////////////////////////////////////
+// AVCodecContext
+////////////////////////////////////////////////////////////////////////////////
+struct AVCodecContextDeleter {
+  void operator()(AVCodecContext* p);
+};
+struct AVCodecContextPtr
+    : public Wrapper<AVCodecContext, AVCodecContextDeleter> {
+  AVCodecContextPtr(AVCodecParameters* pParam);
+};
+
+////////////////////////////////////////////////////////////////////////////////
+// AVFilterGraph
+////////////////////////////////////////////////////////////////////////////////
+struct AVFilterGraphDeleter {
+  void operator()(AVFilterGraph* p);
+};
+struct AVFilterGraphPtr : public Wrapper<AVFilterGraph, AVFilterGraphDeleter> {
+  AVFilterGraphPtr();
+};
+} // namespace ffmpeg
+} // namespace torchaudio