Refactor the constructors of pointer wrappers (#2373)

Summary: This commit refactor the constructor of wrapper classes so that wrapper classes are only responsible for deallocation of underlying FFmpeg custom structures. The responsibility of custom initialization is moved to helper functions. Context: FFmpeg API uses bunch of raw pointers, which require dedicated allocater and deallcoator. In torchaudio we wrap these pointers with `std::unique_ptr<>` to adopt RAII semantics. Currently all of the customization logics required for `Streamer` are handled by the constructor of wrapper class. Like the following; ``` AVFormatContextPtr( const std::string& src, const std::string& device, const std::map<std::string, std::string>& option); ``` This constructor allocates the raw `AVFormatContext*` pointer, while initializing it with the given option, then it parses the input media. As we consider the write/encode features, which require different way of initializing the `AVFormatContext*`, making it the responsibility of constructors of `AVFormatContextPtr` reduce the flexibility. Thus this commit moves the customization to helper factory function. - `AVFormatContextPtr(...)` -> `get_input_format_context(...)` - `AVCodecContextPtr(...)` -> `get_decode_context(...)` Pull Request resolved: #2373 Reviewed By: hwangjeff Differential Revision: D36230148 Pulled By: mthrok fbshipit-source-id: 202d57d549223904ee958193f3b386ef5a9cda3a
pytorch · May 11, 2022 · 93c26d6 · 93c26d6
1 parent 2c79b55
commit 93c26d6
Show file tree

Hide file tree

Showing 5 changed files with 57 additions and 45 deletions.
diff --git a/torchaudio/csrc/ffmpeg/decoder.cpp b/torchaudio/csrc/ffmpeg/decoder.cpp
@@ -11,7 +11,15 @@ Decoder::Decoder(
     const std::string& decoder_name,
     const std::map<std::string, std::string>& decoder_option,
     const torch::Device& device)
-    : pCodecContext(pParam, decoder_name, decoder_option, device) {}
+    : pCodecContext(get_decode_context(pParam->codec_id, decoder_name)) {
+  init_codec_context(
+      pCodecContext,
+      pParam,
+      decoder_name,
+      decoder_option,
+      device,
+      pHWBufferRef);
+}
 
 int Decoder::process_packet(AVPacket* pPacket) {
   return avcodec_send_packet(pCodecContext, pPacket);

diff --git a/torchaudio/csrc/ffmpeg/decoder.h b/torchaudio/csrc/ffmpeg/decoder.h
@@ -7,6 +7,7 @@ namespace ffmpeg {
 
 class Decoder {
   AVCodecContextPtr pCodecContext;
+  AVBufferRefPtr pHWBufferRef;
 
  public:
   // Default constructable

diff --git a/torchaudio/csrc/ffmpeg/ffmpeg.cpp b/torchaudio/csrc/ffmpeg/ffmpeg.cpp
@@ -62,7 +62,9 @@ std::string join(std::vector<std::string> vars) {
 #define AVINPUT_FORMAT_CONST
 #endif
 
-AVFormatContext* get_format_context(
+} // namespace
+
+AVFormatContextPtr get_input_format_context(
     const std::string& src,
     const std::string& device,
     const std::map<std::string, std::string>& option) {
@@ -83,19 +85,11 @@ AVFormatContext* get_format_context(
     throw std::runtime_error(
         "Failed to open the input \"" + src + "\" (" + av_err2string(ret) +
         ").");
-  return pFormat;
+  return AVFormatContextPtr(pFormat);
 }
-} // namespace
 
-AVFormatContextPtr::AVFormatContextPtr(
-    const std::string& src,
-    const std::string& device,
-    const std::map<std::string, std::string>& option)
-    : Wrapper<AVFormatContext, AVFormatContextDeleter>(
-          get_format_context(src, device, option)) {
-  if (avformat_find_stream_info(ptr.get(), NULL) < 0)
-    throw std::runtime_error("Failed to find stream information.");
-}
+AVFormatContextPtr::AVFormatContextPtr(AVFormatContext* p)
+    : Wrapper<AVFormatContext, AVFormatContextDeleter>(p) {}
 
 ////////////////////////////////////////////////////////////////////////////////
 // AVPacket
@@ -152,7 +146,7 @@ void AVCodecContextDeleter::operator()(AVCodecContext* p) {
 };
 
 namespace {
-AVCodecContext* get_codec_context(
+const AVCodec* get_decode_codec(
     enum AVCodecID codec_id,
     const std::string& decoder_name) {
   const AVCodec* pCodec = decoder_name.empty()
@@ -169,12 +163,21 @@ AVCodecContext* get_codec_context(
     }
     throw std::runtime_error(ss.str());
   }
+  return pCodec;
+}
+
+} // namespace
+
+AVCodecContextPtr get_decode_context(
+    enum AVCodecID codec_id,
+    const std::string& decoder_name) {
+  const AVCodec* pCodec = get_decode_codec(codec_id, decoder_name);
 
   AVCodecContext* pCodecContext = avcodec_alloc_context3(pCodec);
   if (!pCodecContext) {
     throw std::runtime_error("Failed to allocate CodecContext.");
   }
-  return pCodecContext;
+  return AVCodecContextPtr(pCodecContext);
 }
 
 #ifdef USE_CUDA
@@ -217,12 +220,7 @@ void init_codec_context(
     const std::map<std::string, std::string>& decoder_option,
     const torch::Device& device,
     AVBufferRefPtr& pHWBufferRef) {
-  const AVCodec* pCodec = decoder_name.empty()
-      ? avcodec_find_decoder(pParams->codec_id)
-      : avcodec_find_decoder_by_name(decoder_name.c_str());
-
-  // No need to check if pCodec is null as it's been already checked in
-  // get_codec_context
+  const AVCodec* pCodec = get_decode_codec(pParams->codec_id, decoder_name);
 
   if (avcodec_parameters_to_context(pCodecContext, pParams) < 0) {
     throw std::runtime_error("Failed to set CodecContext parameter.");
@@ -276,19 +274,9 @@ void init_codec_context(
     pParams->channel_layout =
         av_get_default_channel_layout(pCodecContext->channels);
 }
-} // namespace
 
-AVCodecContextPtr::AVCodecContextPtr(
-    AVCodecParameters* pParam,
-    const std::string& decoder_name,
-    const std::map<std::string, std::string>& decoder_option,
-    const torch::Device& device)
-    : Wrapper<AVCodecContext, AVCodecContextDeleter>(
-          get_codec_context(pParam->codec_id, decoder_name)),
-      pHWBufferRef() {
-  init_codec_context(
-      ptr.get(), pParam, decoder_name, decoder_option, device, pHWBufferRef);
-}
+AVCodecContextPtr::AVCodecContextPtr(AVCodecContext* p)
+    : Wrapper<AVCodecContext, AVCodecContextDeleter>(p) {}
 
 ////////////////////////////////////////////////////////////////////////////////
 // AVBufferRefPtr

diff --git a/torchaudio/csrc/ffmpeg/ffmpeg.h b/torchaudio/csrc/ffmpeg/ffmpeg.h
@@ -65,12 +65,15 @@ struct AVFormatContextDeleter {
 
 struct AVFormatContextPtr
     : public Wrapper<AVFormatContext, AVFormatContextDeleter> {
-  AVFormatContextPtr(
-      const std::string& src,
-      const std::string& device,
-      const std::map<std::string, std::string>& option);
+  explicit AVFormatContextPtr(AVFormatContext* p);
 };
 
+// create format context for reading media
+AVFormatContextPtr get_input_format_context(
+    const std::string& src,
+    const std::string& device,
+    const std::map<std::string, std::string>& option);
+
 ////////////////////////////////////////////////////////////////////////////////
 // AVPacket
 ////////////////////////////////////////////////////////////////////////////////
@@ -132,15 +135,23 @@ struct AVCodecContextDeleter {
 };
 struct AVCodecContextPtr
     : public Wrapper<AVCodecContext, AVCodecContextDeleter> {
-  AVBufferRefPtr pHWBufferRef;
-
-  AVCodecContextPtr(
-      AVCodecParameters* pParam,
-      const std::string& decoder,
-      const std::map<std::string, std::string>& decoder_option,
-      const torch::Device& device);
+  explicit AVCodecContextPtr(AVCodecContext* p);
 };
 
+// Allocate codec context from either decoder name or ID
+AVCodecContextPtr get_decode_context(
+    enum AVCodecID codec_id,
+    const std::string& decoder);
+
+// Initialize codec context with the parameters
+void init_codec_context(
+    AVCodecContext* pCodecContext,
+    AVCodecParameters* pParams,
+    const std::string& decoder_name,
+    const std::map<std::string, std::string>& decoder_option,
+    const torch::Device& device,
+    AVBufferRefPtr& pHWBufferRef);
+
 ////////////////////////////////////////////////////////////////////////////////
 // AVFilterGraph
 ////////////////////////////////////////////////////////////////////////////////

diff --git a/torchaudio/csrc/ffmpeg/streamer.cpp b/torchaudio/csrc/ffmpeg/streamer.cpp
@@ -46,7 +46,11 @@ Streamer::Streamer(
     const std::string& src,
     const std::string& device,
     const std::map<std::string, std::string>& option)
-    : pFormatContext(src, device, option) {
+    : pFormatContext(get_input_format_context(src, device, option)) {
+  if (avformat_find_stream_info(pFormatContext, nullptr) < 0) {
+    throw std::runtime_error("Failed to find stream information.");
+  }
+
   processors =
       std::vector<std::unique_ptr<StreamProcessor>>(pFormatContext->nb_streams);
   for (int i = 0; i < pFormatContext->nb_streams; ++i) {