Move helper functions out of common utility for better locality (#2512)

Summary: This commits move helper functions/definitions around so that better locality of logics are achieved. ## Detail `ffmpeg.[h|cpp]` implements classes that convert FFmpeg structures into RAII semantics. Initially it these classes included the construction logic in their constructors, but such logics were extracted to factory functions in #2373. Now the reason why the factory functions stayed in `ffmpeg.[h|cpp]` was because the logic for the initialization and clean-up of AVDictionary class was only available in `ffmpeg.cpp`. Now AVDictionary class handling is properly defined in #2507, the factory functions, which are not that reusable better stay with the implementation that use them. This makes `ffmpeg.h` lean and clean, makes it easier to see what can be reused. Pull Request resolved: #2512 Reviewed By: hwangjeff Differential Revision: D37477592 Pulled By: mthrok fbshipit-source-id: 8c1b5059ea5f44649cc0eb1f82d1a92877ef186e
pytorch · Jul 7, 2022 · 10ac6d2 · 10ac6d2
1 parent 515fd01
commit 10ac6d2
Show file tree

Hide file tree

Showing 5 changed files with 175 additions and 209 deletions.
diff --git a/torchaudio/csrc/ffmpeg/decoder.cpp b/torchaudio/csrc/ffmpeg/decoder.cpp
@@ -6,19 +6,133 @@ namespace ffmpeg {
 ////////////////////////////////////////////////////////////////////////////////
 // Decoder
 ////////////////////////////////////////////////////////////////////////////////
+namespace {
+AVCodecContextPtr get_decode_context(
+    enum AVCodecID codec_id,
+    const c10::optional<std::string>& decoder_name) {
+  const AVCodec* pCodec = !decoder_name.has_value()
+      ? avcodec_find_decoder(codec_id)
+      : avcodec_find_decoder_by_name(decoder_name.value().c_str());
+
+  if (!pCodec) {
+    std::stringstream ss;
+    if (!decoder_name.has_value()) {
+      ss << "Unsupported codec: \"" << avcodec_get_name(codec_id) << "\", ("
+         << codec_id << ").";
+    } else {
+      ss << "Unsupported codec: \"" << decoder_name.value() << "\".";
+    }
+    throw std::runtime_error(ss.str());
+  }
+
+  AVCodecContext* pCodecContext = avcodec_alloc_context3(pCodec);
+  if (!pCodecContext) {
+    throw std::runtime_error("Failed to allocate CodecContext.");
+  }
+  return AVCodecContextPtr(pCodecContext);
+}
+
+#ifdef USE_CUDA
+enum AVPixelFormat get_hw_format(
+    AVCodecContext* ctx,
+    const enum AVPixelFormat* pix_fmts) {
+  const enum AVPixelFormat* p = nullptr;
+  AVPixelFormat pix_fmt = *static_cast<AVPixelFormat*>(ctx->opaque);
+  for (p = pix_fmts; *p != -1; p++) {
+    if (*p == pix_fmt) {
+      return *p;
+    }
+  }
+  TORCH_WARN("Failed to get HW surface format.");
+  return AV_PIX_FMT_NONE;
+}
+
+const AVCodecHWConfig* get_cuda_config(const AVCodec* pCodec) {
+  for (int i = 0;; ++i) {
+    const AVCodecHWConfig* config = avcodec_get_hw_config(pCodec, i);
+    if (!config) {
+      break;
+    }
+    if (config->device_type == AV_HWDEVICE_TYPE_CUDA &&
+        config->methods & AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX) {
+      return config;
+    }
+  }
+  std::stringstream ss;
+  ss << "CUDA device was requested, but the codec \"" << pCodec->name
+     << "\" is not supported.";
+  throw std::runtime_error(ss.str());
+}
+#endif
+
+void init_codec_context(
+    AVCodecContext* pCodecContext,
+    AVCodecParameters* pParams,
+    const OptionDict& decoder_option,
+    const torch::Device& device,
+    AVBufferRefPtr& pHWBufferRef) {
+  int ret = avcodec_parameters_to_context(pCodecContext, pParams);
+  if (ret < 0) {
+    throw std::runtime_error(
+        "Failed to set CodecContext parameter: " + av_err2string(ret));
+  }
+
+#ifdef USE_CUDA
+  // Enable HW Acceleration
+  if (device.type() == c10::DeviceType::CUDA) {
+    const AVCodecHWConfig* config = get_cuda_config(pCodecContext->codec);
+    // TODO: check how to log
+    // C10_LOG << "Decoder " << pCodec->name << " supports device " <<
+    // av_hwdevice_get_type_name(config->device_type);
+
+    // https://www.ffmpeg.org/doxygen/trunk/hw__decode_8c_source.html#l00221
+    // 1. Set HW pixel format (config->pix_fmt) to opaue pointer.
+    static thread_local AVPixelFormat pix_fmt = config->pix_fmt;
+    pCodecContext->opaque = static_cast<void*>(&pix_fmt);
+    // 2. Set pCodecContext->get_format call back function which
+    // will retrieve the HW pixel format from opaque pointer.
+    pCodecContext->get_format = get_hw_format;
+    // 3. Create HW device context and set to pCodecContext.
+    AVBufferRef* hw_device_ctx = nullptr;
+    ret = av_hwdevice_ctx_create(
+        &hw_device_ctx,
+        AV_HWDEVICE_TYPE_CUDA,
+        std::to_string(device.index()).c_str(),
+        nullptr,
+        0);
+    if (ret < 0) {
+      throw std::runtime_error(
+          "Failed to create CUDA device context: " + av_err2string(ret));
+    }
+    assert(hw_device_ctx);
+    pCodecContext->hw_device_ctx = av_buffer_ref(hw_device_ctx);
+    pHWBufferRef.reset(hw_device_ctx);
+  }
+#endif
+
+  AVDictionary* opts = get_option_dict(decoder_option);
+  ret = avcodec_open2(pCodecContext, pCodecContext->codec, &opts);
+  clean_up_dict(opts);
+
+  if (ret < 0) {
+    throw std::runtime_error(
+        "Failed to initialize CodecContext: " + av_err2string(ret));
+  }
+
+  if (pParams->codec_type == AVMEDIA_TYPE_AUDIO && !pParams->channel_layout)
+    pParams->channel_layout =
+        av_get_default_channel_layout(pCodecContext->channels);
+}
+} // namespace
+
 Decoder::Decoder(
     AVCodecParameters* pParam,
     const c10::optional<std::string>& decoder_name,
     const OptionDict& decoder_option,
     const torch::Device& device)
     : pCodecContext(get_decode_context(pParam->codec_id, decoder_name)) {
   init_codec_context(
-      pCodecContext,
-      pParam,
-      decoder_name,
-      decoder_option,
-      device,
-      pHWBufferRef);
+      pCodecContext, pParam, decoder_option, device, pHWBufferRef);
 }
 
 int Decoder::process_packet(AVPacket* pPacket) {

diff --git a/torchaudio/csrc/ffmpeg/ffmpeg.cpp b/torchaudio/csrc/ffmpeg/ffmpeg.cpp
@@ -35,65 +35,13 @@ void clean_up_dict(AVDictionary* p) {
   }
 }
 
-namespace {
-
-// https://github.com/FFmpeg/FFmpeg/blob/4e6debe1df7d53f3f59b37449b82265d5c08a172/doc/APIchanges#L252-L260
-// Starting from libavformat 59 (ffmpeg 5),
-// AVInputFormat is const and related functions expect constant.
-#if LIBAVFORMAT_VERSION_MAJOR >= 59
-#define AVINPUT_FORMAT_CONST const
-#else
-#define AVINPUT_FORMAT_CONST
-#endif
-
-} // namespace
-
 ////////////////////////////////////////////////////////////////////////////////
 // AVFormatContext
 ////////////////////////////////////////////////////////////////////////////////
 void AVFormatContextDeleter::operator()(AVFormatContext* p) {
   avformat_close_input(&p);
 };
 
-AVFormatContextPtr get_input_format_context(
-    const std::string& src,
-    const c10::optional<std::string>& device,
-    const OptionDict& option,
-    AVIOContext* io_ctx) {
-  AVFormatContext* pFormat = avformat_alloc_context();
-  if (!pFormat) {
-    throw std::runtime_error("Failed to allocate AVFormatContext.");
-  }
-  if (io_ctx) {
-    pFormat->pb = io_ctx;
-  }
-
-  auto* pInput = [&]() -> AVINPUT_FORMAT_CONST AVInputFormat* {
-    if (device.has_value()) {
-      std::string device_str = device.value();
-      AVINPUT_FORMAT_CONST AVInputFormat* p =
-          av_find_input_format(device_str.c_str());
-      if (!p) {
-        std::ostringstream msg;
-        msg << "Unsupported device/format: \"" << device_str << "\"";
-        throw std::runtime_error(msg.str());
-      }
-      return p;
-    }
-    return nullptr;
-  }();
-
-  AVDictionary* opt = get_option_dict(option);
-  int ret = avformat_open_input(&pFormat, src.c_str(), pInput, &opt);
-  clean_up_dict(opt);
-
-  if (ret < 0)
-    throw std::runtime_error(
-        "Failed to open the input \"" + src + "\" (" + av_err2string(ret) +
-        ").");
-  return AVFormatContextPtr(pFormat);
-}
-
 AVFormatContextPtr::AVFormatContextPtr(AVFormatContext* p)
     : Wrapper<AVFormatContext, AVFormatContextDeleter>(p) {}
 
@@ -162,136 +110,6 @@ void AVCodecContextDeleter::operator()(AVCodecContext* p) {
   avcodec_free_context(&p);
 };
 
-namespace {
-const AVCodec* get_decode_codec(
-    enum AVCodecID codec_id,
-    const c10::optional<std::string>& decoder_name) {
-  const AVCodec* pCodec = !decoder_name.has_value()
-      ? avcodec_find_decoder(codec_id)
-      : avcodec_find_decoder_by_name(decoder_name.value().c_str());
-
-  if (!pCodec) {
-    std::stringstream ss;
-    if (!decoder_name.has_value()) {
-      ss << "Unsupported codec: \"" << avcodec_get_name(codec_id) << "\", ("
-         << codec_id << ").";
-    } else {
-      ss << "Unsupported codec: \"" << decoder_name.value() << "\".";
-    }
-    throw std::runtime_error(ss.str());
-  }
-  return pCodec;
-}
-
-} // namespace
-
-AVCodecContextPtr get_decode_context(
-    enum AVCodecID codec_id,
-    const c10::optional<std::string>& decoder_name) {
-  const AVCodec* pCodec = get_decode_codec(codec_id, decoder_name);
-
-  AVCodecContext* pCodecContext = avcodec_alloc_context3(pCodec);
-  if (!pCodecContext) {
-    throw std::runtime_error("Failed to allocate CodecContext.");
-  }
-  return AVCodecContextPtr(pCodecContext);
-}
-
-#ifdef USE_CUDA
-enum AVPixelFormat get_hw_format(
-    AVCodecContext* ctx,
-    const enum AVPixelFormat* pix_fmts) {
-  const enum AVPixelFormat* p = nullptr;
-  AVPixelFormat pix_fmt = *static_cast<AVPixelFormat*>(ctx->opaque);
-  for (p = pix_fmts; *p != -1; p++) {
-    if (*p == pix_fmt) {
-      return *p;
-    }
-  }
-  TORCH_WARN("Failed to get HW surface format.");
-  return AV_PIX_FMT_NONE;
-}
-
-const AVCodecHWConfig* get_cuda_config(const AVCodec* pCodec) {
-  for (int i = 0;; ++i) {
-    const AVCodecHWConfig* config = avcodec_get_hw_config(pCodec, i);
-    if (!config) {
-      break;
-    }
-    if (config->device_type == AV_HWDEVICE_TYPE_CUDA &&
-        config->methods & AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX) {
-      return config;
-    }
-  }
-  std::stringstream ss;
-  ss << "CUDA device was requested, but the codec \"" << pCodec->name
-     << "\" is not supported.";
-  throw std::runtime_error(ss.str());
-}
-#endif
-
-void init_codec_context(
-    AVCodecContext* pCodecContext,
-    AVCodecParameters* pParams,
-    const c10::optional<std::string>& decoder_name,
-    const OptionDict& decoder_option,
-    const torch::Device& device,
-    AVBufferRefPtr& pHWBufferRef) {
-  const AVCodec* pCodec = get_decode_codec(pParams->codec_id, decoder_name);
-
-  int ret = avcodec_parameters_to_context(pCodecContext, pParams);
-  if (ret < 0) {
-    throw std::runtime_error(
-        "Failed to set CodecContext parameter: " + av_err2string(ret));
-  }
-
-#ifdef USE_CUDA
-  // Enable HW Acceleration
-  if (device.type() == c10::DeviceType::CUDA) {
-    const AVCodecHWConfig* config = get_cuda_config(pCodec);
-    // TODO: check how to log
-    // C10_LOG << "Decoder " << pCodec->name << " supports device " <<
-    // av_hwdevice_get_type_name(config->device_type);
-
-    // https://www.ffmpeg.org/doxygen/trunk/hw__decode_8c_source.html#l00221
-    // 1. Set HW pixel format (config->pix_fmt) to opaue pointer.
-    static thread_local AVPixelFormat pix_fmt = config->pix_fmt;
-    pCodecContext->opaque = static_cast<void*>(&pix_fmt);
-    // 2. Set pCodecContext->get_format call back function which
-    // will retrieve the HW pixel format from opaque pointer.
-    pCodecContext->get_format = get_hw_format;
-    // 3. Create HW device context and set to pCodecContext.
-    AVBufferRef* hw_device_ctx = nullptr;
-    ret = av_hwdevice_ctx_create(
-        &hw_device_ctx,
-        AV_HWDEVICE_TYPE_CUDA,
-        std::to_string(device.index()).c_str(),
-        nullptr,
-        0);
-    if (ret < 0) {
-      throw std::runtime_error(
-          "Failed to create CUDA device context: " + av_err2string(ret));
-    }
-    assert(hw_device_ctx);
-    pCodecContext->hw_device_ctx = av_buffer_ref(hw_device_ctx);
-    pHWBufferRef.reset(hw_device_ctx);
-  }
-#endif
-
-  AVDictionary* opts = get_option_dict(decoder_option);
-  ret = avcodec_open2(pCodecContext, pCodec, &opts);
-  clean_up_dict(opts);
-
-  if (ret < 0) {
-    throw std::runtime_error(
-        "Failed to initialize CodecContext: " + av_err2string(ret));
-  }
-
-  if (pParams->codec_type == AVMEDIA_TYPE_AUDIO && !pParams->channel_layout)
-    pParams->channel_layout =
-        av_get_default_channel_layout(pCodecContext->channels);
-}
-
 AVCodecContextPtr::AVCodecContextPtr(AVCodecContext* p)
     : Wrapper<AVCodecContext, AVCodecContextDeleter>(p) {}
 

diff --git a/torchaudio/csrc/ffmpeg/ffmpeg.h b/torchaudio/csrc/ffmpeg/ffmpeg.h
@@ -27,6 +27,15 @@ namespace ffmpeg {
 
 using OptionDict = std::map<std::string, std::string>;
 
+// https://github.com/FFmpeg/FFmpeg/blob/4e6debe1df7d53f3f59b37449b82265d5c08a172/doc/APIchanges#L252-L260
+// Starting from libavformat 59 (ffmpeg 5),
+// AVInputFormat is const and related functions expect constant.
+#if LIBAVFORMAT_VERSION_MAJOR >= 59
+#define AVFORMAT_CONST const
+#else
+#define AVFORMAT_CONST
+#endif
+
 // Replacement of av_err2str, which causes
 // `error: taking address of temporary array`
 // https://github.com/joncampbell123/composite-video-simulator/issues/5
@@ -84,13 +93,6 @@ struct AVFormatContextPtr
   explicit AVFormatContextPtr(AVFormatContext* p);
 };
 
-// create format context for reading media
-AVFormatContextPtr get_input_format_context(
-    const std::string& src,
-    const c10::optional<std::string>& device,
-    const OptionDict& option,
-    AVIOContext* io_ctx = nullptr);
-
 ////////////////////////////////////////////////////////////////////////////////
 // AVIO
 ////////////////////////////////////////////////////////////////////////////////
@@ -166,20 +168,6 @@ struct AVCodecContextPtr
   explicit AVCodecContextPtr(AVCodecContext* p);
 };
 
-// Allocate codec context from either decoder name or ID
-AVCodecContextPtr get_decode_context(
-    enum AVCodecID codec_id,
-    const c10::optional<std::string>& decoder);
-
-// Initialize codec context with the parameters
-void init_codec_context(
-    AVCodecContext* pCodecContext,
-    AVCodecParameters* pParams,
-    const c10::optional<std::string>& decoder_name,
-    const OptionDict& decoder_option,
-    const torch::Device& device,
-    AVBufferRefPtr& pHWBufferRef);
-
 ////////////////////////////////////////////////////////////////////////////////
 // AVFilterGraph
 ////////////////////////////////////////////////////////////////////////////////