support hardware accelerated video decoding

commaai · Nov 15, 2021 · 465608a · 465608a
1 parent 65d1b51
commit 465608a
Show file tree

Hide file tree

Showing 6 changed files with 148 additions and 55 deletions.
diff --git a/selfdrive/ui/replay/framereader.cc b/selfdrive/ui/replay/framereader.cc
@@ -11,25 +11,35 @@ int readFunction(void *opaque, uint8_t *buf, int buf_size) {
   return iss.gcount() ? iss.gcount() : AVERROR_EOF;
 }
 
+enum AVPixelFormat get_hw_format(AVCodecContext *ctx, const enum AVPixelFormat *pix_fmts) {
+  enum AVPixelFormat *hw_pix_fmt = reinterpret_cast<enum AVPixelFormat *>(ctx->opaque);
+  for (const enum AVPixelFormat *p = pix_fmts; *p != -1; p++) {
+    if (*p == *hw_pix_fmt) {
+      return *p;
+    }
+  }
+  assert(0);
+  return AV_PIX_FMT_NONE;
+}
+
 }  // namespace
 
 FrameReader::FrameReader(bool local_cache, int chunk_size, int retries) : FileReader(local_cache, chunk_size, retries) {
-  pFormatCtx_ = avformat_alloc_context();
-  av_frame_ = av_frame_alloc();
-  rgb_frame_ = av_frame_alloc();
-  yuv_frame_ = av_frame_alloc();
+  input_ctx = avformat_alloc_context();
+  av_frame_.reset(av_frame_alloc());
+  yuv_frame.reset(av_frame_alloc());
+  rgb_frame.reset(av_frame_alloc());
 }
 
 FrameReader::~FrameReader() {
   for (auto &f : frames_) {
     av_free_packet(&f.pkt);
   }
 
-  if (pCodecCtx_) avcodec_free_context(&pCodecCtx_);
-  if (pFormatCtx_) avformat_close_input(&pFormatCtx_);
-  if (av_frame_) av_frame_free(&av_frame_);
-  if (rgb_frame_) av_frame_free(&rgb_frame_);
-  if (yuv_frame_) av_frame_free(&yuv_frame_);
+  if (decoder_ctx) avcodec_free_context(&decoder_ctx);
+  if (input_ctx) avformat_close_input(&input_ctx);
+  if (hw_device_ctx) av_buffer_unref(&hw_device_ctx);
+
   if (rgb_sws_ctx_) sws_freeContext(rgb_sws_ctx_);
   if (yuv_sws_ctx_) sws_freeContext(yuv_sws_ctx_);
 
@@ -39,56 +49,64 @@ FrameReader::~FrameReader() {
   }
 }
 
-bool FrameReader::load(const std::string &url, std::atomic<bool> *abort) {
+bool FrameReader::load(const std::string &url, AVHWDeviceType hw_device_type, std::atomic<bool> *abort) {
   std::string content = read(url, abort);
   if (content.empty()) return false;
 
   std::istringstream iss(content);
   const int avio_ctx_buffer_size = 64 * 1024;
   unsigned char *avio_ctx_buffer = (unsigned char *)av_malloc(avio_ctx_buffer_size);
   avio_ctx_ = avio_alloc_context(avio_ctx_buffer, avio_ctx_buffer_size, 0, &iss, readFunction, nullptr, nullptr);
-  pFormatCtx_->pb = avio_ctx_;
+  input_ctx->pb = avio_ctx_;
 
-  pFormatCtx_->probesize = 10 * 1024 * 1024;  // 10MB
-  int ret = avformat_open_input(&pFormatCtx_, url.c_str(), NULL, NULL);
+  input_ctx->probesize = 10 * 1024 * 1024;  // 10MB
+  int ret = avformat_open_input(&input_ctx, url.c_str(), NULL, NULL);
   if (ret != 0) {
     char err_str[1024] = {0};
     av_strerror(ret, err_str, std::size(err_str));
     printf("Error loading video - %s - %s\n", err_str, url.c_str());
     return false;
   }
-  avformat_find_stream_info(pFormatCtx_, NULL);
-  // av_dump_format(pFormatCtx_, 0, url.c_str(), 0);
 
-  AVStream *video = pFormatCtx_->streams[0];
-  auto pCodec = avcodec_find_decoder(video->codec->codec_id);
-  if (!pCodec) return false;
+  ret = avformat_find_stream_info(input_ctx, nullptr);
+  if (ret < 0) {
+    printf("cannot find a video stream in the input file\n");
+    return false;
+  }
 
-  pCodecCtx_ = avcodec_alloc_context3(pCodec);
-  ret = avcodec_parameters_to_context(pCodecCtx_, video->codecpar);
+  AVStream *video = input_ctx->streams[0];
+  auto decoder = avcodec_find_decoder(video->codec->codec_id);
+  if (!decoder) return false;
+
+  decoder_ctx = avcodec_alloc_context3(decoder);
+  ret = avcodec_parameters_to_context(decoder_ctx, video->codecpar);
   if (ret != 0) return false;
 
-  // pCodecCtx_->thread_count = 0;
-  // pCodecCtx_->thread_type = FF_THREAD_FRAME;
-  ret = avcodec_open2(pCodecCtx_, pCodec, NULL);
-  if (ret < 0) return false;
+  width = (decoder_ctx->width + 3) & ~3;
+  height = decoder_ctx->height;
+  if (hw_device_type != AV_HWDEVICE_TYPE_NONE) {
+    if (!initHardwareDecoder(hw_device_type)) {
+      return false;
+    }
+    height = (decoder_ctx->height + 15) & ~15;
+  }
 
-  width = (pCodecCtx_->width + 3) & ~3;
-  height = pCodecCtx_->height;
-  rgb_sws_ctx_ = sws_getContext(pCodecCtx_->width, pCodecCtx_->height, AV_PIX_FMT_YUV420P,
-                            width, height, AV_PIX_FMT_BGR24,
-                            SWS_FAST_BILINEAR, NULL, NULL, NULL);
+  rgb_sws_ctx_ = sws_getContext(decoder_ctx->width, decoder_ctx->height, sws_src_format,
+                                width, height, AV_PIX_FMT_BGR24,
+                                SWS_BILINEAR, NULL, NULL, NULL);
   if (!rgb_sws_ctx_) return false;
-
-  yuv_sws_ctx_ = sws_getContext(pCodecCtx_->width, pCodecCtx_->height, AV_PIX_FMT_YUV420P,
-                            width, height, AV_PIX_FMT_YUV420P,
-                            SWS_FAST_BILINEAR, NULL, NULL, NULL);
+  yuv_sws_ctx_ = sws_getContext(decoder_ctx->width, decoder_ctx->height, sws_src_format,
+                                width, height, AV_PIX_FMT_YUV420P,
+                                SWS_BILINEAR, NULL, NULL, NULL);
   if (!yuv_sws_ctx_) return false;
 
+  ret = avcodec_open2(decoder_ctx, decoder, NULL);
+  if (ret < 0) return false;
+
   frames_.reserve(60 * 20);  // 20fps, one minute
   while (!(abort && *abort)) {
     Frame &frame = frames_.emplace_back();
-    ret = av_read_frame(pFormatCtx_, &frame.pkt);
+    ret = av_read_frame(input_ctx, &frame.pkt);
     if (ret < 0) {
       frames_.pop_back();
       valid_ = (ret == AVERROR_EOF);
@@ -100,6 +118,42 @@ bool FrameReader::load(const std::string &url, std::atomic<bool> *abort) {
   return valid_;
 }
 
+bool FrameReader::initHardwareDecoder(AVHWDeviceType hw_device_type) {
+  decoder_ctx->opaque = &hw_pix_fmt;
+  decoder_ctx->get_format = get_hw_format;
+  for (int i = 0;; i++) {
+    const AVCodecHWConfig *config = avcodec_get_hw_config(decoder_ctx->codec, i);
+    if (!config) {
+      printf("decoder %s does not support hw device type %s.\n",
+             decoder_ctx->codec->name, av_hwdevice_get_type_name(hw_device_type));
+      return false;
+    }
+    if (config->methods & AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX && config->device_type == hw_device_type) {
+      hw_pix_fmt = config->pix_fmt;
+      break;
+    }
+  }
+
+  int ret = av_hwdevice_ctx_create(&hw_device_ctx, hw_device_type, nullptr, nullptr, 0);
+  if (ret < 0) {
+    printf("Failed to create specified HW device %d.\n", ret);
+    return false;
+  }
+
+  // get sws source format
+  AVHWFramesConstraints *hw_frames_const = av_hwdevice_get_hwframe_constraints(hw_device_ctx, nullptr);
+  assert(hw_frames_const != 0);
+  for (AVPixelFormat *p = hw_frames_const->valid_sw_formats; *p != AV_PIX_FMT_NONE; p++) {
+    if (sws_isSupportedInput(*p)) {
+      sws_src_format = *p;
+      break;
+    }
+  }
+
+  decoder_ctx->hw_device_ctx = av_buffer_ref(hw_device_ctx);
+  return true;
+}
+
 bool FrameReader::get(int idx, uint8_t *rgb, uint8_t *yuv) {
   assert(rgb || yuv);
   if (!valid_ || idx < 0 || idx >= frames_.size()) {
@@ -125,35 +179,50 @@ bool FrameReader::decode(int idx, uint8_t *rgb, uint8_t *yuv) {
   for (int i = from_idx; i <= idx; ++i) {
     Frame &frame = frames_[i];
     if ((!frame.decoded || i == idx) && !frame.failed) {
-      frame.decoded = decodeFrame(&frame.pkt);
+      AVFrame *f = decodeFrame(&frame.pkt);
+      frame.decoded = f != nullptr;
       frame.failed = !frame.decoded;
       if (frame.decoded && i == idx) {
-        return copyBuffers(av_frame_, rgb, yuv);
+        return copyBuffers(f, rgb, yuv);
       }
     }
   }
   return false;
 }
 
-bool FrameReader::decodeFrame(AVPacket *pkt) {
-  int ret = avcodec_send_packet(pCodecCtx_, pkt);
+AVFrame *FrameReader::decodeFrame(AVPacket *pkt) {
+  int ret = avcodec_send_packet(decoder_ctx, pkt);
   if (ret < 0) {
     printf("Error sending a packet for decoding\n");
-    return false;
+    return nullptr;
+  }
+
+  ret = avcodec_receive_frame(decoder_ctx, av_frame_.get());
+  if (ret != 0) {
+    return nullptr;
+  }
+
+  if (av_frame_->format == hw_pix_fmt) {
+    hw_frame.reset(av_frame_alloc());
+    if ((ret = av_hwframe_transfer_data(hw_frame.get(), av_frame_.get(), 0)) < 0) {
+      printf("error transferring the data from GPU to CPU\n");
+      return nullptr;
+    }
+    return hw_frame.get();
+  } else {
+    return av_frame_.get();
   }
-  ret = avcodec_receive_frame(pCodecCtx_, av_frame_);
-  return ret == 0;
 }
 
 bool FrameReader::copyBuffers(AVFrame *f, uint8_t *rgb, uint8_t *yuv) {
   // images is going to be written to output buffers, no alignment (align = 1)
   if (yuv) {
-    av_image_fill_arrays(yuv_frame_->data, yuv_frame_->linesize, yuv, AV_PIX_FMT_YUV420P, width, height, 1);
-    int ret = sws_scale(yuv_sws_ctx_, (const uint8_t **)f->data, f->linesize, 0, f->height, yuv_frame_->data, yuv_frame_->linesize);
+    av_image_fill_arrays(yuv_frame->data, yuv_frame->linesize, yuv, AV_PIX_FMT_YUV420P, width, height, 1);
+    int ret = sws_scale(yuv_sws_ctx_, (const uint8_t **)f->data, f->linesize, 0, f->height, yuv_frame->data, yuv_frame->linesize);
     if (ret < 0) return false;
   }
 
-  av_image_fill_arrays(rgb_frame_->data, rgb_frame_->linesize, rgb, AV_PIX_FMT_BGR24, width, height, 1);
-  int ret = sws_scale(rgb_sws_ctx_, (const uint8_t **)f->data, f->linesize, 0, f->height, rgb_frame_->data, rgb_frame_->linesize);
+  av_image_fill_arrays(rgb_frame->data, rgb_frame->linesize, rgb, AV_PIX_FMT_BGR24, width, height, 1);
+  int ret = sws_scale(rgb_sws_ctx_, (const uint8_t **)f->data, f->linesize, 0, f->height, rgb_frame->data, rgb_frame->linesize);
   return ret >= 0;
 }
diff --git a/selfdrive/ui/replay/framereader.h b/selfdrive/ui/replay/framereader.h
@@ -1,5 +1,6 @@
 #pragma once
 
+#include <memory>
 #include <string>
 #include <vector>
 
@@ -12,11 +13,15 @@ extern "C" {
 #include <libavutil/imgutils.h>
 }
 
+struct AVFrameDeleter {
+  void operator()(AVFrame* frame) const { av_frame_free(&frame); }
+};
+
 class FrameReader : protected FileReader {
 public:
   FrameReader(bool local_cache = false, int chunk_size = -1, int retries = 0);
   ~FrameReader();
-  bool load(const std::string &url, std::atomic<bool> *abort = nullptr);
+  bool load(const std::string &url, AVHWDeviceType hw_device_type = AV_HWDEVICE_TYPE_NONE, std::atomic<bool> *abort = nullptr);
   bool get(int idx, uint8_t *rgb, uint8_t *yuv);
   int getRGBSize() const { return width * height * 3; }
   int getYUVSize() const { return width * height * 3 / 2; }
@@ -26,8 +31,9 @@ class FrameReader : protected FileReader {
   int width = 0, height = 0;
 
 private:
+  bool initHardwareDecoder(AVHWDeviceType hw_device_type);
   bool decode(int idx, uint8_t *rgb, uint8_t *yuv);
-  bool decodeFrame(AVPacket *pkt);
+  AVFrame * decodeFrame(AVPacket *pkt);
   bool copyBuffers(AVFrame *f, uint8_t *rgb, uint8_t *yuv);
 
   struct Frame {
@@ -36,11 +42,15 @@ class FrameReader : protected FileReader {
     bool failed = false;
   };
   std::vector<Frame> frames_;
+  AVPixelFormat sws_src_format = AV_PIX_FMT_YUV420P;
   SwsContext *rgb_sws_ctx_ = nullptr, *yuv_sws_ctx_ = nullptr;
-  AVFrame *av_frame_, *rgb_frame_, *yuv_frame_ = nullptr;
-  AVFormatContext *pFormatCtx_ = nullptr;
-  AVCodecContext *pCodecCtx_ = nullptr;
+  std::unique_ptr<AVFrame, AVFrameDeleter>av_frame_, rgb_frame, yuv_frame, hw_frame;
+  AVFormatContext *input_ctx = nullptr;
+  AVCodecContext *decoder_ctx = nullptr;
   int key_frames_count_ = 0;
   bool valid_ = false;
   AVIOContext *avio_ctx_ = nullptr;
+
+  AVPixelFormat hw_pix_fmt = AV_PIX_FMT_NONE;
+  AVBufferRef *hw_device_ctx = nullptr;
 };
diff --git a/selfdrive/ui/replay/main.cc b/selfdrive/ui/replay/main.cc
@@ -102,6 +102,8 @@ int main(int argc, char *argv[]) {
       {"no-cache", REPLAY_FLAG_NO_FILE_CACHE, "turn off local cache"},
       {"qcam", REPLAY_FLAG_QCAMERA, "load qcamera"},
       {"yuv", REPLAY_FLAG_SEND_YUV, "send yuv frame"},
+      {"cuda", REPLAY_FLAG_CUDA, "enable CUDA accelerated decoding"},
+      {"mediacodec", REPLAY_FLAG_MEDIACODEC, "enable MediaCodec accelerated decoding"},
   };
 
   QCommandLineParser parser;

diff --git a/selfdrive/ui/replay/replay.h b/selfdrive/ui/replay/replay.h
@@ -16,6 +16,8 @@ enum REPLAY_FLAGS {
   REPLAY_FLAG_NO_FILE_CACHE = 0x0020,
   REPLAY_FLAG_QCAMERA = 0x0040,
   REPLAY_FLAG_SEND_YUV = 0x0080,
+  REPLAY_FLAG_CUDA = 0x0100,
+  REPLAY_FLAG_MEDIACODEC = 0x0200,
 };
 
 class Replay : public QObject {

diff --git a/selfdrive/ui/replay/route.cc b/selfdrive/ui/replay/route.cc
@@ -91,7 +91,7 @@ void Route::addFileToSegment(int n, const QString &file) {
 
 // class Segment
 
-Segment::Segment(int n, const SegmentFile &files, uint32_t flags) : seg_num(n) {
+Segment::Segment(int n, const SegmentFile &files, uint32_t flags) : seg_num(n), flags(flags) {
   // [RoadCam, DriverCam, WideRoadCam, log]. fallback to qcamera/qlog
   const QString file_list[] = {
       (flags & REPLAY_FLAG_QCAMERA) || files.road_cam.isEmpty() ? files.qcamera : files.road_cam,
@@ -102,7 +102,7 @@ Segment::Segment(int n, const SegmentFile &files, uint32_t flags) : seg_num(n) {
   for (int i = 0; i < std::size(file_list); i++) {
     if (!file_list[i].isEmpty()) {
       loading_++;
-      synchronizer_.addFuture(QtConcurrent::run([=] { loadFile(i, file_list[i].toStdString(), !(flags & REPLAY_FLAG_NO_FILE_CACHE)); }));
+      synchronizer_.addFuture(QtConcurrent::run([=] { loadFile(i, file_list[i].toStdString()); }));
     }
   }
 }
@@ -114,11 +114,20 @@ Segment::~Segment() {
   synchronizer_.waitForFinished();
 }
 
-void Segment::loadFile(int id, const std::string file, bool local_cache) {
+void Segment::loadFile(int id, const std::string file) {
+  const bool local_cache = !(flags & REPLAY_FLAG_NO_FILE_CACHE);
   bool success = false;
   if (id < MAX_CAMERAS) {
     frames[id] = std::make_unique<FrameReader>(local_cache, 20 * 1024 * 1024, 3);
-    success = frames[id]->load(file, &abort_);
+
+    AVHWDeviceType hw_device_type = AV_HWDEVICE_TYPE_NONE;
+    if (flags & REPLAY_FLAG_CUDA) {
+      hw_device_type = AV_HWDEVICE_TYPE_CUDA;
+    } else if (flags & REPLAY_FLAG_MEDIACODEC) {
+      hw_device_type = AV_HWDEVICE_TYPE_MEDIACODEC;
+    }
+
+    success = frames[id]->load(file, hw_device_type, &abort_);
   } else {
     log = std::make_unique<LogReader>(local_cache, -1, 3);
     success = log->load(file, &abort_);

diff --git a/selfdrive/ui/replay/route.h b/selfdrive/ui/replay/route.h
@@ -58,9 +58,10 @@ class Segment : public QObject {
   void loadFinished(bool success);
 
 protected:
-  void loadFile(int id, const std::string file, bool local_cache);
+  void loadFile(int id, const std::string file);
 
   std::atomic<bool> abort_ = false;
   std::atomic<int> loading_ = 0;
   QFutureSynchronizer<void> synchronizer_;
+  uint32_t flags;
 };