diff --git a/selfdrive/ui/replay/framereader.cc b/selfdrive/ui/replay/framereader.cc index bfdc6ee386d767d..1b673750a7e3a1b 100644 --- a/selfdrive/ui/replay/framereader.cc +++ b/selfdrive/ui/replay/framereader.cc @@ -11,13 +11,24 @@ int readFunction(void *opaque, uint8_t *buf, int buf_size) { return iss.gcount() ? iss.gcount() : AVERROR_EOF; } +enum AVPixelFormat get_hw_format(AVCodecContext *ctx, const enum AVPixelFormat *pix_fmts) { + enum AVPixelFormat *hw_pix_fmt = reinterpret_cast(ctx->opaque); + for (const enum AVPixelFormat *p = pix_fmts; *p != -1; p++) { + if (*p == *hw_pix_fmt) { + return *p; + } + } + assert(0); + return AV_PIX_FMT_NONE; +} + } // namespace FrameReader::FrameReader(bool local_cache, int chunk_size, int retries) : FileReader(local_cache, chunk_size, retries) { - pFormatCtx_ = avformat_alloc_context(); - av_frame_ = av_frame_alloc(); - rgb_frame_ = av_frame_alloc(); - yuv_frame_ = av_frame_alloc(); + input_ctx = avformat_alloc_context(); + av_frame_.reset(av_frame_alloc()); + yuv_frame.reset(av_frame_alloc()); + rgb_frame.reset(av_frame_alloc()); } FrameReader::~FrameReader() { @@ -25,11 +36,10 @@ FrameReader::~FrameReader() { av_free_packet(&f.pkt); } - if (pCodecCtx_) avcodec_free_context(&pCodecCtx_); - if (pFormatCtx_) avformat_close_input(&pFormatCtx_); - if (av_frame_) av_frame_free(&av_frame_); - if (rgb_frame_) av_frame_free(&rgb_frame_); - if (yuv_frame_) av_frame_free(&yuv_frame_); + if (decoder_ctx) avcodec_free_context(&decoder_ctx); + if (input_ctx) avformat_close_input(&input_ctx); + if (hw_device_ctx) av_buffer_unref(&hw_device_ctx); + if (rgb_sws_ctx_) sws_freeContext(rgb_sws_ctx_); if (yuv_sws_ctx_) sws_freeContext(yuv_sws_ctx_); @@ -39,7 +49,7 @@ FrameReader::~FrameReader() { } } -bool FrameReader::load(const std::string &url, std::atomic *abort) { +bool FrameReader::load(const std::string &url, AVHWDeviceType hw_device_type, std::atomic *abort) { std::string content = read(url, abort); if (content.empty()) return false; @@ -47,48 +57,56 @@ bool FrameReader::load(const std::string &url, std::atomic *abort) { const int avio_ctx_buffer_size = 64 * 1024; unsigned char *avio_ctx_buffer = (unsigned char *)av_malloc(avio_ctx_buffer_size); avio_ctx_ = avio_alloc_context(avio_ctx_buffer, avio_ctx_buffer_size, 0, &iss, readFunction, nullptr, nullptr); - pFormatCtx_->pb = avio_ctx_; + input_ctx->pb = avio_ctx_; - pFormatCtx_->probesize = 10 * 1024 * 1024; // 10MB - int ret = avformat_open_input(&pFormatCtx_, url.c_str(), NULL, NULL); + input_ctx->probesize = 10 * 1024 * 1024; // 10MB + int ret = avformat_open_input(&input_ctx, url.c_str(), NULL, NULL); if (ret != 0) { char err_str[1024] = {0}; av_strerror(ret, err_str, std::size(err_str)); printf("Error loading video - %s - %s\n", err_str, url.c_str()); return false; } - avformat_find_stream_info(pFormatCtx_, NULL); - // av_dump_format(pFormatCtx_, 0, url.c_str(), 0); - AVStream *video = pFormatCtx_->streams[0]; - auto pCodec = avcodec_find_decoder(video->codec->codec_id); - if (!pCodec) return false; + ret = avformat_find_stream_info(input_ctx, nullptr); + if (ret < 0) { + printf("cannot find a video stream in the input file\n"); + return false; + } - pCodecCtx_ = avcodec_alloc_context3(pCodec); - ret = avcodec_parameters_to_context(pCodecCtx_, video->codecpar); + AVStream *video = input_ctx->streams[0]; + auto decoder = avcodec_find_decoder(video->codec->codec_id); + if (!decoder) return false; + + decoder_ctx = avcodec_alloc_context3(decoder); + ret = avcodec_parameters_to_context(decoder_ctx, video->codecpar); if (ret != 0) return false; - // pCodecCtx_->thread_count = 0; - // pCodecCtx_->thread_type = FF_THREAD_FRAME; - ret = avcodec_open2(pCodecCtx_, pCodec, NULL); - if (ret < 0) return false; + width = (decoder_ctx->width + 3) & ~3; + height = decoder_ctx->height; + if (hw_device_type != AV_HWDEVICE_TYPE_NONE) { + if (!initHardwareDecoder(hw_device_type)) { + return false; + } + height = (decoder_ctx->height + 15) & ~15; + } - width = (pCodecCtx_->width + 3) & ~3; - height = pCodecCtx_->height; - rgb_sws_ctx_ = sws_getContext(pCodecCtx_->width, pCodecCtx_->height, AV_PIX_FMT_YUV420P, - width, height, AV_PIX_FMT_BGR24, - SWS_FAST_BILINEAR, NULL, NULL, NULL); + rgb_sws_ctx_ = sws_getContext(decoder_ctx->width, decoder_ctx->height, sws_src_format, + width, height, AV_PIX_FMT_BGR24, + SWS_BILINEAR, NULL, NULL, NULL); if (!rgb_sws_ctx_) return false; - - yuv_sws_ctx_ = sws_getContext(pCodecCtx_->width, pCodecCtx_->height, AV_PIX_FMT_YUV420P, - width, height, AV_PIX_FMT_YUV420P, - SWS_FAST_BILINEAR, NULL, NULL, NULL); + yuv_sws_ctx_ = sws_getContext(decoder_ctx->width, decoder_ctx->height, sws_src_format, + width, height, AV_PIX_FMT_YUV420P, + SWS_BILINEAR, NULL, NULL, NULL); if (!yuv_sws_ctx_) return false; + ret = avcodec_open2(decoder_ctx, decoder, NULL); + if (ret < 0) return false; + frames_.reserve(60 * 20); // 20fps, one minute while (!(abort && *abort)) { Frame &frame = frames_.emplace_back(); - ret = av_read_frame(pFormatCtx_, &frame.pkt); + ret = av_read_frame(input_ctx, &frame.pkt); if (ret < 0) { frames_.pop_back(); valid_ = (ret == AVERROR_EOF); @@ -100,6 +118,42 @@ bool FrameReader::load(const std::string &url, std::atomic *abort) { return valid_; } +bool FrameReader::initHardwareDecoder(AVHWDeviceType hw_device_type) { + decoder_ctx->opaque = &hw_pix_fmt; + decoder_ctx->get_format = get_hw_format; + for (int i = 0;; i++) { + const AVCodecHWConfig *config = avcodec_get_hw_config(decoder_ctx->codec, i); + if (!config) { + printf("decoder %s does not support hw device type %s.\n", + decoder_ctx->codec->name, av_hwdevice_get_type_name(hw_device_type)); + return false; + } + if (config->methods & AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX && config->device_type == hw_device_type) { + hw_pix_fmt = config->pix_fmt; + break; + } + } + + int ret = av_hwdevice_ctx_create(&hw_device_ctx, hw_device_type, nullptr, nullptr, 0); + if (ret < 0) { + printf("Failed to create specified HW device %d.\n", ret); + return false; + } + + // get sws source format + AVHWFramesConstraints *hw_frames_const = av_hwdevice_get_hwframe_constraints(hw_device_ctx, nullptr); + assert(hw_frames_const != 0); + for (AVPixelFormat *p = hw_frames_const->valid_sw_formats; *p != AV_PIX_FMT_NONE; p++) { + if (sws_isSupportedInput(*p)) { + sws_src_format = *p; + break; + } + } + + decoder_ctx->hw_device_ctx = av_buffer_ref(hw_device_ctx); + return true; +} + bool FrameReader::get(int idx, uint8_t *rgb, uint8_t *yuv) { assert(rgb || yuv); if (!valid_ || idx < 0 || idx >= frames_.size()) { @@ -125,35 +179,50 @@ bool FrameReader::decode(int idx, uint8_t *rgb, uint8_t *yuv) { for (int i = from_idx; i <= idx; ++i) { Frame &frame = frames_[i]; if ((!frame.decoded || i == idx) && !frame.failed) { - frame.decoded = decodeFrame(&frame.pkt); + AVFrame *f = decodeFrame(&frame.pkt); + frame.decoded = f != nullptr; frame.failed = !frame.decoded; if (frame.decoded && i == idx) { - return copyBuffers(av_frame_, rgb, yuv); + return copyBuffers(f, rgb, yuv); } } } return false; } -bool FrameReader::decodeFrame(AVPacket *pkt) { - int ret = avcodec_send_packet(pCodecCtx_, pkt); +AVFrame *FrameReader::decodeFrame(AVPacket *pkt) { + int ret = avcodec_send_packet(decoder_ctx, pkt); if (ret < 0) { printf("Error sending a packet for decoding\n"); - return false; + return nullptr; + } + + ret = avcodec_receive_frame(decoder_ctx, av_frame_.get()); + if (ret != 0) { + return nullptr; + } + + if (av_frame_->format == hw_pix_fmt) { + hw_frame.reset(av_frame_alloc()); + if ((ret = av_hwframe_transfer_data(hw_frame.get(), av_frame_.get(), 0)) < 0) { + printf("error transferring the data from GPU to CPU\n"); + return nullptr; + } + return hw_frame.get(); + } else { + return av_frame_.get(); } - ret = avcodec_receive_frame(pCodecCtx_, av_frame_); - return ret == 0; } bool FrameReader::copyBuffers(AVFrame *f, uint8_t *rgb, uint8_t *yuv) { // images is going to be written to output buffers, no alignment (align = 1) if (yuv) { - av_image_fill_arrays(yuv_frame_->data, yuv_frame_->linesize, yuv, AV_PIX_FMT_YUV420P, width, height, 1); - int ret = sws_scale(yuv_sws_ctx_, (const uint8_t **)f->data, f->linesize, 0, f->height, yuv_frame_->data, yuv_frame_->linesize); + av_image_fill_arrays(yuv_frame->data, yuv_frame->linesize, yuv, AV_PIX_FMT_YUV420P, width, height, 1); + int ret = sws_scale(yuv_sws_ctx_, (const uint8_t **)f->data, f->linesize, 0, f->height, yuv_frame->data, yuv_frame->linesize); if (ret < 0) return false; } - av_image_fill_arrays(rgb_frame_->data, rgb_frame_->linesize, rgb, AV_PIX_FMT_BGR24, width, height, 1); - int ret = sws_scale(rgb_sws_ctx_, (const uint8_t **)f->data, f->linesize, 0, f->height, rgb_frame_->data, rgb_frame_->linesize); + av_image_fill_arrays(rgb_frame->data, rgb_frame->linesize, rgb, AV_PIX_FMT_BGR24, width, height, 1); + int ret = sws_scale(rgb_sws_ctx_, (const uint8_t **)f->data, f->linesize, 0, f->height, rgb_frame->data, rgb_frame->linesize); return ret >= 0; } diff --git a/selfdrive/ui/replay/framereader.h b/selfdrive/ui/replay/framereader.h index 1b552cd1b859d08..6820d1af93c7909 100644 --- a/selfdrive/ui/replay/framereader.h +++ b/selfdrive/ui/replay/framereader.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include @@ -12,11 +13,15 @@ extern "C" { #include } +struct AVFrameDeleter { + void operator()(AVFrame* frame) const { av_frame_free(&frame); } +}; + class FrameReader : protected FileReader { public: FrameReader(bool local_cache = false, int chunk_size = -1, int retries = 0); ~FrameReader(); - bool load(const std::string &url, std::atomic *abort = nullptr); + bool load(const std::string &url, AVHWDeviceType hw_device_type = AV_HWDEVICE_TYPE_NONE, std::atomic *abort = nullptr); bool get(int idx, uint8_t *rgb, uint8_t *yuv); int getRGBSize() const { return width * height * 3; } int getYUVSize() const { return width * height * 3 / 2; } @@ -26,8 +31,9 @@ class FrameReader : protected FileReader { int width = 0, height = 0; private: + bool initHardwareDecoder(AVHWDeviceType hw_device_type); bool decode(int idx, uint8_t *rgb, uint8_t *yuv); - bool decodeFrame(AVPacket *pkt); + AVFrame * decodeFrame(AVPacket *pkt); bool copyBuffers(AVFrame *f, uint8_t *rgb, uint8_t *yuv); struct Frame { @@ -36,11 +42,15 @@ class FrameReader : protected FileReader { bool failed = false; }; std::vector frames_; + AVPixelFormat sws_src_format = AV_PIX_FMT_YUV420P; SwsContext *rgb_sws_ctx_ = nullptr, *yuv_sws_ctx_ = nullptr; - AVFrame *av_frame_, *rgb_frame_, *yuv_frame_ = nullptr; - AVFormatContext *pFormatCtx_ = nullptr; - AVCodecContext *pCodecCtx_ = nullptr; + std::unique_ptrav_frame_, rgb_frame, yuv_frame, hw_frame; + AVFormatContext *input_ctx = nullptr; + AVCodecContext *decoder_ctx = nullptr; int key_frames_count_ = 0; bool valid_ = false; AVIOContext *avio_ctx_ = nullptr; + + AVPixelFormat hw_pix_fmt = AV_PIX_FMT_NONE; + AVBufferRef *hw_device_ctx = nullptr; }; diff --git a/selfdrive/ui/replay/main.cc b/selfdrive/ui/replay/main.cc index d4fc8ff13a9f5c4..51d5175e9c7c9aa 100644 --- a/selfdrive/ui/replay/main.cc +++ b/selfdrive/ui/replay/main.cc @@ -102,6 +102,8 @@ int main(int argc, char *argv[]) { {"no-cache", REPLAY_FLAG_NO_FILE_CACHE, "turn off local cache"}, {"qcam", REPLAY_FLAG_QCAMERA, "load qcamera"}, {"yuv", REPLAY_FLAG_SEND_YUV, "send yuv frame"}, + {"cuda", REPLAY_FLAG_CUDA, "enable CUDA accelerated decoding"}, + {"mediacodec", REPLAY_FLAG_MEDIACODEC, "enable MediaCodec accelerated decoding"}, }; QCommandLineParser parser; diff --git a/selfdrive/ui/replay/replay.h b/selfdrive/ui/replay/replay.h index 320afa505b9670f..2f7ce57b7910012 100644 --- a/selfdrive/ui/replay/replay.h +++ b/selfdrive/ui/replay/replay.h @@ -16,6 +16,8 @@ enum REPLAY_FLAGS { REPLAY_FLAG_NO_FILE_CACHE = 0x0020, REPLAY_FLAG_QCAMERA = 0x0040, REPLAY_FLAG_SEND_YUV = 0x0080, + REPLAY_FLAG_CUDA = 0x0100, + REPLAY_FLAG_MEDIACODEC = 0x0200, }; class Replay : public QObject { diff --git a/selfdrive/ui/replay/route.cc b/selfdrive/ui/replay/route.cc index d834d67ab63714f..57b99547d595d9e 100644 --- a/selfdrive/ui/replay/route.cc +++ b/selfdrive/ui/replay/route.cc @@ -91,7 +91,7 @@ void Route::addFileToSegment(int n, const QString &file) { // class Segment -Segment::Segment(int n, const SegmentFile &files, uint32_t flags) : seg_num(n) { +Segment::Segment(int n, const SegmentFile &files, uint32_t flags) : seg_num(n), flags(flags) { // [RoadCam, DriverCam, WideRoadCam, log]. fallback to qcamera/qlog const QString file_list[] = { (flags & REPLAY_FLAG_QCAMERA) || files.road_cam.isEmpty() ? files.qcamera : files.road_cam, @@ -102,7 +102,7 @@ Segment::Segment(int n, const SegmentFile &files, uint32_t flags) : seg_num(n) { for (int i = 0; i < std::size(file_list); i++) { if (!file_list[i].isEmpty()) { loading_++; - synchronizer_.addFuture(QtConcurrent::run([=] { loadFile(i, file_list[i].toStdString(), !(flags & REPLAY_FLAG_NO_FILE_CACHE)); })); + synchronizer_.addFuture(QtConcurrent::run([=] { loadFile(i, file_list[i].toStdString()); })); } } } @@ -114,11 +114,20 @@ Segment::~Segment() { synchronizer_.waitForFinished(); } -void Segment::loadFile(int id, const std::string file, bool local_cache) { +void Segment::loadFile(int id, const std::string file) { + const bool local_cache = !(flags & REPLAY_FLAG_NO_FILE_CACHE); bool success = false; if (id < MAX_CAMERAS) { frames[id] = std::make_unique(local_cache, 20 * 1024 * 1024, 3); - success = frames[id]->load(file, &abort_); + + AVHWDeviceType hw_device_type = AV_HWDEVICE_TYPE_NONE; + if (flags & REPLAY_FLAG_CUDA) { + hw_device_type = AV_HWDEVICE_TYPE_CUDA; + } else if (flags & REPLAY_FLAG_MEDIACODEC) { + hw_device_type = AV_HWDEVICE_TYPE_MEDIACODEC; + } + + success = frames[id]->load(file, hw_device_type, &abort_); } else { log = std::make_unique(local_cache, -1, 3); success = log->load(file, &abort_); diff --git a/selfdrive/ui/replay/route.h b/selfdrive/ui/replay/route.h index b11607f250021f6..c39eef7d92ef4c7 100644 --- a/selfdrive/ui/replay/route.h +++ b/selfdrive/ui/replay/route.h @@ -58,9 +58,10 @@ class Segment : public QObject { void loadFinished(bool success); protected: - void loadFile(int id, const std::string file, bool local_cache); + void loadFile(int id, const std::string file); std::atomic abort_ = false; std::atomic loading_ = 0; QFutureSynchronizer synchronizer_; + uint32_t flags; };