Skip to content

Commit

Permalink
add support for CUDA decoding
Browse files Browse the repository at this point in the history
  • Loading branch information
deanlee committed Nov 14, 2021
1 parent 65d1b51 commit 5f9ae48
Show file tree
Hide file tree
Showing 6 changed files with 104 additions and 41 deletions.
116 changes: 83 additions & 33 deletions selfdrive/ui/replay/framereader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,22 +14,26 @@ int readFunction(void *opaque, uint8_t *buf, int buf_size) {
} // namespace

FrameReader::FrameReader(bool local_cache, int chunk_size, int retries) : FileReader(local_cache, chunk_size, retries) {
pFormatCtx_ = avformat_alloc_context();
input_ctx = avformat_alloc_context();
av_frame_ = av_frame_alloc();
rgb_frame_ = av_frame_alloc();
yuv_frame_ = av_frame_alloc();
hw_frame = av_frame_alloc();
}

FrameReader::~FrameReader() {
for (auto &f : frames_) {
av_free_packet(&f.pkt);
}

if (pCodecCtx_) avcodec_free_context(&pCodecCtx_);
if (pFormatCtx_) avformat_close_input(&pFormatCtx_);
if (decoder_ctx) avcodec_free_context(&decoder_ctx);
if (input_ctx) avformat_close_input(&input_ctx);
if (hw_device_ctx) av_buffer_unref(&hw_device_ctx);

if (av_frame_) av_frame_free(&av_frame_);
if (rgb_frame_) av_frame_free(&rgb_frame_);
if (yuv_frame_) av_frame_free(&yuv_frame_);
if (hw_frame) av_frame_free(&hw_frame);
if (rgb_sws_ctx_) sws_freeContext(rgb_sws_ctx_);
if (yuv_sws_ctx_) sws_freeContext(yuv_sws_ctx_);

Expand All @@ -47,48 +51,83 @@ bool FrameReader::load(const std::string &url, std::atomic<bool> *abort) {
const int avio_ctx_buffer_size = 64 * 1024;
unsigned char *avio_ctx_buffer = (unsigned char *)av_malloc(avio_ctx_buffer_size);
avio_ctx_ = avio_alloc_context(avio_ctx_buffer, avio_ctx_buffer_size, 0, &iss, readFunction, nullptr, nullptr);
pFormatCtx_->pb = avio_ctx_;
input_ctx->pb = avio_ctx_;

pFormatCtx_->probesize = 10 * 1024 * 1024; // 10MB
int ret = avformat_open_input(&pFormatCtx_, url.c_str(), NULL, NULL);
input_ctx->probesize = 10 * 1024 * 1024; // 10MB
int ret = avformat_open_input(&input_ctx, url.c_str(), NULL, NULL);
if (ret != 0) {
char err_str[1024] = {0};
av_strerror(ret, err_str, std::size(err_str));
printf("Error loading video - %s - %s\n", err_str, url.c_str());
return false;
}
avformat_find_stream_info(pFormatCtx_, NULL);
// av_dump_format(pFormatCtx_, 0, url.c_str(), 0);
avformat_find_stream_info(input_ctx, NULL);
// av_dump_format(input_ctx, 0, url.c_str(), 0);

AVStream *video = pFormatCtx_->streams[0];
auto pCodec = avcodec_find_decoder(video->codec->codec_id);
if (!pCodec) return false;
AVStream *video = input_ctx->streams[0];
auto decoder = avcodec_find_decoder(video->codec->codec_id);
if (!decoder) return false;

pCodecCtx_ = avcodec_alloc_context3(pCodec);
ret = avcodec_parameters_to_context(pCodecCtx_, video->codecpar);
decoder_ctx = avcodec_alloc_context3(decoder);
ret = avcodec_parameters_to_context(decoder_ctx, video->codecpar);
if (ret != 0) return false;

// pCodecCtx_->thread_count = 0;
// pCodecCtx_->thread_type = FF_THREAD_FRAME;
ret = avcodec_open2(pCodecCtx_, pCodec, NULL);
if (ret < 0) return false;
// decoder_ctx->thread_count = 0;
// decoder_ctx->thread_type = FF_THREAD_FRAME;
AVPixelFormat sws_src_format = AV_PIX_FMT_YUV420P;

if (hw_device_type != AV_HWDEVICE_TYPE_NONE) {
for (int i = 0;; i++) {
const AVCodecHWConfig *config = avcodec_get_hw_config(decoder, i);
if (!config) {
printf("decoder %s does not support hw device type %s.\n",
decoder->name, av_hwdevice_get_type_name(hw_device_type));
return false;
}
if (config->methods & AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX && config->device_type == hw_device_type) {
hw_pix_fmt = config->pix_fmt;
break;
}
}

ret = av_hwdevice_ctx_create(&hw_device_ctx, hw_device_type, nullptr, nullptr, 0);
if (ret < 0) {
printf("Failed to create specified HW device %d.\n", ret);
return false;
}

// get sws source format
AVHWFramesConstraints *hw_frames_const = av_hwdevice_get_hwframe_constraints(hw_device_ctx, nullptr);
assert(hw_frames_const != 0);
for (AVPixelFormat *p = hw_frames_const->valid_sw_formats; *p != AV_PIX_FMT_NONE; p++) {
if (sws_isSupportedInput(*p)) {
sws_src_format = *p;
break;
}
}

width = (pCodecCtx_->width + 3) & ~3;
height = pCodecCtx_->height;
rgb_sws_ctx_ = sws_getContext(pCodecCtx_->width, pCodecCtx_->height, AV_PIX_FMT_YUV420P,
width, height, AV_PIX_FMT_BGR24,
SWS_FAST_BILINEAR, NULL, NULL, NULL);
decoder_ctx->hw_device_ctx = av_buffer_ref(hw_device_ctx);
}

width = (decoder_ctx->width + 3) & ~3;
height = decoder_ctx->height;
rgb_sws_ctx_ = sws_getContext(decoder_ctx->width, decoder_ctx->height, sws_src_format,
width, height, AV_PIX_FMT_BGR24,
SWS_FAST_BILINEAR, NULL, NULL, NULL);
if (!rgb_sws_ctx_) return false;

yuv_sws_ctx_ = sws_getContext(pCodecCtx_->width, pCodecCtx_->height, AV_PIX_FMT_YUV420P,
width, height, AV_PIX_FMT_YUV420P,
SWS_FAST_BILINEAR, NULL, NULL, NULL);
yuv_sws_ctx_ = sws_getContext(decoder_ctx->width, decoder_ctx->height, sws_src_format,
width, height, AV_PIX_FMT_YUV420P,
SWS_FAST_BILINEAR, NULL, NULL, NULL);
if (!yuv_sws_ctx_) return false;

ret = avcodec_open2(decoder_ctx, decoder, NULL);
if (ret < 0) return false;

frames_.reserve(60 * 20); // 20fps, one minute
while (!(abort && *abort)) {
Frame &frame = frames_.emplace_back();
ret = av_read_frame(pFormatCtx_, &frame.pkt);
ret = av_read_frame(input_ctx, &frame.pkt);
if (ret < 0) {
frames_.pop_back();
valid_ = (ret == AVERROR_EOF);
Expand Down Expand Up @@ -125,24 +164,35 @@ bool FrameReader::decode(int idx, uint8_t *rgb, uint8_t *yuv) {
for (int i = from_idx; i <= idx; ++i) {
Frame &frame = frames_[i];
if ((!frame.decoded || i == idx) && !frame.failed) {
frame.decoded = decodeFrame(&frame.pkt);
AVFrame *f = decodeFrame(&frame.pkt);
frame.decoded = f != nullptr;
frame.failed = !frame.decoded;
if (frame.decoded && i == idx) {
return copyBuffers(av_frame_, rgb, yuv);
return copyBuffers(f, rgb, yuv);
}
}
}
return false;
}

bool FrameReader::decodeFrame(AVPacket *pkt) {
int ret = avcodec_send_packet(pCodecCtx_, pkt);
AVFrame *FrameReader::decodeFrame(AVPacket *pkt) {
int ret = avcodec_send_packet(decoder_ctx, pkt);
if (ret < 0) {
printf("Error sending a packet for decoding\n");
return false;
return nullptr;
}
ret = avcodec_receive_frame(decoder_ctx, av_frame_);
if (ret == 0) {
if (av_frame_->format == hw_pix_fmt) {
if ((ret = av_hwframe_transfer_data(hw_frame, av_frame_, 0)) < 0) {
printf("error transferring the data from GPU to CPU\n");
return nullptr;
}
return hw_frame;
} else
return av_frame_;
}
ret = avcodec_receive_frame(pCodecCtx_, av_frame_);
return ret == 0;
return nullptr;
}

bool FrameReader::copyBuffers(AVFrame *f, uint8_t *rgb, uint8_t *yuv) {
Expand Down
14 changes: 10 additions & 4 deletions selfdrive/ui/replay/framereader.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ class FrameReader : protected FileReader {
public:
FrameReader(bool local_cache = false, int chunk_size = -1, int retries = 0);
~FrameReader();
void setHardwareDevice(AVHWDeviceType type) { hw_device_type = type; }
bool load(const std::string &url, std::atomic<bool> *abort = nullptr);
bool get(int idx, uint8_t *rgb, uint8_t *yuv);
int getRGBSize() const { return width * height * 3; }
Expand All @@ -27,7 +28,7 @@ class FrameReader : protected FileReader {

private:
bool decode(int idx, uint8_t *rgb, uint8_t *yuv);
bool decodeFrame(AVPacket *pkt);
AVFrame * decodeFrame(AVPacket *pkt);
bool copyBuffers(AVFrame *f, uint8_t *rgb, uint8_t *yuv);

struct Frame {
Expand All @@ -37,10 +38,15 @@ class FrameReader : protected FileReader {
};
std::vector<Frame> frames_;
SwsContext *rgb_sws_ctx_ = nullptr, *yuv_sws_ctx_ = nullptr;
AVFrame *av_frame_, *rgb_frame_, *yuv_frame_ = nullptr;
AVFormatContext *pFormatCtx_ = nullptr;
AVCodecContext *pCodecCtx_ = nullptr;
AVFrame *av_frame_, *rgb_frame_, *yuv_frame_;
AVFormatContext *input_ctx = nullptr;
AVCodecContext *decoder_ctx = nullptr;
int key_frames_count_ = 0;
bool valid_ = false;
AVIOContext *avio_ctx_ = nullptr;

AVHWDeviceType hw_device_type = AV_HWDEVICE_TYPE_NONE;
AVPixelFormat hw_pix_fmt = AV_PIX_FMT_NONE;
AVBufferRef *hw_device_ctx = nullptr;
AVFrame *hw_frame = nullptr;
};
1 change: 1 addition & 0 deletions selfdrive/ui/replay/main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ int main(int argc, char *argv[]) {
{"no-cache", REPLAY_FLAG_NO_FILE_CACHE, "turn off local cache"},
{"qcam", REPLAY_FLAG_QCAMERA, "load qcamera"},
{"yuv", REPLAY_FLAG_SEND_YUV, "send yuv frame"},
{"cuda", REPLAY_FLAG_CUDA, "enable CUDA accelerated decoding"},
};

QCommandLineParser parser;
Expand Down
1 change: 1 addition & 0 deletions selfdrive/ui/replay/replay.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ enum REPLAY_FLAGS {
REPLAY_FLAG_NO_FILE_CACHE = 0x0020,
REPLAY_FLAG_QCAMERA = 0x0040,
REPLAY_FLAG_SEND_YUV = 0x0080,
REPLAY_FLAG_CUDA = 0x0100,
};

class Replay : public QObject {
Expand Down
10 changes: 7 additions & 3 deletions selfdrive/ui/replay/route.cc
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ void Route::addFileToSegment(int n, const QString &file) {

// class Segment

Segment::Segment(int n, const SegmentFile &files, uint32_t flags) : seg_num(n) {
Segment::Segment(int n, const SegmentFile &files, uint32_t flags) : seg_num(n), flags(flags) {
// [RoadCam, DriverCam, WideRoadCam, log]. fallback to qcamera/qlog
const QString file_list[] = {
(flags & REPLAY_FLAG_QCAMERA) || files.road_cam.isEmpty() ? files.qcamera : files.road_cam,
Expand All @@ -102,7 +102,7 @@ Segment::Segment(int n, const SegmentFile &files, uint32_t flags) : seg_num(n) {
for (int i = 0; i < std::size(file_list); i++) {
if (!file_list[i].isEmpty()) {
loading_++;
synchronizer_.addFuture(QtConcurrent::run([=] { loadFile(i, file_list[i].toStdString(), !(flags & REPLAY_FLAG_NO_FILE_CACHE)); }));
synchronizer_.addFuture(QtConcurrent::run([=] { loadFile(i, file_list[i].toStdString()); }));
}
}
}
Expand All @@ -114,10 +114,14 @@ Segment::~Segment() {
synchronizer_.waitForFinished();
}

void Segment::loadFile(int id, const std::string file, bool local_cache) {
void Segment::loadFile(int id, const std::string file) {
const bool local_cache = !(flags & REPLAY_FLAG_NO_FILE_CACHE);
bool success = false;
if (id < MAX_CAMERAS) {
frames[id] = std::make_unique<FrameReader>(local_cache, 20 * 1024 * 1024, 3);
if (flags & REPLAY_FLAG_CUDA) {
frames[id]->setHardwareDevice(AV_HWDEVICE_TYPE_CUDA);
}
success = frames[id]->load(file, &abort_);
} else {
log = std::make_unique<LogReader>(local_cache, -1, 3);
Expand Down
3 changes: 2 additions & 1 deletion selfdrive/ui/replay/route.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,10 @@ class Segment : public QObject {
void loadFinished(bool success);

protected:
void loadFile(int id, const std::string file, bool local_cache);
void loadFile(int id, const std::string file);

std::atomic<bool> abort_ = false;
std::atomic<int> loading_ = 0;
QFutureSynchronizer<void> synchronizer_;
uint32_t flags;
};

0 comments on commit 5f9ae48

Please sign in to comment.