Skip to content

Commit

Permalink
ffmpeg: Persist filtergraph in between transcodes.
Browse files Browse the repository at this point in the history
Re-initializing the filtergraph turns out to be expensive,
especially if GPUs are in the picture. We get around 30% more
performance by persisting the filtergraph.

In order to flush the filter, we cache the most recent audio/video
frame and feed those into the filter repeatedly with a sentinel
value set (`AVFrame.opque`). Once we receive a frame from the
filtergraph with the sentinel set, we know the filter has been
completely flushed.
  • Loading branch information
j0sh committed Mar 23, 2020
1 parent 97c48b3 commit a03f838
Showing 1 changed file with 52 additions and 21 deletions.
73 changes: 52 additions & 21 deletions ffmpeg/lpms_ffmpeg.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,6 @@
#include <libavutil/opt.h>
#include <libavutil/pixdesc.h>

#include <pthread.h>

// Not great to appropriate internal API like this...
const int lpms_ERR_INPUT_PIXFMT = FFERRTAG('I','N','P','X');
const int lpms_ERR_FILTERS = FFERRTAG('F','L','T','R');
Expand All @@ -32,11 +30,12 @@ struct input_ctx {
enum AVHWDeviceType hw_type;
char *device;

int64_t next_pts_a, next_pts_v;

// Decoder flush
AVPacket *first_pkt;
int flushed;

// Filter flush
AVFrame *last_frame_v, *last_frame_a;
};

struct filter_ctx {
Expand All @@ -47,6 +46,8 @@ struct filter_ctx {
AVFilterContext *src_ctx;

uint8_t *hwframes; // GPU frame pool data
int64_t flush_offset;
int flushed, flush_count;
};

struct output_ctx {
Expand Down Expand Up @@ -214,13 +215,14 @@ static void close_output(struct output_ctx *octx)
}
if (octx->vc && AV_HWDEVICE_TYPE_NONE == octx->hw_type) avcodec_free_context(&octx->vc);
if (octx->ac) avcodec_free_context(&octx->ac);
free_filter(&octx->vf);
free_filter(&octx->af);
octx->af.flushed = octx->vf.flushed = 0;
}

static void free_output(struct output_ctx *octx) {
close_output(octx);
if (octx->vc) avcodec_free_context(&octx->vc);
free_filter(&octx->vf);
free_filter(&octx->af);
}


Expand Down Expand Up @@ -736,6 +738,8 @@ static void free_input(struct input_ctx *inctx)
}
if (inctx->ac) avcodec_free_context(&inctx->ac);
if (inctx->hw_device_ctx) av_buffer_unref(&inctx->hw_device_ctx);
if (inctx->last_frame_v) av_frame_free(&inctx->last_frame_v);
if (inctx->last_frame_a) av_frame_free(&inctx->last_frame_a);
}

static int open_video_decoder(input_params *params, struct input_ctx *ctx)
Expand Down Expand Up @@ -855,6 +859,10 @@ static int open_input(input_params *params, struct input_ctx *ctx)
if (ret < 0) dd_err("Unable to open video decoder\n")
ret = open_audio_decoder(params, ctx);
if (ret < 0) dd_err("Unable to open audio decoder\n")
ctx->last_frame_v = av_frame_alloc();
if (!ctx->last_frame_v) dd_err("Unable to alloc last_frame_v");
ctx->last_frame_a = av_frame_alloc();
if (!ctx->last_frame_a) dd_err("Unable to alloc last_frame_a");

return 0;

Expand Down Expand Up @@ -1025,6 +1033,7 @@ int process_out(struct input_ctx *ictx, struct output_ctx *octx, AVCodecContext
goto proc_cleanup; \
}
int ret = 0;
int is_flushing = 0;

if (!encoder) proc_err("Trying to transmux; not supported")

Expand All @@ -1043,15 +1052,24 @@ int process_out(struct input_ctx *ictx, struct output_ctx *octx, AVCodecContext
ret = init_video_filters(ictx, octx);
if (ret < 0) return lpms_ERR_FILTERS;
}
// Start filter flushing process if necessary
if (!inf && !filter->flushed) {
// Set input frame to the last frame
// And increment pts offset by pkt_duration
// TODO It may make sense to use the expected output packet duration instead
int is_video = AVMEDIA_TYPE_VIDEO == ost->codecpar->codec_type;
AVFrame *frame = is_video ? ictx->last_frame_v : ictx->last_frame_a;
filter->flush_offset += frame->pkt_duration;
inf = frame;
inf->opaque = (void*)inf->pts; // value doesn't matter; just needs to be set
is_flushing = 1;
}
if (inf) {
// Apply the offset from filter flushing, then reset for the next output
inf->pts += filter->flush_offset;
ret = av_buffersrc_write_frame(filter->src_ctx, inf);
inf->pts -= filter->flush_offset;
if (ret < 0) proc_err("Error feeding the filtergraph");
} else {
// We need to set the pts at EOF to the *end* of the last packet
// in order to avoid discarding any queued packets
int64_t next_pts = AVMEDIA_TYPE_VIDEO == ost->codecpar->codec_type ?
ictx->next_pts_v : ictx->next_pts_a;
av_buffersrc_close(filter->src_ctx, next_pts, AV_BUFFERSRC_FLAG_PUSH);
}

while (1) {
Expand All @@ -1066,6 +1084,15 @@ int process_out(struct input_ctx *ictx, struct output_ctx *octx, AVCodecContext
if (inf) return ret;
frame = NULL;
} else if (ret < 0) proc_err("Error consuming the filtergraph\n");
if (frame && frame->opaque) {
// opaque being set means it's a flush packet
filter->flush_count++;
// don't set flushed flag in case this is a flush from a previous segment
if (is_flushing) filter->flushed = 1;
frame->opaque = NULL; // reset just to be sure
continue;
}
if (frame) frame->pts -= filter->flush_count;
ret = encode(encoder, frame, octx, ost);
av_frame_unref(frame);
// For HW we keep the encoder open so will only get EAGAIN.
Expand Down Expand Up @@ -1167,8 +1194,6 @@ int transcode(struct transcode_thread *h,
if (octx->vc) {
ret = add_video_stream(octx, ictx);
if (ret < 0) main_err("Unable to re-add video stream\n");
ret = init_video_filters(ictx, octx);
if (ret < 0) main_err("Unable to re-open video filter\n")
} else fprintf(stderr, "no video stream\n");

// re-attach audio encoder
Expand All @@ -1190,6 +1215,7 @@ int transcode(struct transcode_thread *h,
while (1) {
int has_frame = 0;
AVStream *ist = NULL;
AVFrame *last_frame = NULL;
av_frame_unref(dframe);
ret = process_in(ictx, dframe, &ipkt);
if (ret == AVERROR_EOF) break;
Expand All @@ -1204,20 +1230,25 @@ int transcode(struct transcode_thread *h,
// width / height will be zero for pure streamcopy (no decoding)
decoded_results->frames += dframe->width && dframe->height;
decoded_results->pixels += dframe->width * dframe->height;
has_frame = has_frame && dframe->width && dframe->height;
if (has_frame) last_frame = ictx->last_frame_v;
} else if (AVMEDIA_TYPE_AUDIO == ist->codecpar->codec_type) {
has_frame = has_frame && dframe->nb_samples;
if (has_frame) last_frame = ictx->last_frame_a;
}
if (has_frame) {
int64_t dur = 0;
if (dframe->pkt_duration) dur = dframe->pkt_duration;
else if (ist->avg_frame_rate.den) {
dur = av_rescale_q(1, av_inv_q(ist->avg_frame_rate), ist->time_base);
else if (ist->r_frame_rate.den) {
dur = av_rescale_q(1, av_inv_q(ist->r_frame_rate), ist->time_base);
} else {
// TODO use better heuristics for this; look at how ffmpeg does it
//fprintf(stderr, "Could not determine next pts; filter might drop\n");
fprintf(stderr, "Could not determine next pts; filter might drop\n");
}
ictx->next_pts_v = dframe->pts + dur;
dframe->pkt_duration = dur;
av_frame_unref(last_frame);
av_frame_ref(last_frame, dframe);
}
} else if (AVMEDIA_TYPE_AUDIO == ist->codecpar->codec_type) {
if (has_frame) ictx->next_pts_a = dframe->pts + dframe->pkt_duration;
}

for (i = 0; i < nb_outputs; i++) {
struct output_ctx *octx = &outputs[i];
Expand Down

0 comments on commit a03f838

Please sign in to comment.