From 0828d1fab44537947a2b3d261bdc07081445fdd8 Mon Sep 17 00:00:00 2001 From: Zen <46526140+master-of-zen@users.noreply.github.com> Date: Mon, 3 May 2021 21:49:18 +0300 Subject: [PATCH] wip --- src/api/internal.rs | 2 + src/scenechange/mod.rs | 170 ++++++++++++++++++++++++++--------------- 2 files changed, 111 insertions(+), 61 deletions(-) diff --git a/src/api/internal.rs b/src/api/internal.rs index 6e3eb4bc44..ed7c446563 100644 --- a/src/api/internal.rs +++ b/src/api/internal.rs @@ -271,6 +271,7 @@ impl ContextInner { let seq = Arc::new(Sequence::new(enc)); let inter_cfg = InterConfig::new(enc); + let lookahead_distance = inter_cfg.keyframe_lookahead_distance() as usize; ContextInner { frame_count: 0, @@ -288,6 +289,7 @@ impl ContextInner { keyframe_detector: SceneChangeDetector::new( *enc, CpuFeatureLevel::default(), + lookahead_distance, seq.clone(), ), config: Arc::new(*enc), diff --git a/src/scenechange/mod.rs b/src/scenechange/mod.rs index 39ca9464f3..a92a2972c9 100644 --- a/src/scenechange/mod.rs +++ b/src/scenechange/mod.rs @@ -27,8 +27,12 @@ pub struct SceneChangeDetector { scale_factor: usize, // Frame buffer for scaled frames frame_buffer: Vec>, + // Deque offset for current + lookahead_offset: usize, + // Start deque offset based on lookahead + deque_offset: usize, // Scenechange results for adaptive threshold - score_deque: Vec, + score_deque: Vec<(f64, f64)>, /// Number of pixels in scaled frame for fast mode pixels: usize, /// The bit depth of the video. @@ -42,7 +46,7 @@ pub struct SceneChangeDetector { impl SceneChangeDetector { pub fn new( encoder_config: EncoderConfig, cpu_feature_level: CpuFeatureLevel, - sequence: Arc, + lookahead_distance: usize, sequence: Arc, ) -> Self { // This implementation is based on a Python implementation at // https://pyscenedetect.readthedocs.io/en/latest/reference/detection-methods/. @@ -55,6 +59,9 @@ impl SceneChangeDetector { // This may be adjusted later. // // This threshold is only used for the fast scenecut implementation. + // + // Testing shown that default threshold of 12 overallocates keyframes by almost double, + // compared to other scene change implementations const BASE_THRESHOLD: usize = 25; let bit_depth = encoder_config.bit_depth; let fast_mode = encoder_config.speed_settings.fast_scene_detection @@ -64,9 +71,13 @@ impl SceneChangeDetector { let scale_factor = if fast_mode { detect_scale_factor(&sequence) } else { 1_usize }; - let score_deque = Vec::with_capacity(5); - // Pixel count for fast scenedetect + // Set lookahead offset to 5 if normal lookahead available + let lookahead_offset = if lookahead_distance >= 5 { 5 } else { 0 }; + let deque_offset = lookahead_offset; + + let score_deque = Vec::with_capacity(5 + lookahead_distance); + // Pixel count for fast scenedetect let pixels = if fast_mode { (sequence.max_frame_height as usize / scale_factor) * (sequence.max_frame_width as usize / scale_factor) @@ -74,13 +85,16 @@ impl SceneChangeDetector { 1 }; - let frame_buffer = Vec::with_capacity(2); + let frame_buffer = + if fast_mode { Vec::with_capacity(2) } else { Vec::new() }; Self { threshold: BASE_THRESHOLD * bit_depth / 8, fast_mode, scale_factor, frame_buffer, + lookahead_offset, + deque_offset, score_deque, pixels, bit_depth, @@ -103,6 +117,9 @@ impl SceneChangeDetector { &mut self, frame_set: &[Arc>], input_frameno: u64, previous_keyframe: u64, ) -> bool { + // Use score deque for adaptive threshold for scene cut + // Declare score_deque offset based on lookahead for scene change scores + // Find the distance to the previous keyframe. let distance = input_frameno - previous_keyframe; @@ -118,48 +135,103 @@ impl SceneChangeDetector { return false; } - // Set our scenecut method + // Decrease deque offset if there is no more frames + if self.deque_offset > frame_set.len() && self.lookahead_offset > 0 { + self.deque_offset = frame_set.len(); + } + + // Initially fill score deque with forward frames + if self.deque_offset > 0 && self.score_deque.len() == 0 { + for x in 0..self.lookahead_offset { + // Filling score deque with forward frames + let result = if self.fast_mode { + self.fast_scenecut(frame_set[x].clone(), frame_set[x + 1].clone()) + } else { + self.cost_scenecut( + frame_set[x].clone(), + frame_set[x + 1].clone(), + input_frameno, + previous_keyframe, + ) + }; + self.score_deque.push((result.inter_cost, result.intra_cost)); + } + debug!("{:.0?}", self.score_deque) + } + + // Running single frame comparison and adding it to deque let result = if self.fast_mode { - self.fast_scenecut(frame_set[0].clone(), frame_set[1].clone()) + self.fast_scenecut( + frame_set[0 + self.deque_offset].clone(), + frame_set[1 + self.deque_offset].clone(), + ) } else { self.cost_scenecut( - frame_set[0].clone(), - frame_set[1].clone(), + frame_set[0 + self.deque_offset].clone(), + frame_set[1 + self.deque_offset].clone(), input_frameno, previous_keyframe, ) }; + self + .score_deque + .push((result.inter_cost as f64, result.intra_cost as f64)); + + // Adaptive scenecut check; + let scenecut = self.adaptive_scenecut(); debug!( - "[SC-Detect] Frame {}: T={:.1} P={:.1} {}", + "[SC-Detect] Frame {}: Cost={:.0} Threshold= {:.0} {}", input_frameno, - result.threshold, - result.inter_cost, - if result.has_scenecut { "Scenecut" } else { "No cut" } + self.score_deque[self.score_deque.len() - self.deque_offset].0, + self.score_deque[self.score_deque.len() - self.deque_offset].1, + if scenecut { "Scenecut" } else { "No cut" } ); - result.has_scenecut + + if scenecut { + // Reset lookahead offset + self.deque_offset = self.lookahead_offset; + + // Clear buffers and deque + self.frame_buffer.clear(); + self.score_deque.clear(); + } else { + // Keep score deque 5 + lookahead_size frames + self + .score_deque + .push((result.inter_cost as f64, result.intra_cost as f64)); + if self.score_deque.len() > 5 + self.deque_offset { + self.score_deque.remove(0); + } + } + + scenecut } /// Compares current scene score to adapted threshold based on previous scores + /// Value of current frame is offset by lookahead, if lookahead >=5 /// Returns true if current scene score is higher than adapted threshold - fn adaptive_scenecut(&mut self, scene_score: f64) -> bool { - if self.score_deque.is_empty() { - true // we skip high delta on first frame comparision as it's probably inside flashing or high motion scene + fn adaptive_scenecut(&mut self) -> bool { + // Max cost of all available frames + let max_of_deque: f64 = self + .score_deque + .iter() + .cloned() + .map(|(_, b)| b) + .fold(-1. / 0. /* -inf */, f64::max); + + // Scenecut check + let threshold = if self.fast_mode { + self.threshold as f64 + max_of_deque } else { - let max_of_deque: f64 = self - .score_deque - .iter() - .cloned() - .fold(-1. / 0. /* -inf */, f64::max); // max of last n(5) frames - - // - let scenecut = scene_score > self.threshold as f64 + max_of_deque; - debug!( - "[SC-Detect] P: {:.1} {:.1?} Cut: {}", - scene_score, self.score_deque, scenecut - ); - scenecut - } + max_of_deque + }; + + let scene_score = + self.score_deque[self.score_deque.len() - self.deque_offset].0; + + let scenecut = scene_score >= threshold; + scenecut } /// The fast algorithm detects fast cuts using a raw difference @@ -167,7 +239,7 @@ impl SceneChangeDetector { #[hawktracer(fast_scenecut)] fn fast_scenecut( &mut self, frame1: Arc>, frame2: Arc>, - ) -> ScenecutResult { + ) -> ScenecutData { // Downscaling both frames for comparison // Moving scaled frames to buffer if self.frame_buffer.is_empty() { @@ -184,27 +256,10 @@ impl SceneChangeDetector { let delta = self.delta_in_planes(&self.frame_buffer[0], &self.frame_buffer[1]); - // Adaptive scenecut check; - let scenecut = - delta >= self.threshold as f64 && self.adaptive_scenecut(delta); - - if scenecut { - // Clear buffers - self.frame_buffer.clear(); - self.score_deque.clear(); - } else { - // Keep score deque 5 frames - self.score_deque.push(delta as f64); - if self.score_deque.len() > 5 { - self.score_deque.remove(0); - } - } - - ScenecutResult { + ScenecutData { intra_cost: self.threshold as f64, threshold: self.threshold as f64, inter_cost: delta as f64, - has_scenecut: scenecut, } } @@ -217,7 +272,7 @@ impl SceneChangeDetector { fn cost_scenecut( &self, frame1: Arc>, frame2: Arc>, frameno: u64, previous_keyframe: u64, - ) -> ScenecutResult { + ) -> ScenecutData { let frame2_ref2 = Arc::clone(&frame2); let (intra_cost, inter_cost) = crate::rayon::join( move || { @@ -269,12 +324,7 @@ impl SceneChangeDetector { }; let threshold = intra_cost * (1.0 - bias); - ScenecutResult { - intra_cost, - threshold, - inter_cost, - has_scenecut: inter_cost > threshold, - } + ScenecutData { intra_cost, inter_cost, threshold } } /// Calculates delta beetween 2 planes @@ -299,7 +349,7 @@ impl SceneChangeDetector { } } -/// Scaling factor for frame in scenedetection +/// Scaling factor for frame in scene detection fn detect_scale_factor(sequence: &Arc) -> usize { let small_edge = cmp::min(sequence.max_frame_height, sequence.max_frame_width) as usize; @@ -324,11 +374,9 @@ fn detect_scale_factor(sequence: &Arc) -> usize { } /// This struct primarily exists for returning metrics to the caller -/// for logging debug information. #[derive(Debug, Clone, Copy)] -struct ScenecutResult { +struct ScenecutData { intra_cost: f64, inter_cost: f64, threshold: f64, - has_scenecut: bool, }