From 9b3af3055cfde6158e7491085aae04ecfef0365d Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Mon, 11 Nov 2024 10:49:58 +0100 Subject: [PATCH] Fix video backward seeking / stepping back sometimes getting stuck (in the presence of b-frames) (#8053) ### What * Fixes https://github.com/rerun-io/rerun/issues/7956 Our PTS -> sample search algorithm wasn't quite right. Fixed this here and added a unit test. Unfortunately, I had to introduce a small auxiliary data structure to keep this snappy / not O(n) for n samples, but it's very simple and rather contained. Tested on native & web. Web still suffers from * https://github.com/rerun-io/rerun/issues/7961 but on Chrome this works fine now! https://github.com/user-attachments/assets/7a20467d-4d66-456f-be99-debba4ff7a31 ### Checklist * [x] I have read and agree to [Contributor Guide](https://github.com/rerun-io/rerun/blob/main/CONTRIBUTING.md) and the [Code of Conduct](https://github.com/rerun-io/rerun/blob/main/CODE_OF_CONDUCT.md) * [x] I've included a screenshot or gif (if applicable) * [x] I have tested the web demo (if applicable): * Using examples from latest `main` build: [rerun.io/viewer](https://rerun.io/viewer/pr/8053?manifest_url=https://app.rerun.io/version/main/examples_manifest.json) * Using full set of examples from `nightly` build: [rerun.io/viewer](https://rerun.io/viewer/pr/8053?manifest_url=https://app.rerun.io/version/nightly/examples_manifest.json) * [x] The PR title and labels are set such as to maximize their usefulness for the next release's CHANGELOG * [x] If applicable, add a new check to the [release checklist](https://github.com/rerun-io/rerun/blob/main/tests/python/release_checklist)! * [x] If have noted any breaking changes to the log API in `CHANGELOG.md` and the migration guide - [PR Build Summary](https://build.rerun.io/pr/8053) - [Recent benchmark results](https://build.rerun.io/graphs/crates.html) - [Wasm size tracking](https://build.rerun.io/graphs/sizes.html) To run all checks from `main`, comment on the PR with `@rerun-bot full-check`. --- Cargo.lock | 1 + crates/store/re_video/Cargo.toml | 1 + .../re_video/src/decode/ffmpeg_h264/ffmpeg.rs | 6 - crates/store/re_video/src/demux/mod.rs | 201 ++++++++++++++++-- crates/viewer/re_data_ui/src/video.rs | 18 ++ crates/viewer/re_renderer/src/video/player.rs | 10 +- 6 files changed, 214 insertions(+), 23 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 68f5b97064f3..73dcb03e2b1e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6477,6 +6477,7 @@ dependencies = [ name = "re_video" version = "0.20.0-alpha.1+dev" dependencies = [ + "bit-vec", "cfg_aliases 0.2.1", "criterion", "crossbeam", diff --git a/crates/store/re_video/Cargo.toml b/crates/store/re_video/Cargo.toml index 458d68b4403c..bfad392b3be1 100644 --- a/crates/store/re_video/Cargo.toml +++ b/crates/store/re_video/Cargo.toml @@ -47,6 +47,7 @@ re_build_info.workspace = true re_log.workspace = true re_tracing.workspace = true +bit-vec.workspace = true crossbeam.workspace = true econtext.workspace = true itertools.workspace = true diff --git a/crates/store/re_video/src/decode/ffmpeg_h264/ffmpeg.rs b/crates/store/re_video/src/decode/ffmpeg_h264/ffmpeg.rs index 743936de625c..9829e05df12d 100644 --- a/crates/store/re_video/src/decode/ffmpeg_h264/ffmpeg.rs +++ b/crates/store/re_video/src/decode/ffmpeg_h264/ffmpeg.rs @@ -516,12 +516,6 @@ fn read_ffmpeg_output( // How do we know how large this buffer needs to be? // Whenever the highest known DTS is behind the PTS, we need to wait until the DTS catches up. // Otherwise, we'd assign the wrong PTS to the frame that just came in. - // - // Example how presentation timestamps and decode timestamps - // can play out in the presence of B-frames to illustrate this: - // PTS: 1 4 2 3 - // DTS: 1 2 3 4 - // Stream: I P B B let frame_info = loop { let oldest_pts_in_buffer = pending_frame_infos.first_key_value().map(|(pts, _)| *pts); diff --git a/crates/store/re_video/src/demux/mod.rs b/crates/store/re_video/src/demux/mod.rs index b821d77cb345..2dbc898ccb52 100644 --- a/crates/store/re_video/src/demux/mod.rs +++ b/crates/store/re_video/src/demux/mod.rs @@ -9,6 +9,7 @@ pub mod mp4; use std::{collections::BTreeMap, ops::Range}; +use bit_vec::BitVec; use itertools::Itertools as _; use super::{Time, Timescale}; @@ -89,6 +90,11 @@ pub struct SamplesStatistics { /// /// If true, the video typically has no B-frames as those require frame reordering. pub dts_always_equal_pts: bool, + + /// If `dts_always_equal_pts` is false, then this gives for each sample whether its PTS is the highest seen so far. + /// If `dts_always_equal_pts` is true, then this is left as `None`. + /// This is used for optimizing PTS search. + pub has_sample_highest_pts_so_far: Option, } impl SamplesStatistics { @@ -104,9 +110,25 @@ impl SamplesStatistics { .iter() .all(|s| s.decode_timestamp == s.presentation_timestamp); + let mut biggest_pts_so_far = Time::MIN; + let has_sample_highest_pts_so_far = (!dts_always_equal_pts).then(|| { + samples + .iter() + .map(move |sample| { + if sample.presentation_timestamp > biggest_pts_so_far { + biggest_pts_so_far = sample.presentation_timestamp; + true + } else { + false + } + }) + .collect() + }); + Self { minimum_presentation_timestamp, dts_always_equal_pts, + has_sample_highest_pts_so_far, } } } @@ -296,34 +318,83 @@ impl VideoData { }) } - /// For a given decode (!) timestamp, returns the index of the latest sample whose + /// For a given decode (!) timestamp, returns the index of the first sample whose /// decode timestamp is lesser than or equal to the given timestamp. - pub fn latest_sample_index_at_decode_timestamp(&self, decode_time: Time) -> Option { - latest_at_idx( - &self.samples, - |sample| sample.decode_timestamp, - &decode_time, - ) + fn latest_sample_index_at_decode_timestamp( + samples: &[Sample], + decode_time: Time, + ) -> Option { + latest_at_idx(samples, |sample| sample.decode_timestamp, &decode_time) } - /// For a given presentation timestamp, return the index of the latest sample - /// whose presentation timestamp is lesser than or equal to the given timestamp. - pub fn latest_sample_index_at_presentation_timestamp( - &self, + /// See [`Self::latest_sample_index_at_presentation_timestamp`], split out for testing purposes. + fn latest_sample_index_at_presentation_timestamp_internal( + samples: &[Sample], + sample_statistics: &SamplesStatistics, presentation_timestamp: Time, ) -> Option { // Find the latest sample where `decode_timestamp <= presentation_timestamp`. // Because `decode <= presentation`, we never have to look further backwards in the // video than this. let decode_sample_idx = - self.latest_sample_index_at_decode_timestamp(presentation_timestamp)?; + Self::latest_sample_index_at_decode_timestamp(samples, presentation_timestamp)?; + + // It's very common that dts==pts in which case we're done! + let Some(has_sample_highest_pts_so_far) = + sample_statistics.has_sample_highest_pts_so_far.as_ref() + else { + debug_assert!(!sample_statistics.dts_always_equal_pts); + return Some(decode_sample_idx); + }; + debug_assert!(has_sample_highest_pts_so_far.len() == samples.len()); // Search backwards, starting at `decode_sample_idx`, looking for // the first sample where `sample.presentation_timestamp <= presentation_timestamp`. - // this is the sample which when decoded will be presented at the timestamp the user requested. - self.samples[..=decode_sample_idx] - .iter() - .rposition(|sample| sample.presentation_timestamp <= presentation_timestamp) + // I.e. the sample with the biggest PTS that is smaller or equal to the requested PTS. + // + // The tricky part is that we can't just take the first sample with a presentation timestamp that matches + // since smaller presentation timestamps may still show up further back! + let mut best_index = usize::MAX; + let mut best_pts = Time::MIN; + for sample_idx in (0..=decode_sample_idx).rev() { + let sample = &samples[sample_idx]; + + if sample.presentation_timestamp == presentation_timestamp { + // Clean hit. Take this one, no questions asked :) + // (assuming that each PTS is unique!) + return Some(sample_idx); + } + + if sample.presentation_timestamp < presentation_timestamp + && sample.presentation_timestamp > best_pts + { + best_pts = sample.presentation_timestamp; + best_index = sample_idx; + } + + if best_pts != Time::MIN && has_sample_highest_pts_so_far[sample_idx] { + // We won't see any bigger PTS values anymore, meaning we're as close as we can get to the requested PTS! + return Some(best_index); + } + } + + None + } + + /// For a given presentation timestamp, return the index of the first sample + /// whose presentation timestamp is lesser than or equal to the given timestamp. + /// + /// Remember that samples after (i.e. with higher index) may have a *lower* presentation time + /// if the stream has sample reordering! + pub fn latest_sample_index_at_presentation_timestamp( + &self, + presentation_timestamp: Time, + ) -> Option { + Self::latest_sample_index_at_presentation_timestamp_internal( + &self.samples, + &self.samples_statistics, + presentation_timestamp, + ) } /// For a given decode (!) timestamp, return the index of the group of pictures (GOP) index containing the given timestamp. @@ -553,4 +624,102 @@ mod tests { assert_eq!(latest_at_idx(&v, |v| *v, &11), Some(9)); assert_eq!(latest_at_idx(&v, |v| *v, &1000), Some(9)); } + + #[test] + fn test_latest_sample_index_at_presentation_timestamp() { + // This is a snippet of real world data! + let pts = [ + 512, 1536, 1024, 768, 1280, 2560, 2048, 1792, 2304, 3584, 3072, 2816, 3328, 4608, 4096, + 3840, 4352, 5376, 4864, 5120, 6400, 5888, 5632, 6144, 7424, 6912, 6656, 7168, 8448, + 7936, 7680, 8192, 9472, 8960, 8704, 9216, 10496, 9984, 9728, 10240, 11520, 11008, + 10752, 11264, 12544, 12032, 11776, 12288, 13568, 13056, + ]; + let dts = [ + 0, 256, 512, 768, 1024, 1280, 1536, 1792, 2048, 2304, 2560, 2816, 3072, 3328, 3584, + 3840, 4096, 4352, 4608, 4864, 5120, 5376, 5632, 5888, 6144, 6400, 6656, 6912, 7168, + 7424, 7680, 7936, 8192, 8448, 8704, 8960, 9216, 9472, 9728, 9984, 10240, 10496, 10752, + 11008, 11264, 11520, 11776, 12032, 12288, 12544, + ]; + + // Checking our basic assumptions about this data: + assert_eq!(pts.len(), dts.len()); + assert!(pts.iter().zip(dts.iter()).all(|(pts, dts)| dts <= pts)); + + // Create fake samples from this. + let samples = pts + .into_iter() + .zip(dts) + .map(|(pts, dts)| Sample { + is_sync: false, + decode_timestamp: Time(dts), + presentation_timestamp: Time(pts), + duration: Time(1), + byte_offset: 0, + byte_length: 0, + }) + .collect::>(); + + let sample_statistics = SamplesStatistics::new(&samples); + assert_eq!(sample_statistics.minimum_presentation_timestamp, Time(512)); + assert!(!sample_statistics.dts_always_equal_pts); + + // Test queries on the samples. + let query_pts = |pts| { + VideoData::latest_sample_index_at_presentation_timestamp_internal( + &samples, + &sample_statistics, + pts, + ) + }; + + // Check that query for all exact positions works as expected using brute force search as the reference. + for (idx, sample) in samples.iter().enumerate() { + assert_eq!(Some(idx), query_pts(sample.presentation_timestamp)); + } + + // Check that for slightly offsetted positions the query is still correct. + // This works because for this dataset we know the minimum presentation timesetampe distance is always 256. + for (idx, sample) in samples.iter().enumerate() { + assert_eq!( + Some(idx), + query_pts(sample.presentation_timestamp + Time(1)) + ); + assert_eq!( + Some(idx), + query_pts(sample.presentation_timestamp + Time(255)) + ); + } + + // A few hardcoded cases - both for illustrative purposes and to make sure the generic tests above are correct. + + // Querying before the first sample. + assert_eq!(None, query_pts(Time(0))); + assert_eq!(None, query_pts(Time(123))); + + // Querying for the first sample + assert_eq!(Some(0), query_pts(Time(512))); + assert_eq!(Some(0), query_pts(Time(513))); + assert_eq!(Some(0), query_pts(Time(600))); + assert_eq!(Some(0), query_pts(Time(767))); + + // The next sample is a jump in index! + assert_eq!(Some(3), query_pts(Time(768))); + assert_eq!(Some(3), query_pts(Time(769))); + assert_eq!(Some(3), query_pts(Time(800))); + assert_eq!(Some(3), query_pts(Time(1023))); + + // And the one after that should jump back again. + assert_eq!(Some(2), query_pts(Time(1024))); + assert_eq!(Some(2), query_pts(Time(1025))); + assert_eq!(Some(2), query_pts(Time(1100))); + assert_eq!(Some(2), query_pts(Time(1279))); + + // And another one! + assert_eq!(Some(4), query_pts(Time(1280))); + assert_eq!(Some(4), query_pts(Time(1281))); + + // Test way outside of the range. + // (this is not the last element in the list since that one doesn't have the highest PTS) + assert_eq!(Some(48), query_pts(Time(123123123123123123))); + } } diff --git a/crates/viewer/re_data_ui/src/video.rs b/crates/viewer/re_data_ui/src/video.rs index 19cdd8b0cce5..e9d2d93dd02f 100644 --- a/crates/viewer/re_data_ui/src/video.rs +++ b/crates/viewer/re_data_ui/src/video.rs @@ -268,6 +268,24 @@ fn frame_info_ui(ui: &mut egui::Ui, frame_info: &FrameInfo, video_data: &re_vide .on_hover_text("Raw presentation timestamp prior to applying the timescale.\n\ This specifies the time at which the frame should be shown relative to the start of a video stream."); + // Judging the following to be a bit too obscure to be of relevance outside of debugging Rerun itself. + #[cfg(debug_assertions)] + { + if let Some(has_sample_highest_pts_so_far) = video_data + .samples_statistics + .has_sample_highest_pts_so_far + .as_ref() + { + if let Some(sample_idx) = video_data + .latest_sample_index_at_presentation_timestamp(frame_info.presentation_timestamp) + { + ui.list_item_flat_noninteractive( + PropertyContent::new("Highest PTS so far").value_text(has_sample_highest_pts_so_far[sample_idx].to_string()) + ).on_hover_text("Whether the presentation timestamp (PTS) at the this frame is the highest encountered so far. If false there are lower PTS values prior in the list."); + } + } + } + // Information about the current group of pictures this frame is part of. // Lookup via decode timestamp is faster, but it may not always be available. if let Some(gop_index) = diff --git a/crates/viewer/re_renderer/src/video/player.rs b/crates/viewer/re_renderer/src/video/player.rs index c30f1b053d30..935d20003ce6 100644 --- a/crates/viewer/re_renderer/src/video/player.rs +++ b/crates/viewer/re_renderer/src/video/player.rs @@ -258,7 +258,15 @@ impl VideoPlayer { // special case: handle seeking backwards within a single GOP // this is super inefficient, but it's the only way to handle it // while maintaining a buffer of only 2 GOPs - if requested_sample_idx < self.current_sample_idx { + // + // Note that due to sample reordering (in the presence of b-frames), if can happen + // that `self.current_sample_idx` is *behind* the `requested_sample_idx` even if we're + // seeking backwards! + // Therefore, it's important to compare presentation timestamps instead of sample indices. + // (comparing decode timestamps should be equivalent to comparing sample indices) + let current_pts = self.data.samples[self.current_sample_idx].presentation_timestamp; + let requested_pts = self.data.samples[requested_sample_idx].presentation_timestamp; + if requested_pts < current_pts { self.reset()?; self.enqueue_gop(requested_gop_idx, video_data)?; self.enqueue_gop(requested_gop_idx + 1, video_data)?;