From d12a6fbcf7d1151a2c1097fb4706bf56b03f0e6f Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Wed, 13 Nov 2024 13:02:37 +0100 Subject: [PATCH] Fix issues with seeking in some H.264 videos on native & web (#8111) ### What * Fixes #8098 Updates re_mp4 to 0.3.0, details of fixes see https://github.com/rerun-io/re_mp4/releases/tag/0.3.0 ### Checklist * [x] I have read and agree to [Contributor Guide](https://github.com/rerun-io/rerun/blob/main/CONTRIBUTING.md) and the [Code of Conduct](https://github.com/rerun-io/rerun/blob/main/CODE_OF_CONDUCT.md) * [x] I've included a screenshot or gif (if applicable) * [x] I have tested the web demo (if applicable): * Using examples from latest `main` build: [rerun.io/viewer](https://rerun.io/viewer/pr/8111?manifest_url=https://app.rerun.io/version/main/examples_manifest.json) * Using full set of examples from `nightly` build: [rerun.io/viewer](https://rerun.io/viewer/pr/8111?manifest_url=https://app.rerun.io/version/nightly/examples_manifest.json) * [x] The PR title and labels are set such as to maximize their usefulness for the next release's CHANGELOG * [x] If applicable, add a new check to the [release checklist](https://github.com/rerun-io/rerun/blob/main/tests/python/release_checklist)! * [x] If have noted any breaking changes to the log API in `CHANGELOG.md` and the migration guide - [PR Build Summary](https://build.rerun.io/pr/8111) - [Recent benchmark results](https://build.rerun.io/graphs/crates.html) - [Wasm size tracking](https://build.rerun.io/graphs/sizes.html) To run all checks from `main`, comment on the PR with `@rerun-bot full-check`. --------- Co-authored-by: Emil Ernerfeldt --- Cargo.lock | 4 +- Cargo.toml | 2 +- crates/store/re_video/src/decode/webcodecs.rs | 30 ++------ crates/store/re_video/src/demux/mod.rs | 77 ++++++------------- crates/store/re_video/src/demux/mp4.rs | 24 +++++- crates/store/re_video/src/time.rs | 51 ++++-------- crates/viewer/re_data_ui/src/video.rs | 36 +++------ crates/viewer/re_renderer/src/video/mod.rs | 2 - crates/viewer/re_renderer/src/video/player.rs | 10 +-- 9 files changed, 88 insertions(+), 148 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 78a56900b789..dea4885f2ad4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5823,9 +5823,9 @@ dependencies = [ [[package]] name = "re_mp4" -version = "0.2.1" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb588af9f28ae35e59681c71c0d6379884e1209a074ffb1b3b18291abac2585a" +checksum = "751650322999417b64a5a89b034b4e34e4596826e5dfee9327856db77ca511e3" dependencies = [ "byteorder", "bytes", diff --git a/Cargo.toml b/Cargo.toml index 752caed4bd72..65143aeb32a6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -206,7 +206,7 @@ memory-stats = "1.1" mimalloc = "0.1.43" mime_guess2 = "2.0" # infer MIME type by file extension, and map mime to file extension mint = "0.5.9" -re_mp4 = "0.2.1" +re_mp4 = "0.3.0" natord = "1.0.9" ndarray = "0.16" ndarray-rand = "0.15" diff --git a/crates/store/re_video/src/decode/webcodecs.rs b/crates/store/re_video/src/decode/webcodecs.rs index e015f2c26256..5f54376c10a7 100644 --- a/crates/store/re_video/src/decode/webcodecs.rs +++ b/crates/store/re_video/src/decode/webcodecs.rs @@ -34,7 +34,6 @@ impl std::ops::Deref for WebVideoFrame { pub struct WebVideoDecoder { video_config: Config, timescale: Timescale, - minimum_presentation_timestamp: Time, decoder: web_sys::VideoDecoder, hw_acceleration: DecodeHardwareAcceleration, on_output: Arc, @@ -108,16 +107,11 @@ impl WebVideoDecoder { on_output: impl Fn(Result) + Send + Sync + 'static, ) -> Result { let on_output = Arc::new(on_output); - let decoder = init_video_decoder( - on_output.clone(), - video.timescale, - video.samples_statistics.minimum_presentation_timestamp, - )?; + let decoder = init_video_decoder(on_output.clone(), video.timescale)?; Ok(Self { video_config: video.config.clone(), timescale: video.timescale, - minimum_presentation_timestamp: video.samples_statistics.minimum_presentation_timestamp, decoder, hw_acceleration, on_output, @@ -138,7 +132,7 @@ impl AsyncDecoder for WebVideoDecoder { &data, video_chunk .presentation_timestamp - .into_micros_since_start(self.timescale, self.minimum_presentation_timestamp), + .into_micros(self.timescale), type_, ); @@ -162,11 +156,7 @@ impl AsyncDecoder for WebVideoDecoder { // At least on Firefox, it can happen that reset on a previous error fails. // In that case, start over completely and try again! re_log::debug!("Video decoder reset failed, recreating decoder."); - self.decoder = init_video_decoder( - self.on_output.clone(), - self.timescale, - self.minimum_presentation_timestamp, - )?; + self.decoder = init_video_decoder(self.on_output.clone(), self.timescale)?; }; self.decoder @@ -205,23 +195,15 @@ impl AsyncDecoder for WebVideoDecoder { fn init_video_decoder( on_output_callback: Arc, timescale: Timescale, - minimum_presentation_timestamp: Time, ) -> Result { let on_output = { let on_output = on_output_callback.clone(); Closure::wrap(Box::new(move |frame: web_sys::VideoFrame| { // We assume that the timestamp returned by the decoder is in time since start, // and does not represent demuxed "raw" presentation timestamps. - let presentation_timestamp = Time::from_micros_since_start( - frame.timestamp().unwrap_or(0.0), - timescale, - minimum_presentation_timestamp, - ); - let duration = Time::from_micros_since_start( - frame.duration().unwrap_or(0.0), - timescale, - minimum_presentation_timestamp, - ); + let presentation_timestamp = + Time::from_micros(frame.timestamp().unwrap_or(0.0), timescale); + let duration = Time::from_micros(frame.duration().unwrap_or(0.0), timescale); on_output(Ok(Frame { content: WebVideoFrame(frame), diff --git a/crates/store/re_video/src/demux/mod.rs b/crates/store/re_video/src/demux/mod.rs index be4a04448bf9..733111ad83e8 100644 --- a/crates/store/re_video/src/demux/mod.rs +++ b/crates/store/re_video/src/demux/mod.rs @@ -75,19 +75,6 @@ pub struct VideoData { /// Meta informationa about the video samples. #[derive(Clone, Debug)] pub struct SamplesStatistics { - /// The smallest presentation timestamp observed in this video. - /// - /// This is typically 0, but in the presence of B-frames, it may be non-zero. - /// In fact, many formats don't require this to be zero, but video players typically - /// normalize the shown time to start at zero. - /// Note that timestamps in the [`Sample`]s are *not* automatically adjusted with this value. - // This is roughly equivalent to FFmpeg's internal `min_corrected_pts` - // https://github.com/FFmpeg/FFmpeg/blob/4047b887fc44b110bccb1da09bcb79d6e454b88b/libavformat/isom.h#L202 - // (unlike us, this handles a bunch more edge cases but it fulfills the same role) - // To learn more about this I recommend reading the patch that introduced this in FFmpeg: - // https://patchwork.ffmpeg.org/project/ffmpeg/patch/20170606181601.25187-1-isasi@google.com/#12592 - pub minimum_presentation_timestamp: Time, - /// Whether all decode timestamps are equal to presentation timestamps. /// /// If true, the video typically has no B-frames as those require frame reordering. @@ -103,11 +90,6 @@ impl SamplesStatistics { pub fn new(samples: &[Sample]) -> Self { re_tracing::profile_function!(); - let minimum_presentation_timestamp = samples - .iter() - .map(|s| s.presentation_timestamp) - .min() - .unwrap_or_default(); let dts_always_equal_pts = samples .iter() .all(|s| s.decode_timestamp == s.presentation_timestamp); @@ -128,7 +110,6 @@ impl SamplesStatistics { }); Self { - minimum_presentation_timestamp, dts_always_equal_pts, has_sample_highest_pts_so_far, } @@ -301,8 +282,6 @@ impl VideoData { /// Determines the video timestamps of all frames inside a video, returning raw time values. /// /// Returned timestamps are in nanoseconds since start and are guaranteed to be monotonically increasing. - /// These are *not* necessarily the same as the presentation timestamps, as the returned timestamps are - /// normalized respect to the start of the video, see [`SamplesStatistics::minimum_presentation_timestamp`]. pub fn frame_timestamps_ns(&self) -> impl Iterator + '_ { // Segments are guaranteed to be sorted among each other, but within a segment, // presentation timestamps may not be sorted since this is sorted by decode timestamps. @@ -311,12 +290,7 @@ impl VideoData { .iter() .map(|sample| sample.presentation_timestamp) .sorted() - .map(|pts| { - pts.into_nanos_since_start( - self.timescale, - self.samples_statistics.minimum_presentation_timestamp, - ) - }) + .map(|pts| pts.into_nanos(self.timescale)) }) } @@ -650,16 +624,16 @@ mod tests { fn test_latest_sample_index_at_presentation_timestamp() { // This is a snippet of real world data! let pts = [ - 512, 1536, 1024, 768, 1280, 2560, 2048, 1792, 2304, 3584, 3072, 2816, 3328, 4608, 4096, - 3840, 4352, 5376, 4864, 5120, 6400, 5888, 5632, 6144, 7424, 6912, 6656, 7168, 8448, - 7936, 7680, 8192, 9472, 8960, 8704, 9216, 10496, 9984, 9728, 10240, 11520, 11008, - 10752, 11264, 12544, 12032, 11776, 12288, 13568, 13056, + 0, 1024, 512, 256, 768, 2048, 1536, 1280, 1792, 3072, 2560, 2304, 2816, 4096, 3584, + 3328, 3840, 4864, 4352, 4608, 5888, 5376, 5120, 5632, 6912, 6400, 6144, 6656, 7936, + 7424, 7168, 7680, 8960, 8448, 8192, 8704, 9984, 9472, 9216, 9728, 11008, 10496, 10240, + 10752, 12032, 11520, 11264, 11776, 13056, 12544, ]; let dts = [ - 0, 256, 512, 768, 1024, 1280, 1536, 1792, 2048, 2304, 2560, 2816, 3072, 3328, 3584, - 3840, 4096, 4352, 4608, 4864, 5120, 5376, 5632, 5888, 6144, 6400, 6656, 6912, 7168, - 7424, 7680, 7936, 8192, 8448, 8704, 8960, 9216, 9472, 9728, 9984, 10240, 10496, 10752, - 11008, 11264, 11520, 11776, 12032, 12288, 12544, + -512, -256, 0, 256, 512, 768, 1024, 1280, 1536, 1792, 2048, 2304, 2560, 2816, 3072, + 3328, 3584, 3840, 4096, 4352, 4608, 4864, 5120, 5376, 5632, 5888, 6144, 6400, 6656, + 6912, 7168, 7424, 7680, 7936, 8192, 8448, 8704, 8960, 9216, 9472, 9728, 9984, 10240, + 10496, 10752, 11008, 11264, 11520, 11776, 12032, ]; // Checking our basic assumptions about this data: @@ -684,7 +658,6 @@ mod tests { .collect::>(); let sample_statistics = SamplesStatistics::new(&samples); - assert_eq!(sample_statistics.minimum_presentation_timestamp, Time(512)); assert!(!sample_statistics.dts_always_equal_pts); // Test queries on the samples. @@ -717,30 +690,30 @@ mod tests { // A few hardcoded cases - both for illustrative purposes and to make sure the generic tests above are correct. // Querying before the first sample. - assert_eq!(None, query_pts(Time(0))); - assert_eq!(None, query_pts(Time(123))); + assert_eq!(None, query_pts(Time(-1))); + assert_eq!(None, query_pts(Time(-123))); // Querying for the first sample - assert_eq!(Some(0), query_pts(Time(512))); - assert_eq!(Some(0), query_pts(Time(513))); - assert_eq!(Some(0), query_pts(Time(600))); - assert_eq!(Some(0), query_pts(Time(767))); + assert_eq!(Some(0), query_pts(Time(0))); + assert_eq!(Some(0), query_pts(Time(1))); + assert_eq!(Some(0), query_pts(Time(88))); + assert_eq!(Some(0), query_pts(Time(255))); // The next sample is a jump in index! - assert_eq!(Some(3), query_pts(Time(768))); - assert_eq!(Some(3), query_pts(Time(769))); - assert_eq!(Some(3), query_pts(Time(800))); - assert_eq!(Some(3), query_pts(Time(1023))); + assert_eq!(Some(3), query_pts(Time(256))); + assert_eq!(Some(3), query_pts(Time(257))); + assert_eq!(Some(3), query_pts(Time(400))); + assert_eq!(Some(3), query_pts(Time(511))); // And the one after that should jump back again. - assert_eq!(Some(2), query_pts(Time(1024))); - assert_eq!(Some(2), query_pts(Time(1025))); - assert_eq!(Some(2), query_pts(Time(1100))); - assert_eq!(Some(2), query_pts(Time(1279))); + assert_eq!(Some(2), query_pts(Time(512))); + assert_eq!(Some(2), query_pts(Time(513))); + assert_eq!(Some(2), query_pts(Time(600))); + assert_eq!(Some(2), query_pts(Time(767))); // And another one! - assert_eq!(Some(4), query_pts(Time(1280))); - assert_eq!(Some(4), query_pts(Time(1281))); + assert_eq!(Some(4), query_pts(Time(768))); + assert_eq!(Some(4), query_pts(Time(1023))); // Test way outside of the range. // (this is not the last element in the list since that one doesn't have the highest PTS) diff --git a/crates/store/re_video/src/demux/mp4.rs b/crates/store/re_video/src/demux/mp4.rs index ee44ff0d9775..4984b8cc1298 100644 --- a/crates/store/re_video/src/demux/mp4.rs +++ b/crates/store/re_video/src/demux/mp4.rs @@ -56,8 +56,8 @@ impl VideoData { gop_sample_start_index = samples.len(); } - let decode_timestamp = Time::new(sample.decode_timestamp as i64); - let presentation_timestamp = Time::new(sample.composition_timestamp as i64); + let decode_timestamp = Time::new(sample.decode_timestamp); + let presentation_timestamp = Time::new(sample.composition_timestamp); let duration = Time::new(sample.duration as i64); let byte_offset = sample.offset as u32; @@ -76,6 +76,26 @@ impl VideoData { } } + // Generate data for `test_latest_sample_index_at_presentation_timestamp` test. + if false { + re_log::info!( + "pts: {:?}", + samples + .iter() + .take(50) + .map(|s| s.presentation_timestamp.0) + .collect::>() + ); + re_log::info!( + "dts: {:?}", + samples + .iter() + .take(50) + .map(|s| s.decode_timestamp.0) + .collect::>() + ); + } + // Append the last GOP if there are any samples left: if !samples.is_empty() { let start = samples[gop_sample_start_index].decode_timestamp; diff --git a/crates/store/re_video/src/time.rs b/crates/store/re_video/src/time.rs index f6025d698c0d..1961067c15e1 100644 --- a/crates/store/re_video/src/time.rs +++ b/crates/store/re_video/src/time.rs @@ -28,67 +28,50 @@ impl Time { Self(v) } - /// `time_base` specifies the #[inline] - pub fn from_secs_since_start( - secs_since_start: f64, - timescale: Timescale, - start_time: Self, - ) -> Self { - Self((secs_since_start * timescale.0 as f64).round() as i64 + start_time.0) + pub fn from_secs(secs_since_start: f64, timescale: Timescale) -> Self { + Self((secs_since_start * timescale.0 as f64).round() as i64) } #[inline] - pub fn from_millis_since_start( - millis_since_start: f64, - timescale: Timescale, - start_time: Self, - ) -> Self { - Self::from_secs_since_start(millis_since_start / 1e3, timescale, start_time) + pub fn from_millis(millis_since_start: f64, timescale: Timescale) -> Self { + Self::from_secs(millis_since_start / 1e3, timescale) } #[inline] - pub fn from_micros_since_start( - micros_since_start: f64, - timescale: Timescale, - start_time: Self, - ) -> Self { - Self::from_secs_since_start(micros_since_start / 1e6, timescale, start_time) + pub fn from_micros(micros_since_start: f64, timescale: Timescale) -> Self { + Self::from_secs(micros_since_start / 1e6, timescale) } #[inline] - pub fn from_nanos_since_start( - nanos_since_start: i64, - timescale: Timescale, - start_time: Self, - ) -> Self { - Self::from_secs_since_start(nanos_since_start as f64 / 1e9, timescale, start_time) + pub fn from_nanos(nanos_since_start: i64, timescale: Timescale) -> Self { + Self::from_secs(nanos_since_start as f64 / 1e9, timescale) } /// Convert to a duration #[inline] pub fn duration(self, timescale: Timescale) -> std::time::Duration { - std::time::Duration::from_nanos(self.into_nanos_since_start(timescale, Self(0)) as _) + std::time::Duration::from_nanos(self.into_nanos(timescale) as _) } #[inline] - pub fn into_secs_since_start(self, timescale: Timescale, start_time: Self) -> f64 { - (self.0 - start_time.0) as f64 / timescale.0 as f64 + pub fn into_secs(self, timescale: Timescale) -> f64 { + self.0 as f64 / timescale.0 as f64 } #[inline] - pub fn into_millis_since_start(self, timescale: Timescale, start_time: Self) -> f64 { - self.into_secs_since_start(timescale, start_time) * 1e3 + pub fn into_millis(self, timescale: Timescale) -> f64 { + self.into_secs(timescale) * 1e3 } #[inline] - pub fn into_micros_since_start(self, timescale: Timescale, start_time: Self) -> f64 { - self.into_secs_since_start(timescale, start_time) * 1e6 + pub fn into_micros(self, timescale: Timescale) -> f64 { + self.into_secs(timescale) * 1e6 } #[inline] - pub fn into_nanos_since_start(self, timescale: Timescale, start_time: Self) -> i64 { - (self.into_secs_since_start(timescale, start_time) * 1e9).round() as i64 + pub fn into_nanos(self, timescale: Timescale) -> i64 { + (self.into_secs(timescale) * 1e9).round() as i64 } } diff --git a/crates/viewer/re_data_ui/src/video.rs b/crates/viewer/re_data_ui/src/video.rs index 326805ec6f86..2a88b37ba722 100644 --- a/crates/viewer/re_data_ui/src/video.rs +++ b/crates/viewer/re_data_ui/src/video.rs @@ -8,7 +8,7 @@ use re_ui::{ list_item::{self, PropertyContent}, DesignTokens, UiExt, }; -use re_video::{decode::FrameInfo, demux::SamplesStatistics, VideoData}; +use re_video::{decode::FrameInfo, VideoData}; use re_viewer_context::UiLayout; pub fn video_result_ui( @@ -116,7 +116,12 @@ fn video_data_ui(ui: &mut egui::Ui, ui_layout: UiLayout, video_data: &VideoData) .value_text(video_data.gops.len().to_string()), ) .on_hover_text("The total number of Group of Pictures (GOPs) in the video."); - samples_statistics_ui(ui, &video_data.samples_statistics); + + let re_video::SamplesStatistics {dts_always_equal_pts, has_sample_highest_pts_so_far: _} = &video_data.samples_statistics; + + ui.list_item_flat_noninteractive( + PropertyContent::new("All PTS equal DTS").value_bool(*dts_always_equal_pts) + ).on_hover_text("Whether all decode timestamps are equal to presentation timestamps. If true, the video typically has no B-frames."); }); ui.list_item_collapsible_noninteractive_label("Video samples", false, |ui| { @@ -231,10 +236,7 @@ fn timestamp_ui(ui: &mut egui::Ui, video_data: &VideoData, timestamp: re_video:: ui.monospace(re_format::format_int(timestamp.0)) .on_hover_ui(|ui| { ui.monospace(re_format::format_timestamp_seconds( - timestamp.into_secs_since_start( - video_data.timescale, - video_data.samples_statistics.minimum_presentation_timestamp, - ), + timestamp.into_secs(video_data.timescale), )); }); } @@ -349,18 +351,6 @@ pub fn show_decoded_frame_info( } } -fn samples_statistics_ui(ui: &mut egui::Ui, samples_statistics: &SamplesStatistics) { - ui.list_item_flat_noninteractive( - PropertyContent::new("Minimum PTS").value_text(samples_statistics.minimum_presentation_timestamp.0.to_string()) - ).on_hover_text("The smallest presentation timestamp (PTS) observed in this video.\n\ - A non-zero value indicates that there are B-frames in the video.\n\ - Rerun will place the 0:00:00 time at this timestamp."); - ui.list_item_flat_noninteractive( - // `value_bool` doesn't look great for static values. - PropertyContent::new("All PTS equal DTS").value_text(samples_statistics.dts_always_equal_pts.to_string()) - ).on_hover_text("Whether all decode timestamps are equal to presentation timestamps. If true, the video typically has no B-frames."); -} - fn frame_info_ui(ui: &mut egui::Ui, frame_info: &FrameInfo, video_data: &re_video::VideoData) { let FrameInfo { is_sync, @@ -382,13 +372,11 @@ fn frame_info_ui(ui: &mut egui::Ui, frame_info: &FrameInfo, video_data: &re_vide let presentation_time_range = presentation_timestamp..presentation_timestamp + duration; ui.list_item_flat_noninteractive(PropertyContent::new("Time range").value_text(format!( "{} - {}", - re_format::format_timestamp_seconds(presentation_time_range.start.into_secs_since_start( - video_data.timescale, - video_data.samples_statistics.minimum_presentation_timestamp + re_format::format_timestamp_seconds(presentation_time_range.start.into_secs( + video_data.timescale )), - re_format::format_timestamp_seconds(presentation_time_range.end.into_secs_since_start( - video_data.timescale, - video_data.samples_statistics.minimum_presentation_timestamp + re_format::format_timestamp_seconds(presentation_time_range.end.into_secs( + video_data.timescale )), ))) .on_hover_text("Time range in which this frame is valid."); diff --git a/crates/viewer/re_renderer/src/video/mod.rs b/crates/viewer/re_renderer/src/video/mod.rs index 15bad868326a..ca3df90ec1a3 100644 --- a/crates/viewer/re_renderer/src/video/mod.rs +++ b/crates/viewer/re_renderer/src/video/mod.rs @@ -137,8 +137,6 @@ impl Video { /// empty. /// /// The time is specified in seconds since the start of the video. - /// This may not be equal to the presentation timestamp as it may have an offset applied, - /// see [`re_video::SamplesStatistics::minimum_presentation_timestamp`]. pub fn frame_at( &self, render_context: &RenderContext, diff --git a/crates/viewer/re_renderer/src/video/player.rs b/crates/viewer/re_renderer/src/video/player.rs index cb4f4c96c20b..8e2d63575973 100644 --- a/crates/viewer/re_renderer/src/video/player.rs +++ b/crates/viewer/re_renderer/src/video/player.rs @@ -127,13 +127,9 @@ impl VideoPlayer { if time_since_video_start_in_seconds < 0.0 { return Err(VideoPlayerError::NegativeTimestamp); } - let presentation_timestamp = Time::from_secs_since_start( - time_since_video_start_in_seconds, - self.data.timescale, - self.data.samples_statistics.minimum_presentation_timestamp, - ); - let presentation_timestamp = presentation_timestamp - .min(self.data.duration + self.data.samples_statistics.minimum_presentation_timestamp); // Don't seek past the end of the video. + let presentation_timestamp = + Time::from_secs(time_since_video_start_in_seconds, self.data.timescale); + let presentation_timestamp = presentation_timestamp.min(self.data.duration); // Don't seek past the end of the video. let error_on_last_frame_at = self.last_error.is_some(); self.frame_at_internal(render_ctx, presentation_timestamp, video_data)?;