some promising results

rerun-io · Sep 25, 2024 · 779fbcd · 779fbcd
1 parent 199e4b1
commit 779fbcd
Show file tree

Hide file tree

Showing 3 changed files with 137 additions and 26 deletions.
diff --git a/crates/viewer/re_renderer/src/video/decoder/native.rs b/crates/viewer/re_renderer/src/video/decoder/native.rs
@@ -41,7 +41,7 @@ impl VideoDecoder {
     pub fn frame_at(
         &mut self,
         _render_ctx: &RenderContext,
-        _timestamp_s: f64,
+        _presentation_timestamp_s: f64,
     ) -> FrameDecodingResult {
         FrameDecodingResult::Error(DecodingError::NoNativeSupport)
     }

diff --git a/crates/viewer/re_renderer/src/video/decoder/web.rs b/crates/viewer/re_renderer/src/video/decoder/web.rs
@@ -107,6 +107,8 @@ impl VideoDecoder {
         render_context: &RenderContext,
         data: Arc<re_video::VideoData>,
     ) -> Result<Self, DecodingError> {
+        re_log::debug!("{:?}", data.samples);
+
         let frames = Arc::new(Mutex::new(Vec::with_capacity(16)));
         let decode_error = Arc::new(Mutex::new(None));
 
@@ -116,6 +118,7 @@ impl VideoDecoder {
             let frames = frames.clone();
             let decode_error = decode_error.clone();
             move |frame: web_sys::VideoFrame| {
+                web_sys::console::log_1(&frame);
                 let composition_timestamp =
                     Time::from_micros(frame.timestamp().unwrap_or(0.0), timescale);
                 let duration = Time::from_micros(frame.duration().unwrap_or(0.0), timescale);
@@ -167,7 +170,7 @@ impl VideoDecoder {
     pub fn frame_at(
         &mut self,
         render_ctx: &RenderContext,
-        timestamp_s: f64,
+        presentation_timestamp_s: f64,
     ) -> FrameDecodingResult {
         if let Some(error) = self.decode_error.lock().clone() {
             // TODO(emilk): if there is a decoding error in one segment or sample,
@@ -177,7 +180,7 @@ impl VideoDecoder {
             return FrameDecodingResult::Error(error);
         }
 
-        let result = self.frame_at_internal(timestamp_s);
+        let result = self.frame_at_internal(presentation_timestamp_s);
         match &result {
             FrameDecodingResult::Ready(_) => {
                 self.error_on_last_frame_at = false;
@@ -200,27 +203,127 @@ impl VideoDecoder {
         result
     }
 
-    fn frame_at_internal(&mut self, timestamp_s: f64) -> FrameDecodingResult {
-        if timestamp_s < 0.0 {
+    fn frame_at_internal(&mut self, presentation_timestamp_s: f64) -> FrameDecodingResult {
+        if presentation_timestamp_s < 0.0 {
             return FrameDecodingResult::Error(DecodingError::NegativeTimestamp);
         }
-        let timescale = self.data.timescale;
-        let timestamp = Time::from_secs(timestamp_s, timescale);
+        let presentation_timestamp = Time::from_secs(presentation_timestamp_s, self.data.timescale);
+
+        if let Err(err) = self.enqueue_requested_segments2(presentation_timestamp) {
+            return FrameDecodingResult::Error(err);
+        }
 
-        let Some(requested_segment_idx) =
-            latest_at_idx(&self.data.segments, |segment| segment.start, &timestamp)
+        self.try_present_frame(presentation_timestamp)
+    }
+
+    fn enqueue_requested_segments2(
+        &mut self,
+        presentation_timestamp: Time,
+    ) -> Result<(), DecodingError> {
+        // Some terminology:
+        //   - presentation timestamp = composition timestamp
+        //     = the time at which the frame should be shown
+        //   - decode timestamp
+        //     = determines the decoding order of samples
+        //
+        // Note: `composition >= decode` for any given sample.
+        //       For some codecs, the two timestamps are the same.
+        // We must enqueue samples in decode order, but show them in composition order.
+
+        // 1. Find the latest sample where `decode_timestamp <= presentation_timestamp`.
+        //    Because `composition >= decode`, we never have to look further ahead in the
+        //    video than this.
+        let Some(decode_sample_idx) = latest_at_idx(
+            &self.data.samples,
+            |sample| sample.decode_timestamp,
+            &presentation_timestamp,
+        ) else {
+            return Err(DecodingError::EmptyVideo);
+        };
+
+        // 2. Search _backwards_, starting at `decode_sample_idx`, looking for
+        //    the first sample where `sample.composition_timestamp <= presentation_timestamp`.
+        //    This is the sample the user requested.
+        let Some(requested_sample_idx) = self.data.samples[..=decode_sample_idx]
+            .iter()
+            .rposition(|sample| sample.composition_timestamp <= presentation_timestamp)
         else {
-            return FrameDecodingResult::Error(DecodingError::EmptyVideo);
+            return Err(DecodingError::EmptyVideo);
+        };
+
+        // 3. Do a binary search through segments by the decode timestamp of the found sample
+        //    to find the segment that contains the sample.
+        let Some(requested_segment_idx) = latest_at_idx(
+            &self.data.segments,
+            |segment| segment.start,
+            &self.data.samples[requested_sample_idx].decode_timestamp,
+        ) else {
+            return Err(DecodingError::EmptyVideo);
+        };
+
+        re_log::debug!("decode={decode_sample_idx} segment={requested_segment_idx} sample={requested_sample_idx}");
+
+        // 4. Enqueue segments as needed.
+        //
+        // We maintain a buffer of 2 segments, so we can always smoothly transition to the next segment.
+        // We can always start decoding from any segment, because segments always begin with a keyframe.
+        //
+        // Backward seeks or seeks across many segments trigger a reset of the decoder,
+        // because decoding all the samples between the previous sample and the requested
+        // one would mean decoding and immediately discarding more frames than we need.
+        if requested_segment_idx != self.current_segment_idx {
+            let segment_distance = requested_segment_idx.checked_sub(self.current_segment_idx);
+            if segment_distance == Some(1) {
+                // forward seek to next segment - queue up the one _after_ requested
+                self.enqueue_segment(requested_segment_idx + 1);
+            } else {
+                // Startup, forward seek by N>1, or backward seek across segments -> reset decoder
+                self.reset()?;
+                self.enqueue_segment(requested_segment_idx);
+                self.enqueue_segment(requested_segment_idx + 1);
+            }
+        } else if requested_sample_idx != self.current_sample_idx {
+            // special case: handle seeking backwards within a single segment
+            // this is super inefficient, but it's the only way to handle it
+            // while maintaining a buffer of 2 segments
+            let sample_distance = requested_sample_idx as isize - self.current_sample_idx as isize;
+            if sample_distance < 0 {
+                self.reset()?;
+                self.enqueue_segment(requested_segment_idx);
+                self.enqueue_segment(requested_segment_idx + 1);
+            }
+        }
+
+        // At this point, we have the requested segments enqueued. They will be output
+        // in _composition timestamp_ order, so presenting the frame is a binary search
+        // through the frame buffer as usual.
+
+        self.current_segment_idx = requested_segment_idx;
+        self.current_sample_idx = requested_sample_idx;
+
+        Ok(())
+    }
+
+    fn enqueue_requested_segments(
+        &mut self,
+        presentation_timestamp: Time,
+    ) -> Result<(), DecodingError> {
+        let Some(requested_segment_idx) = latest_at_idx(
+            &self.data.segments,
+            |segment| segment.start,
+            &presentation_timestamp,
+        ) else {
+            return Err(DecodingError::EmptyVideo);
         };
         let requested_segment = &self.data.segments[requested_segment_idx];
 
         let Some(requested_sample_idx) = latest_at_idx(
             &self.data.samples[requested_segment.range()],
             |sample| sample.decode_timestamp,
-            &timestamp,
+            &presentation_timestamp,
         ) else {
             // This should never happen, because segments are never empty.
-            return FrameDecodingResult::Error(DecodingError::EmptySegment);
+            return Err(DecodingError::EmptySegment);
         };
 
         // Enqueue segments as needed. We maintain a buffer of 2 segments, so we can
@@ -238,9 +341,7 @@ impl VideoDecoder {
                 self.enqueue_segment(requested_segment_idx + 1);
             } else {
                 // Startup, forward seek by N>1, or backward seek across segments -> reset decoder
-                if let Err(err) = self.reset() {
-                    return FrameDecodingResult::Error(err);
-                }
+                self.reset()?;
                 self.enqueue_segment(requested_segment_idx);
                 self.enqueue_segment(requested_segment_idx + 1);
             }
@@ -250,9 +351,7 @@ impl VideoDecoder {
             // while maintaining a buffer of 2 segments
             let sample_distance = requested_sample_idx as isize - self.current_sample_idx as isize;
             if sample_distance < 0 {
-                if let Err(err) = self.reset() {
-                    return FrameDecodingResult::Error(err);
-                }
+                self.reset()?;
                 self.enqueue_segment(requested_segment_idx);
                 self.enqueue_segment(requested_segment_idx + 1);
             }
@@ -261,11 +360,19 @@ impl VideoDecoder {
         self.current_segment_idx = requested_segment_idx;
         self.current_sample_idx = requested_sample_idx;
 
+        Ok(())
+    }
+
+    fn try_present_frame(&mut self, presentation_timestamp: Time) -> FrameDecodingResult {
+        let timescale = self.data.timescale;
+
         let mut frames = self.frames.lock();
 
-        let Some(frame_idx) =
-            latest_at_idx(&frames, |frame| frame.composition_timestamp, &timestamp)
-        else {
+        let Some(frame_idx) = latest_at_idx(
+            &frames,
+            |frame| frame.composition_timestamp,
+            &presentation_timestamp,
+        ) else {
             // no buffered frames - texture will be blank
             // Don't return a zeroed texture, because we may just be behind on decoding
             // and showing an old frame is better than showing a blank frame,
@@ -287,7 +394,7 @@ impl VideoDecoder {
         // This handles the case when we have a buffered frame that's older than the requested timestamp.
         // We don't want to show this frame to the user, because it's not actually the one they requested,
         // so instead return the last decoded frame.
-        if timestamp.into_millis(timescale) - frame_timestamp_ms > frame_duration_ms {
+        if presentation_timestamp.into_millis(timescale) - frame_timestamp_ms > frame_duration_ms {
             return FrameDecodingResult::Pending(self.texture.clone());
         }
 
@@ -351,10 +458,11 @@ impl VideoDecoder {
         } else {
             EncodedVideoChunkType::Delta
         };
-        // TODO(jan): use `composition_timestamp` instead
         let chunk = EncodedVideoChunkInit::new(
             &data,
-            sample.decode_timestamp.into_micros(self.data.timescale),
+            sample
+                .composition_timestamp
+                .into_micros(self.data.timescale),
             type_,
         );
         chunk.set_duration(sample.duration.into_micros(self.data.timescale));
@@ -368,6 +476,7 @@ impl VideoDecoder {
             return;
         };
 
+        web_sys::console::log_1(&chunk);
         if let Err(err) = self.decoder.decode(&chunk) {
             *self.decode_error.lock() = Some(DecodingError::DecodeChunk(js_error_to_string(&err)));
         }

diff --git a/crates/viewer/re_renderer/src/video/mod.rs b/crates/viewer/re_renderer/src/video/mod.rs
@@ -122,7 +122,7 @@ impl Video {
         &self,
         render_context: &RenderContext,
         decoder_stream_id: VideoDecodingStreamId,
-        timestamp_s: f64,
+        presentation_timestamp_s: f64,
     ) -> FrameDecodingResult {
         re_tracing::profile_function!();
 
@@ -152,7 +152,9 @@ impl Video {
         };
 
         decoder_entry.frame_index = render_context.active_frame_idx();
-        decoder_entry.decoder.frame_at(render_context, timestamp_s)
+        decoder_entry
+            .decoder
+            .frame_at(render_context, presentation_timestamp_s)
     }
 
     /// Removes all decoders that have been unused in the last frame.