Skip to content

Commit

Permalink
Make mp4 parsing faster & lower memory overhead (#7860)
Browse files Browse the repository at this point in the history
  • Loading branch information
jprochazk authored Oct 22, 2024
1 parent c23e81e commit 613a35b
Show file tree
Hide file tree
Showing 12 changed files with 177 additions and 118 deletions.
14 changes: 13 additions & 1 deletion .github/workflows/reusable_bench.yml
Original file line number Diff line number Diff line change
Expand Up @@ -75,18 +75,30 @@ jobs:
workload_identity_provider: ${{ secrets.GOOGLE_WORKLOAD_IDENTITY_PROVIDER }}
service_account: ${{ secrets.GOOGLE_SERVICE_ACCOUNT }}

- uses: prefix-dev/setup-pixi@v0.8.1
with:
pixi-version: v0.25.0
# Only has the deps for round-trips. Not all examples.
environments: wheel-test-min

- name: Download test assets
run: pixi run -e wheel-test-min python ./tests/assets/download_test_assets.py

- name: Add SHORT_SHA env property with commit short sha
run: echo "SHORT_SHA=`echo ${{github.sha}} | cut -c1-7`" >> $GITHUB_ENV

- name: Run benchmark
# Use bash shell so we get pipefail behavior with tee
# Running under `pixi` so we get `nasm`
run: |
cargo bench \
pixi run -e wheel-test-min \
cargo bench \
--all-features \
-p re_entity_db \
-p re_log_encoding \
-p re_query \
-p re_tuid \
-p re_video \
-- --output-format=bencher | tee /tmp/${{ env.SHORT_SHA }}
- name: "Set up Cloud SDK"
Expand Down
4 changes: 2 additions & 2 deletions Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -5555,8 +5555,7 @@ dependencies = [
[[package]]
name = "re_mp4"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3d1e30657b1ae7f0dd3428a59dc8140732b74a22cc07763606c9ec4054138731"
source = "git+https://github.com/rerun-io/re_mp4?rev=7d38361ee5b05f5a2b83a8029057c8a24d2e9023#7d38361ee5b05f5a2b83a8029057c8a24d2e9023"
dependencies = [
"byteorder",
"bytes",
Expand Down Expand Up @@ -6182,6 +6181,7 @@ name = "re_video"
version = "0.20.0-alpha.1+dev"
dependencies = [
"cfg_aliases 0.2.1",
"criterion",
"crossbeam",
"econtext",
"indicatif",
Expand Down
3 changes: 3 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -565,3 +565,6 @@ missing_errors_doc = "allow"
re_arrow2 = { git = "https://github.com/rerun-io/re_arrow2", rev = "e4717d6debc6d4474ec10db8f629f823f57bad07" }

# dav1d = { path = "/home/cmc/dev/rerun-io/rav1d", package = "re_rav1d", version = "0.1.1" }

# Commit on `main` branch of `re_mp4`
re_mp4 = { git = "https://github.com/rerun-io/re_mp4", rev = "7d38361ee5b05f5a2b83a8029057c8a24d2e9023" }
7 changes: 6 additions & 1 deletion crates/store/re_video/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ dav1d = { workspace = true, optional = true, default-features = false, features

[dev-dependencies]
indicatif.workspace = true

criterion.workspace = true

# For build.rs:
[build-dependencies]
Expand All @@ -71,3 +71,8 @@ cfg_aliases.workspace = true

[[example]]
name = "frames"


[[bench]]
name = "video_load_bench"
harness = false
24 changes: 24 additions & 0 deletions crates/store/re_video/benches/video_load_bench.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#![allow(clippy::unwrap_used)] // acceptable in benchmarks

use std::path::Path;

use criterion::{criterion_group, criterion_main, Criterion};

fn video_load(c: &mut Criterion) {
let video_path = Path::new(env!("CARGO_MANIFEST_DIR"))
.ancestors()
.nth(3)
.unwrap()
.join("tests/assets/video/Big_Buck_Bunny_1080_10s_av1.mp4");
let video = std::fs::read(video_path).unwrap();
c.bench_function("video_load", |b| {
b.iter_batched(
|| {},
|()| re_video::VideoData::load_from_bytes(&video, "video/mp4"),
criterion::BatchSize::LargeInput,
);
});
}

criterion_group!(benches, video_load);
criterion_main!(benches);
9 changes: 5 additions & 4 deletions crates/store/re_video/examples/frames.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ fn main() {

println!("Decoding {video_path}");

let video = std::fs::read(video_path).expect("failed to read video");
let video = re_video::VideoData::load_mp4(&video).expect("failed to load video");
let video_blob = std::fs::read(video_path).expect("failed to read video");
let video = re_video::VideoData::load_mp4(&video_blob).expect("failed to load video");

println!(
"{} {}x{}",
Expand All @@ -37,11 +37,12 @@ fn main() {
let mut decoder = re_video::decode::new_decoder(video_path.to_string(), &video)
.expect("Failed to create decoder");

write_video_frames(&video, decoder.as_mut(), &output_dir);
write_video_frames(&video, &video_blob, decoder.as_mut(), &output_dir);
}

fn write_video_frames(
video: &re_video::VideoData,
video_blob: &[u8],
decoder: &mut dyn re_video::decode::SyncDecoder,
output_dir: &PathBuf,
) {
Expand All @@ -61,7 +62,7 @@ fn write_video_frames(
let start = Instant::now();
for sample in &video.samples {
let should_stop = std::sync::atomic::AtomicBool::new(false);
let chunk = video.get(sample).unwrap();
let chunk = sample.get(video_blob).unwrap();
decoder.submit_chunk(&should_stop, chunk, &on_output);
}

Expand Down
46 changes: 22 additions & 24 deletions crates/store/re_video/src/demux/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,6 @@ pub struct VideoData {
/// and should be presented in composition-timestamp order.
pub samples: Vec<Sample>,

/// This array stores all data used by samples.
pub data: Vec<u8>,

/// All the tracks in the mp4; not just the video track.
///
/// Can be nice to show in a UI.
Expand Down Expand Up @@ -245,25 +242,6 @@ impl VideoData {
.sorted()
})
}

/// Returns `None` if the sample is invalid/out-of-range.
pub fn get(&self, sample: &Sample) -> Option<Chunk> {
let byte_offset = sample.byte_offset as usize;
let byte_length = sample.byte_length as usize;

if self.data.len() < byte_offset + byte_length {
None
} else {
let data = &self.data[byte_offset..byte_offset + byte_length];

Some(Chunk {
data: data.to_vec(),
composition_timestamp: sample.composition_timestamp,
duration: sample.duration,
is_sync: sample.is_sync,
})
}
}
}

/// A Group of Pictures (GOP) always starts with an I-frame, followed by delta-frames.
Expand Down Expand Up @@ -311,13 +289,34 @@ pub struct Sample {
/// Duration of the sample, in time units.
pub duration: Time,

/// Offset into [`VideoData::data`]
/// Offset into the video data.
pub byte_offset: u32,

/// Length of sample starting at [`Sample::byte_offset`].
pub byte_length: u32,
}

impl Sample {
/// Read the sample from the video data.
///
/// Note that `data` _must_ be a reference to the original MP4 file
/// from which the [`VideoData`] was loaded.
///
/// Returns `None` if the sample is out of bounds, which can only happen
/// if `data` is not the original video data.
pub fn get(&self, data: &[u8]) -> Option<Chunk> {
let data = data
.get(self.byte_offset as usize..(self.byte_offset + self.byte_length) as usize)?
.to_vec();
Some(Chunk {
data,
composition_timestamp: self.composition_timestamp,
duration: self.duration,
is_sync: self.is_sync,
})
}
}

/// Configuration of a video.
#[derive(Debug, Clone)]
pub struct Config {
Expand Down Expand Up @@ -385,7 +384,6 @@ impl std::fmt::Debug for VideoData {
"samples",
&self.samples.iter().enumerate().collect::<Vec<_>>(),
)
.field("data", &self.data.len())
.finish()
}
}
2 changes: 0 additions & 2 deletions crates/store/re_video/src/demux/mp4.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@ impl VideoData {
let mut samples = Vec::<Sample>::new();
let mut gops = Vec::<GroupOfPictures>::new();
let mut gop_sample_start_index = 0;
let data = track.data.clone();

for sample in &track.samples {
if sample.is_sync && !samples.is_empty() {
Expand Down Expand Up @@ -86,7 +85,6 @@ impl VideoData {
duration,
gops,
samples,
data,
mp4_tracks,
})
}
Expand Down
9 changes: 8 additions & 1 deletion crates/viewer/re_data_ui/src/blob.rs
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ pub fn blob_preview_and_save_ui(
ui_layout,
&video_result,
video_timestamp,
blob,
);
}

Expand Down Expand Up @@ -175,6 +176,7 @@ fn show_video_blob_info(
ui_layout: UiLayout,
video_result: &Result<re_renderer::video::Video, VideoLoadError>,
video_timestamp: Option<VideoTimestamp>,
blob: &re_types::datatypes::Blob,
) {
#[allow(clippy::match_same_arms)]
match video_result {
Expand Down Expand Up @@ -262,7 +264,12 @@ fn show_video_blob_info(
ui.id().with("video_player").value(),
);

match video.frame_at(render_ctx, decode_stream_id, timestamp_in_seconds) {
match video.frame_at(
render_ctx,
decode_stream_id,
timestamp_in_seconds,
blob.as_slice(),
) {
Ok(VideoFrameTexture {
texture,
time_range,
Expand Down
18 changes: 10 additions & 8 deletions crates/viewer/re_renderer/src/video/decoder/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,7 @@ impl VideoDecoder {
&mut self,
render_ctx: &RenderContext,
presentation_timestamp_s: f64,
video_data: &[u8],
) -> Result<VideoFrameTexture, DecodingError> {
if presentation_timestamp_s < 0.0 {
return Err(DecodingError::NegativeTimestamp);
Expand All @@ -197,7 +198,7 @@ impl VideoDecoder {
let presentation_timestamp = presentation_timestamp.min(self.data.duration); // Don't seek past the end of the video.

let error_on_last_frame_at = self.last_error.is_some();
let result = self.frame_at_internal(render_ctx, presentation_timestamp);
let result = self.frame_at_internal(render_ctx, presentation_timestamp, video_data);

match result {
Ok(()) => {
Expand Down Expand Up @@ -248,6 +249,7 @@ impl VideoDecoder {
&mut self,
render_ctx: &RenderContext,
presentation_timestamp: Time,
video_data: &[u8],
) -> Result<(), DecodingError> {
re_tracing::profile_function!();

Expand Down Expand Up @@ -322,21 +324,21 @@ impl VideoDecoder {
if requested_gop_idx != self.current_gop_idx {
if self.current_gop_idx.saturating_add(1) == requested_gop_idx {
// forward seek to next GOP - queue up the one _after_ requested
self.enqueue_gop(requested_gop_idx + 1)?;
self.enqueue_gop(requested_gop_idx + 1, video_data)?;
} else {
// forward seek by N>1 OR backward seek across GOPs - reset
self.reset()?;
self.enqueue_gop(requested_gop_idx)?;
self.enqueue_gop(requested_gop_idx + 1)?;
self.enqueue_gop(requested_gop_idx, video_data)?;
self.enqueue_gop(requested_gop_idx + 1, video_data)?;
}
} else if requested_sample_idx != self.current_sample_idx {
// special case: handle seeking backwards within a single GOP
// this is super inefficient, but it's the only way to handle it
// while maintaining a buffer of only 2 GOPs
if requested_sample_idx < self.current_sample_idx {
self.reset()?;
self.enqueue_gop(requested_gop_idx)?;
self.enqueue_gop(requested_gop_idx + 1)?;
self.enqueue_gop(requested_gop_idx, video_data)?;
self.enqueue_gop(requested_gop_idx + 1, video_data)?;
}
}

Expand Down Expand Up @@ -384,15 +386,15 @@ impl VideoDecoder {
/// Enqueue all samples in the given GOP.
///
/// Does nothing if the index is out of bounds.
fn enqueue_gop(&mut self, gop_idx: usize) -> Result<(), DecodingError> {
fn enqueue_gop(&mut self, gop_idx: usize, video_data: &[u8]) -> Result<(), DecodingError> {
let Some(gop) = self.data.gops.get(gop_idx) else {
return Ok(());
};

let samples = &self.data.samples[gop.range()];

for (i, sample) in samples.iter().enumerate() {
let chunk = self.data.get(sample).ok_or(DecodingError::BadData)?;
let chunk = sample.get(video_data).ok_or(DecodingError::BadData)?;
let is_keyframe = i == 0;
self.chunk_decoder.decode(chunk, is_keyframe)?;
}
Expand Down
3 changes: 2 additions & 1 deletion crates/viewer/re_renderer/src/video/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,7 @@ impl Video {
render_context: &RenderContext,
decoder_stream_id: VideoDecodingStreamId,
presentation_timestamp_s: f64,
video_data: &[u8],
) -> FrameDecodingResult {
re_tracing::profile_function!();

Expand Down Expand Up @@ -233,7 +234,7 @@ impl Video {
decoder_entry.frame_index = render_context.active_frame_idx();
decoder_entry
.decoder
.frame_at(render_context, presentation_timestamp_s)
.frame_at(render_context, presentation_timestamp_s, video_data)
}

/// Removes all decoders that have been unused in the last frame.
Expand Down
Loading

0 comments on commit 613a35b

Please sign in to comment.