diff --git a/src/encoder.rs b/src/encoder.rs index 380e5d76ea..25ddd88eff 100644 --- a/src/encoder.rs +++ b/src/encoder.rs @@ -47,6 +47,7 @@ use std::collections::VecDeque; use std::io::Write; use std::mem::MaybeUninit; use std::sync::Arc; +use std::sync::Mutex; use std::{fmt, io, mem}; use crate::rayon::iter::*; @@ -438,6 +439,7 @@ pub struct FrameState { // these are stored per-tile for easier access. pub frame_me_stats: Arc<[FrameMEStats; REF_FRAMES as usize]>, pub enc_stats: EncoderStats, + pub active_tiles: Arc>>, } impl FrameState { @@ -479,6 +481,7 @@ impl FrameState { restoration: rs, frame_me_stats: me_stats, enc_stats: Default::default(), + active_tiles: Default::default(), } } @@ -510,6 +513,7 @@ impl FrameState { restoration: rs, frame_me_stats: FrameMEStats::new_arc_array(fi.w_in_b, fi.h_in_b), enc_stats: Default::default(), + active_tiles: Default::default(), } } @@ -992,13 +996,16 @@ impl FrameInvariants { #[cfg(feature = "unstable")] if fi.show_frame || fi.showable_frame { let cur_frame_time = fi.frame_timestamp(); - // Increment the film grain seed for the next frame - if let Some(params) = - Arc::make_mut(&mut fi.config).get_film_grain_mut_at(cur_frame_time) - { - params.random_seed = params.random_seed.wrapping_add(3248); - if params.random_seed == 0 { - params.random_seed = DEFAULT_GS_SEED; + // SAFETY: We're not inside a tile, there's nothing else writing to the encoder config here. + unsafe { + // Increment the film grain seed for the next frame + if let Some(params) = arc_get_mut_unsafe(&mut fi.config) + .get_film_grain_mut_at(cur_frame_time) + { + params.random_seed = params.random_seed.wrapping_add(3248); + if params.random_seed == 0 { + params.random_seed = DEFAULT_GS_SEED; + } } } } @@ -3192,8 +3199,15 @@ fn encode_tile_group( }) .unzip(); - let stats = - tile_states.into_iter().map(|ts| ts.enc_stats).collect::>(); + let stats = tile_states + .into_iter() + .map(|ts| { + // We have to clone here because `TileStateMut` is `Drop`. + // However, `EncoderStats` is a small struct, and the compiler is likely to + // optimize away this clone anyway. + ts.enc_stats.clone() + }) + .collect::>(); for tile_stats in stats { fs.enc_stats += &tile_stats; } @@ -3242,12 +3256,16 @@ fn encode_tile_group( let rec = &mut ts.rec; cdef_filter_tile(fi, &deblocked_frame, &blocks.as_tile_blocks(), rec); } - /* TODO: Don't apply if lossless */ - fs.restoration.lrf_filter_frame( - Arc::get_mut(&mut fs.rec).unwrap(), - &deblocked_frame, - fi, - ); + // SAFETY: We know no other threads are accessing this because we are done with tiles. + // The only references would be in ref frames, which are not being touched at the moment. + unsafe { + /* TODO: Don't apply if lossless */ + fs.restoration.lrf_filter_frame( + arc_get_mut_unsafe(&mut fs.rec), + &deblocked_frame, + fi, + ); + } } else { /* TODO: Don't apply if lossless */ if fi.sequence.enable_cdef { diff --git a/src/tiling/tile_state.rs b/src/tiling/tile_state.rs index b9507b23b0..c3366b4838 100644 --- a/src/tiling/tile_state.rs +++ b/src/tiling/tile_state.rs @@ -7,6 +7,8 @@ // Media Patent License 1.0 was not distributed with this source code in the // PATENTS file, you can obtain it at www.aomedia.org/license/patent. +use itertools::Itertools; + use super::*; use crate::context::*; @@ -14,14 +16,18 @@ use crate::encoder::*; use crate::frame::*; use crate::lrf::{IntegralImageBuffer, SOLVE_IMAGE_SIZE}; use crate::mc::MotionVector; +use crate::me::FrameMEStats; use crate::partition::{RefType, REF_FRAMES}; use crate::predict::{InterCompoundBuffers, PredictionMode}; use crate::quantize::*; use crate::rdo::*; use crate::stats::EncoderStats; use crate::util::*; +use std::ops::Deref; +use std::ops::DerefMut; use std::ops::{Index, IndexMut}; use std::sync::Arc; +use std::sync::Mutex; /// Tiled view of `FrameState` /// @@ -67,6 +73,9 @@ pub struct TileStateMut<'a, T: Pixel> { pub integral_buffer: IntegralImageBuffer, pub inter_compound_buffers: InterCompoundBuffers, pub enc_stats: EncoderStats, + /// Keeps a reference to the frame's active tile list + /// so that we can remove this tile from the list on drop. + frame_tile_states: Arc>>, } /// Contains information for a coded block that is @@ -122,6 +131,29 @@ impl IndexMut for MiTileState { } } +fn overlaps_any_active_tile( + luma_rect: TileRect, tile_states: &[TileRect], +) -> bool { + let this_start_x = luma_rect.x; + let this_end_x = this_start_x + luma_rect.width; + let this_start_y = luma_rect.y; + let this_end_y = this_start_y + luma_rect.height; + for state in tile_states { + let locked_start_x = state.x; + let locked_end_x = locked_start_x + state.width; + let locked_start_y = state.y; + let locked_end_y = locked_start_y + state.height; + if ((locked_start_x..locked_end_x).contains(&this_start_x) + || (locked_start_x..locked_end_x).contains(&this_end_x)) + && ((locked_start_y..locked_end_y).contains(&this_start_y) + || (locked_start_y..locked_end_y).contains(&this_end_y)) + { + return true; + } + } + false +} + impl<'a, T: Pixel> TileStateMut<'a, T> { pub fn new( fs: &'a mut FrameState, sbo: PlaneSuperBlockOffset, @@ -148,48 +180,63 @@ impl<'a, T: Pixel> TileStateMut<'a, T> { let sb_width = width.align_power_of_two_and_shift(sb_size_log2); let sb_height = height.align_power_of_two_and_shift(sb_size_log2); - Self { - sbo, - sb_size_log2, - sb_width, - sb_height, - mi_width: width >> MI_SIZE_LOG2, - mi_height: height >> MI_SIZE_LOG2, - width, - height, - input: &fs.input, - input_tile: Tile::new(&fs.input, luma_rect), - input_hres: &fs.input_hres, - input_qres: &fs.input_qres, - deblock: &fs.deblock, - rec: TileMut::new(Arc::make_mut(&mut fs.rec), luma_rect), - qc: Default::default(), - segmentation: &fs.segmentation, - restoration: TileRestorationStateMut::new( - &mut fs.restoration, + { + let mut tile_states = fs.active_tiles.lock().unwrap(); + assert!( + !overlaps_any_active_tile(luma_rect, &tile_states), + "Overlapping tiles were initialized, this is not thread safe" + ); + tile_states.push(luma_rect); + } + + // SAFETY: We use `arc_get_mut_unsafe` to make mutable references + // to the rec and ME stats contents. + // The above assert ensures that safety conditions are not violated. + unsafe { + Self { sbo, + sb_size_log2, sb_width, sb_height, - ), - me_stats: Arc::make_mut(&mut fs.frame_me_stats) - .iter_mut() - .map(|fmvs| { - TileMEStatsMut::new( - fmvs, - sbo.0.x << (sb_size_log2 - MI_SIZE_LOG2), - sbo.0.y << (sb_size_log2 - MI_SIZE_LOG2), - width >> MI_SIZE_LOG2, - height >> MI_SIZE_LOG2, - ) - }) - .collect(), - coded_block_info: MiTileState::new( - width >> MI_SIZE_LOG2, - height >> MI_SIZE_LOG2, - ), - integral_buffer: IntegralImageBuffer::zeroed(SOLVE_IMAGE_SIZE), - inter_compound_buffers: InterCompoundBuffers::default(), - enc_stats: EncoderStats::default(), + mi_width: width >> MI_SIZE_LOG2, + mi_height: height >> MI_SIZE_LOG2, + width, + height, + input: &fs.input, + input_tile: Tile::new(&fs.input, luma_rect), + input_hres: &fs.input_hres, + input_qres: &fs.input_qres, + deblock: &fs.deblock, + rec: TileMut::new(arc_get_mut_unsafe(&mut fs.rec), luma_rect), + qc: Default::default(), + segmentation: &fs.segmentation, + restoration: TileRestorationStateMut::new( + &mut fs.restoration, + sbo, + sb_width, + sb_height, + ), + me_stats: arc_get_mut_unsafe(&mut fs.frame_me_stats) + .iter_mut() + .map(|fmvs| { + TileMEStatsMut::new( + fmvs, + sbo.0.x << (sb_size_log2 - MI_SIZE_LOG2), + sbo.0.y << (sb_size_log2 - MI_SIZE_LOG2), + width >> MI_SIZE_LOG2, + height >> MI_SIZE_LOG2, + ) + }) + .collect(), + coded_block_info: MiTileState::new( + width >> MI_SIZE_LOG2, + height >> MI_SIZE_LOG2, + ), + integral_buffer: IntegralImageBuffer::zeroed(SOLVE_IMAGE_SIZE), + inter_compound_buffers: InterCompoundBuffers::default(), + enc_stats: EncoderStats::default(), + frame_tile_states: Arc::clone(&fs.active_tiles), + } } } @@ -262,3 +309,17 @@ impl<'a, T: Pixel> TileStateMut<'a, T> { } } } + +impl<'a, T: Pixel> Drop for TileStateMut<'a, T> { + fn drop(&mut self) { + // Remove this tile from the list of active tiles + let rect = self.tile_rect(); + let mut tile_states = self.frame_tile_states.lock().unwrap(); + let pos = tile_states + .iter() + .find_position(|ts_rect| ts_rect.x == rect.x && ts_rect.y == rect.y) + .unwrap() + .0; + tile_states.swap_remove(pos); + } +} diff --git a/src/util/mod.rs b/src/util/mod.rs index ca475b0039..02a1d68af5 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -12,8 +12,36 @@ mod align; mod cdf; mod uninit; +use std::sync::Arc; + pub use v_frame::math::*; pub use v_frame::pixel::*; pub use align::*; pub use uninit::*; + +// There does exist `Arc::get_mut_unchecked` to do this, +// but it is currently nightly only. +// And because the Arc fields are private, we have to do something much more annoying. +// +// Once `get_mut_unchecked` is stable in stdlib, we should use that instead. +// +// Why does it matter so much that this exists? +// `Arc::make_mut` may clone the inner data without our awareness. +// That may (although has not, somehow) cause issues with data consistency. +// But the issue we have encountered is that these clones make rav1e slower. +// If we know that we are not writing to the same part of this Arc as another thread, +// such as when tiling, we can avoid the clone. +pub(crate) unsafe fn arc_get_mut_unsafe(this: &mut Arc) -> &mut T { + let count = Arc::strong_count(this); + let raw = Arc::into_raw(Arc::clone(this)); + for _ in 0..count { + Arc::decrement_strong_count(raw); + } + let inner = Arc::get_mut(this).unwrap(); + for _ in 0..count { + Arc::increment_strong_count(raw); + } + Arc::from_raw(raw); + inner +}