Skip to content

Commit

Permalink
Remove uses of Arc::make_mut to avoid clones
Browse files Browse the repository at this point in the history
  • Loading branch information
shssoichiro committed May 10, 2022
1 parent c23559d commit 13dc931
Show file tree
Hide file tree
Showing 3 changed files with 161 additions and 54 deletions.
48 changes: 33 additions & 15 deletions src/encoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ use std::collections::VecDeque;
use std::io::Write;
use std::mem::MaybeUninit;
use std::sync::Arc;
use std::sync::Mutex;
use std::{fmt, io, mem};

use crate::rayon::iter::*;
Expand Down Expand Up @@ -438,6 +439,7 @@ pub struct FrameState<T: Pixel> {
// these are stored per-tile for easier access.
pub frame_me_stats: Arc<[FrameMEStats; REF_FRAMES as usize]>,
pub enc_stats: EncoderStats,
pub active_tiles: Arc<Mutex<Vec<TileRect>>>,
}

impl<T: Pixel> FrameState<T> {
Expand Down Expand Up @@ -479,6 +481,7 @@ impl<T: Pixel> FrameState<T> {
restoration: rs,
frame_me_stats: me_stats,
enc_stats: Default::default(),
active_tiles: Default::default(),
}
}

Expand Down Expand Up @@ -510,6 +513,7 @@ impl<T: Pixel> FrameState<T> {
restoration: rs,
frame_me_stats: FrameMEStats::new_arc_array(fi.w_in_b, fi.h_in_b),
enc_stats: Default::default(),
active_tiles: Default::default(),
}
}

Expand Down Expand Up @@ -992,13 +996,16 @@ impl<T: Pixel> FrameInvariants<T> {
#[cfg(feature = "unstable")]
if fi.show_frame || fi.showable_frame {
let cur_frame_time = fi.frame_timestamp();
// Increment the film grain seed for the next frame
if let Some(params) =
Arc::make_mut(&mut fi.config).get_film_grain_mut_at(cur_frame_time)
{
params.random_seed = params.random_seed.wrapping_add(3248);
if params.random_seed == 0 {
params.random_seed = DEFAULT_GS_SEED;
// SAFETY: We're not inside a tile, there's nothing else writing to the encoder config here.
unsafe {
// Increment the film grain seed for the next frame
if let Some(params) = arc_get_mut_unsafe(&mut fi.config)
.get_film_grain_mut_at(cur_frame_time)
{
params.random_seed = params.random_seed.wrapping_add(3248);
if params.random_seed == 0 {
params.random_seed = DEFAULT_GS_SEED;
}
}
}
}
Expand Down Expand Up @@ -3192,8 +3199,15 @@ fn encode_tile_group<T: Pixel>(
})
.unzip();

let stats =
tile_states.into_iter().map(|ts| ts.enc_stats).collect::<Vec<_>>();
let stats = tile_states
.into_iter()
.map(|ts| {
// We have to clone here because `TileStateMut` is `Drop`.
// However, `EncoderStats` is a small struct, and the compiler is likely to
// optimize away this clone anyway.
ts.enc_stats.clone()
})
.collect::<Vec<_>>();
for tile_stats in stats {
fs.enc_stats += &tile_stats;
}
Expand Down Expand Up @@ -3242,12 +3256,16 @@ fn encode_tile_group<T: Pixel>(
let rec = &mut ts.rec;
cdef_filter_tile(fi, &deblocked_frame, &blocks.as_tile_blocks(), rec);
}
/* TODO: Don't apply if lossless */
fs.restoration.lrf_filter_frame(
Arc::get_mut(&mut fs.rec).unwrap(),
&deblocked_frame,
fi,
);
// SAFETY: We know no other threads are accessing this because we are done with tiles.
// The only references would be in ref frames, which are not being touched at the moment.
unsafe {
/* TODO: Don't apply if lossless */
fs.restoration.lrf_filter_frame(
arc_get_mut_unsafe(&mut fs.rec),
&deblocked_frame,
fi,
);
}
} else {
/* TODO: Don't apply if lossless */
if fi.sequence.enable_cdef {
Expand Down
139 changes: 100 additions & 39 deletions src/tiling/tile_state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,21 +7,27 @@
// Media Patent License 1.0 was not distributed with this source code in the
// PATENTS file, you can obtain it at www.aomedia.org/license/patent.

use itertools::Itertools;

use super::*;

use crate::context::*;
use crate::encoder::*;
use crate::frame::*;
use crate::lrf::{IntegralImageBuffer, SOLVE_IMAGE_SIZE};
use crate::mc::MotionVector;
use crate::me::FrameMEStats;
use crate::partition::{RefType, REF_FRAMES};
use crate::predict::{InterCompoundBuffers, PredictionMode};
use crate::quantize::*;
use crate::rdo::*;
use crate::stats::EncoderStats;
use crate::util::*;
use std::ops::Deref;
use std::ops::DerefMut;
use std::ops::{Index, IndexMut};
use std::sync::Arc;
use std::sync::Mutex;

/// Tiled view of `FrameState`
///
Expand Down Expand Up @@ -67,6 +73,9 @@ pub struct TileStateMut<'a, T: Pixel> {
pub integral_buffer: IntegralImageBuffer,
pub inter_compound_buffers: InterCompoundBuffers,
pub enc_stats: EncoderStats,
/// Keeps a reference to the frame's active tile list
/// so that we can remove this tile from the list on drop.
frame_tile_states: Arc<Mutex<Vec<TileRect>>>,
}

/// Contains information for a coded block that is
Expand Down Expand Up @@ -122,6 +131,29 @@ impl IndexMut<usize> for MiTileState {
}
}

fn overlaps_any_active_tile(
luma_rect: TileRect, tile_states: &[TileRect],
) -> bool {
let this_start_x = luma_rect.x;
let this_end_x = this_start_x + luma_rect.width;
let this_start_y = luma_rect.y;
let this_end_y = this_start_y + luma_rect.height;
for state in tile_states {
let locked_start_x = state.x;
let locked_end_x = locked_start_x + state.width;
let locked_start_y = state.y;
let locked_end_y = locked_start_y + state.height;
if ((locked_start_x..locked_end_x).contains(&this_start_x)
|| (locked_start_x..locked_end_x).contains(&this_end_x))
&& ((locked_start_y..locked_end_y).contains(&this_start_y)
|| (locked_start_y..locked_end_y).contains(&this_end_y))
{
return true;
}
}
false
}

impl<'a, T: Pixel> TileStateMut<'a, T> {
pub fn new(
fs: &'a mut FrameState<T>, sbo: PlaneSuperBlockOffset,
Expand All @@ -148,48 +180,63 @@ impl<'a, T: Pixel> TileStateMut<'a, T> {
let sb_width = width.align_power_of_two_and_shift(sb_size_log2);
let sb_height = height.align_power_of_two_and_shift(sb_size_log2);

Self {
sbo,
sb_size_log2,
sb_width,
sb_height,
mi_width: width >> MI_SIZE_LOG2,
mi_height: height >> MI_SIZE_LOG2,
width,
height,
input: &fs.input,
input_tile: Tile::new(&fs.input, luma_rect),
input_hres: &fs.input_hres,
input_qres: &fs.input_qres,
deblock: &fs.deblock,
rec: TileMut::new(Arc::make_mut(&mut fs.rec), luma_rect),
qc: Default::default(),
segmentation: &fs.segmentation,
restoration: TileRestorationStateMut::new(
&mut fs.restoration,
{
let mut tile_states = fs.active_tiles.lock().unwrap();
assert!(
!overlaps_any_active_tile(luma_rect, &tile_states),
"Overlapping tiles were initialized, this is not thread safe"
);
tile_states.push(luma_rect);
}

// SAFETY: We use `arc_get_mut_unsafe` to make mutable references
// to the rec and ME stats contents.
// The above assert ensures that safety conditions are not violated.
unsafe {
Self {
sbo,
sb_size_log2,
sb_width,
sb_height,
),
me_stats: Arc::make_mut(&mut fs.frame_me_stats)
.iter_mut()
.map(|fmvs| {
TileMEStatsMut::new(
fmvs,
sbo.0.x << (sb_size_log2 - MI_SIZE_LOG2),
sbo.0.y << (sb_size_log2 - MI_SIZE_LOG2),
width >> MI_SIZE_LOG2,
height >> MI_SIZE_LOG2,
)
})
.collect(),
coded_block_info: MiTileState::new(
width >> MI_SIZE_LOG2,
height >> MI_SIZE_LOG2,
),
integral_buffer: IntegralImageBuffer::zeroed(SOLVE_IMAGE_SIZE),
inter_compound_buffers: InterCompoundBuffers::default(),
enc_stats: EncoderStats::default(),
mi_width: width >> MI_SIZE_LOG2,
mi_height: height >> MI_SIZE_LOG2,
width,
height,
input: &fs.input,
input_tile: Tile::new(&fs.input, luma_rect),
input_hres: &fs.input_hres,
input_qres: &fs.input_qres,
deblock: &fs.deblock,
rec: TileMut::new(arc_get_mut_unsafe(&mut fs.rec), luma_rect),
qc: Default::default(),
segmentation: &fs.segmentation,
restoration: TileRestorationStateMut::new(
&mut fs.restoration,
sbo,
sb_width,
sb_height,
),
me_stats: arc_get_mut_unsafe(&mut fs.frame_me_stats)
.iter_mut()
.map(|fmvs| {
TileMEStatsMut::new(
fmvs,
sbo.0.x << (sb_size_log2 - MI_SIZE_LOG2),
sbo.0.y << (sb_size_log2 - MI_SIZE_LOG2),
width >> MI_SIZE_LOG2,
height >> MI_SIZE_LOG2,
)
})
.collect(),
coded_block_info: MiTileState::new(
width >> MI_SIZE_LOG2,
height >> MI_SIZE_LOG2,
),
integral_buffer: IntegralImageBuffer::zeroed(SOLVE_IMAGE_SIZE),
inter_compound_buffers: InterCompoundBuffers::default(),
enc_stats: EncoderStats::default(),
frame_tile_states: Arc::clone(&fs.active_tiles),
}
}
}

Expand Down Expand Up @@ -262,3 +309,17 @@ impl<'a, T: Pixel> TileStateMut<'a, T> {
}
}
}

impl<'a, T: Pixel> Drop for TileStateMut<'a, T> {
fn drop(&mut self) {
// Remove this tile from the list of active tiles
let rect = self.tile_rect();
let mut tile_states = self.frame_tile_states.lock().unwrap();
let pos = tile_states
.iter()
.find_position(|ts_rect| ts_rect.x == rect.x && ts_rect.y == rect.y)
.unwrap()
.0;
tile_states.swap_remove(pos);
}
}
28 changes: 28 additions & 0 deletions src/util/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,36 @@ mod align;
mod cdf;
mod uninit;

use std::sync::Arc;

pub use v_frame::math::*;
pub use v_frame::pixel::*;

pub use align::*;
pub use uninit::*;

// There does exist `Arc::get_mut_unchecked` to do this,
// but it is currently nightly only.
// And because the Arc fields are private, we have to do something much more annoying.
//
// Once `get_mut_unchecked` is stable in stdlib, we should use that instead.
//
// Why does it matter so much that this exists?
// `Arc::make_mut` may clone the inner data without our awareness.
// That may (although has not, somehow) cause issues with data consistency.
// But the issue we have encountered is that these clones make rav1e slower.
// If we know that we are not writing to the same part of this Arc as another thread,
// such as when tiling, we can avoid the clone.
pub(crate) unsafe fn arc_get_mut_unsafe<T>(this: &mut Arc<T>) -> &mut T {
let count = Arc::strong_count(this);
let raw = Arc::into_raw(Arc::clone(this));
for _ in 0..count {
Arc::decrement_strong_count(raw);
}
let inner = Arc::get_mut(this).unwrap();
for _ in 0..count {
Arc::increment_strong_count(raw);
}
Arc::from_raw(raw);
inner
}

0 comments on commit 13dc931

Please sign in to comment.