Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove uses of Arc::make_mut to avoid clones #2934

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 33 additions & 15 deletions src/encoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ use std::collections::VecDeque;
use std::io::Write;
use std::mem::MaybeUninit;
use std::sync::Arc;
use std::sync::Mutex;
use std::{fmt, io, mem};

use crate::rayon::iter::*;
Expand Down Expand Up @@ -438,6 +439,7 @@ pub struct FrameState<T: Pixel> {
// these are stored per-tile for easier access.
pub frame_me_stats: Arc<[FrameMEStats; REF_FRAMES as usize]>,
pub enc_stats: EncoderStats,
pub active_tiles: Arc<Mutex<Vec<TileRect>>>,
}

impl<T: Pixel> FrameState<T> {
Expand Down Expand Up @@ -479,6 +481,7 @@ impl<T: Pixel> FrameState<T> {
restoration: rs,
frame_me_stats: me_stats,
enc_stats: Default::default(),
active_tiles: Default::default(),
}
}

Expand Down Expand Up @@ -510,6 +513,7 @@ impl<T: Pixel> FrameState<T> {
restoration: rs,
frame_me_stats: FrameMEStats::new_arc_array(fi.w_in_b, fi.h_in_b),
enc_stats: Default::default(),
active_tiles: Default::default(),
}
}

Expand Down Expand Up @@ -992,13 +996,16 @@ impl<T: Pixel> FrameInvariants<T> {
#[cfg(feature = "unstable")]
if fi.show_frame || fi.showable_frame {
let cur_frame_time = fi.frame_timestamp();
// Increment the film grain seed for the next frame
if let Some(params) =
Arc::make_mut(&mut fi.config).get_film_grain_mut_at(cur_frame_time)
{
params.random_seed = params.random_seed.wrapping_add(3248);
if params.random_seed == 0 {
params.random_seed = DEFAULT_GS_SEED;
// SAFETY: We're not inside a tile, there's nothing else writing to the encoder config here.
unsafe {
// Increment the film grain seed for the next frame
if let Some(params) = arc_get_mut_unsafe(&mut fi.config)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wouldn't be better to move the film grain information to the FrameInvariants? Also who is accessing the config here?

.get_film_grain_mut_at(cur_frame_time)
{
params.random_seed = params.random_seed.wrapping_add(3248);
if params.random_seed == 0 {
params.random_seed = DEFAULT_GS_SEED;
}
}
}
}
Expand Down Expand Up @@ -3192,8 +3199,15 @@ fn encode_tile_group<T: Pixel>(
})
.unzip();

let stats =
tile_states.into_iter().map(|ts| ts.enc_stats).collect::<Vec<_>>();
let stats = tile_states
.into_iter()
.map(|ts| {
// We have to clone here because `TileStateMut` is `Drop`.
// However, `EncoderStats` is a small struct, and the compiler is likely to
// optimize away this clone anyway.
ts.enc_stats.clone()
})
.collect::<Vec<_>>();
for tile_stats in stats {
fs.enc_stats += &tile_stats;
}
Expand Down Expand Up @@ -3242,12 +3256,16 @@ fn encode_tile_group<T: Pixel>(
let rec = &mut ts.rec;
cdef_filter_tile(fi, &deblocked_frame, &blocks.as_tile_blocks(), rec);
}
/* TODO: Don't apply if lossless */
fs.restoration.lrf_filter_frame(
Arc::get_mut(&mut fs.rec).unwrap(),
&deblocked_frame,
fi,
);
// SAFETY: We know no other threads are accessing this because we are done with tiles.
// The only references would be in ref frames, which are not being touched at the moment.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the gain from doing this? Maybe it should be moved to a different patch.

unsafe {
/* TODO: Don't apply if lossless */
fs.restoration.lrf_filter_frame(
arc_get_mut_unsafe(&mut fs.rec),
&deblocked_frame,
fi,
);
}
} else {
/* TODO: Don't apply if lossless */
if fi.sequence.enable_cdef {
Expand Down
139 changes: 100 additions & 39 deletions src/tiling/tile_state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,21 +7,27 @@
// Media Patent License 1.0 was not distributed with this source code in the
// PATENTS file, you can obtain it at www.aomedia.org/license/patent.

use itertools::Itertools;

use super::*;

use crate::context::*;
use crate::encoder::*;
use crate::frame::*;
use crate::lrf::{IntegralImageBuffer, SOLVE_IMAGE_SIZE};
use crate::mc::MotionVector;
use crate::me::FrameMEStats;
use crate::partition::{RefType, REF_FRAMES};
use crate::predict::{InterCompoundBuffers, PredictionMode};
use crate::quantize::*;
use crate::rdo::*;
use crate::stats::EncoderStats;
use crate::util::*;
use std::ops::Deref;
use std::ops::DerefMut;
use std::ops::{Index, IndexMut};
use std::sync::Arc;
use std::sync::Mutex;

/// Tiled view of `FrameState`
///
Expand Down Expand Up @@ -67,6 +73,9 @@ pub struct TileStateMut<'a, T: Pixel> {
pub integral_buffer: IntegralImageBuffer,
pub inter_compound_buffers: InterCompoundBuffers,
pub enc_stats: EncoderStats,
/// Keeps a reference to the frame's active tile list
/// so that we can remove this tile from the list on drop.
frame_tile_states: Arc<Mutex<Vec<TileRect>>>,
}

/// Contains information for a coded block that is
Expand Down Expand Up @@ -122,6 +131,29 @@ impl IndexMut<usize> for MiTileState {
}
}

fn overlaps_any_active_tile(
luma_rect: TileRect, tile_states: &[TileRect],
) -> bool {
let this_start_x = luma_rect.x;
let this_end_x = this_start_x + luma_rect.width;
let this_start_y = luma_rect.y;
let this_end_y = this_start_y + luma_rect.height;
for state in tile_states {
let locked_start_x = state.x;
let locked_end_x = locked_start_x + state.width;
let locked_start_y = state.y;
let locked_end_y = locked_start_y + state.height;
if ((locked_start_x..locked_end_x).contains(&this_start_x)
|| (locked_start_x..locked_end_x).contains(&this_end_x))
&& ((locked_start_y..locked_end_y).contains(&this_start_y)
|| (locked_start_y..locked_end_y).contains(&this_end_y))
{
return true;
}
}
false
}

impl<'a, T: Pixel> TileStateMut<'a, T> {
pub fn new(
fs: &'a mut FrameState<T>, sbo: PlaneSuperBlockOffset,
Expand All @@ -148,48 +180,63 @@ impl<'a, T: Pixel> TileStateMut<'a, T> {
let sb_width = width.align_power_of_two_and_shift(sb_size_log2);
let sb_height = height.align_power_of_two_and_shift(sb_size_log2);

Self {
sbo,
sb_size_log2,
sb_width,
sb_height,
mi_width: width >> MI_SIZE_LOG2,
mi_height: height >> MI_SIZE_LOG2,
width,
height,
input: &fs.input,
input_tile: Tile::new(&fs.input, luma_rect),
input_hres: &fs.input_hres,
input_qres: &fs.input_qres,
deblock: &fs.deblock,
rec: TileMut::new(Arc::make_mut(&mut fs.rec), luma_rect),
qc: Default::default(),
segmentation: &fs.segmentation,
restoration: TileRestorationStateMut::new(
&mut fs.restoration,
{
let mut tile_states = fs.active_tiles.lock().unwrap();
assert!(
!overlaps_any_active_tile(luma_rect, &tile_states),
"Overlapping tiles were initialized, this is not thread safe"
);
tile_states.push(luma_rect);
}

// SAFETY: We use `arc_get_mut_unsafe` to make mutable references
// to the rec and ME stats contents.
// The above assert ensures that safety conditions are not violated.
unsafe {
Self {
sbo,
sb_size_log2,
sb_width,
sb_height,
),
me_stats: Arc::make_mut(&mut fs.frame_me_stats)
.iter_mut()
.map(|fmvs| {
TileMEStatsMut::new(
fmvs,
sbo.0.x << (sb_size_log2 - MI_SIZE_LOG2),
sbo.0.y << (sb_size_log2 - MI_SIZE_LOG2),
width >> MI_SIZE_LOG2,
height >> MI_SIZE_LOG2,
)
})
.collect(),
coded_block_info: MiTileState::new(
width >> MI_SIZE_LOG2,
height >> MI_SIZE_LOG2,
),
integral_buffer: IntegralImageBuffer::zeroed(SOLVE_IMAGE_SIZE),
inter_compound_buffers: InterCompoundBuffers::default(),
enc_stats: EncoderStats::default(),
mi_width: width >> MI_SIZE_LOG2,
mi_height: height >> MI_SIZE_LOG2,
width,
height,
input: &fs.input,
input_tile: Tile::new(&fs.input, luma_rect),
input_hres: &fs.input_hres,
input_qres: &fs.input_qres,
deblock: &fs.deblock,
rec: TileMut::new(arc_get_mut_unsafe(&mut fs.rec), luma_rect),
qc: Default::default(),
segmentation: &fs.segmentation,
restoration: TileRestorationStateMut::new(
&mut fs.restoration,
sbo,
sb_width,
sb_height,
),
me_stats: arc_get_mut_unsafe(&mut fs.frame_me_stats)
.iter_mut()
.map(|fmvs| {
TileMEStatsMut::new(
fmvs,
sbo.0.x << (sb_size_log2 - MI_SIZE_LOG2),
sbo.0.y << (sb_size_log2 - MI_SIZE_LOG2),
width >> MI_SIZE_LOG2,
height >> MI_SIZE_LOG2,
)
})
.collect(),
coded_block_info: MiTileState::new(
width >> MI_SIZE_LOG2,
height >> MI_SIZE_LOG2,
),
integral_buffer: IntegralImageBuffer::zeroed(SOLVE_IMAGE_SIZE),
inter_compound_buffers: InterCompoundBuffers::default(),
enc_stats: EncoderStats::default(),
frame_tile_states: Arc::clone(&fs.active_tiles),
}
}
}

Expand Down Expand Up @@ -262,3 +309,17 @@ impl<'a, T: Pixel> TileStateMut<'a, T> {
}
}
}

impl<'a, T: Pixel> Drop for TileStateMut<'a, T> {
fn drop(&mut self) {
// Remove this tile from the list of active tiles
let rect = self.tile_rect();
let mut tile_states = self.frame_tile_states.lock().unwrap();
let pos = tile_states
.iter()
.find_position(|ts_rect| ts_rect.x == rect.x && ts_rect.y == rect.y)
.unwrap()
.0;
tile_states.swap_remove(pos);
}
}
28 changes: 28 additions & 0 deletions src/util/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,36 @@ mod align;
mod cdf;
mod uninit;

use std::sync::Arc;

pub use v_frame::math::*;
pub use v_frame::pixel::*;

pub use align::*;
pub use uninit::*;

// There does exist `Arc::get_mut_unchecked` to do this,
// but it is currently nightly only.
// And because the Arc fields are private, we have to do something much more annoying.
//
// Once `get_mut_unchecked` is stable in stdlib, we should use that instead.
//
// Why does it matter so much that this exists?
// `Arc::make_mut` may clone the inner data without our awareness.
// That may (although has not, somehow) cause issues with data consistency.
// But the issue we have encountered is that these clones make rav1e slower.
// If we know that we are not writing to the same part of this Arc as another thread,
// such as when tiling, we can avoid the clone.
pub(crate) unsafe fn arc_get_mut_unsafe<T>(this: &mut Arc<T>) -> &mut T {
let count = Arc::strong_count(this);
let raw = Arc::into_raw(Arc::clone(this));
for _ in 0..count {
Arc::decrement_strong_count(raw);
}
let inner = Arc::get_mut(this).unwrap();
for _ in 0..count {
Arc::increment_strong_count(raw);
}
Arc::from_raw(raw);
inner
}