Skip to content

Commit

Permalink
CDEF parallelization wip/mvp
Browse files Browse the repository at this point in the history
  • Loading branch information
master-of-zen committed Apr 7, 2021
1 parent 7bb810f commit 54bddec
Showing 1 changed file with 30 additions and 10 deletions.
40 changes: 30 additions & 10 deletions src/cdef.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,16 @@ use crate::encoder::FrameInvariants;
use crate::frame::*;
use crate::tiling::*;
use crate::util::{clamp, msb, CastFromPrimitive, Pixel};
use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
use rayon::iter::ParallelIterator;
use rayon::prelude::*;
use rust_hawktracer::*;

use crate::cpu_features::CpuFeatureLevel;
use std::cmp;
use std::{
cmp,
ops::DerefMut,
sync::{Arc, Mutex},
};

cfg_if::cfg_if! {
if #[cfg(nasm_x86_64)] {
Expand Down Expand Up @@ -600,31 +605,46 @@ pub fn cdef_filter_tile<T: Pixel>(
let fb_width = (output.planes[0].rect().width + 63) / 64;
let fb_height = (output.planes[0].rect().height + 63) / 64;

// should parallelize this
let mut queue: Vec<(usize, usize, TileMut<'_, T>)> = Vec::new();
let mut queue: Vec<(usize, usize, Arc<Mutex<&mut TileMut<'_, T>>>)> =
Vec::new();
let shared_output = Arc::new(Mutex::new(output));

for fby in 0..fb_height {
for fbx in 0..fb_width {
queue.push((fbx, fby, output));
queue.push((fbx, fby, shared_output.clone()));
}
}

queue.par_iter().for_each(|tpl| filter_tile(tpl, fi, input, tb, output));
queue.into_par_iter().for_each(|tpl| filter_tile(tpl, fi, input, tb));
}

#[hawktracer(filter_tile)]
pub fn filter_tile<T: Pixel>(
tpl: &(usize, usize, &mut TileMut<'_, T>), fi: &FrameInvariants<T>,
input: &Frame<T>, tb: &TileBlocks, output: &mut TileMut<'_, T>,
tpl: (usize, usize, Arc<Mutex<&mut TileMut<'_, T>>>),
fi: &FrameInvariants<T>, input: &Frame<T>, tb: &TileBlocks,
) {
// tile_sbo is treated as an offset into the Tiles' plane
// regions, not as an absolute offset in the visible frame. The
// Tile's own offset is added to this in order to address into
// the input Frame.
let tile_sbo = TileSuperBlockOffset(SuperBlockOffset { x: tpl.0, y: tpl.1 });
let (fbx, fby, shared_output) = tpl;
let tile_sbo = TileSuperBlockOffset(SuperBlockOffset { x: fbx, y: fby });
let cdef_index = tb.get_cdef(tile_sbo);
let cdef_dirs = cdef_analyze_superblock(fi, input, tb, tile_sbo);
loop {
if shared_output.try_lock().is_ok() {
break;
}
}
let mut output = shared_output.lock().unwrap();

cdef_filter_superblock(
fi, input, output, tb, tile_sbo, cdef_index, &cdef_dirs,
fi,
input,
output.deref_mut(),
tb,
tile_sbo,
cdef_index,
&cdef_dirs,
);
}

0 comments on commit 54bddec

Please sign in to comment.