From de3b7c653ed419d28254297eb76252804ac6f8ec Mon Sep 17 00:00:00 2001 From: David Michael Barr Date: Thu, 26 Oct 2023 23:13:58 +0900 Subject: [PATCH] Plumb new get_intra_edges pattern --- benches/predict.rs | 17 +++++-------- src/api/lookahead.rs | 8 +++++-- src/asm/aarch64/predict.rs | 19 +++++++-------- src/asm/shared/predict.rs | 9 +++---- src/asm/x86/predict.rs | 16 +++++-------- src/encoder.rs | 6 +++-- src/predict.rs | 49 ++++++++++++++++++-------------------- src/rdo.rs | 12 ++++++---- 8 files changed, 66 insertions(+), 70 deletions(-) diff --git a/benches/predict.rs b/benches/predict.rs index 3d2b597645..550e91655c 100644 --- a/benches/predict.rs +++ b/benches/predict.rs @@ -12,25 +12,19 @@ use rand::{Rng, SeedableRng}; use rand_chacha::ChaChaRng; use rav1e::bench::cpu_features::CpuFeatureLevel; use rav1e::bench::frame::*; -use rav1e::bench::partition::BlockSize; +use rav1e::bench::partition::{BlockSize, IntraEdge}; use rav1e::bench::predict::*; use rav1e::bench::transform::TxSize; use rav1e::bench::util::*; pub const BLOCK_SIZE: BlockSize = BlockSize::BLOCK_32X32; -pub fn generate_block( - rng: &mut ChaChaRng, edge_buf: &mut Aligned<[T; 257]>, -) -> (Plane, Vec) { +pub fn generate_block(rng: &mut ChaChaRng) -> (Plane, Vec) { let block = Plane::from_slice( &vec![T::cast_from(0); BLOCK_SIZE.width() * BLOCK_SIZE.height()], BLOCK_SIZE.width(), ); let ac: Vec = (0..(32 * 32)).map(|_| rng.gen()).collect(); - for v in edge_buf.data.iter_mut() { - *v = T::cast_from(rng.gen::()); - } - (block, ac) } @@ -132,8 +126,9 @@ pub fn intra_bench( b: &mut Bencher, mode: PredictionMode, variant: PredictionVariant, ) { let mut rng = ChaChaRng::from_seed([0; 32]); - let mut edge_buf = unsafe { Aligned::uninitialized() }; - let (mut block, ac) = generate_block::(&mut rng, &mut edge_buf); + let mut edge_buf = Aligned::from_fn(|_| T::cast_from(rng.gen::())); + let intra_edge = IntraEdge::mock(&mut edge_buf); + let (mut block, ac) = generate_block::(&mut rng); let cpu = CpuFeatureLevel::default(); let bitdepth = match T::type_enum() { PixelType::U8 => 8, @@ -154,7 +149,7 @@ pub fn intra_bench( &ac, angle, None, - &edge_buf, + &intra_edge, cpu, ); }) diff --git a/src/api/lookahead.rs b/src/api/lookahead.rs index 3b711883ff..95d6e4d2dc 100644 --- a/src/api/lookahead.rs +++ b/src/api/lookahead.rs @@ -12,6 +12,7 @@ use crate::partition::{get_intra_edges, BlockSize}; use crate::predict::{IntraParam, PredictionMode}; use crate::tiling::{Area, PlaneRegion, TileRect}; use crate::transform::TxSize; +use crate::util::Aligned; use crate::Pixel; use rayon::iter::*; use rust_hawktracer::*; @@ -44,6 +45,8 @@ pub(crate) fn estimate_intra_costs( let w_in_imp_b = plane.cfg.width / IMPORTANCE_BLOCK_SIZE; let mut intra_costs = Vec::with_capacity(h_in_imp_b * w_in_imp_b); + let mut edge_buf = unsafe { Aligned::uninitialized() }; + for y in 0..h_in_imp_b { for x in 0..w_in_imp_b { let plane_org = plane.region(Area::Rect { @@ -54,7 +57,8 @@ pub(crate) fn estimate_intra_costs( }); // TODO: other intra prediction modes. - let edge_buf = get_intra_edges( + let intra_edge = get_intra_edges( + &mut edge_buf, &plane.as_region(), TileBlockOffset(BlockOffset { x, y }), 0, @@ -92,7 +96,7 @@ pub(crate) fn estimate_intra_costs( &[], // Not used by DC_PRED IntraParam::None, None, // Not used by DC_PRED - &edge_buf, + &intra_edge, cpu_feature_level, ); diff --git a/src/asm/aarch64/predict.rs b/src/asm/aarch64/predict.rs index 218199a617..98bee0a6e3 100644 --- a/src/asm/aarch64/predict.rs +++ b/src/asm/aarch64/predict.rs @@ -9,7 +9,7 @@ use crate::context::MAX_TX_SIZE; use crate::cpu_features::CpuFeatureLevel; -use crate::partition::BlockSize; +use crate::partition::{BlockSize, IntraEdge}; use crate::predict::rust::{ dr_intra_derivative, select_ief_strength, select_ief_upsample, }; @@ -18,7 +18,6 @@ use crate::predict::{ }; use crate::tiling::{PlaneRegion, PlaneRegionMut}; use crate::transform::TxSize; -use crate::util::Aligned; use crate::{Pixel, PixelType}; use libc; use libc::{c_int, ptrdiff_t}; @@ -487,12 +486,12 @@ pub fn dispatch_predict_intra( mode: PredictionMode, variant: PredictionVariant, dst: &mut PlaneRegionMut<'_, T>, tx_size: TxSize, bit_depth: usize, ac: &[i16], angle: isize, ief_params: Option, - edge_buf: &Aligned<[T; 4 * MAX_TX_SIZE + 1]>, cpu: CpuFeatureLevel, + intra_edge: &IntraEdge, cpu: CpuFeatureLevel, ) { let call_rust = |dst: &mut PlaneRegionMut<'_, T>| { rust::dispatch_predict_intra( - mode, variant, dst, tx_size, bit_depth, ac, angle, ief_params, edge_buf, - cpu, + mode, variant, dst, tx_size, bit_depth, ac, angle, ief_params, + intra_edge, cpu, ); }; @@ -504,10 +503,8 @@ pub fn dispatch_predict_intra( let dst_ptr = dst.data_ptr_mut() as *mut _; let dst_u16 = dst.data_ptr_mut() as *mut u16; let stride = T::to_asm_stride(dst.plane_cfg.stride) as libc::ptrdiff_t; - let edge_ptr = - edge_buf.data.as_ptr().offset(2 * MAX_TX_SIZE as isize) as *const _; - let edge_u16 = - edge_buf.data.as_ptr().offset(2 * MAX_TX_SIZE as isize) as *const u16; + let edge_ptr = intra_edge.top_left_ptr() as *const _; + let edge_u16 = intra_edge.top_left_ptr() as *const u16; let w = tx_size.width() as libc::c_int; let h = tx_size.height() as libc::c_int; let angle = angle as libc::c_int; @@ -600,7 +597,7 @@ pub fn dispatch_predict_intra( return ipred_z2( dst.data_ptr_mut(), stride, - edge_buf.data.as_ptr().add(2 * MAX_TX_SIZE), + intra_edge.top_left_ptr(), angle as isize, w, h, @@ -614,7 +611,7 @@ pub fn dispatch_predict_intra( (if angle < 90 { ipred_z1 } else { ipred_z3 })( dst.data_ptr_mut(), stride, - edge_buf.data.as_ptr().add(2 * MAX_TX_SIZE), + intra_edge.top_left_ptr(), angle as isize, w, h, diff --git a/src/asm/shared/predict.rs b/src/asm/shared/predict.rs index 0431134541..71ff6b1390 100644 --- a/src/asm/shared/predict.rs +++ b/src/asm/shared/predict.rs @@ -16,7 +16,7 @@ mod test { use crate::context::MAX_TX_SIZE; use crate::cpu_features::CpuFeatureLevel; use crate::frame::{AsRegion, Plane}; - use crate::partition::BlockSize; + use crate::partition::{BlockSize, IntraEdge}; use crate::predict::dispatch_predict_intra; use crate::predict::pred_cfl_ac; use crate::predict::rust; @@ -41,9 +41,10 @@ mod test { fn pred_matches_inner(cpu: CpuFeatureLevel, bit_depth: usize) { let tx_size = TxSize::TX_4X4; let ac: Aligned<[i16; 32 * 32]> = Aligned::from_fn(|i| i as i16 - 16 * 32); - let edge_buf: Aligned<[T; 4 * MAX_TX_SIZE + 1]> = Aligned::from_fn(|i| { + let mut edge_buf = Aligned::from_fn(|i| { T::cast_from(((i ^ 1) + 32).saturating_sub(2 * MAX_TX_SIZE)) }); + let intra_edge = IntraEdge::mock(&mut edge_buf); let ief_params_all = [ None, @@ -128,7 +129,7 @@ mod test { &ac.data, *angle, *ief_params, - &edge_buf, + &intra_edge, cpu, ); let mut data = [T::zero(); 4 * 4]; @@ -148,7 +149,7 @@ mod test { &ac.data, *angle, *ief_params, - &edge_buf, + &intra_edge, cpu, ); assert_eq!( diff --git a/src/asm/x86/predict.rs b/src/asm/x86/predict.rs index 227fe08859..f42032d30d 100644 --- a/src/asm/x86/predict.rs +++ b/src/asm/x86/predict.rs @@ -7,15 +7,13 @@ // Media Patent License 1.0 was not distributed with this source code in the // PATENTS file, you can obtain it at www.aomedia.org/license/patent. -use crate::context::MAX_TX_SIZE; use crate::cpu_features::CpuFeatureLevel; -use crate::partition::BlockSize; +use crate::partition::{BlockSize, IntraEdge}; use crate::predict::{ rust, IntraEdgeFilterParameters, PredictionMode, PredictionVariant, }; use crate::tiling::{PlaneRegion, PlaneRegionMut}; use crate::transform::TxSize; -use crate::util::Aligned; use crate::Pixel; use std::mem::MaybeUninit; use v_frame::pixel::PixelType; @@ -242,12 +240,12 @@ pub fn dispatch_predict_intra( mode: PredictionMode, variant: PredictionVariant, dst: &mut PlaneRegionMut<'_, T>, tx_size: TxSize, bit_depth: usize, ac: &[i16], angle: isize, ief_params: Option, - edge_buf: &Aligned<[T; 4 * MAX_TX_SIZE + 1]>, cpu: CpuFeatureLevel, + intra_edge: &IntraEdge, cpu: CpuFeatureLevel, ) { let call_rust = |dst: &mut PlaneRegionMut<'_, T>| { rust::dispatch_predict_intra( - mode, variant, dst, tx_size, bit_depth, ac, angle, ief_params, edge_buf, - cpu, + mode, variant, dst, tx_size, bit_depth, ac, angle, ief_params, + intra_edge, cpu, ); }; @@ -261,8 +259,7 @@ pub fn dispatch_predict_intra( match T::type_enum() { PixelType::U8 => { let dst_ptr = dst.data_ptr_mut() as *mut _; - let edge_ptr = - edge_buf.data.as_ptr().offset(2 * MAX_TX_SIZE as isize) as *const _; + let edge_ptr = intra_edge.top_left_ptr() as *const _; if cpu >= CpuFeatureLevel::AVX512ICL { match mode { PredictionMode::DC_PRED => { @@ -555,8 +552,7 @@ pub fn dispatch_predict_intra( } PixelType::U16 => { let dst_ptr = dst.data_ptr_mut() as *mut _; - let edge_ptr = - edge_buf.data.as_ptr().offset(2 * MAX_TX_SIZE as isize) as *const _; + let edge_ptr = intra_edge.top_left_ptr() as *const _; let bd_max = (1 << bit_depth) - 1; if cpu >= CpuFeatureLevel::AVX512ICL { match mode { diff --git a/src/encoder.rs b/src/encoder.rs index 8b2d96e9e9..af522002d8 100644 --- a/src/encoder.rs +++ b/src/encoder.rs @@ -1474,8 +1474,10 @@ pub fn encode_tx_block( let rec = &mut ts.rec.planes[p]; if mode.is_intra() { + let mut edge_buf = unsafe { Aligned::uninitialized() }; let bit_depth = fi.sequence.bit_depth; - let edge_buf = get_intra_edges( + let intra_edge = get_intra_edges( + &mut edge_buf, &rec.as_const(), tile_partition_bo, bx, @@ -1497,7 +1499,7 @@ pub fn encode_tx_block( ac, pred_intra_param, ief_params, - &edge_buf, + &intra_edge, fi.cpu_feature_level, ); } diff --git a/src/predict.rs b/src/predict.rs index 4705cf8ad3..30b37b322e 100644 --- a/src/predict.rs +++ b/src/predict.rs @@ -204,8 +204,8 @@ impl PredictionMode { pub fn predict_intra( self, tile_rect: TileRect, dst: &mut PlaneRegionMut<'_, T>, tx_size: TxSize, bit_depth: usize, ac: &[i16], intra_param: IntraParam, - ief_params: Option, - edge_buf: &Aligned<[T; 4 * MAX_TX_SIZE + 1]>, cpu: CpuFeatureLevel, + ief_params: Option, intra_edge: &IntraEdge, + cpu: CpuFeatureLevel, ) { assert!(self.is_intra()); let &Rect { x: frame_x, y: frame_y, .. } = dst.rect(); @@ -242,8 +242,8 @@ impl PredictionMode { }; dispatch_predict_intra::( - mode, variant, dst, tx_size, bit_depth, ac, angle, ief_params, edge_buf, - cpu, + mode, variant, dst, tx_size, bit_depth, ac, angle, ief_params, + intra_edge, cpu, ); } @@ -702,7 +702,7 @@ pub(crate) mod rust { use crate::cpu_features::CpuFeatureLevel; use crate::tiling::PlaneRegionMut; use crate::transform::TxSize; - use crate::util::{round_shift, Aligned}; + use crate::util::round_shift; use crate::Pixel; use std::mem::{size_of, MaybeUninit}; @@ -711,18 +711,18 @@ pub(crate) mod rust { mode: PredictionMode, variant: PredictionVariant, dst: &mut PlaneRegionMut<'_, T>, tx_size: TxSize, bit_depth: usize, ac: &[i16], angle: isize, ief_params: Option, - edge_buf: &Aligned<[T; 4 * MAX_TX_SIZE + 1]>, _cpu: CpuFeatureLevel, + intra_edge: &IntraEdge, _cpu: CpuFeatureLevel, ) { let width = tx_size.width(); let height = tx_size.height(); // left pixels are ordered from bottom to top and right-aligned - let (left, not_left) = edge_buf.data.split_at(2 * MAX_TX_SIZE); - let (top_left, above) = not_left.split_at(1); + let (left, top_left, above) = intra_edge.as_slices(); - let above_slice = &above[..width + height]; - let left_slice = &left[2 * MAX_TX_SIZE - height..]; - let left_and_left_below_slice = &left[2 * MAX_TX_SIZE - height - width..]; + let above_slice = above; + let left_slice = &left[left.len().saturating_sub(height)..]; + let left_and_left_below_slice = + &left[left.len().saturating_sub(width + height)..]; match mode { PredictionMode::DC_PRED => { @@ -1336,8 +1336,10 @@ pub(crate) mod rust { ); if enable_edge_filter { - above_filtered[1..=above.len()].clone_from_slice(above); - for i in 1..=left.len() { + let above_len = above.len().min(above_filtered.len() - 1); + let left_len = left.len().min(left_filtered.len() - 1); + above_filtered[1..=above_len].clone_from_slice(&above[..above_len]); + for i in 1..=left_len { left_filtered[i] = left[left.len() - i]; } @@ -1512,19 +1514,16 @@ pub(crate) mod rust { mod test { use super::*; use crate::predict::rust::*; + use crate::util::Aligned; use num_traits::*; #[test] fn pred_matches_u8() { - // SAFETY: We write to the array below before reading from it. - let mut edge_buf: Aligned<[u8; 2 * MAX_TX_SIZE + 1]> = - unsafe { Aligned::uninitialized() }; - for i in 0..edge_buf.data.len() { - edge_buf.data[i] = (i + 32).saturating_sub(MAX_TX_SIZE).as_(); - } - let left = &edge_buf.data[MAX_TX_SIZE - 4..MAX_TX_SIZE]; - let above = &edge_buf.data[MAX_TX_SIZE + 1..MAX_TX_SIZE + 5]; - let top_left = edge_buf.data[MAX_TX_SIZE]; + let mut edge_buf = + Aligned::from_fn(|i| (i + 32).saturating_sub(MAX_TX_SIZE * 2).as_()); + let (all_left, top_left, above) = + IntraEdge::mock(&mut edge_buf).as_slices(); + let left = &all_left[all_left.len() - 4..]; let mut output = Plane::from_slice(&[0u8; 4 * 4], 4); @@ -1552,7 +1551,7 @@ mod test { [31, 31, 31, 31, 30, 30, 30, 30, 29, 29, 29, 29, 28, 28, 28, 28] ); - pred_paeth(&mut output.as_region_mut(), above, left, top_left, 4, 4); + pred_paeth(&mut output.as_region_mut(), above, left, top_left[0], 4, 4); assert_eq!( &output.data[..], [32, 34, 35, 36, 30, 32, 32, 36, 29, 32, 32, 32, 28, 28, 32, 32] @@ -1576,9 +1575,7 @@ mod test { [33, 34, 35, 36, 31, 31, 32, 33, 30, 30, 30, 31, 29, 30, 30, 30] ); - let left = &edge_buf.data[MAX_TX_SIZE - 8..MAX_TX_SIZE]; - let above = &edge_buf.data[MAX_TX_SIZE + 1..MAX_TX_SIZE + 9]; - let top_left = &edge_buf.data[MAX_TX_SIZE..=MAX_TX_SIZE]; + let left = &all_left[all_left.len() - 8..]; let angles = [ 3, 6, 9, 14, 17, 20, 23, 26, 29, 32, 36, 39, 42, 45, 48, 51, 54, 58, 61, 64, 67, 70, 73, 76, 81, 84, 87, diff --git a/src/rdo.rs b/src/rdo.rs index 42146d5136..7aad802420 100644 --- a/src/rdo.rs +++ b/src/rdo.rs @@ -1433,11 +1433,13 @@ fn intra_frame_rdo_mode_decision( let satds = { // FIXME: If tx partition is used, this whole sads block should be fixed let tx_size = bsize.tx_size(); - let edge_buf = { + let mut edge_buf = unsafe { Aligned::uninitialized() }; + let intra_edge = { let rec = &ts.rec.planes[0].as_const(); let po = tile_bo.plane_offset(rec.plane_cfg); // FIXME: If tx partition is used, get_intra_edges() should be called for each tx block get_intra_edges( + &mut edge_buf, rec, tile_bo, 0, @@ -1479,7 +1481,7 @@ fn intra_frame_rdo_mode_decision( &[0i16; 2], IntraParam::None, if luma_mode.is_directional() { ief_params } else { None }, - &edge_buf, + &intra_edge, fi.cpu_feature_level, ); @@ -1612,6 +1614,7 @@ pub fn rdo_cfl_alpha( // SAFETY: We write to the array below before reading from it. let mut ac: Aligned<[MaybeUninit; 32 * 32]> = unsafe { Aligned::uninitialized() }; + let mut edge_buf = unsafe { Aligned::uninitialized() }; let ac = luma_ac(&mut ac.data, ts, tile_bo, bsize, luma_tx_size, fi); let best_alpha: ArrayVec = (1..3) .map(|p| { @@ -1620,7 +1623,8 @@ pub fn rdo_cfl_alpha( let rec = &mut ts.rec.planes[p]; let input = &ts.input_tile.planes[p]; let po = tile_bo.plane_offset(rec.plane_cfg); - let edge_buf = get_intra_edges( + let intra_edge = get_intra_edges( + &mut edge_buf, &rec.as_const(), tile_bo, 0, @@ -1644,7 +1648,7 @@ pub fn rdo_cfl_alpha( ac, IntraParam::Alpha(alpha), None, - &edge_buf, + &intra_edge, fi.cpu_feature_level, ); sse_wxh(