From e773dbed8ccd8eacc8314312d03b1519a54e559c Mon Sep 17 00:00:00 2001 From: maj160 <13800690+maj160@users.noreply.github.com> Date: Sun, 15 Jan 2023 20:48:01 -0500 Subject: [PATCH 1/2] Use region dimensions in SAD and ME This avoids having to re-check bounds every time we perform SAD, as the region knows its own size. It also may save 2 usize's being passed around during ME. To enforce this, we also remove the w and h parameters from everywhere. This is part of a series of commits authored by @maj160 to improve performance of rav1e. --- benches/dist.rs | 12 ++-- src/api/internal.rs | 4 +- src/api/lookahead.rs | 17 +---- src/asm/aarch64/dist.rs | 26 ++++---- src/asm/x86/dist/mod.rs | 25 +++---- src/dist.rs | 31 ++++----- src/me.rs | 142 ++++++++++++++++++++++------------------ src/rdo.rs | 18 ++--- 8 files changed, 137 insertions(+), 138 deletions(-) diff --git a/benches/dist.rs b/benches/dist.rs index d86a7743e0..75fe05a84c 100644 --- a/benches/dist.rs +++ b/benches/dist.rs @@ -92,8 +92,6 @@ fn new_plane( type DistFn = fn( plane_org: &PlaneRegion<'_, T>, plane_ref: &PlaneRegion<'_, T>, - w: usize, - h: usize, bit_depth: usize, cpu: CpuFeatureLevel, ) -> u32; @@ -108,15 +106,15 @@ fn run_dist_bench( let input_plane = new_plane::(&mut ra, w, h); let rec_plane = new_plane::(&mut ra, w, h); - let plane_org = input_plane.as_region(); - let plane_ref = rec_plane.as_region(); - let blk_w = bs.width(); let blk_h = bs.height(); + let plane_org = + input_plane.region(Area::Rect { x: 0, y: 0, width: blk_w, height: blk_h }); + let plane_ref = + rec_plane.region(Area::Rect { x: 0, y: 0, width: blk_w, height: blk_h }); b.iter(|| { - let _ = - black_box(func(&plane_org, &plane_ref, blk_w, blk_h, bit_depth, cpu)); + let _ = black_box(func(&plane_org, &plane_ref, bit_depth, cpu)); }) } diff --git a/src/api/internal.rs b/src/api/internal.rs index 20ef57e328..bb9318735e 100644 --- a/src/api/internal.rs +++ b/src/api/internal.rs @@ -902,6 +902,8 @@ impl ContextInner { bsize: BlockSize, len: usize, reference_frame_block_importances: &mut [f32], ) { + debug_assert!(bsize.width() == IMPORTANCE_BLOCK_SIZE); + debug_assert!(bsize.height() == IMPORTANCE_BLOCK_SIZE); let coded_data = fi.coded_frame_data.as_ref().unwrap(); let plane_org = &frame.planes[0]; let plane_ref = &reference_frame.planes[0]; @@ -950,8 +952,6 @@ impl ContextInner { let inter_cost = get_satd( ®ion_org, ®ion_ref, - bsize.width(), - bsize.height(), bit_depth, fi.cpu_feature_level, ) as f32; diff --git a/src/api/lookahead.rs b/src/api/lookahead.rs index 2758d5920b..ca4f12c8cf 100644 --- a/src/api/lookahead.rs +++ b/src/api/lookahead.rs @@ -107,8 +107,6 @@ pub(crate) fn estimate_intra_costs( let intra_cost = get_satd( &plane_org, &plane_after_prediction_region, - bsize.width(), - bsize.height(), bit_depth, cpu_feature_level, ); @@ -223,10 +221,6 @@ pub(crate) fn estimate_inter_costs( let h_in_imp_b = plane_org.cfg.height / IMPORTANCE_BLOCK_SIZE; let w_in_imp_b = plane_org.cfg.width / IMPORTANCE_BLOCK_SIZE; let stats = &fs.frame_me_stats.read().expect("poisoned lock")[0]; - let bsize = BlockSize::from_width_and_height( - IMPORTANCE_BLOCK_SIZE, - IMPORTANCE_BLOCK_SIZE, - ); let mut inter_costs = 0; (0..h_in_imp_b).for_each(|y| { @@ -252,14 +246,9 @@ pub(crate) fn estimate_inter_costs( height: IMPORTANCE_BLOCK_SIZE, }); - inter_costs += get_satd( - ®ion_org, - ®ion_ref, - bsize.width(), - bsize.height(), - bit_depth, - fi.cpu_feature_level, - ) as u64; + inter_costs += + get_satd(®ion_org, ®ion_ref, bit_depth, fi.cpu_feature_level) + as u64; }); }); inter_costs as f64 / (w_in_imp_b * h_in_imp_b) as f64 diff --git a/src/asm/aarch64/dist.rs b/src/asm/aarch64/dist.rs index 40d20b702f..c8d01de197 100644 --- a/src/asm/aarch64/dist.rs +++ b/src/asm/aarch64/dist.rs @@ -74,12 +74,13 @@ const fn to_index(bsize: BlockSize) -> usize { #[inline(always)] #[allow(clippy::let_and_return)] pub fn get_sad( - src: &PlaneRegion<'_, T>, dst: &PlaneRegion<'_, T>, w: usize, h: usize, - bit_depth: usize, cpu: CpuFeatureLevel, + src: &PlaneRegion<'_, T>, dst: &PlaneRegion<'_, T>, bit_depth: usize, + cpu: CpuFeatureLevel, ) -> u32 { - let bsize_opt = BlockSize::from_width_and_height_opt(w, h); + let bsize_opt = + BlockSize::from_width_and_height_opt(src.rect().width, src.rect().height); - let call_rust = || -> u32 { rust::get_sad(dst, src, w, h, bit_depth, cpu) }; + let call_rust = || -> u32 { rust::get_sad(dst, src, bit_depth, cpu) }; #[cfg(feature = "check_asm")] let ref_dist = call_rust(); @@ -110,12 +111,13 @@ pub fn get_sad( #[inline(always)] pub fn get_satd( - src: &PlaneRegion<'_, T>, dst: &PlaneRegion<'_, T>, w: usize, h: usize, - bit_depth: usize, cpu: CpuFeatureLevel, + src: &PlaneRegion<'_, T>, dst: &PlaneRegion<'_, T>, bit_depth: usize, + cpu: CpuFeatureLevel, ) -> u32 { - let bsize_opt = BlockSize::from_width_and_height_opt(w, h); + let bsize_opt = + BlockSize::from_width_and_height_opt(src.rect().width, src.rect().height); - let call_rust = || -> u32 { rust::get_satd(dst, src, w, h, bit_depth, cpu) }; + let call_rust = || -> u32 { rust::get_satd(dst, src, bit_depth, cpu) }; #[cfg(feature = "check_asm")] let ref_dist = call_rust(); @@ -229,8 +231,8 @@ mod test { *s = random::() as u16 * $BD / 8; *d = random::() as u16 * $BD / 8; } - let result = [](&src.as_region(), &dst.as_region(), $W, $H, $BD, CpuFeatureLevel::from_str($OPTLIT).unwrap()); - let rust_result = [](&src.as_region(), &dst.as_region(), $W, $H, $BD, CpuFeatureLevel::RUST); + let result = [](&src.as_region(), &dst.as_region(), $BD, CpuFeatureLevel::from_str($OPTLIT).unwrap()); + let rust_result = [](&src.as_region(), &dst.as_region(), $BD, CpuFeatureLevel::RUST); assert_eq!(rust_result, result); } else { @@ -242,8 +244,8 @@ mod test { *s = random::(); *d = random::(); } - let result = [](&src.as_region(), &dst.as_region(), $W, $H, $BD, CpuFeatureLevel::from_str($OPTLIT).unwrap()); - let rust_result = [](&src.as_region(), &dst.as_region(), $W, $H, $BD, CpuFeatureLevel::RUST); + let result = [](&src.as_region(), &dst.as_region(), $BD, CpuFeatureLevel::from_str($OPTLIT).unwrap()); + let rust_result = [](&src.as_region(), &dst.as_region(), $BD, CpuFeatureLevel::RUST); assert_eq!(rust_result, result); } diff --git a/src/asm/x86/dist/mod.rs b/src/asm/x86/dist/mod.rs index a1200c2a39..246b985ca2 100644 --- a/src/asm/x86/dist/mod.rs +++ b/src/asm/x86/dist/mod.rs @@ -166,12 +166,13 @@ pub(crate) const fn to_index(bsize: BlockSize) -> usize { #[inline(always)] #[allow(clippy::let_and_return)] pub fn get_sad( - src: &PlaneRegion<'_, T>, dst: &PlaneRegion<'_, T>, w: usize, h: usize, - bit_depth: usize, cpu: CpuFeatureLevel, + src: &PlaneRegion<'_, T>, dst: &PlaneRegion<'_, T>, bit_depth: usize, + cpu: CpuFeatureLevel, ) -> u32 { - let bsize_opt = BlockSize::from_width_and_height_opt(w, h); + let bsize_opt = + BlockSize::from_width_and_height_opt(src.rect().width, src.rect().height); - let call_rust = || -> u32 { rust::get_sad(dst, src, w, h, bit_depth, cpu) }; + let call_rust = || -> u32 { rust::get_sad(dst, src, bit_depth, cpu) }; #[cfg(feature = "check_asm")] let ref_dist = call_rust(); @@ -220,12 +221,14 @@ pub fn get_sad( #[inline(always)] #[allow(clippy::let_and_return)] pub fn get_satd( - src: &PlaneRegion<'_, T>, dst: &PlaneRegion<'_, T>, w: usize, h: usize, - bit_depth: usize, cpu: CpuFeatureLevel, + src: &PlaneRegion<'_, T>, dst: &PlaneRegion<'_, T>, bit_depth: usize, + cpu: CpuFeatureLevel, ) -> u32 { + let w = src.rect().width; + let h = src.rect().height; let bsize_opt = BlockSize::from_width_and_height_opt(w, h); - let call_rust = || -> u32 { rust::get_satd(dst, src, w, h, bit_depth, cpu) }; + let call_rust = || -> u32 { rust::get_satd(dst, src, bit_depth, cpu) }; #[cfg(feature = "check_asm")] let ref_dist = call_rust(); @@ -565,8 +568,8 @@ mod test { *s = random::() as u16 * $BD / 8; *d = random::() as u16 * $BD / 8; } - let result = [](&src.as_region(), &dst.as_region(), $W, $H, $BD, CpuFeatureLevel::from_str($OPTLIT).unwrap()); - let rust_result = [](&src.as_region(), &dst.as_region(), $W, $H, $BD, CpuFeatureLevel::RUST); + let result = [](&src.as_region(), &dst.as_region(), $BD, CpuFeatureLevel::from_str($OPTLIT).unwrap()); + let rust_result = [](&src.as_region(), &dst.as_region(), $BD, CpuFeatureLevel::RUST); assert_eq!(rust_result, result); } else { @@ -578,8 +581,8 @@ mod test { *s = random::(); *d = random::(); } - let result = [](&src.as_region(), &dst.as_region(), $W, $H, $BD, CpuFeatureLevel::from_str($OPTLIT).unwrap()); - let rust_result = [](&src.as_region(), &dst.as_region(), $W, $H, $BD, CpuFeatureLevel::RUST); + let result = [](&src.as_region(), &dst.as_region(), $BD, CpuFeatureLevel::from_str($OPTLIT).unwrap()); + let rust_result = [](&src.as_region(), &dst.as_region(), $BD, CpuFeatureLevel::RUST); assert_eq!(rust_result, result); } diff --git a/src/dist.rs b/src/dist.rs index f8a4d6c715..79dc10471b 100644 --- a/src/dist.rs +++ b/src/dist.rs @@ -31,14 +31,12 @@ pub(crate) mod rust { /// Compute the sum of absolute differences over a block. /// w and h can be at most 128, the size of the largest block. pub fn get_sad( - plane_org: &PlaneRegion<'_, T>, plane_ref: &PlaneRegion<'_, T>, w: usize, - h: usize, _bit_depth: usize, _cpu: CpuFeatureLevel, + plane_org: &PlaneRegion<'_, T>, plane_ref: &PlaneRegion<'_, T>, + _bit_depth: usize, _cpu: CpuFeatureLevel, ) -> u32 { - debug_assert!(w <= 128 && h <= 128); - let plane_org = - plane_org.subregion(Area::Rect { x: 0, y: 0, width: w, height: h }); - let plane_ref = - plane_ref.subregion(Area::Rect { x: 0, y: 0, width: w, height: h }); + debug_assert!( + plane_org.rect().width <= 128 && plane_org.rect().height <= 128 + ); plane_org .rows_iter() @@ -156,11 +154,12 @@ pub(crate) mod rust { /// revert to sad on edges when these transforms do not fit into w and h. /// 4x4 transforms instead of 8x8 transforms when width or height < 8. pub fn get_satd( - plane_org: &PlaneRegion<'_, T>, plane_ref: &PlaneRegion<'_, T>, w: usize, - h: usize, _bit_depth: usize, _cpu: CpuFeatureLevel, + plane_org: &PlaneRegion<'_, T>, plane_ref: &PlaneRegion<'_, T>, + _bit_depth: usize, _cpu: CpuFeatureLevel, ) -> u32 { + let w = plane_org.rect().width; + let h = plane_org.rect().height; assert!(w <= 128 && h <= 128); - assert!(plane_org.rect().width >= w && plane_org.rect().height >= h); assert!(plane_ref.rect().width >= w && plane_ref.rect().height >= h); // Size of hadamard transform should be 4x4 or 8x8 @@ -186,9 +185,7 @@ pub(crate) mod rust { // Revert to sad on edge blocks (frame edges) if chunk_w != size || chunk_h != size { - sum += get_sad( - &chunk_org, &chunk_ref, chunk_w, chunk_h, _bit_depth, _cpu, - ) as u64; + sum += get_sad(&chunk_org, &chunk_ref, _bit_depth, _cpu) as u64; continue; } @@ -443,7 +440,7 @@ pub mod test { let (input_plane, rec_plane) = setup_planes::(); for (w, h, distortion) in blocks { - let area = Area::StartingAt { x: 32, y: 40 }; + let area = Area::Rect { x: 32, y: 40, width: w, height: h }; let input_region = input_plane.region(area); let rec_region = rec_plane.region(area); @@ -453,8 +450,6 @@ pub mod test { get_sad( &input_region, &rec_region, - w, - h, bit_depth, CpuFeatureLevel::default() ) @@ -502,7 +497,7 @@ pub mod test { let (input_plane, rec_plane) = setup_planes::(); for (w, h, distortion) in blocks { - let area = Area::StartingAt { x: 32, y: 40 }; + let area = Area::Rect { x: 32, y: 40, width: w, height: h }; let input_region = input_plane.region(area); let rec_region = rec_plane.region(area); @@ -512,8 +507,6 @@ pub mod test { get_satd( &input_region, &rec_region, - w, - h, bit_depth, CpuFeatureLevel::default() ) diff --git a/src/me.rs b/src/me.rs index a6b09e9f03..888f66b5de 100644 --- a/src/me.rs +++ b/src/me.rs @@ -568,10 +568,23 @@ pub fn estimate_motion( }; let org_region = &match ssdec { - 0 => ts.input_tile.planes[0] - .subregion(Area::BlockStartingAt { bo: tile_bo.0 }), - 1 => ts.input_hres.region(Area::StartingAt { x: po.x, y: po.y }), - 2 => ts.input_qres.region(Area::StartingAt { x: po.x, y: po.y }), + 0 => ts.input_tile.planes[0].subregion(Area::BlockRect { + bo: tile_bo.0, + width: w, + height: h, + }), + 1 => ts.input_hres.region(Area::Rect { + x: po.x, + y: po.y, + width: w, + height: h, + }), + 2 => ts.input_qres.region(Area::Rect { + x: po.x, + y: po.y, + width: w, + height: h, + }), _ => unimplemented!(), }; @@ -584,8 +597,6 @@ pub fn estimate_motion( po, lambda, pmv.unwrap_or(global_mv), - w, - h, mvx_min, mvx_max, mvy_min, @@ -612,15 +623,13 @@ pub fn estimate_motion( mvx_max, mvy_min, mvy_max, - w, - h, best.mv, ); } sub_pixel_me( fi, po, org_region, p_ref, lambda, pmv, mvx_min, mvx_max, mvy_min, - mvy_max, w, h, use_satd, &mut best, ref_frame, + mvy_max, use_satd, &mut best, ref_frame, ); } @@ -659,10 +668,23 @@ fn refine_subsampled_motion_estimate( }; let org_region = &match ssdec { - 0 => ts.input_tile.planes[0] - .subregion(Area::BlockStartingAt { bo: tile_bo.0 }), - 1 => ts.input_hres.region(Area::StartingAt { x: po.x, y: po.y }), - 2 => ts.input_qres.region(Area::StartingAt { x: po.x, y: po.y }), + 0 => ts.input_tile.planes[0].subregion(Area::BlockRect { + bo: tile_bo.0, + width: w, + height: h, + }), + 1 => ts.input_hres.region(Area::Rect { + x: po.x, + y: po.y, + width: w, + height: h, + }), + 2 => ts.input_qres.region(Area::Rect { + x: po.x, + y: po.y, + width: w, + height: h, + }), _ => unimplemented!(), }; @@ -680,7 +702,7 @@ fn refine_subsampled_motion_estimate( let y_lo = po.y + (mv.row as isize / 8 - 1).max(mvy_min / 8); let y_hi = po.y + (mv.row as isize / 8 + 2).min(mvy_max / 8); let mut results = full_search( - fi, x_lo, x_hi, y_lo, y_hi, w, h, org_region, p_ref, po, 1, lambda, pmv, + fi, x_lo, x_hi, y_lo, y_hi, org_region, p_ref, po, 1, lambda, pmv, ); // Scale motion vectors to full res size @@ -695,11 +717,13 @@ fn refine_subsampled_motion_estimate( fn full_pixel_me( fi: &FrameInvariants, ts: &TileStateMut<'_, T>, org_region: &PlaneRegion, p_ref: &Plane, tile_bo: TileBlockOffset, - po: PlaneOffset, lambda: u32, pmv: [MotionVector; 2], w: usize, h: usize, - mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize, - ref_frame: RefType, corner: MVSamplingMode, extensive_search: bool, - ssdec: u8, + po: PlaneOffset, lambda: u32, pmv: [MotionVector; 2], mvx_min: isize, + mvx_max: isize, mvy_min: isize, mvy_max: isize, ref_frame: RefType, + corner: MVSamplingMode, extensive_search: bool, ssdec: u8, ) -> MotionSearchResult { + let w = org_region.rect().width; + let h = org_region.rect().height; + let ref_frame_id = ref_frame.to_index(); let tile_me_stats = &ts.me_stats[ref_frame_id].as_const(); let frame_ref = fi.rec_buffer.frames[fi.ref_frames[0] as usize] @@ -735,8 +759,6 @@ fn full_pixel_me( mvx_max, mvy_min, mvy_max, - w, - h, ); fullpel_diamond_search( fi, @@ -751,8 +773,6 @@ fn full_pixel_me( mvx_max, mvy_min, mvy_max, - w, - h, ); if results.rd.cost < best.rd.cost { @@ -769,7 +789,6 @@ fn full_pixel_me( // Search the median, the best mvs of neighboring blocks, and motion vectors // from the previous frame. Stop once a candidate with a sad less than a // threshold is found. - let thresh = (subsets.min_sad as f32 * 1.2) as u32 + (((w * h) as u32) << (fi.sequence.bit_depth - 8)); @@ -808,8 +827,6 @@ fn full_pixel_me( mvx_max, mvy_min, mvy_max, - w, - h, // Use 24, since it is the largest range that x264 uses. 24, ); @@ -835,8 +852,6 @@ fn full_pixel_me( x_hi, y_lo, y_hi, - w, - h, org_region, p_ref, po, @@ -860,8 +875,8 @@ fn full_pixel_me( fn sub_pixel_me( fi: &FrameInvariants, po: PlaneOffset, org_region: &PlaneRegion, p_ref: &Plane, lambda: u32, pmv: [MotionVector; 2], mvx_min: isize, - mvx_max: isize, mvy_min: isize, mvy_max: isize, w: usize, h: usize, - use_satd: bool, best: &mut MotionSearchResult, ref_frame: RefType, + mvx_max: isize, mvy_min: isize, mvy_max: isize, use_satd: bool, + best: &mut MotionSearchResult, ref_frame: RefType, ) { subpel_diamond_search( fi, @@ -875,8 +890,6 @@ fn sub_pixel_me( mvx_max, mvy_min, mvy_max, - w, - h, use_satd, best, ref_frame, @@ -887,14 +900,14 @@ fn get_best_predictor( fi: &FrameInvariants, po: PlaneOffset, org_region: &PlaneRegion, p_ref: &Plane, predictors: &[MotionVector], bit_depth: usize, pmv: [MotionVector; 2], lambda: u32, mvx_min: isize, mvx_max: isize, - mvy_min: isize, mvy_max: isize, w: usize, h: usize, + mvy_min: isize, mvy_max: isize, ) -> MotionSearchResult { let mut best: MotionSearchResult = MotionSearchResult::empty(); for &init_mv in predictors.iter() { let rd = get_fullpel_mv_rd( fi, po, org_region, p_ref, bit_depth, pmv, lambda, false, mvx_min, - mvx_max, mvy_min, mvy_max, w, h, init_mv, + mvx_max, mvy_min, mvy_max, init_mv, ); if rd.cost < best.rd.cost { @@ -957,7 +970,7 @@ fn fullpel_diamond_search( fi: &FrameInvariants, po: PlaneOffset, org_region: &PlaneRegion, p_ref: &Plane, current: &mut MotionSearchResult, bit_depth: usize, pmv: [MotionVector; 2], lambda: u32, mvx_min: isize, mvx_max: isize, - mvy_min: isize, mvy_max: isize, w: usize, h: usize, + mvy_min: isize, mvy_max: isize, ) { // Define the initial and the final scale (log2) of the diamond. let (mut diamond_radius_log2, diamond_radius_end_log2) = (1u8, 0u8); @@ -969,7 +982,7 @@ fn fullpel_diamond_search( let cand_mv = current.mv + (offset << diamond_radius_log2); let rd = get_fullpel_mv_rd( fi, po, org_region, p_ref, bit_depth, pmv, lambda, false, mvx_min, - mvx_max, mvy_min, mvy_max, w, h, cand_mv, + mvx_max, mvy_min, mvy_max, cand_mv, ); if rd.cost < best_cand.rd.cost { @@ -1056,7 +1069,7 @@ fn hexagon_search( fi: &FrameInvariants, po: PlaneOffset, org_region: &PlaneRegion, p_ref: &Plane, current: &mut MotionSearchResult, bit_depth: usize, pmv: [MotionVector; 2], lambda: u32, mvx_min: isize, mvx_max: isize, - mvy_min: isize, mvy_max: isize, w: usize, h: usize, + mvy_min: isize, mvy_max: isize, ) { // The first iteration of hexagon search is implemented separate from // subsequent iterations, which overlap with previous iterations. @@ -1072,7 +1085,7 @@ fn hexagon_search( let cand_mv = current.mv + HEXAGON_PATTERN[i]; let rd = get_fullpel_mv_rd( fi, po, org_region, p_ref, bit_depth, pmv, lambda, false, mvx_min, - mvx_max, mvy_min, mvy_max, w, h, cand_mv, + mvx_max, mvy_min, mvy_max, cand_mv, ); if rd.cost < best_cand.rd.cost { @@ -1104,7 +1117,7 @@ fn hexagon_search( let rd = get_fullpel_mv_rd( fi, po, org_region, p_ref, bit_depth, pmv, lambda, false, mvx_min, - mvx_max, mvy_min, mvy_max, w, h, cand_mv, + mvx_max, mvy_min, mvy_max, cand_mv, ); if rd.cost < best_cand.rd.cost { @@ -1121,7 +1134,7 @@ fn hexagon_search( let cand_mv = current.mv + offset; let rd = get_fullpel_mv_rd( fi, po, org_region, p_ref, bit_depth, pmv, lambda, false, mvx_min, - mvx_max, mvy_min, mvy_max, w, h, cand_mv, + mvx_max, mvy_min, mvy_max, cand_mv, ); if rd.cost < best_cand.rd.cost { @@ -1170,7 +1183,7 @@ fn uneven_multi_hex_search( fi: &FrameInvariants, po: PlaneOffset, org_region: &PlaneRegion, p_ref: &Plane, current: &mut MotionSearchResult, bit_depth: usize, pmv: [MotionVector; 2], lambda: u32, mvx_min: isize, mvx_max: isize, - mvy_min: isize, mvy_max: isize, w: usize, h: usize, me_range: i16, + mvy_min: isize, mvy_max: isize, me_range: i16, ) { assert!(!current.is_empty()); @@ -1201,7 +1214,7 @@ fn uneven_multi_hex_search( let cand_mv = center + offset * i; let rd = get_fullpel_mv_rd( fi, po, org_region, p_ref, bit_depth, pmv, lambda, false, mvx_min, - mvx_max, mvy_min, mvy_max, w, h, cand_mv, + mvx_max, mvy_min, mvy_max, cand_mv, ); if rd.cost < current.rd.cost { @@ -1222,7 +1235,7 @@ fn uneven_multi_hex_search( let cand_mv = center + offset * i; let rd = get_fullpel_mv_rd( fi, po, org_region, p_ref, bit_depth, pmv, lambda, false, mvx_min, - mvx_max, mvy_min, mvy_max, w, h, cand_mv, + mvx_max, mvy_min, mvy_max, cand_mv, ); if rd.cost < current.rd.cost { @@ -1242,7 +1255,7 @@ fn uneven_multi_hex_search( let cand_mv = center + MotionVector { row, col }; let rd = get_fullpel_mv_rd( fi, po, org_region, p_ref, bit_depth, pmv, lambda, false, mvx_min, - mvx_max, mvy_min, mvy_max, w, h, cand_mv, + mvx_max, mvy_min, mvy_max, cand_mv, ); if rd.cost < current.rd.cost { @@ -1284,7 +1297,7 @@ fn uneven_multi_hex_search( let cand_mv = center + offset * i; let rd = get_fullpel_mv_rd( fi, po, org_region, p_ref, bit_depth, pmv, lambda, false, mvx_min, - mvx_max, mvy_min, mvy_max, w, h, cand_mv, + mvx_max, mvy_min, mvy_max, cand_mv, ); if rd.cost < current.rd.cost { @@ -1297,7 +1310,7 @@ fn uneven_multi_hex_search( // Refine the search results using a 'normal' hexagon search. hexagon_search( fi, po, org_region, p_ref, current, bit_depth, pmv, lambda, mvx_min, - mvx_max, mvy_min, mvy_max, w, h, + mvx_max, mvy_min, mvy_max, ); } @@ -1309,13 +1322,14 @@ fn uneven_multi_hex_search( fn subpel_diamond_search( fi: &FrameInvariants, po: PlaneOffset, org_region: &PlaneRegion, _p_ref: &Plane, bit_depth: usize, pmv: [MotionVector; 2], lambda: u32, - mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize, w: usize, - h: usize, use_satd: bool, current: &mut MotionSearchResult, - ref_frame: RefType, + mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize, + use_satd: bool, current: &mut MotionSearchResult, ref_frame: RefType, ) { use crate::util::Aligned; // Motion compensation assembly has special requirements for edges + let w = org_region.rect().width; + let h = org_region.rect().height; let mc_w = w.next_power_of_two(); let mc_h = (h + 1) & !1; @@ -1352,8 +1366,6 @@ fn subpel_diamond_search( mvx_max, mvy_min, mvy_max, - w, - h, cand_mv, &mut tmp_region, ref_frame, @@ -1385,7 +1397,7 @@ fn get_fullpel_mv_rd( fi: &FrameInvariants, po: PlaneOffset, org_region: &PlaneRegion, p_ref: &Plane, bit_depth: usize, pmv: [MotionVector; 2], lambda: u32, use_satd: bool, mvx_min: isize, mvx_max: isize, mvy_min: isize, - mvy_max: isize, w: usize, h: usize, cand_mv: MotionVector, + mvy_max: isize, cand_mv: MotionVector, ) -> MVCandidateRD { if (cand_mv.col as isize) < mvx_min || (cand_mv.col as isize) > mvx_max @@ -1396,21 +1408,22 @@ fn get_fullpel_mv_rd( } // Convert the motion vector into an full pixel offset. - let plane_ref = p_ref.region(Area::StartingAt { + let plane_ref = p_ref.region(Area::Rect { x: po.x + (cand_mv.col / 8) as isize, y: po.y + (cand_mv.row / 8) as isize, + width: org_region.rect().width, + height: org_region.rect().height, }); compute_mv_rd( - fi, pmv, lambda, use_satd, bit_depth, w, h, cand_mv, org_region, - &plane_ref, + fi, pmv, lambda, use_satd, bit_depth, cand_mv, org_region, &plane_ref, ) } fn get_subpel_mv_rd( fi: &FrameInvariants, po: PlaneOffset, org_region: &PlaneRegion, bit_depth: usize, pmv: [MotionVector; 2], lambda: u32, use_satd: bool, - mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize, w: usize, - h: usize, cand_mv: MotionVector, tmp_region: &mut PlaneRegionMut, + mvx_min: isize, mvx_max: isize, mvy_min: isize, mvy_max: isize, + cand_mv: MotionVector, tmp_region: &mut PlaneRegionMut, ref_frame: RefType, ) -> MVCandidateRD { if (cand_mv.col as isize) < mvx_min @@ -1433,8 +1446,7 @@ fn get_subpel_mv_rd( ); let plane_ref = tmp_region.as_const(); compute_mv_rd( - fi, pmv, lambda, use_satd, bit_depth, w, h, cand_mv, org_region, - &plane_ref, + fi, pmv, lambda, use_satd, bit_depth, cand_mv, org_region, &plane_ref, ) } @@ -1442,13 +1454,13 @@ fn get_subpel_mv_rd( #[inline(always)] fn compute_mv_rd( fi: &FrameInvariants, pmv: [MotionVector; 2], lambda: u32, - use_satd: bool, bit_depth: usize, w: usize, h: usize, cand_mv: MotionVector, + use_satd: bool, bit_depth: usize, cand_mv: MotionVector, plane_org: &PlaneRegion<'_, T>, plane_ref: &PlaneRegion<'_, T>, ) -> MVCandidateRD { let sad = if use_satd { - get_satd(plane_org, plane_ref, w, h, bit_depth, fi.cpu_feature_level) + get_satd(plane_org, plane_ref, bit_depth, fi.cpu_feature_level) } else { - get_sad(plane_org, plane_ref, w, h, bit_depth, fi.cpu_feature_level) + get_sad(plane_org, plane_ref, bit_depth, fi.cpu_feature_level) }; let rate1 = get_mv_rate(cand_mv, pmv[0], fi.allow_high_precision_mv); @@ -1460,9 +1472,11 @@ fn compute_mv_rd( fn full_search( fi: &FrameInvariants, x_lo: isize, x_hi: isize, y_lo: isize, y_hi: isize, - w: usize, h: usize, org_region: &PlaneRegion, p_ref: &Plane, - po: PlaneOffset, step: usize, lambda: u32, pmv: [MotionVector; 2], + org_region: &PlaneRegion, p_ref: &Plane, po: PlaneOffset, step: usize, + lambda: u32, pmv: [MotionVector; 2], ) -> MotionSearchResult { + let w = org_region.rect().width; + let h = org_region.rect().height; let search_region = p_ref.region(Area::Rect { x: x_lo, y: y_lo, @@ -1488,8 +1502,6 @@ fn full_search( lambda, false, fi.sequence.bit_depth, - w, - h, mv, org_region, &ref_window, diff --git a/src/rdo.rs b/src/rdo.rs index f40701f26e..4e9a862de7 100644 --- a/src/rdo.rs +++ b/src/rdo.rs @@ -1337,15 +1337,16 @@ fn inter_frame_rdo_mode_decision( &mut ts.inter_compound_buffers, ); - let plane_org = ts.input_tile.planes[0] - .subregion(Area::BlockStartingAt { bo: tile_bo.0 }); + let plane_org = ts.input_tile.planes[0].subregion(Area::BlockRect { + bo: tile_bo.0, + width: bsize.width(), + height: bsize.height(), + }); let plane_ref = rec_region.as_const(); let satd = get_satd( &plane_org, &plane_ref, - bsize.width(), - bsize.height(), fi.sequence.bit_depth, fi.cpu_feature_level, ); @@ -1482,15 +1483,16 @@ fn intra_frame_rdo_mode_decision( fi.cpu_feature_level, ); - let plane_org = ts.input_tile.planes[0] - .subregion(Area::BlockStartingAt { bo: tile_bo.0 }); + let plane_org = ts.input_tile.planes[0].subregion(Area::BlockRect { + bo: tile_bo.0, + width: tx_size.width(), + height: tx_size.height(), + }); let plane_ref = rec_region.as_const(); satds_all[luma_mode as usize] = get_satd( &plane_org, &plane_ref, - tx_size.width(), - tx_size.height(), fi.sequence.bit_depth, fi.cpu_feature_level, ); From bc171d71bc7a585ec8dbb4ed23b91c49ce2acf76 Mon Sep 17 00:00:00 2001 From: Josh Holmer Date: Tue, 17 Jan 2023 02:04:42 -0500 Subject: [PATCH 2/2] Attempt to fix macos tests --- src/asm/aarch64/dist.rs | 2 +- src/asm/x86/dist/mod.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/asm/aarch64/dist.rs b/src/asm/aarch64/dist.rs index c8d01de197..cb97504469 100644 --- a/src/asm/aarch64/dist.rs +++ b/src/asm/aarch64/dist.rs @@ -80,7 +80,7 @@ pub fn get_sad( let bsize_opt = BlockSize::from_width_and_height_opt(src.rect().width, src.rect().height); - let call_rust = || -> u32 { rust::get_sad(dst, src, bit_depth, cpu) }; + let call_rust = || -> u32 { rust::get_sad(src, dst, bit_depth, cpu) }; #[cfg(feature = "check_asm")] let ref_dist = call_rust(); diff --git a/src/asm/x86/dist/mod.rs b/src/asm/x86/dist/mod.rs index 246b985ca2..97d4f2b117 100644 --- a/src/asm/x86/dist/mod.rs +++ b/src/asm/x86/dist/mod.rs @@ -172,7 +172,7 @@ pub fn get_sad( let bsize_opt = BlockSize::from_width_and_height_opt(src.rect().width, src.rect().height); - let call_rust = || -> u32 { rust::get_sad(dst, src, bit_depth, cpu) }; + let call_rust = || -> u32 { rust::get_sad(src, dst, bit_depth, cpu) }; #[cfg(feature = "check_asm")] let ref_dist = call_rust();