Skip to content

Commit

Permalink
fn filter_plane_{cols,rows}_{y,uv}: Remove last of bounds checks in…
Browse files Browse the repository at this point in the history
… the loop (#1195)

This (along with #1193) gets rid of the last bounds checks inside the
loops in these `fn`s, which hopefully helps since these `fn`s are hot.
  • Loading branch information
kkysen authored Jun 18, 2024
2 parents 3c238c2 + a9d25bb commit 231e81e
Showing 1 changed file with 20 additions and 16 deletions.
36 changes: 20 additions & 16 deletions src/lf_apply.rs
Original file line number Diff line number Diff line change
Expand Up @@ -353,6 +353,8 @@ unsafe fn filter_plane_cols_y<BD: BitDepth>(
// filter edges between columns (e.g. block1 | block2)
let lf_sb = &f.dsp.lf.loop_filter_sb;
let len = endy4 - starty4;
let y_dst = |x| y_dst + x * 4;
y_dst(w).as_ptr::<BD>(); // Bounds check
let mask = &mask[..w];
for x in 0..w {
if !have_left && x == 0 {
Expand All @@ -369,9 +371,8 @@ unsafe fn filter_plane_cols_y<BD: BitDepth>(
} else {
mask.each_ref().map(|[_, b]| b.get() as u32)
};
let y_dst = y_dst + x * 4;
let lvl = &lvl[x..];
lf_sb.y.h.call::<BD>(f, y_dst, &hmask, lvl, 0, len);
lf_sb.y.h.call::<BD>(f, y_dst(x), &hmask, lvl, 0, len);
}
}

Expand All @@ -392,20 +393,19 @@ unsafe fn filter_plane_rows_y<BD: BitDepth>(
// block2
let lf_sb = &f.dsp.lf.loop_filter_sb;
let len = endy4 - starty4;
let y_dst = |i| y_dst + (i as isize * 4 * y_dst.pixel_stride::<BD>());
y_dst(len - 1).as_ptr::<BD>(); // Bounds check
let lvl = &lvl[..1 + (len - 1) * b4_stride];

for i in 0..len {
let y = i + starty4;
let y_dst = y_dst + (i as isize * 4 * y_dst.pixel_stride::<BD>());
if !have_top && y == 0 {
continue;
}
let mask = &mask[y];
let vmask = mask
let vmask = mask[y % mask.len()] // To elide the bounds check.
.each_ref()
.map(|[a, b]| a.get() as u32 | ((b.get() as u32) << 16));
let lvl = &lvl[i * b4_stride..];
lf_sb.y.v.call::<BD>(f, y_dst, &vmask, lvl, 1, w);
lf_sb.y.v.call::<BD>(f, y_dst(i), &vmask, lvl, 1, w);
}
}

Expand All @@ -425,14 +425,16 @@ unsafe fn filter_plane_cols_uv<BD: BitDepth>(
// filter edges between columns (e.g. block1 | block2)
let lf_sb = &f.dsp.lf.loop_filter_sb;
let len = endy4 - starty4;
let u_dst = |x| u_dst + x * 4;
let v_dst = |x| v_dst + x * 4;
u_dst(w).as_ptr::<BD>(); // Bounds check
v_dst(w).as_ptr::<BD>(); // Bounds check
let mask = &mask[..w];
let lvl = &lvl[..w];
for x in 0..w {
if !have_left && x == 0 {
continue;
}
let u_dst = u_dst + x * 4;
let v_dst = v_dst + x * 4;
let mask = &mask[x];
let hmask = if starty4 == 0 {
if endy4 > 16 >> ss_ver {
Expand All @@ -446,8 +448,8 @@ unsafe fn filter_plane_cols_uv<BD: BitDepth>(
};
let hmask = [hmask[0], hmask[1], 0];
let lvl = &lvl[x..];
lf_sb.uv.h.call::<BD>(f, u_dst, &hmask, lvl, 2, len);
lf_sb.uv.h.call::<BD>(f, v_dst, &hmask, lvl, 3, len);
lf_sb.uv.h.call::<BD>(f, u_dst(x), &hmask, lvl, 2, len);
lf_sb.uv.h.call::<BD>(f, v_dst(x), &hmask, lvl, 3, len);
}
}

Expand All @@ -470,21 +472,23 @@ unsafe fn filter_plane_rows_uv<BD: BitDepth>(
// block2
let lf_sb = &f.dsp.lf.loop_filter_sb;
let len = endy4 - starty4;
let u_dst = |i| u_dst + (i as isize * 4 * u_dst.pixel_stride::<BD>());
let v_dst = |i| v_dst + (i as isize * 4 * v_dst.pixel_stride::<BD>());
u_dst(len - 1).as_ptr::<BD>(); // Bounds check
v_dst(len - 1).as_ptr::<BD>(); // Bounds check
let lvl = &lvl[..1 + (len - 1) * b4_stride];
for i in 0..len {
let y = i + starty4;
let u_dst = u_dst + (i as isize * 4 * u_dst.pixel_stride::<BD>());
let v_dst = v_dst + (i as isize * 4 * v_dst.pixel_stride::<BD>());
if !have_top && y == 0 {
continue;
}
let vmask = mask[y]
let vmask = mask[y % mask.len()] // To elide the bounds check.
.each_ref()
.map(|[a, b]| a.get() as u32 | ((b.get() as u32) << (16 >> ss_hor)));
let vmask = [vmask[0], vmask[1], 0];
let lvl = &lvl[i * b4_stride..];
lf_sb.uv.v.call::<BD>(f, u_dst, &vmask, lvl, 2, w);
lf_sb.uv.v.call::<BD>(f, v_dst, &vmask, lvl, 3, w);
lf_sb.uv.v.call::<BD>(f, u_dst(i), &vmask, lvl, 2, w);
lf_sb.uv.v.call::<BD>(f, v_dst(i), &vmask, lvl, 3, w);
}
}

Expand Down

0 comments on commit 231e81e

Please sign in to comment.