From 178da3b38dc576b3cc2d411cbe045fcc8c1d2c99 Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Sat, 25 May 2024 19:35:02 -0700 Subject: [PATCH 1/3] `fn fg{y,uv}_32x32xn_rust`: Index `static W` array instead of slicing it since indices are already provably in-bounds. This is simpler and also closer to the C. It also makes upcoming changes simpler to integrate. --- src/filmgrain.rs | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/src/filmgrain.rs b/src/filmgrain.rs index d4f84f473..ad234d4f3 100644 --- a/src/filmgrain.rs +++ b/src/filmgrain.rs @@ -632,7 +632,7 @@ unsafe fn fgy_32x32xn_rust( 0 }; - static w: [[c_int; 2]; 2] = [[27, 17], [17, 27]]; + static W: [[c_int; 2]; 2] = [[27, 17], [17, 27]]; let add_noise_y = |x, y, grain| { let src = src_row @@ -658,30 +658,30 @@ unsafe fn fgy_32x32xn_rust( } // Special case for overlapped column - for (x, wx) in w[..xstart].iter().enumerate() { + for x in 0..xstart { let grain = sample_lut::(grain_lut, &offsets, false, false, false, false, x, y); let old = sample_lut::(grain_lut, &offsets, false, false, true, false, x, y); - let grain = round2(old * wx[0] + grain * wx[1], 5); + let grain = round2(old * W[x][0] + grain * W[x][1], 5); let grain = iclip(grain, grain_min, grain_max); add_noise_y(x, y, grain); } } - for (y, wy) in w[..ystart].iter().enumerate() { + for y in 0..ystart { // Special case for overlapped row (sans corner) for x in xstart..bw { let grain = sample_lut::(grain_lut, &offsets, false, false, false, false, x, y); let old = sample_lut::(grain_lut, &offsets, false, false, false, true, x, y); - let grain = round2(old * wy[0] + grain * wy[1], 5); + let grain = round2(old * W[y][0] + grain * W[y][1], 5); let grain = iclip(grain, grain_min, grain_max); add_noise_y(x, y, grain); } // Special case for doubly-overlapped corner - for (x, wx) in w[..xstart].iter().enumerate() { + for x in 0..xstart { // Blend the top pixel with the top left block let top = sample_lut::(grain_lut, &offsets, false, false, false, true, x, y); let old = sample_lut::(grain_lut, &offsets, false, false, true, true, x, y); - let top = round2(old * wx[0] + top * wx[1], 5); + let top = round2(old * W[x][0] + top * W[x][1], 5); let top = iclip(top, grain_min, grain_max); // Blend the current pixel with the left block @@ -689,9 +689,9 @@ unsafe fn fgy_32x32xn_rust( let old = sample_lut::(grain_lut, &offsets, false, false, true, false, x, y); // Mix the row rows together and apply grain - let grain = round2(old * wx[0] + grain * wx[1], 5); + let grain = round2(old * W[x][0] + grain * W[x][1], 5); let grain = iclip(grain, grain_min, grain_max); - let grain = round2(top * wy[0] + grain * wy[1], 5); + let grain = round2(top * W[y][0] + grain * W[y][1], 5); let grain = iclip(grain, grain_min, grain_max); add_noise_y(x, y, grain); } @@ -768,7 +768,7 @@ unsafe fn fguv_32x32xn_rust( 0 }; - static w: [[[c_int; 2]; 2 /* off */]; 2 /* sub */] = [[[27, 17], [17, 27]], [[23, 22], [0; 2]]]; + static W: [[[c_int; 2]; 2 /* off */]; 2 /* sub */] = [[[27, 17], [17, 27]], [[23, 22], [0; 2]]]; let add_noise_uv = |x, y, grain| { let lx = bx.wrapping_add(x) << sx; @@ -815,7 +815,7 @@ unsafe fn fguv_32x32xn_rust( for x in 0..xstart { let grain = sample_lut::(grain_lut, &offsets, is_sx, is_sy, false, false, x, y); let old = sample_lut::(grain_lut, &offsets, is_sx, is_sy, true, false, x, y); - let grain = round2(old * w[sx][x][0] + grain * w[sx][x][1], 5); + let grain = round2(old * W[sx][x][0] + grain * W[sx][x][1], 5); let grain = iclip(grain, grain_min, grain_max); add_noise_uv(x, y, grain); } @@ -825,7 +825,7 @@ unsafe fn fguv_32x32xn_rust( for x in xstart..bw { let grain = sample_lut::(grain_lut, &offsets, is_sx, is_sy, false, false, x, y); let old = sample_lut::(grain_lut, &offsets, is_sx, is_sy, false, true, x, y); - let grain = round2(old * w[sy][y][0] + grain * w[sy][y][1], 5); + let grain = round2(old * W[sy][y][0] + grain * W[sy][y][1], 5); let grain = iclip(grain, grain_min, grain_max); add_noise_uv(x, y, grain); } @@ -835,7 +835,7 @@ unsafe fn fguv_32x32xn_rust( // Blend the top pixel with the top left block let top = sample_lut::(grain_lut, &offsets, is_sx, is_sy, false, true, x, y); let old = sample_lut::(grain_lut, &offsets, is_sx, is_sy, true, true, x, y); - let top = round2(old * w[sx][x][0] + top * w[sx][x][1], 5); + let top = round2(old * W[sx][x][0] + top * W[sx][x][1], 5); let top = iclip(top, grain_min, grain_max); // Blend the current pixel with the left block @@ -843,9 +843,9 @@ unsafe fn fguv_32x32xn_rust( let old = sample_lut::(grain_lut, &offsets, is_sx, is_sy, true, false, x, y); // Mix the row rows together and apply to image - let grain = round2(old * w[sx][x][0] + grain * w[sx][x][1], 5); + let grain = round2(old * W[sx][x][0] + grain * W[sx][x][1], 5); let grain = iclip(grain, grain_min, grain_max); - let grain = round2(top * w[sy][y][0] + grain * w[sy][y][1], 5); + let grain = round2(top * W[sy][y][0] + grain * W[sy][y][1], 5); let grain = iclip(grain, grain_min, grain_max); add_noise_uv(x, y, grain); } From 7503daf4aa4e63a2f88d71f37f3cec3b2d3465b7 Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Sat, 25 May 2024 19:49:28 -0700 Subject: [PATCH 2/3] `fn fg{y,uv}_32x32xn_rust`: Refactor out `noise_{y,uv}` closures from `add_noise_{y,uv}` ones for upcoming refactors. --- src/filmgrain.rs | 52 ++++++++++++++++++++++++++++-------------------- 1 file changed, 30 insertions(+), 22 deletions(-) diff --git a/src/filmgrain.rs b/src/filmgrain.rs index ad234d4f3..59da4a3aa 100644 --- a/src/filmgrain.rs +++ b/src/filmgrain.rs @@ -28,6 +28,7 @@ use std::ops::Add; use std::ops::Shl; use std::ops::Shr; use std::ptr; +use std::slice; use to_method::To; #[cfg(all( @@ -634,6 +635,14 @@ unsafe fn fgy_32x32xn_rust( static W: [[c_int; 2]; 2] = [[27, 17], [17, 27]]; + let noise_y = |src: BD::Pixel, grain| { + let noise = round2( + scaling.as_ref()[src.to::()] as c_int * grain, + data.scaling_shift, + ); + iclip(src.as_::() + noise, min_value, max_value).as_::() + }; + let add_noise_y = |x, y, grain| { let src = src_row .offset(y as isize * BD::pxstride(stride)) @@ -643,11 +652,7 @@ unsafe fn fgy_32x32xn_rust( .offset(y as isize * BD::pxstride(stride)) .add(x) .add(bx); - let noise = round2( - scaling.as_ref()[(*src).to::()] as c_int * grain, - data.scaling_shift, - ); - *dst = iclip((*src).as_::() + noise, min_value, max_value).as_::(); + *dst = noise_y(*src, grain); }; for y in ystart..bh { @@ -770,27 +775,15 @@ unsafe fn fguv_32x32xn_rust( static W: [[[c_int; 2]; 2 /* off */]; 2 /* sub */] = [[[27, 17], [17, 27]], [[23, 22], [0; 2]]]; - let add_noise_uv = |x, y, grain| { - let lx = bx.wrapping_add(x) << sx; - let ly = y << sy; - let luma = luma_row - .offset(ly as isize * BD::pxstride(luma_stride)) - .offset(lx as isize); - let mut avg = *luma.offset(0); + let noise_uv = |src: BD::Pixel, grain, luma: &[BD::Pixel]| { + let mut avg = luma[0]; if is_sx { - avg = ((avg.as_::() + (*luma.offset(1)).as_::() + 1) >> 1) - .as_::(); + avg = ((avg.as_::() + luma[1].as_::() + 1) >> 1).as_::(); } - let src = src_row - .offset(y as isize * BD::pxstride(stride)) - .add(bx.wrapping_add(x)); - let dst = dst_row - .offset(y as isize * BD::pxstride(stride)) - .add(bx.wrapping_add(x)); let mut val = avg.as_::(); if !data.chroma_scaling_from_luma { let combined = avg.as_::() * data.uv_luma_mult[uv] - + (*src).as_::() * data.uv_mult[uv]; + + src.as_::() * data.uv_mult[uv]; val = bd .iclip_pixel((combined >> 6) + data.uv_offset[uv] * (1 << bitdepth_min_8)) .as_::(); @@ -801,7 +794,22 @@ unsafe fn fguv_32x32xn_rust( scaling.as_ref()[val as usize % scaling.as_ref().len()] as c_int * grain, data.scaling_shift, ); - *dst = iclip((*src).as_::() + noise, min_value, max_value).as_::(); + iclip(src.as_::() + noise, min_value, max_value).as_::() + }; + + let add_noise_uv = |x, y, grain| { + let lx = bx.wrapping_add(x) << sx; + let ly = y << sy; + let luma = luma_row + .offset(ly as isize * BD::pxstride(luma_stride)) + .offset(lx as isize); + let src = src_row + .offset(y as isize * BD::pxstride(stride)) + .add(bx.wrapping_add(x)); + let dst = dst_row + .offset(y as isize * BD::pxstride(stride)) + .add(bx.wrapping_add(x)); + *dst = noise_uv(*src, grain, slice::from_raw_parts(luma, 1 + sx)); }; for y in ystart..bh { From f11757fd2c1941e433730267b91c1901aa415ce1 Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Sat, 25 May 2024 20:13:39 -0700 Subject: [PATCH 3/3] `fn fg{y,uv}_32x32xn_rust`: Replace `add_noise_{y,uv}` closures with `{luma,src,dst}_row_{y,uv}` closures and slice ops. --- src/filmgrain.rs | 81 +++++++++++++++++++++++++++--------------------- 1 file changed, 46 insertions(+), 35 deletions(-) diff --git a/src/filmgrain.rs b/src/filmgrain.rs index 59da4a3aa..5467817ba 100644 --- a/src/filmgrain.rs +++ b/src/filmgrain.rs @@ -635,6 +635,15 @@ unsafe fn fgy_32x32xn_rust( static W: [[c_int; 2]; 2] = [[27, 17], [17, 27]]; + let src_row_y = |y| { + let src = src_row.offset(y as isize * BD::pxstride(stride)).add(bx); + slice::from_raw_parts(src, bw) + }; + let dst_row_y = |y| { + let dst = dst_row.offset(y as isize * BD::pxstride(stride)).add(bx); + slice::from_raw_parts_mut(dst, bw) + }; + let noise_y = |src: BD::Pixel, grain| { let noise = round2( scaling.as_ref()[src.to::()] as c_int * grain, @@ -643,23 +652,14 @@ unsafe fn fgy_32x32xn_rust( iclip(src.as_::() + noise, min_value, max_value).as_::() }; - let add_noise_y = |x, y, grain| { - let src = src_row - .offset(y as isize * BD::pxstride(stride)) - .add(x) - .add(bx); - let dst = dst_row - .offset(y as isize * BD::pxstride(stride)) - .add(x) - .add(bx); - *dst = noise_y(*src, grain); - }; - for y in ystart..bh { + let src = src_row_y(y); + let dst = dst_row_y(y); + // Non-overlapped image region (straightforward) for x in xstart..bw { let grain = sample_lut::(grain_lut, &offsets, false, false, false, false, x, y); - add_noise_y(x, y, grain); + dst[x] = noise_y(src[x], grain); } // Special case for overlapped column @@ -668,17 +668,20 @@ unsafe fn fgy_32x32xn_rust( let old = sample_lut::(grain_lut, &offsets, false, false, true, false, x, y); let grain = round2(old * W[x][0] + grain * W[x][1], 5); let grain = iclip(grain, grain_min, grain_max); - add_noise_y(x, y, grain); + dst[x] = noise_y(src[x], grain); } } for y in 0..ystart { + let src = src_row_y(y); + let dst = dst_row_y(y); + // Special case for overlapped row (sans corner) for x in xstart..bw { let grain = sample_lut::(grain_lut, &offsets, false, false, false, false, x, y); let old = sample_lut::(grain_lut, &offsets, false, false, false, true, x, y); let grain = round2(old * W[y][0] + grain * W[y][1], 5); let grain = iclip(grain, grain_min, grain_max); - add_noise_y(x, y, grain); + dst[x] = noise_y(src[x], grain); } // Special case for doubly-overlapped corner @@ -698,7 +701,7 @@ unsafe fn fgy_32x32xn_rust( let grain = iclip(grain, grain_min, grain_max); let grain = round2(top * W[y][0] + grain * W[y][1], 5); let grain = iclip(grain, grain_min, grain_max); - add_noise_y(x, y, grain); + dst[x] = noise_y(src[x], grain); } } } @@ -775,6 +778,21 @@ unsafe fn fguv_32x32xn_rust( static W: [[[c_int; 2]; 2 /* off */]; 2 /* sub */] = [[[27, 17], [17, 27]], [[23, 22], [0; 2]]]; + let luma_row_uv = |y| { + let luma = luma_row + .offset((y << sy) as isize * BD::pxstride(luma_stride)) + .add(bx << sx); + slice::from_raw_parts(luma, bw << sx) + }; + let src_row_uv = |y| { + let src = src_row.offset(y as isize * BD::pxstride(stride)).add(bx); + slice::from_raw_parts(src, bw) + }; + let dst_row_uv = |y| { + let dst = dst_row.offset(y as isize * BD::pxstride(stride)).add(bx); + slice::from_raw_parts_mut(dst, bw) + }; + let noise_uv = |src: BD::Pixel, grain, luma: &[BD::Pixel]| { let mut avg = luma[0]; if is_sx { @@ -797,26 +815,15 @@ unsafe fn fguv_32x32xn_rust( iclip(src.as_::() + noise, min_value, max_value).as_::() }; - let add_noise_uv = |x, y, grain| { - let lx = bx.wrapping_add(x) << sx; - let ly = y << sy; - let luma = luma_row - .offset(ly as isize * BD::pxstride(luma_stride)) - .offset(lx as isize); - let src = src_row - .offset(y as isize * BD::pxstride(stride)) - .add(bx.wrapping_add(x)); - let dst = dst_row - .offset(y as isize * BD::pxstride(stride)) - .add(bx.wrapping_add(x)); - *dst = noise_uv(*src, grain, slice::from_raw_parts(luma, 1 + sx)); - }; - for y in ystart..bh { + let luma = luma_row_uv(y); + let src = src_row_uv(y); + let dst = dst_row_uv(y); + // Non-overlapped image region (straightforward) for x in xstart..bw { let grain = sample_lut::(grain_lut, &offsets, is_sx, is_sy, false, false, x, y); - add_noise_uv(x, y, grain); + dst[x] = noise_uv(src[x], grain, &luma[x << sx..]); } // Special case for overlapped column @@ -825,17 +832,21 @@ unsafe fn fguv_32x32xn_rust( let old = sample_lut::(grain_lut, &offsets, is_sx, is_sy, true, false, x, y); let grain = round2(old * W[sx][x][0] + grain * W[sx][x][1], 5); let grain = iclip(grain, grain_min, grain_max); - add_noise_uv(x, y, grain); + dst[x] = noise_uv(src[x], grain, &luma[x << sx..]); } } for y in 0..ystart { + let luma = luma_row_uv(y); + let src = src_row_uv(y); + let dst = dst_row_uv(y); + // Special case for overlapped row (sans corner) for x in xstart..bw { let grain = sample_lut::(grain_lut, &offsets, is_sx, is_sy, false, false, x, y); let old = sample_lut::(grain_lut, &offsets, is_sx, is_sy, false, true, x, y); let grain = round2(old * W[sy][y][0] + grain * W[sy][y][1], 5); let grain = iclip(grain, grain_min, grain_max); - add_noise_uv(x, y, grain); + dst[x] = noise_uv(src[x], grain, &luma[x << sx..]); } // Special case for doubly-overlapped corner @@ -855,7 +866,7 @@ unsafe fn fguv_32x32xn_rust( let grain = iclip(grain, grain_min, grain_max); let grain = round2(top * W[sy][y][0] + grain * W[sy][y][1], 5); let grain = iclip(grain, grain_min, grain_max); - add_noise_uv(x, y, grain); + dst[x] = noise_uv(src[x], grain, &luma[x << sx..]); } } }