Skip to content

Commit

Permalink
SIMD-ize auto-correlation function in LPC.
Browse files Browse the repository at this point in the history
  • Loading branch information
yotarok committed Oct 4, 2023
1 parent bdfc9bb commit 7b50807
Show file tree
Hide file tree
Showing 3 changed files with 88 additions and 28 deletions.
18 changes: 9 additions & 9 deletions report/report.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,20 +17,20 @@ Sources used: wikimedia.i_love_you_california, wikimedia.winter_kiss, wikimedia.
- st: 0.5443052357800366
- dmse: 0.5418910269181569
- bsbs: 0.5435087256676912
- mae: 0.5374008633412148
- mae: 0.5374571118440491


### Average compression speed (inverse RTF)
- Reference
- opt8lax: 265.72298864106625
- opt8: 262.0175361163109
- opt5: 510.05557499954114
- opt8lax: 257.2307394912905
- opt8: 259.9417855241402
- opt5: 498.18069606854715

- Ours
- default: 212.53670254739373
- st: 84.90619245444263
- dmse: 123.82711138553906
- bsbs: 11.764164674514397
- mae: 25.7701793126406
- default: 222.1443707999417
- st: 95.26297207652442
- dmse: 128.94722929725017
- bsbs: 13.508132367761204
- mae: 26.8191475592669


38 changes: 27 additions & 11 deletions src/fakesimd.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ use std::array;
// TYPE DEFINITION
// ===
#[derive(Clone, Copy, Debug, Eq, PartialEq, PartialOrd)]
#[repr(transparent)]
pub struct Simd<T: SimdElement, const LANES: usize>([T; LANES]);

#[derive(Clone, Copy, Debug, Eq, PartialEq, PartialOrd)]
Expand All @@ -45,6 +46,10 @@ impl SimdElement for u32 {
type Mask = Self;
}

impl SimdElement for f32 {
type Mask = Self;
}

pub trait SupportedLaneCount {}
pub struct LaneCount<const LANES: usize>();
impl SupportedLaneCount for LaneCount<16> {}
Expand Down Expand Up @@ -87,6 +92,9 @@ pub type i32x16 = Simd<i32, 16>;
#[allow(non_camel_case_types)]
pub type u32x16 = Simd<u32, 16>;

#[allow(non_camel_case_types)]
pub type f32x32 = Simd<f32, 32>;

// ===
// IMPLEMENTATION OF `fakesimd::Simd` (non-trait methods)
// ===
Expand Down Expand Up @@ -233,13 +241,32 @@ macro_rules! def_binop {
};
}

macro_rules! def_binop_assign {
($trait_name:ident, $binop_name: ident, $fn_name:ident, $expr:expr) => {
impl<U, T, const N: usize> std::ops::$trait_name<U> for Simd<T, N>
where
T: SimdElement,
Self: std::ops::$binop_name<U, Output = Self>,
{
#[inline]
#[allow(clippy::redundant_closure_call)]
fn $fn_name(&mut self, rhs: U) {
*self = ($expr)(*self, rhs);
}
}
};
}

def_binop!(Add, add, |x, y| x + y);
def_binop!(Sub, sub, |x, y| x - y);
def_binop!(Mul, mul, |x, y| x * y);
def_binop!(Div, div, |x, y| x / y);
def_binop!(BitAnd, bitand, |x, y| x & y);
def_binop!(Shr, shr, |x, y| x >> y);

def_binop_assign!(AddAssign, Add, add_assign, |x, y| x + y);
def_binop_assign!(SubAssign, Sub, sub_assign, |x, y| x - y);

impl<T, const N: usize> std::convert::AsRef<[T; N]> for Simd<T, N>
where
T: SimdElement,
Expand Down Expand Up @@ -282,14 +309,3 @@ where
&mut self.as_mut_array()[index]
}
}

impl<U, T, const N: usize> std::ops::SubAssign<U> for Simd<T, N>
where
T: SimdElement,
Self: std::ops::Sub<U, Output = Self>,
{
#[inline]
fn sub_assign(&mut self, rhs: U) {
*self = *self - rhs;
}
}
60 changes: 52 additions & 8 deletions src/lpc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,41 @@ pub fn delay_sum(order: usize, signal: &[f32], dest: &mut nalgebra::DMatrix<f32>
weighted_delay_sum(order, signal, dest, |_t| 1.0f32);
}

#[inline]
#[allow(clippy::needless_range_loop)] // for readability
#[allow(dead_code)] // not used in "fakesimd" but should still be compilable.
fn weighted_auto_correlation_simd<F>(order: usize, signal: &[f32], dest: &mut [f32], weight_fn: F)
where
F: Fn(usize) -> f32,
{
assert!(dest.len() >= order);

let mut lagged = simd::f32x32::splat(0f32);
// acc0 is computed separately for better SIMD alignment.
let mut acc0 = 0.0f32;
let mut acc = simd::f32x32::splat(0f32);
for tau in 0..order - 1 {
lagged[tau] = signal[order - 2 - tau];
}
for t in (order - 1)..signal.len() {
let w = weight_fn(t);
let y = signal[t];
acc0 += w * y * y;
acc += simd::f32x32::splat(w * y) * lagged;
lagged = lagged.rotate_lanes_right::<1>();
lagged[0] = y;
}
for (tau, mut_p) in dest.iter_mut().enumerate() {
*mut_p = if tau == 0 {
acc0
} else if tau < order {
acc[tau - 1]
} else {
0.0
};
}
}

/// Compute weighted auto-correlation coefficients.
///
/// # Panics
Expand All @@ -338,15 +373,24 @@ pub fn weighted_auto_correlation<F>(order: usize, signal: &[f32], dest: &mut [f3
where
F: Fn(usize) -> f32,
{
assert!(dest.len() >= order);
for p in &mut *dest {
*p = 0.0;
#[cfg(not(feature = "fakesimd"))]
{
// The current implementation is inefficient with fakesimd when order
// is low. So, here we still have a scalar version of it.
weighted_auto_correlation_simd(order, signal, dest, weight_fn);
}
for t in (order - 1)..signal.len() {
let w = weight_fn(t);
for tau in 0..order {
let v: f32 = signal[t] * signal[t - tau] * w;
dest[tau] += v;
#[cfg(feature = "fakesimd")]
{
assert!(dest.len() >= order);
for p in &mut *dest {
*p = 0.0;
}
for t in (order - 1)..signal.len() {
let w = weight_fn(t);
for tau in 0..order {
let v: f32 = signal[t] * signal[t - tau] * w;
dest[tau] += v;
}
}
}
}
Expand Down

0 comments on commit 7b50807

Please sign in to comment.