From c7c94fd882697d74c49c4efe0827a5ff899de638 Mon Sep 17 00:00:00 2001 From: burgerguy Date: Fri, 28 Apr 2023 13:57:39 +0000 Subject: [PATCH] clean up a bit, add package metadata --- Cargo.toml | 11 +- README.md | 2 +- src/lib.rs | 4 +- src/main.rs | 9 - src/test/checks.rs | 327 +++++++++++++++++++++ src/{comp_test/mod.rs => test/compile.rs} | 1 + src/test/mod.rs | 329 +--------------------- 7 files changed, 337 insertions(+), 346 deletions(-) delete mode 100644 src/main.rs create mode 100644 src/test/checks.rs rename src/{comp_test/mod.rs => test/compile.rs} (88%) diff --git a/Cargo.toml b/Cargo.toml index 17d1e5c..4c4442e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,11 +1,11 @@ [package] -name = "fast_math" +name = "fath" +authors = [ "burgerindividual", "duplexsystem" ] version = "0.1.0" edition = "2021" - -[dependencies] -#num = "0.4.0" -#fixed = "2.0.0-alpha.11" +license = "GPL-3.0" +repository = "https://github.com/burgerindividual/fath" +description = "Fa(st ma)th library built for speed." [dev-dependencies] rand = "0.8.5" @@ -16,4 +16,3 @@ opt-level = 3 [profile.release] opt-level = 3 panic = "abort" - diff --git a/README.md b/README.md index 9cc1233..04cf08b 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,2 @@ -# fast_math +# fath SIMD-Capable fast approximate math library written in Rust diff --git a/src/lib.rs b/src/lib.rs index a575ba2..590bdf2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -11,6 +11,4 @@ pub mod shared; pub mod simd; #[cfg(test)] -pub mod test; - -pub mod comp_test; +pub mod test; \ No newline at end of file diff --git a/src/main.rs b/src/main.rs deleted file mode 100644 index 99d626b..0000000 --- a/src/main.rs +++ /dev/null @@ -1,9 +0,0 @@ -#![feature(core_intrinsics, portable_simd)] - -use core::simd::*; -use fast_math::comp_test::test; - -pub fn main() { - let num = 30872332346397_u64; - println!("{:?}, {:?}", test(Simd::splat(49)), num.ilog(10)); -} diff --git a/src/test/checks.rs b/src/test/checks.rs new file mode 100644 index 0000000..8128383 --- /dev/null +++ b/src/test/checks.rs @@ -0,0 +1,327 @@ +use crate::shared::float::*; +use crate::shared::int::*; +use core::f32::consts::FRAC_PI_2; +use core::ops::Range; +use core::simd::*; +use rand::rngs::ThreadRng; +use rand::{thread_rng, Rng, RngCore}; +// #[cfg(target_arch = "x86")] +// #[allow(unused_imports)] +// use core::arch::x86::*; +// #[cfg(target_arch = "x86_64")] +// #[allow(unused_imports)] +// use core::arch::x86_64::*; + +const ITERS: usize = 1 << 20; + +#[inline(never)] +#[test] +pub fn scalar_error() { + const RANGE: Range = -FRAC_PI_2..FRAC_PI_2; + const MAX_ERROR_0: f32 = 2.9e-2_f32; + const MAX_ERROR_1: f32 = 6.0e-4_f32; + const MAX_ERROR_2: f32 = 6.9e-6_f32; + const MAX_ERROR_3: f32 = 2.7e-7_f32; + + let mut rng = thread_rng(); + for _i in 0..ITERS { + let x = rng.gen_range(RANGE); + + let approx_0 = unsafe { x.sin_fast_approx::<0>() }; + let approx_1 = unsafe { x.sin_fast_approx::<1>() }; + let approx_2 = unsafe { x.sin_fast_approx::<2>() }; + let approx_3 = unsafe { x.sin_fast_approx::<3>() }; + let exact = x.sin(); + + assert!( + (exact - approx_0).abs() <= MAX_ERROR_0, + "Error greater than set maximum: true: {exact}, approx: {approx_0}, x: {x}" + ); + assert!( + (exact - approx_1).abs() <= MAX_ERROR_1, + "Error greater than set maximum: true: {exact}, approx: {approx_1}, x: {x}" + ); + assert!( + (exact - approx_2).abs() <= MAX_ERROR_2, + "Error greater than set maximum: true: {exact}, approx: {approx_2}, x: {x}" + ); + assert!( + (exact - approx_3).abs() <= MAX_ERROR_3, + "Error greater than set maximum: true: {exact}, approx: {approx_3}, x: {x}" + ); + } +} + +#[inline(never)] +#[test] +pub fn simd_error() { + const RANGE: Range = -FRAC_PI_2..FRAC_PI_2; + const MAX_ERROR_0: f32 = 2.9e-2_f32; + const MAX_ERROR_1: f32 = 6.0e-4_f32; + const MAX_ERROR_2: f32 = 6.9e-6_f32; + const MAX_ERROR_3: f32 = 2.7e-7_f32; + + let rng = &mut thread_rng(); + + test::<2>(rng); + test::<4>(rng); + test::<8>(rng); + test::<16>(rng); + + #[inline(always)] + fn test(rng: &mut ThreadRng) + where + LaneCount: SupportedLaneCount, + { + for _i in 0..ITERS { + let mut vec_uninit: core::mem::MaybeUninit> = + core::mem::MaybeUninit::uninit(); + let vec_ptr = vec_uninit.as_mut_ptr(); + + for i in 0..LANES { + unsafe { + (*vec_ptr)[i] = rng.gen_range(RANGE); + } + } + + let x = unsafe { vec_uninit.assume_init() }; + + let approx_0 = unsafe { x.sin_fast_approx::<0>() }; + let approx_1 = unsafe { x.sin_fast_approx::<1>() }; + let approx_2 = unsafe { x.sin_fast_approx::<2>() }; + let approx_3 = unsafe { x.sin_fast_approx::<3>() }; + + let mut vec_uninit: core::mem::MaybeUninit> = + core::mem::MaybeUninit::uninit(); + let vec_ptr = vec_uninit.as_mut_ptr(); + + for i in 0..LANES { + unsafe { + (*vec_ptr)[i] = x[i].sin(); + } + } + + let exact = unsafe { vec_uninit.assume_init() }; + + assert!( + (exact - approx_0) + .abs() + .simd_le(Simd::splat(MAX_ERROR_0)) + .all(), + "Error greater than set maximum: true: {:?}, approx: {:?}, x: {:?}", + exact, + approx_0, + x + ); + assert!( + (exact - approx_1) + .abs() + .simd_le(Simd::splat(MAX_ERROR_1)) + .all(), + "Error greater than set maximum: true: {:?}, approx: {:?}, x: {:?}", + exact, + approx_1, + x + ); + assert!( + (exact - approx_2) + .abs() + .simd_le(Simd::splat(MAX_ERROR_2)) + .all(), + "Error greater than set maximum: true: {:?}, approx: {:?}, x: {:?}", + exact, + approx_2, + x + ); + assert!( + (exact - approx_3) + .abs() + .simd_le(Simd::splat(MAX_ERROR_3)) + .all(), + "Error greater than set maximum: true: {:?}, approx: {:?}, x: {:?}", + exact, + approx_3, + x + ); + } + } +} + +#[inline(never)] +#[test] +pub fn simd_ilog_error() { + let rng = &mut thread_rng(); + + test::<2>(rng); + test::<4>(rng); + test::<8>(rng); + test::<16>(rng); + + #[inline(always)] + fn test(rng: &mut ThreadRng) + where + LaneCount: SupportedLaneCount, + { + for _i in 0..ITERS { + let mut vec_uninit: core::mem::MaybeUninit> = + core::mem::MaybeUninit::uninit(); + let vec_ptr = vec_uninit.as_mut_ptr(); + + for i in 0..LANES { + unsafe { + (*vec_ptr)[i] = rng.next_u32(); + } + } + + let x = unsafe { vec_uninit.assume_init() }; + + let fast = unsafe { x.ilog_const_base_unchecked::<3>() }; + + let mut vec_uninit: core::mem::MaybeUninit> = + core::mem::MaybeUninit::uninit(); + let vec_ptr = vec_uninit.as_mut_ptr(); + + for i in 0..LANES { + unsafe { + (*vec_ptr)[i] = x[i].ilog(3); + } + } + + let exact = unsafe { vec_uninit.assume_init() }; + + assert!( + exact.simd_eq(fast).all(), + "Error greater than set maximum: true: {:?}, approx: {:?}, x: {:?}", + exact, + fast, + x + ); + } + } +} + +// /// Options: +// /// --cfg print_values +// /// --cfg print_error +// /// --cfg print_cycles +// #[allow(dead_code)] +// pub fn main() { +// const STEPS: usize = 1000; //1 << 24; +// const WARMUP_ITRS: usize = 1 << 24; +// const START: f32 = 0.0; +// const END: f32 = FRAC_PI_2; +// +// const ITRS: usize = STEPS / LANES; +// const SLICE: f32 = (END - START) / (STEPS as f32); +// const INCR: Simd = Simd::from_array([SLICE * LANES as f32; LANES]); +// +// println!("Count: {STEPS}"); +// +// #[allow(unused_mut)] +// let mut vec = Simd::::splat(SLICE).mul_add( +// Simd::from_slice(&(0..LANES).collect::>()).cast::(), +// Simd::splat(START), +// ); +// +// if cfg!(print_cycles) { +// if cfg!(any(target_arch = "x86", target_arch = "x86_64")) { +// for _i in 0..WARMUP_ITRS { +// unsafe { +// black_box(wrap_auto_vectorize!( +// sin_fast_approx::, +// LANES, +// black_box(vec) +// )); +// } +// } +// } else { +// panic!("CPU cycle timings are not supported on this platform"); +// } +// } +// +// #[allow(unused_variables)] +// let mut total_error = 0.0_f64; +// let mut max_error = 0.0_f64; +// #[allow(unused_variables)] +// let mut built_string: String; +// #[cfg(print_values)] +// { +// built_string = String::with_capacity(STEPS * 16); +// } +// #[allow(unused_variables, unused_mut)] +// let mut cycles_1: u64; +// #[cfg(all(print_cycles, any(target_arch = "x86", target_arch = "x86_64")))] +// unsafe { +// let mut _unused = 0_u32; +// cycles_1 = __rdtscp(&mut _unused); +// } +// +// for _i in 0..ITRS { +// let result = unsafe { +// black_box(wrap_auto_vectorize!( +// sin_fast_approx::, +// LANES, +// black_box(vec) +// )) +// }; +// +// if cfg!(print_error) { +// let mut array: [f32; LANES] = [0.0; LANES]; +// +// for i in 0..LANES { +// array[i] = if COS { vec[i].cos() } else { vec[i].sin() }; +// } +// +// let true_result = Simd::from_array(array); +// +// // the range of sin and cos are between -1 and 1 +// let distance = (result.cast::() - true_result.cast::()).abs(); +// let distance_epsilons = distance / Simd::splat(f32::EPSILON as f64); +// total_error += distance_epsilons.reduce_sum(); +// max_error = max_error.max(distance_epsilons.reduce_max()); +// +// #[cfg(print_values)] +// { +// for i in 0..LANES { +// built_string.push_str(&format!( +// "{:?} {:?} {:?} {:.3}\n", +// vec[i], result[i], true_result[i], distance_epsilons[i] +// )); +// } +// } +// } else if cfg!(print_values) { +// #[cfg(print_values)] +// { +// for i in 0..LANES { +// built_string.push_str(&format!("{:?} {:?}\n", vec[i], result[i])); +// } +// } +// } +// +// #[cfg(any(print_values, print_error))] +// { +// vec += INCR; +// } +// } +// #[cfg(all(print_cycles, any(target_arch = "x86", target_arch = "x86_64")))] +// unsafe { +// let mut _unused = 0_u32; +// let cycles_2 = __rdtscp(&mut _unused); +// +// let cycles_total = cycles_2 - cycles_1; +// let per_iter_cycles = cycles_total as f64 / (ITRS as f64); +// let per_op_cycles = cycles_total as f64 / (STEPS as f64); +// println!("Avg Cycles Per Iter: {per_iter_cycles}\nAvg Cycles Per Op: {per_op_cycles}"); +// } +// +// #[cfg(print_error)] +// { +// let per_op_error = total_error / (STEPS as f64); +// println!("Avg Error Per Op (epsilons): {per_op_error}\nMax Error (epsilons): {max_error}") +// } +// +// #[cfg(print_values)] +// { +// println!("Values:\n{built_string}"); +// } +// } diff --git a/src/comp_test/mod.rs b/src/test/compile.rs similarity index 88% rename from src/comp_test/mod.rs rename to src/test/compile.rs index e4250e1..11e5874 100644 --- a/src/comp_test/mod.rs +++ b/src/test/compile.rs @@ -2,6 +2,7 @@ use crate::shared::int::*; use core::simd::*; #[inline(never)] +#[allow(dead_code)] pub fn test(x: u32x8) -> u32x8 { unsafe { x.ilog_const_base_unchecked::<2>() } } diff --git a/src/test/mod.rs b/src/test/mod.rs index 8128383..7d02b94 100644 --- a/src/test/mod.rs +++ b/src/test/mod.rs @@ -1,327 +1,2 @@ -use crate::shared::float::*; -use crate::shared::int::*; -use core::f32::consts::FRAC_PI_2; -use core::ops::Range; -use core::simd::*; -use rand::rngs::ThreadRng; -use rand::{thread_rng, Rng, RngCore}; -// #[cfg(target_arch = "x86")] -// #[allow(unused_imports)] -// use core::arch::x86::*; -// #[cfg(target_arch = "x86_64")] -// #[allow(unused_imports)] -// use core::arch::x86_64::*; - -const ITERS: usize = 1 << 20; - -#[inline(never)] -#[test] -pub fn scalar_error() { - const RANGE: Range = -FRAC_PI_2..FRAC_PI_2; - const MAX_ERROR_0: f32 = 2.9e-2_f32; - const MAX_ERROR_1: f32 = 6.0e-4_f32; - const MAX_ERROR_2: f32 = 6.9e-6_f32; - const MAX_ERROR_3: f32 = 2.7e-7_f32; - - let mut rng = thread_rng(); - for _i in 0..ITERS { - let x = rng.gen_range(RANGE); - - let approx_0 = unsafe { x.sin_fast_approx::<0>() }; - let approx_1 = unsafe { x.sin_fast_approx::<1>() }; - let approx_2 = unsafe { x.sin_fast_approx::<2>() }; - let approx_3 = unsafe { x.sin_fast_approx::<3>() }; - let exact = x.sin(); - - assert!( - (exact - approx_0).abs() <= MAX_ERROR_0, - "Error greater than set maximum: true: {exact}, approx: {approx_0}, x: {x}" - ); - assert!( - (exact - approx_1).abs() <= MAX_ERROR_1, - "Error greater than set maximum: true: {exact}, approx: {approx_1}, x: {x}" - ); - assert!( - (exact - approx_2).abs() <= MAX_ERROR_2, - "Error greater than set maximum: true: {exact}, approx: {approx_2}, x: {x}" - ); - assert!( - (exact - approx_3).abs() <= MAX_ERROR_3, - "Error greater than set maximum: true: {exact}, approx: {approx_3}, x: {x}" - ); - } -} - -#[inline(never)] -#[test] -pub fn simd_error() { - const RANGE: Range = -FRAC_PI_2..FRAC_PI_2; - const MAX_ERROR_0: f32 = 2.9e-2_f32; - const MAX_ERROR_1: f32 = 6.0e-4_f32; - const MAX_ERROR_2: f32 = 6.9e-6_f32; - const MAX_ERROR_3: f32 = 2.7e-7_f32; - - let rng = &mut thread_rng(); - - test::<2>(rng); - test::<4>(rng); - test::<8>(rng); - test::<16>(rng); - - #[inline(always)] - fn test(rng: &mut ThreadRng) - where - LaneCount: SupportedLaneCount, - { - for _i in 0..ITERS { - let mut vec_uninit: core::mem::MaybeUninit> = - core::mem::MaybeUninit::uninit(); - let vec_ptr = vec_uninit.as_mut_ptr(); - - for i in 0..LANES { - unsafe { - (*vec_ptr)[i] = rng.gen_range(RANGE); - } - } - - let x = unsafe { vec_uninit.assume_init() }; - - let approx_0 = unsafe { x.sin_fast_approx::<0>() }; - let approx_1 = unsafe { x.sin_fast_approx::<1>() }; - let approx_2 = unsafe { x.sin_fast_approx::<2>() }; - let approx_3 = unsafe { x.sin_fast_approx::<3>() }; - - let mut vec_uninit: core::mem::MaybeUninit> = - core::mem::MaybeUninit::uninit(); - let vec_ptr = vec_uninit.as_mut_ptr(); - - for i in 0..LANES { - unsafe { - (*vec_ptr)[i] = x[i].sin(); - } - } - - let exact = unsafe { vec_uninit.assume_init() }; - - assert!( - (exact - approx_0) - .abs() - .simd_le(Simd::splat(MAX_ERROR_0)) - .all(), - "Error greater than set maximum: true: {:?}, approx: {:?}, x: {:?}", - exact, - approx_0, - x - ); - assert!( - (exact - approx_1) - .abs() - .simd_le(Simd::splat(MAX_ERROR_1)) - .all(), - "Error greater than set maximum: true: {:?}, approx: {:?}, x: {:?}", - exact, - approx_1, - x - ); - assert!( - (exact - approx_2) - .abs() - .simd_le(Simd::splat(MAX_ERROR_2)) - .all(), - "Error greater than set maximum: true: {:?}, approx: {:?}, x: {:?}", - exact, - approx_2, - x - ); - assert!( - (exact - approx_3) - .abs() - .simd_le(Simd::splat(MAX_ERROR_3)) - .all(), - "Error greater than set maximum: true: {:?}, approx: {:?}, x: {:?}", - exact, - approx_3, - x - ); - } - } -} - -#[inline(never)] -#[test] -pub fn simd_ilog_error() { - let rng = &mut thread_rng(); - - test::<2>(rng); - test::<4>(rng); - test::<8>(rng); - test::<16>(rng); - - #[inline(always)] - fn test(rng: &mut ThreadRng) - where - LaneCount: SupportedLaneCount, - { - for _i in 0..ITERS { - let mut vec_uninit: core::mem::MaybeUninit> = - core::mem::MaybeUninit::uninit(); - let vec_ptr = vec_uninit.as_mut_ptr(); - - for i in 0..LANES { - unsafe { - (*vec_ptr)[i] = rng.next_u32(); - } - } - - let x = unsafe { vec_uninit.assume_init() }; - - let fast = unsafe { x.ilog_const_base_unchecked::<3>() }; - - let mut vec_uninit: core::mem::MaybeUninit> = - core::mem::MaybeUninit::uninit(); - let vec_ptr = vec_uninit.as_mut_ptr(); - - for i in 0..LANES { - unsafe { - (*vec_ptr)[i] = x[i].ilog(3); - } - } - - let exact = unsafe { vec_uninit.assume_init() }; - - assert!( - exact.simd_eq(fast).all(), - "Error greater than set maximum: true: {:?}, approx: {:?}, x: {:?}", - exact, - fast, - x - ); - } - } -} - -// /// Options: -// /// --cfg print_values -// /// --cfg print_error -// /// --cfg print_cycles -// #[allow(dead_code)] -// pub fn main() { -// const STEPS: usize = 1000; //1 << 24; -// const WARMUP_ITRS: usize = 1 << 24; -// const START: f32 = 0.0; -// const END: f32 = FRAC_PI_2; -// -// const ITRS: usize = STEPS / LANES; -// const SLICE: f32 = (END - START) / (STEPS as f32); -// const INCR: Simd = Simd::from_array([SLICE * LANES as f32; LANES]); -// -// println!("Count: {STEPS}"); -// -// #[allow(unused_mut)] -// let mut vec = Simd::::splat(SLICE).mul_add( -// Simd::from_slice(&(0..LANES).collect::>()).cast::(), -// Simd::splat(START), -// ); -// -// if cfg!(print_cycles) { -// if cfg!(any(target_arch = "x86", target_arch = "x86_64")) { -// for _i in 0..WARMUP_ITRS { -// unsafe { -// black_box(wrap_auto_vectorize!( -// sin_fast_approx::, -// LANES, -// black_box(vec) -// )); -// } -// } -// } else { -// panic!("CPU cycle timings are not supported on this platform"); -// } -// } -// -// #[allow(unused_variables)] -// let mut total_error = 0.0_f64; -// let mut max_error = 0.0_f64; -// #[allow(unused_variables)] -// let mut built_string: String; -// #[cfg(print_values)] -// { -// built_string = String::with_capacity(STEPS * 16); -// } -// #[allow(unused_variables, unused_mut)] -// let mut cycles_1: u64; -// #[cfg(all(print_cycles, any(target_arch = "x86", target_arch = "x86_64")))] -// unsafe { -// let mut _unused = 0_u32; -// cycles_1 = __rdtscp(&mut _unused); -// } -// -// for _i in 0..ITRS { -// let result = unsafe { -// black_box(wrap_auto_vectorize!( -// sin_fast_approx::, -// LANES, -// black_box(vec) -// )) -// }; -// -// if cfg!(print_error) { -// let mut array: [f32; LANES] = [0.0; LANES]; -// -// for i in 0..LANES { -// array[i] = if COS { vec[i].cos() } else { vec[i].sin() }; -// } -// -// let true_result = Simd::from_array(array); -// -// // the range of sin and cos are between -1 and 1 -// let distance = (result.cast::() - true_result.cast::()).abs(); -// let distance_epsilons = distance / Simd::splat(f32::EPSILON as f64); -// total_error += distance_epsilons.reduce_sum(); -// max_error = max_error.max(distance_epsilons.reduce_max()); -// -// #[cfg(print_values)] -// { -// for i in 0..LANES { -// built_string.push_str(&format!( -// "{:?} {:?} {:?} {:.3}\n", -// vec[i], result[i], true_result[i], distance_epsilons[i] -// )); -// } -// } -// } else if cfg!(print_values) { -// #[cfg(print_values)] -// { -// for i in 0..LANES { -// built_string.push_str(&format!("{:?} {:?}\n", vec[i], result[i])); -// } -// } -// } -// -// #[cfg(any(print_values, print_error))] -// { -// vec += INCR; -// } -// } -// #[cfg(all(print_cycles, any(target_arch = "x86", target_arch = "x86_64")))] -// unsafe { -// let mut _unused = 0_u32; -// let cycles_2 = __rdtscp(&mut _unused); -// -// let cycles_total = cycles_2 - cycles_1; -// let per_iter_cycles = cycles_total as f64 / (ITRS as f64); -// let per_op_cycles = cycles_total as f64 / (STEPS as f64); -// println!("Avg Cycles Per Iter: {per_iter_cycles}\nAvg Cycles Per Op: {per_op_cycles}"); -// } -// -// #[cfg(print_error)] -// { -// let per_op_error = total_error / (STEPS as f64); -// println!("Avg Error Per Op (epsilons): {per_op_error}\nMax Error (epsilons): {max_error}") -// } -// -// #[cfg(print_values)] -// { -// println!("Values:\n{built_string}"); -// } -// } +pub mod checks; +pub mod compile; \ No newline at end of file