From 973b879de24a81fcf3b4435757397540b5104811 Mon Sep 17 00:00:00 2001 From: burgerguy Date: Sat, 29 Apr 2023 23:42:59 -0400 Subject: [PATCH] correctly use no_std, get rid of macro in exchange for zips and maps, bump ver --- Cargo.toml | 2 +- src/lib.rs | 10 +++------- src/simd/consts.rs | 22 ---------------------- src/simd/float.rs | 25 ++++++++++++++++++++----- src/simd/int.rs | 7 +++---- src/simd/mod.rs | 1 - src/test/compile.rs | 6 +++--- src/test/mod.rs | 4 +++- 8 files changed, 33 insertions(+), 44 deletions(-) delete mode 100644 src/simd/consts.rs diff --git a/Cargo.toml b/Cargo.toml index f5bf393..de38fb3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "fath" authors = [ "burgerindividual", "duplexsystem" ] -version = "0.1.5" +version = "0.1.6" edition = "2021" license = "LGPL-3.0" repository = "https://github.com/burgerindividual/fath" diff --git a/src/lib.rs b/src/lib.rs index 590bdf2..da5d2e9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,9 +1,5 @@ -#![feature(core_intrinsics, portable_simd)] -// #![no_std] - -pub extern crate std as core; - -extern crate alloc; +#![feature(core_intrinsics, portable_simd, array_zip)] +#![no_std] pub mod scalar; pub mod shared; @@ -11,4 +7,4 @@ pub mod shared; pub mod simd; #[cfg(test)] -pub mod test; \ No newline at end of file +pub mod test; diff --git a/src/simd/consts.rs b/src/simd/consts.rs deleted file mode 100644 index 8632cae..0000000 --- a/src/simd/consts.rs +++ /dev/null @@ -1,22 +0,0 @@ -#[macro_export] -macro_rules! wrap_auto_vectorize { - ($func:expr, $lanes:expr, $($x:ident),+) => { - { - let mut vec_uninit: core::mem::MaybeUninit> = core::mem::MaybeUninit::uninit(); - let vec_ptr = vec_uninit.as_mut_ptr(); - - let mut i = 0; - while i < $lanes { - let evaluated = $func($($x[i]),+); - #[allow(unused_unsafe)] - unsafe { - (*vec_ptr)[i] = evaluated; - } - i += 1; - } - - #[allow(unused_unsafe)] - unsafe { vec_uninit.assume_init() } - } - } -} diff --git a/src/simd/float.rs b/src/simd/float.rs index f72c344..ed3dd55 100644 --- a/src/simd/float.rs +++ b/src/simd/float.rs @@ -1,5 +1,4 @@ use crate::shared::float::*; -use crate::*; use core::simd::*; impl FastApproxFloat for Simd @@ -8,21 +7,37 @@ where { #[inline(always)] unsafe fn sin_fast_approx(self) -> Self { - wrap_auto_vectorize!(sin_fast_approx::, LANES, self) + Simd::from_array( + self.as_array() + .map(|e| sin_fast_approx::(e)), + ) } #[inline(always)] unsafe fn cos_fast_approx(self) -> Self { - wrap_auto_vectorize!(sin_fast_approx::, LANES, self) + Simd::from_array( + self.as_array() + .map(|e| sin_fast_approx::(e)), + ) } #[inline(always)] unsafe fn log_fast_approx(self, base: Self) -> Self { - wrap_auto_vectorize!(log_fast_approx::, LANES, self, base) + Simd::from_array( + self.as_array() + .zip(*base.as_array()) + .map(|(self_elem, base_elem)| log_fast_approx::(self_elem, base_elem)), + ) } #[inline(always)] unsafe fn log_fast_approx_const_base(self, base: Self) -> Self { - wrap_auto_vectorize!(log_fast_approx_const_base::, LANES, self, base) + Simd::from_array( + self.as_array() + .zip(*base.as_array()) + .map(|(self_elem, base_elem)| { + log_fast_approx_const_base::(self_elem, base_elem) + }), + ) } } diff --git a/src/simd/int.rs b/src/simd/int.rs index 4fae199..58279ca 100644 --- a/src/simd/int.rs +++ b/src/simd/int.rs @@ -1,9 +1,8 @@ use crate::shared::int::*; use crate::*; +use core::mem::size_of; use core::simd::*; -use mem::size_of; -use std::mem; macro_rules! unsigned_impl { ($u:ty,$s:ty,$f:ty,$mant_bits:expr) => { @@ -31,10 +30,10 @@ macro_rules! unsigned_impl { let unsigned_mask = Mask::from_int_unchecked( self.cast::<$s>() >> Simd::splat(UNSIGNED_LOG2 as $s), ); - + // need to get rid of bits that could cause a round-up let adjusted = (self & !(self >> Simd::splat($mant_bits + 1))).cast::<$s>(); - + let exponent = (adjusted.cast::<$f>().to_bits() >> Simd::splat($mant_bits)) - Simd::splat((1 << ((size_of::<$f>() * 8) - 2 - $mant_bits)) - 1); diff --git a/src/simd/mod.rs b/src/simd/mod.rs index 4bf4b3b..a89042e 100644 --- a/src/simd/mod.rs +++ b/src/simd/mod.rs @@ -1,3 +1,2 @@ -pub mod consts; pub mod float; pub mod int; diff --git a/src/test/compile.rs b/src/test/compile.rs index 11e5874..fe31a4c 100644 --- a/src/test/compile.rs +++ b/src/test/compile.rs @@ -1,8 +1,8 @@ -use crate::shared::int::*; +use crate::shared::float::FastApproxFloat; use core::simd::*; #[inline(never)] #[allow(dead_code)] -pub fn test(x: u32x8) -> u32x8 { - unsafe { x.ilog_const_base_unchecked::<2>() } +pub fn test(x: f32x8, base: f32x8) -> f32x8 { + unsafe { x.log_fast_approx::<0>(base) } } diff --git a/src/test/mod.rs b/src/test/mod.rs index 7d02b94..29eef7a 100644 --- a/src/test/mod.rs +++ b/src/test/mod.rs @@ -1,2 +1,4 @@ +#[cfg(test)] pub mod checks; -pub mod compile; \ No newline at end of file + +pub mod compile;