diff --git a/Cargo.toml b/Cargo.toml index 7b093ac..d861b9f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,6 +13,10 @@ publish = true # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[features] +nightly = [] +default = [] + [dependencies] [dev-dependencies] diff --git a/benches/conversions.rs b/benches/conversions.rs index 11e8684..032e96f 100644 --- a/benches/conversions.rs +++ b/benches/conversions.rs @@ -1,3 +1,4 @@ +#![feature(portable_simd)] use criterion::{black_box, criterion_group, criterion_main, Criterion}; use colcon::{Space, convert_space}; @@ -73,12 +74,11 @@ pub fn conversions(c: &mut Criterion) { } )); c.bench_function("srgb_eotf", |b| b.iter(|| { - const N: usize = 16; - black_box(pixels.clone().chunks_exact_mut(N).for_each(|simd| { - let simd: &mut [f32; N] = simd.try_into().unwrap(); - *simd = colcon::srgb_eotf(*simd); - })); - //black_box(pixels.clone().iter_mut().for_each(|n| *n = colcon::srgb_eotf(*n))); + black_box(pixels.clone().iter_mut().for_each(|n| *n = colcon::srgb_eotf(*n))); + } )); + + c.bench_function("srgb_eotf_simd", |b| b.iter(|| { + black_box(pixels.clone().as_simd_mut::<32>().1.iter_mut().for_each(|n| *n = colcon::srgb_eotf(*n))); } )); c.bench_function("srgb_eotf_inverse", |b| b.iter(|| { diff --git a/src/lib.rs b/src/lib.rs index c039335..748bc07 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,4 @@ +#![cfg_attr(feature = "nightly", feature(portable_simd))] #![warn(missing_docs)] //! Simple colorspace conversions in pure Rust. @@ -9,8 +10,13 @@ //! This crate references CIE Standard Illuminant D65 for functions to/from CIE XYZ use core::ffi::{c_char, CStr}; -//use core::cmp::PartialOrd; -//use core::ops::{Add, Div, Mul, Rem, Sub}; +use core::ops::{Add, Div, Mul, Rem, Sub}; + +#[cfg(feature = "nightly")] +use std::simd::prelude::*; + +#[cfg(feature = "nightly")] +use std::simd::{LaneCount, StdFloat, SupportedLaneCount}; fn spowf(n: f32, power: f32) -> f32 { n.abs().powf(power).copysign(n) @@ -19,16 +25,21 @@ fn spowf(n: f32, power: f32) -> f32 { enum Cmp { Gt, Lt, - GtEq, - LtEq, + Ge, + Le, } -trait DType: Sized + Copy { +trait DType: + Sized + + Copy + + Add + + Div + + Mul + + Sub + + Rem +{ fn f32(b: f32) -> Self; - fn add(self, b: Self) -> Self; - fn sub(self, b: Self) -> Self; - fn div(self, b: Self) -> Self; - fn mul(self, b: Self) -> Self; + fn fma(self, mul: Self, add: Self) -> Self; fn powf(self, b: Self) -> Self; fn branch Self, G: FnOnce() -> Self>( self, @@ -44,20 +55,41 @@ impl DType for f32 { b } - fn add(self, b: Self) -> Self { - self + b + fn fma(self, mul: Self, add: Self) -> Self { + self.mul_add(mul, add) + } + + fn powf(self, b: Self) -> Self { + self.powf(b) } - fn sub(self, b: Self) -> Self { - self - b + fn branch Self, G: FnOnce() -> Self>( + self, + b: Self, + cmp: Cmp, + x: F, + y: G, + ) -> Self { + if match cmp { + Cmp::Gt => self > b, + Cmp::Lt => self < b, + Cmp::Ge => self >= b, + Cmp::Le => self <= b, + } { + x() + } else { + y() + } } +} - fn div(self, b: Self) -> Self { - self / b +impl DType for f64 { + fn f32(b: f32) -> Self { + b.into() } - fn mul(self, b: Self) -> Self { - self * b + fn fma(self, mul: Self, add: Self) -> Self { + self.mul_add(mul, add) } fn powf(self, b: Self) -> Self { @@ -74,8 +106,8 @@ impl DType for f32 { if match cmp { Cmp::Gt => self > b, Cmp::Lt => self < b, - Cmp::GtEq => self >= b, - Cmp::LtEq => self <= b, + Cmp::Ge => self >= b, + Cmp::Le => self <= b, } { x() } else { @@ -84,69 +116,41 @@ impl DType for f32 { } } -impl DType for [f32; N] { +#[cfg(feature = "nightly")] +impl DType for Simd +where + LaneCount: SupportedLaneCount, +{ fn f32(object: f32) -> Self { - [object; N] - } - - fn add(mut self, b: Self) -> Self { - self.iter_mut() - .zip(b.into_iter()) - .for_each(|(a, b)| *a = *a + b); - self + Self::splat(object) } - fn sub(mut self, b: Self) -> Self { - self.iter_mut() - .zip(b.into_iter()) - .for_each(|(a, b)| *a = *a - b); - self - } - - fn div(mut self, b: Self) -> Self { - self.iter_mut() - .zip(b.into_iter()) - .for_each(|(a, b)| *a = *a / b); - self - } - - fn mul(mut self, b: Self) -> Self { - self.iter_mut() - .zip(b.into_iter()) - .for_each(|(a, b)| *a = *a * b); - self + fn fma(self, mul: Self, add: Self) -> Self { + self.mul_add(mul, add) } fn powf(mut self, b: Self) -> Self { - self.iter_mut() - .zip(b.into_iter()) - .for_each(|(a, b)| *a = a.powf(b)); + self.as_mut_array() + .iter_mut() + .zip(b.as_array().iter()) + .for_each(|(a, b)| *a = a.powf(*b)); self } fn branch Self, G: FnOnce() -> Self>( - mut self, + self, b: Self, cmp: Cmp, x: F, y: G, ) -> Self { - self.iter_mut() - .zip(b.into_iter()) - .zip(x().into_iter().zip(y().into_iter())) - .for_each(|((a, b), (x, y))| { - if match cmp { - Cmp::Gt => *a > b, - Cmp::Lt => *a < b, - Cmp::GtEq => *a >= b, - Cmp::LtEq => *a <= b, - } { - *a = x - } else { - *a = y - } - }); - self + match cmp { + Cmp::Gt => self.simd_gt(b), + Cmp::Lt => self.simd_lt(b), + Cmp::Ge => self.simd_ge(b), + Cmp::Le => self.simd_le(b), + } + .select(x(), y()) } } @@ -300,29 +304,23 @@ fn matmul3(matrix: [[f32; 3]; 3], pixel: [f32; 3]) -> [f32; 3] { /// //#[no_mangle] //pub fn srgb_eotf(n: T) -> T { -// if n <= SRGBEOTF_CHI.into() { -// n / SRGBEOTF_PHI.into() +// if n <= SRGBEOTF_CHI { +// n / SRGBEOTF_PHI // } else { -// ((n + SRGBEOTF_ALPHA.into()) / (SRGBEOTF_ALPHA + 1.0).into()).powf(SRGBEOTF_GAMMA.into()) +// ((n + SRGBEOTF_ALPHA) / (SRGBEOTF_ALPHA + 1.0)).powf(SRGBEOTF_GAMMA) // } //} pub fn srgb_eotf(n: T) -> T { n.branch( DType::f32(SRGBEOTF_CHI), - Cmp::LtEq, - || n.div(DType::f32(SRGBEOTF_PHI)), + Cmp::Le, + || n / DType::f32(SRGBEOTF_PHI), || { - n.add(DType::f32(SRGBEOTF_ALPHA)) - .div(DType::f32(SRGBEOTF_ALPHA + 1.0)) + ((n + DType::f32(SRGBEOTF_ALPHA)) / DType::f32(SRGBEOTF_ALPHA + 1.0)) .powf(DType::f32(SRGBEOTF_GAMMA)) }, ) - //if n <= SRGBEOTF_CHI.into() { - // n / SRGBEOTF_PHI.into() - //} else { - // ((n + SRGBEOTF_ALPHA.into()) / (SRGBEOTF_ALPHA + 1.0).into()).powf(SRGBEOTF_GAMMA.into()) - //} } /// Inverse sRGB Electro-Optical Transfer Function