Skip to content

Commit

Permalink
Convert lrgb_to_xyz to DType
Browse files Browse the repository at this point in the history
Should be a best case scenario. Literally just element-wise FMA.

Almost +30%: 107µs to 77µs on arch=native

It's *cool* yes but the code quality degrades so much I wonder if its
even worth it. Then when you factor in the complex 3-dimension
deinterleave that'll be needed to use it properly...

I still have to test it of course, but I just feel it'll eat what little
perf I get. I have AVX512 as well, so AVX≤2 will probably end up hurting
even more.
  • Loading branch information
Beinsezii committed Jun 3, 2024
1 parent 138a072 commit 34d3a86
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 8 deletions.
5 changes: 5 additions & 0 deletions benches/conversions.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#![feature(portable_simd)]
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use colcon::{Space, convert_space};
//use std::simd::prelude::*;

fn pixels() -> Box<[f32]> {
let size = 512;
Expand All @@ -25,6 +26,10 @@ pub fn conversions(c: &mut Criterion) {
black_box(pixels.clone().chunks_exact_mut(3).for_each(|pixel| colcon::lrgb_to_xyz(pixel.try_into().unwrap())));
} ));

c.bench_function("lrgb_to_xyz_simd", |b| b.iter(|| {
black_box(pixels.clone().as_simd_mut::<32>().1.chunks_exact_mut(3).for_each(|pixel| colcon::lrgb_to_xyz(pixel.try_into().unwrap())));
} ));

c.bench_function("xyz_to_cielab", |b| b.iter(|| {
black_box(pixels.clone().chunks_exact_mut(3).for_each(|pixel| colcon::xyz_to_cielab(pixel.try_into().unwrap())));
} ));
Expand Down
20 changes: 12 additions & 8 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -288,11 +288,11 @@ fn matmul3t(pixel: [f32; 3], matrix: [[f32; 3]; 3]) -> [f32; 3] {
}

/// Transposed 3 * 3x3 matrix multiply, ie matrix @ pixel
fn matmul3(matrix: [[f32; 3]; 3], pixel: [f32; 3]) -> [f32; 3] {
fn matmul3<T: DType>(matrix: [[f32; 3]; 3], pixel: [T; 3]) -> [T; 3] {
[
pixel[0] * matrix[0][0] + pixel[1] * matrix[0][1] + pixel[2] * matrix[0][2],
pixel[0] * matrix[1][0] + pixel[1] * matrix[1][1] + pixel[2] * matrix[1][2],
pixel[0] * matrix[2][0] + pixel[1] * matrix[2][1] + pixel[2] * matrix[2][2],
pixel[0].fma(DType::f32(matrix[0][0]), pixel[1].fma(DType::f32(matrix[0][1]), pixel[2] * DType::f32(matrix[0][2]))),
pixel[0].fma(DType::f32(matrix[1][0]), pixel[1].fma(DType::f32(matrix[1][1]), pixel[2] * DType::f32(matrix[1][2]))),
pixel[0].fma(DType::f32(matrix[2][0]), pixel[1].fma(DType::f32(matrix[2][1]), pixel[2] * DType::f32(matrix[2][2]))),
]
}
// ### MATRICES ### }}}
Expand Down Expand Up @@ -1048,11 +1048,15 @@ pub extern "C" fn srgb_to_lrgb(pixel: &mut [f32; 3]) {
/// Convert from Linear Light RGB to CIE XYZ, D65 standard illuminant
///
/// <https://en.wikipedia.org/wiki/SRGB#From_sRGB_to_CIE_XYZ>
#[no_mangle]
pub extern "C" fn lrgb_to_xyz(pixel: &mut [f32; 3]) {
pub fn lrgb_to_xyz<T: DType>(pixel: &mut [T; 3]) {
*pixel = matmul3(XYZ65_MAT, *pixel)
}

#[no_mangle]
extern "C" fn lrgb_to_xyz_f32(pixel: &mut [f32; 3]) {
lrgb_to_xyz(pixel)
}

/// Convert from CIE XYZ to CIE LAB.
///
/// <https://en.wikipedia.org/wiki/CIELAB_color_space#From_CIEXYZ_to_CIELAB>
Expand Down Expand Up @@ -1601,7 +1605,7 @@ mod tests {

#[test]
fn xyz_forwards() {
func_cmp(LRGB, XYZ, lrgb_to_xyz)
func_cmp(LRGB, XYZ, lrgb_to_xyz_f32)
}
#[test]
fn xyz_backwards() {
Expand Down Expand Up @@ -1752,7 +1756,7 @@ mod tests {
("hsv_backwards", hsv_to_srgb),
("lrgb_forwards", srgb_to_lrgb),
("lrgb_backwards", lrgb_to_srgb),
("xyz_forwards", lrgb_to_xyz),
("xyz_forwards", lrgb_to_xyz_f32),
("xyz_backwards", xyz_to_lrgb),
("lab_forwards", xyz_to_cielab),
("lab_backwards", cielab_to_xyz),
Expand Down

0 comments on commit 34d3a86

Please sign in to comment.