From 764f87718366fb882370b84d0310c3b634eb7d4d Mon Sep 17 00:00:00 2001 From: Fabio Lagalla Date: Fri, 14 Jan 2022 18:02:36 +0100 Subject: [PATCH] Jpeg draft --- README.md | 1 + src/color_space.rs | 2 + src/convert_image/avx2.rs | 420 ++++++++++++++++++++++++++++++++---- src/convert_image/common.rs | 33 ++- src/convert_image/sse2.rs | 386 ++++++++++++++++++++++++++++++--- src/convert_image/x86.rs | 352 +++++++++++++++++++++++++++--- src/dispatcher.rs | 2 +- src/lib.rs | 106 ++++++++- tests/tests.rs | 89 +++++++- 9 files changed, 1276 insertions(+), 115 deletions(-) diff --git a/README.md b/README.md index fe86f0a..41123fa 100644 --- a/README.md +++ b/README.md @@ -39,6 +39,7 @@ The supported color models are: * YCbCr, ITU-R Recommendation BT.601 (standard video system) * YCbCr, ITU-R Recommendation BT.709 (CSC systems) +* YCbCr, Jpeg ## Requirements diff --git a/src/color_space.rs b/src/color_space.rs index 195ae7e..a94d3ff 100644 --- a/src/color_space.rs +++ b/src/color_space.rs @@ -30,4 +30,6 @@ pub enum ColorSpace { Bt601, /// YCbCr, ITU-R Recommendation BT.709 (CSC systems) Bt709, + /// YCbCr, Jpeg + Bt601Full, } diff --git a/src/convert_image/avx2.rs b/src/convert_image/avx2.rs index 96d9e86..6233356 100644 --- a/src/convert_image/avx2.rs +++ b/src/convert_image/avx2.rs @@ -22,28 +22,30 @@ use core::ptr::{read_unaligned as loadu, write_unaligned as storeu}; #[cfg(target_arch = "x86")] use core::arch::x86::{ - __m256i, _mm256_add_epi16, _mm256_add_epi32, _mm256_cmpeq_epi32, _mm256_extracti128_si256, - _mm256_madd_epi16, _mm256_mulhi_epu16, _mm256_or_si256, _mm256_packs_epi32, - _mm256_packus_epi16, _mm256_permute2x128_si256, _mm256_permute4x64_epi64, - _mm256_permutevar8x32_epi32, _mm256_set1_epi16, _mm256_set1_epi32, _mm256_set_epi32, - _mm256_set_epi64x, _mm256_set_m128i, _mm256_setr_epi32, _mm256_setr_epi8, _mm256_setzero_si256, - _mm256_shuffle_epi8, _mm256_slli_epi16, _mm256_slli_epi32, _mm256_srai_epi16, - _mm256_srai_epi32, _mm256_srli_epi16, _mm256_srli_epi32, _mm256_srli_si256, _mm256_sub_epi16, - _mm256_unpackhi_epi16, _mm256_unpackhi_epi8, _mm256_unpacklo_epi16, _mm256_unpacklo_epi32, - _mm256_unpacklo_epi64, _mm256_unpacklo_epi8, _mm_prefetch, _mm_setzero_si128, _MM_HINT_NTA, + __m256i, _mm256_add_epi16, _mm256_add_epi32, _mm256_and_si256, _mm256_cmpeq_epi32, + _mm256_extracti128_si256, _mm256_madd_epi16, _mm256_mulhi_epu16, _mm256_or_si256, + _mm256_packs_epi32, _mm256_packus_epi16, _mm256_permute2x128_si256, _mm256_permute4x64_epi64, + _mm256_permutevar8x32_epi32, _mm256_set1_epi16, _mm256_set1_epi32, _mm256_set1_epi64x, + _mm256_set_epi16, _mm256_set_epi32, _mm256_set_epi64x, _mm256_set_m128i, _mm256_setr_epi32, + _mm256_setr_epi8, _mm256_setzero_si256, _mm256_shuffle_epi8, _mm256_slli_epi16, + _mm256_slli_epi32, _mm256_srai_epi16, _mm256_srai_epi32, _mm256_srli_epi16, _mm256_srli_epi32, + _mm256_srli_si256, _mm256_sub_epi16, _mm256_unpackhi_epi16, _mm256_unpackhi_epi8, + _mm256_unpacklo_epi16, _mm256_unpacklo_epi32, _mm256_unpacklo_epi64, _mm256_unpacklo_epi8, + _mm_prefetch, _mm_setzero_si128, _MM_HINT_NTA, }; #[cfg(target_arch = "x86_64")] use core::arch::x86_64::{ - __m256i, _mm256_add_epi16, _mm256_add_epi32, _mm256_cmpeq_epi32, _mm256_extract_epi64, - _mm256_extracti128_si256, _mm256_madd_epi16, _mm256_mulhi_epu16, _mm256_or_si256, - _mm256_packs_epi32, _mm256_packus_epi16, _mm256_permute2x128_si256, _mm256_permute4x64_epi64, - _mm256_permutevar8x32_epi32, _mm256_set1_epi16, _mm256_set1_epi32, _mm256_set_epi32, - _mm256_set_epi64x, _mm256_set_m128i, _mm256_setr_epi32, _mm256_setr_epi8, _mm256_setzero_si256, - _mm256_shuffle_epi8, _mm256_slli_epi16, _mm256_slli_epi32, _mm256_srai_epi16, - _mm256_srai_epi32, _mm256_srli_epi16, _mm256_srli_epi32, _mm256_srli_si256, _mm256_sub_epi16, - _mm256_unpackhi_epi16, _mm256_unpackhi_epi8, _mm256_unpacklo_epi16, _mm256_unpacklo_epi32, - _mm256_unpacklo_epi64, _mm256_unpacklo_epi8, _mm_prefetch, _mm_setzero_si128, _MM_HINT_NTA, + __m256i, _mm256_add_epi16, _mm256_add_epi32, _mm256_and_si256, _mm256_cmpeq_epi32, + _mm256_extract_epi64, _mm256_extracti128_si256, _mm256_madd_epi16, _mm256_mulhi_epu16, + _mm256_or_si256, _mm256_packs_epi32, _mm256_packus_epi16, _mm256_permute2x128_si256, + _mm256_permute4x64_epi64, _mm256_permutevar8x32_epi32, _mm256_set1_epi16, _mm256_set1_epi32, + _mm256_set1_epi64x, _mm256_set_epi32, _mm256_set_epi64x, _mm256_set_m128i, _mm256_setr_epi32, + _mm256_setr_epi8, _mm256_setzero_si256, _mm256_shuffle_epi8, _mm256_slli_epi16, + _mm256_slli_epi32, _mm256_srai_epi16, _mm256_srai_epi32, _mm256_srli_epi16, _mm256_srli_epi32, + _mm256_srli_si256, _mm256_sub_epi16, _mm256_unpackhi_epi16, _mm256_unpackhi_epi8, + _mm256_unpacklo_epi16, _mm256_unpacklo_epi32, _mm256_unpacklo_epi64, _mm256_unpacklo_epi8, + _mm_prefetch, _mm_setzero_si128, _MM_HINT_NTA, }; const LANE_COUNT: usize = 32; @@ -79,7 +81,7 @@ macro_rules! align_dqword_2x96 { }; } -const FORWARD_WEIGHTS: [[i32; 6]; Colorimetry::Length as usize] = [ +const FORWARD_WEIGHTS: [[i32; 8]; Colorimetry::Length as usize] = [ [ i32x2_to_i32(XG_601 - SHORT_HALF, XR_601), i32x2_to_i32(SHORT_HALF, XB_601), @@ -87,6 +89,8 @@ const FORWARD_WEIGHTS: [[i32; 6]; Colorimetry::Length as usize] = [ i32x2_to_i32(YG_601, YR_601), i32x2_to_i32(0, ZB_601), i32x2_to_i32(0, YB_601), + Y_OFFSET, + 0, ], [ i32x2_to_i32(XG_709 - SHORT_HALF, XR_709), @@ -95,6 +99,18 @@ const FORWARD_WEIGHTS: [[i32; 6]; Colorimetry::Length as usize] = [ i32x2_to_i32(YG_709, YR_709), i32x2_to_i32(0, ZB_709), i32x2_to_i32(0, YB_709), + Y_OFFSET, + 0, + ], + [ + i32x2_to_i32(XG_JPEG - SHORT_HALF, XR_JPEG), + i32x2_to_i32(SHORT_HALF, XB_JPEG), + i32x2_to_i32(ZG_JPEG, ZR_JPEG - SHORT_HALF), + i32x2_to_i32(YG_JPEG, YR_JPEG), + i32x2_to_i32(0, ZB_JPEG), + i32x2_to_i32(0, YB_JPEG - SHORT_HALF), + FIX16_HALF, + 1, ], ]; @@ -119,6 +135,16 @@ const BACKWARD_WEIGHTS: [[i16; 8]; Colorimetry::Length as usize] = [ i32_to_i16(GP_709), i32_to_i16(BN_709), ], + [ + i32_to_i16(XXYM_JPEG), + i32_to_i16(RCRM_JPEG), + i32_to_i16(GCRM_JPEG), + i32_to_i16(GCBM_JPEG), + i32_to_i16(BCBM_JPEG), + i32_to_i16(RN_JPEG), + i32_to_i16(GP_JPEG), + i32_to_i16(BN_JPEG), + ], ]; /// Convert fixed point to int (8-wide) @@ -294,6 +320,7 @@ unsafe fn lrgb_to_yuv_8x( sampler: Sampler, y_weigths: &[__m256i; 3], uv_weights: &[__m256i; 3], + full_range: bool, ) { let (rg0, bg0) = unpack_ui8x3_i16x2_8x(rgb0, sampler); pack_i32_8x( @@ -309,10 +336,21 @@ unsafe fn lrgb_to_yuv_8x( let srg = sum_i16x2_neighborhood_4x(rg0, rg1); let sbg = sum_i16x2_neighborhood_4x(bg0, bg1); - pack_i32_8x( - uv, - fix_to_i32_8x!(affine_transform(srg, sbg, uv_weights), FIX18), - ); + let mut t = affine_transform(srg, sbg, uv_weights); + if full_range { + t = _mm256_add_epi32( + t, + _mm256_slli_epi32( + _mm256_or_si256( + _mm256_and_si256(sbg, _mm256_set1_epi64x(0xFFFF_i64)), + _mm256_and_si256(srg, _mm256_set1_epi64x(0xFFFF_0000_0000_i64)), + ), + 14, + ), + ); + } + + pack_i32_8x(uv, fix_to_i32_8x!(t, FIX18)); } #[inline(always)] @@ -326,6 +364,7 @@ unsafe fn lrgb_to_i420_8x( sampler: Sampler, y_weigths: &[__m256i; 3], uv_weights: &[__m256i; 3], + full_range: bool, ) { let (rg0, bg0) = unpack_ui8x3_i16x2_8x(rgb0, sampler); pack_i32_8x( @@ -341,9 +380,22 @@ unsafe fn lrgb_to_i420_8x( let srg = sum_i16x2_neighborhood_4x(rg0, rg1); let sbg = sum_i16x2_neighborhood_4x(bg0, bg1); + let mut t = affine_transform(srg, sbg, uv_weights); + if full_range { + t = _mm256_add_epi32( + t, + _mm256_slli_epi32( + _mm256_or_si256( + _mm256_and_si256(sbg, _mm256_set1_epi64x(0xFFFF_i64)), + _mm256_and_si256(srg, _mm256_set1_epi64x(0xFFFF_0000_0000_i64)), + ), + 14, + ), + ); + } let shuff = _mm256_permutevar8x32_epi32( - fix_to_i32_8x!(affine_transform(srg, sbg, uv_weights), FIX18), + fix_to_i32_8x!(t, FIX18), _mm256_set_epi32(7, 5, 3, 1, 6, 4, 2, 0), ); @@ -373,6 +425,7 @@ unsafe fn lrgb_to_i444_8x( y_weights: &[__m256i; 3], u_weights: &[__m256i; 3], v_weights: &[__m256i; 3], + full_range: bool, ) { let (rg, bg) = unpack_ui8x3_i16x2_8x(rgb, sampler); pack_i32_8x( @@ -380,15 +433,15 @@ unsafe fn lrgb_to_i444_8x( fix_to_i32_8x!(affine_transform(rg, bg, y_weights), FIX16), ); - pack_i32_8x( - u, - fix_to_i32_8x!(affine_transform(rg, bg, u_weights), FIX16), - ); + let mut tu = affine_transform(rg, bg, u_weights); + let mut tv = affine_transform(rg, bg, v_weights); + if full_range { + tu = _mm256_add_epi32(tu, _mm256_srli_epi32(_mm256_slli_epi32(bg, 16), 2)); + tv = _mm256_add_epi32(tv, _mm256_srli_epi32(_mm256_slli_epi32(rg, 16), 2)); + } - pack_i32_8x( - v, - fix_to_i32_8x!(affine_transform(rg, bg, v_weights), FIX16), - ); + pack_i32_8x(u, fix_to_i32_8x!(tu, FIX16)); + pack_i32_8x(v, fix_to_i32_8x!(tv, FIX16)); } #[cfg(not(tarpaulin_include))] @@ -408,7 +461,7 @@ unsafe fn lrgb_to_yuv_avx2( dst_strides: (usize, usize), dst_buffers: &mut (&mut [u8], &mut [u8]), depth: usize, - weights: &[i32; 6], + weights: &[i32; 8], sampler: Sampler, ) { const DST_DEPTH: usize = LRGB_TO_YUV_WAVES; @@ -418,7 +471,7 @@ unsafe fn lrgb_to_yuv_avx2( let y_weigths = [ _mm256_set1_epi32(weights[0]), _mm256_set1_epi32(weights[1]), - _mm256_set1_epi32(Y_OFFSET), + _mm256_set1_epi32(weights[6]), ]; let uv_weights = [ @@ -430,7 +483,7 @@ unsafe fn lrgb_to_yuv_avx2( weights[4], weights[5], weights[4], weights[5], weights[4], weights[5], weights[4], weights[5], ), - _mm256_set1_epi32(C_OFFSET), + _mm256_set1_epi32(C_OFFSET - FIX18_HALF * weights[7]), ]; let src_group = src_buffer.as_ptr(); @@ -459,6 +512,7 @@ unsafe fn lrgb_to_yuv_avx2( sampler, &y_weigths, &uv_weights, + weights[7] == 1, ); } } @@ -476,6 +530,7 @@ unsafe fn lrgb_to_yuv_avx2( sampler, &y_weigths, &uv_weights, + weights[7] == 1, ); } @@ -489,6 +544,7 @@ unsafe fn lrgb_to_yuv_avx2( Sampler::BgrOverflow, &y_weigths, &uv_weights, + weights[7] == 1, ); } } @@ -503,7 +559,7 @@ unsafe fn lrgb_to_i420_avx2( dst_strides: (usize, usize, usize), dst_buffers: &mut (&mut [u8], &mut [u8], &mut [u8]), depth: usize, - weights: &[i32; 6], + weights: &[i32; 8], sampler: Sampler, ) { let (y_stride, u_stride, v_stride) = dst_strides; @@ -511,7 +567,7 @@ unsafe fn lrgb_to_i420_avx2( let y_weigths = [ _mm256_set1_epi32(weights[0]), _mm256_set1_epi32(weights[1]), - _mm256_set1_epi32(Y_OFFSET), + _mm256_set1_epi32(weights[6]), ]; let uv_weights = [ @@ -523,7 +579,7 @@ unsafe fn lrgb_to_i420_avx2( weights[4], weights[5], weights[4], weights[5], weights[4], weights[5], weights[4], weights[5], ), - _mm256_set1_epi32(C_OFFSET), + _mm256_set1_epi32(C_OFFSET - FIX18_HALF * weights[7]), ]; let src_group = src_buffer.as_ptr(); @@ -554,6 +610,7 @@ unsafe fn lrgb_to_i420_avx2( sampler, &y_weigths, &uv_weights, + weights[7] == 1, ); } } @@ -572,6 +629,7 @@ unsafe fn lrgb_to_i420_avx2( sampler, &y_weigths, &uv_weights, + weights[7] == 1, ); } @@ -586,6 +644,7 @@ unsafe fn lrgb_to_i420_avx2( Sampler::BgrOverflow, &y_weigths, &uv_weights, + weights[7] == 1, ); } } @@ -805,7 +864,7 @@ unsafe fn lrgb_to_i444_avx2( dst_strides: (usize, usize, usize), dst_buffers: &mut (&mut [u8], &mut [u8], &mut [u8]), depth: usize, - weights: &[i32; 6], + weights: &[i32; 8], sampler: Sampler, ) { let (y_stride, u_stride, v_stride) = dst_strides; @@ -813,19 +872,19 @@ unsafe fn lrgb_to_i444_avx2( let y_weights = [ _mm256_set1_epi32(weights[0]), _mm256_set1_epi32(weights[1]), - _mm256_set1_epi32(Y_OFFSET), + _mm256_set1_epi32(weights[6]), ]; let u_weights = [ _mm256_set1_epi32(weights[3]), _mm256_set1_epi32(weights[5]), - _mm256_set1_epi32(C_OFFSET16), + _mm256_set1_epi32(C_OFFSET16 - FIX16_HALF * weights[7]), ]; let v_weights = [ _mm256_set1_epi32(weights[2]), _mm256_set1_epi32(weights[4]), - _mm256_set1_epi32(C_OFFSET16), + _mm256_set1_epi32(C_OFFSET16 - FIX16_HALF * weights[7]), ]; let src_group = src_buffer.as_ptr(); @@ -855,6 +914,7 @@ unsafe fn lrgb_to_i444_avx2( &y_weights, &u_weights, &v_weights, + weights[7] == 1, ); } } @@ -872,6 +932,7 @@ unsafe fn lrgb_to_i444_avx2( &y_weights, &u_weights, &v_weights, + weights[7] == 1, ); } @@ -885,6 +946,7 @@ unsafe fn lrgb_to_i444_avx2( &y_weights, &u_weights, &v_weights, + weights[7] == 1, ); } } @@ -2511,3 +2573,277 @@ pub fn bgra_lrgb_rgb_lrgb( true } + +pub fn argb_lrgb_nv12_jpeg( + width: u32, + height: u32, + _last_src_plane: u32, + src_strides: &[usize], + src_buffers: &[&[u8]], + last_dst_plane: u32, + dst_strides: &[usize], + dst_buffers: &mut [&mut [u8]], +) -> bool { + lrgb_nv12( + width, + height, + src_strides, + src_buffers, + last_dst_plane as usize, + dst_strides, + dst_buffers, + PixelFormatChannels::Four, + Colorimetry::Jpeg as usize, + Sampler::Argb, + ) +} + +pub fn bgra_lrgb_nv12_jpeg( + width: u32, + height: u32, + _last_src_plane: u32, + src_strides: &[usize], + src_buffers: &[&[u8]], + last_dst_plane: u32, + dst_strides: &[usize], + dst_buffers: &mut [&mut [u8]], +) -> bool { + lrgb_nv12( + width, + height, + src_strides, + src_buffers, + last_dst_plane as usize, + dst_strides, + dst_buffers, + PixelFormatChannels::Four, + Colorimetry::Jpeg as usize, + Sampler::Bgra, + ) +} + +pub fn bgr_lrgb_nv12_jpeg( + width: u32, + height: u32, + _last_src_plane: u32, + src_strides: &[usize], + src_buffers: &[&[u8]], + last_dst_plane: u32, + dst_strides: &[usize], + dst_buffers: &mut [&mut [u8]], +) -> bool { + lrgb_nv12( + width, + height, + src_strides, + src_buffers, + last_dst_plane as usize, + dst_strides, + dst_buffers, + PixelFormatChannels::Three, + Colorimetry::Jpeg as usize, + Sampler::Bgr, + ) +} + +pub fn nv12_jpeg_bgra_lrgb( + width: u32, + height: u32, + last_src_plane: u32, + src_strides: &[usize], + src_buffers: &[&[u8]], + _last_dst_plane: u32, + dst_strides: &[usize], + dst_buffers: &mut [&mut [u8]], +) -> bool { + nv12_bgra_lrgb( + width, + height, + last_src_plane as usize, + src_strides, + src_buffers, + dst_strides, + dst_buffers, + Colorimetry::Jpeg as usize, + ) +} + +pub fn i420_jpeg_bgra_lrgb( + width: u32, + height: u32, + _last_src_plane: u32, + src_strides: &[usize], + src_buffers: &[&[u8]], + _last_dst_plane: u32, + dst_strides: &[usize], + dst_buffers: &mut [&mut [u8]], +) -> bool { + i420_bgra_lrgb( + width, + height, + src_strides, + src_buffers, + dst_strides, + dst_buffers, + Colorimetry::Jpeg as usize, + ) +} + +pub fn i444_jpeg_bgra_lrgb( + width: u32, + height: u32, + _last_src_plane: u32, + src_strides: &[usize], + src_buffers: &[&[u8]], + _last_dst_plane: u32, + dst_strides: &[usize], + dst_buffers: &mut [&mut [u8]], +) -> bool { + i444_bgra_lrgb( + width, + height, + src_strides, + src_buffers, + dst_strides, + dst_buffers, + Colorimetry::Jpeg as usize, + ) +} + +pub fn argb_lrgb_i420_jpeg( + width: u32, + height: u32, + _last_src_plane: u32, + src_strides: &[usize], + src_buffers: &[&[u8]], + _last_dst_plane: u32, + dst_strides: &[usize], + dst_buffers: &mut [&mut [u8]], +) -> bool { + lrgb_i420( + width, + height, + src_strides, + src_buffers, + dst_strides, + dst_buffers, + PixelFormatChannels::Four, + Colorimetry::Jpeg as usize, + Sampler::Argb, + ) +} + +pub fn bgra_lrgb_i420_jpeg( + width: u32, + height: u32, + _last_src_plane: u32, + src_strides: &[usize], + src_buffers: &[&[u8]], + _last_dst_plane: u32, + dst_strides: &[usize], + dst_buffers: &mut [&mut [u8]], +) -> bool { + lrgb_i420( + width, + height, + src_strides, + src_buffers, + dst_strides, + dst_buffers, + PixelFormatChannels::Four, + Colorimetry::Jpeg as usize, + Sampler::Bgra, + ) +} + +pub fn bgr_lrgb_i420_jpeg( + width: u32, + height: u32, + _last_src_plane: u32, + src_strides: &[usize], + src_buffers: &[&[u8]], + _last_dst_plane: u32, + dst_strides: &[usize], + dst_buffers: &mut [&mut [u8]], +) -> bool { + lrgb_i420( + width, + height, + src_strides, + src_buffers, + dst_strides, + dst_buffers, + PixelFormatChannels::Three, + Colorimetry::Jpeg as usize, + Sampler::Bgr, + ) +} + +pub fn argb_lrgb_i444_jpeg( + width: u32, + height: u32, + _last_src_plane: u32, + src_strides: &[usize], + src_buffers: &[&[u8]], + _last_dst_plane: u32, + dst_strides: &[usize], + dst_buffers: &mut [&mut [u8]], +) -> bool { + lrgb_i444( + width, + height, + src_strides, + src_buffers, + dst_strides, + dst_buffers, + PixelFormatChannels::Four, + Colorimetry::Jpeg as usize, + Sampler::Argb, + ) +} + +pub fn bgra_lrgb_i444_jpeg( + width: u32, + height: u32, + _last_src_plane: u32, + src_strides: &[usize], + src_buffers: &[&[u8]], + _last_dst_plane: u32, + dst_strides: &[usize], + dst_buffers: &mut [&mut [u8]], +) -> bool { + lrgb_i444( + width, + height, + src_strides, + src_buffers, + dst_strides, + dst_buffers, + PixelFormatChannels::Four, + Colorimetry::Jpeg as usize, + Sampler::Bgra, + ) +} + +pub fn bgr_lrgb_i444_jpeg( + width: u32, + height: u32, + _last_src_plane: u32, + src_strides: &[usize], + src_buffers: &[&[u8]], + _last_dst_plane: u32, + dst_strides: &[usize], + dst_buffers: &mut [&mut [u8]], +) -> bool { + lrgb_i444( + width, + height, + src_strides, + src_buffers, + dst_strides, + dst_buffers, + PixelFormatChannels::Three, + Colorimetry::Jpeg as usize, + Sampler::Bgr, + ) +} diff --git a/src/convert_image/common.rs b/src/convert_image/common.rs index 7d26341..64de904 100644 --- a/src/convert_image/common.rs +++ b/src/convert_image/common.rs @@ -71,7 +71,7 @@ pub const FIX6: i32 = 6; #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] pub const SHORT_HALF: i32 = 16384; -// Cooefficient table for 601 +// Coefficient table for 601 pub const XR_601: i32 = 16829; pub const XG_601: i32 = 33039; pub const XB_601: i32 = 6416; @@ -91,7 +91,7 @@ pub const RN_601: i32 = 14234; pub const GP_601: i32 = 8709; pub const BN_601: i32 = 17685; -// Cooefficient table for 709 +// Coefficient table for 709 pub const XR_709: i32 = 11966; pub const XG_709: i32 = 40254; pub const XB_709: i32 = 4064; @@ -111,11 +111,31 @@ pub const RN_709: i32 = 15846; pub const GP_709: i32 = 4952; pub const BN_709: i32 = 18465; +// Coefficient table for jpeg +pub const XR_JPEG: i32 = 19595; +pub const XG_JPEG: i32 = 38470; +pub const XB_JPEG: i32 = 7471; +pub const YR_JPEG: i32 = -11057; +pub const YG_JPEG: i32 = -21709; +pub const YB_JPEG: i32 = 32768; +pub const ZR_JPEG: i32 = 32768; +pub const ZG_JPEG: i32 = -27438; +pub const ZB_JPEG: i32 = -5328; + +pub const XXYM_JPEG: i32 = 16384; +pub const RCRM_JPEG: i32 = 22970; +pub const GCRM_JPEG: i32 = 11700; +pub const GCBM_JPEG: i32 = 5638; +pub const BCBM_JPEG: i32 = 29032; +pub const RN_JPEG: i32 = 11363; +pub const GP_JPEG: i32 = 8633; +pub const BN_JPEG: i32 = 14370; + // Other defines -pub const Y_MIN: i32 = 16; -pub const C_HALF: i32 = 128; -pub const FIX16_Y_MIN: i32 = u8_to_fix(Y_MIN, FIX16); -pub const FIX16_C_HALF: i32 = u8_to_fix(C_HALF, FIX16); +const Y_MIN: i32 = 16; +const C_HALF: i32 = 128; +const FIX16_Y_MIN: i32 = u8_to_fix(Y_MIN, FIX16); +const FIX16_C_HALF: i32 = u8_to_fix(C_HALF, FIX16); pub const FIX18_C_HALF: i32 = u8_to_fix(C_HALF, FIX18); pub const Y_OFFSET: i32 = FIX16_Y_MIN + FIX16_HALF; pub const C_OFFSET: i32 = FIX18_C_HALF + FIX18_HALF; @@ -134,6 +154,7 @@ pub enum Sampler { pub enum Colorimetry { Bt601, Bt709, + Jpeg, Length, } diff --git a/src/convert_image/sse2.rs b/src/convert_image/sse2.rs index bbdbc20..19b05e6 100644 --- a/src/convert_image/sse2.rs +++ b/src/convert_image/sse2.rs @@ -65,7 +65,7 @@ macro_rules! xcgh_odd_even_words { }; } -const FORWARD_WEIGHTS: [[i32; 6]; Colorimetry::Length as usize] = [ +const FORWARD_WEIGHTS: [[i32; 8]; Colorimetry::Length as usize] = [ [ i32x2_to_i32(XG_601 - SHORT_HALF, XR_601), i32x2_to_i32(SHORT_HALF, XB_601), @@ -73,6 +73,8 @@ const FORWARD_WEIGHTS: [[i32; 6]; Colorimetry::Length as usize] = [ i32x2_to_i32(YG_601, YR_601), i32x2_to_i32(0, ZB_601), i32x2_to_i32(0, YB_601), + Y_OFFSET, + 0, ], [ i32x2_to_i32(XG_709 - SHORT_HALF, XR_709), @@ -81,6 +83,18 @@ const FORWARD_WEIGHTS: [[i32; 6]; Colorimetry::Length as usize] = [ i32x2_to_i32(YG_709, YR_709), i32x2_to_i32(0, ZB_709), i32x2_to_i32(0, YB_709), + Y_OFFSET, + 0, + ], + [ + i32x2_to_i32(XG_JPEG - SHORT_HALF, XR_JPEG), + i32x2_to_i32(SHORT_HALF, XB_JPEG), + i32x2_to_i32(ZG_JPEG, ZR_JPEG - SHORT_HALF), + i32x2_to_i32(YG_JPEG, YR_JPEG), + i32x2_to_i32(0, ZB_JPEG), + i32x2_to_i32(0, YB_JPEG - SHORT_HALF), + FIX16_HALF, + 1, ], ]; @@ -105,6 +119,16 @@ const BACKWARD_WEIGHTS: [[i16; 8]; Colorimetry::Length as usize] = [ i32_to_i16(GP_709), i32_to_i16(BN_709), ], + [ + i32_to_i16(XXYM_JPEG), + i32_to_i16(RCRM_JPEG), + i32_to_i16(GCRM_JPEG), + i32_to_i16(GCBM_JPEG), + i32_to_i16(BCBM_JPEG), + i32_to_i16(RN_JPEG), + i32_to_i16(GP_JPEG), + i32_to_i16(BN_JPEG), + ], ]; /// Convert fixed point to int (4-wide) @@ -298,6 +322,7 @@ unsafe fn lrgb_to_yuv_4x( sampler: Sampler, y_weigths: &[__m128i; 3], uv_weights: &[__m128i; 3], + full_range: bool, ) { let (rg0, bg0) = unpack_ui8x3_i16x2_4x(rgb0, sampler); pack_i32_4x( @@ -313,10 +338,21 @@ unsafe fn lrgb_to_yuv_4x( let srg = sum_i16x2_neighborhood_2x(rg0, rg1); let sbg = sum_i16x2_neighborhood_2x(bg0, bg1); - pack_i32_4x( - uv, - fix_to_i32_4x!(affine_transform(srg, sbg, uv_weights), FIX18), - ); + let mut t = affine_transform(srg, sbg, uv_weights); + if full_range { + t = _mm_add_epi32( + t, + _mm_slli_epi32( + _mm_or_si128( + _mm_and_si128(sbg, _mm_set1_epi64x(0xFFFF_i64)), + _mm_and_si128(srg, _mm_set1_epi64x(0xFFFF_0000_0000_i64)), + ), + 14, + ), + ); + } + + pack_i32_4x(uv, fix_to_i32_4x!(t, FIX18)); } #[inline(always)] @@ -330,6 +366,7 @@ unsafe fn lrgb_to_i420_4x( sampler: Sampler, y_weigths: &[__m128i; 3], uv_weights: &[__m128i; 3], + full_range: bool, ) { let (rg0, bg0) = unpack_ui8x3_i16x2_4x(rgb0, sampler); pack_i32_4x( @@ -345,12 +382,22 @@ unsafe fn lrgb_to_i420_4x( let srg = sum_i16x2_neighborhood_2x(rg0, rg1); let sbg = sum_i16x2_neighborhood_2x(bg0, bg1); + let mut t = affine_transform(srg, sbg, uv_weights); + if full_range { + t = _mm_add_epi32( + t, + _mm_slli_epi32( + _mm_or_si128( + _mm_and_si128(sbg, _mm_set1_epi64x(0xFFFF_i64)), + _mm_and_si128(srg, _mm_set1_epi64x(0xFFFF_0000_0000_i64)), + ), + 14, + ), + ); + } // shuff: ******v1 ******v0 ******u1 ******u0 - let shuff = _mm_shuffle_epi32( - fix_to_i32_4x!(affine_transform(srg, sbg, uv_weights), FIX18), - mm_shuffle(3, 1, 2, 0), - ); + let shuff = _mm_shuffle_epi32(fix_to_i32_4x!(t, FIX18), mm_shuffle(3, 1, 2, 0)); // uv_res: v1v0u1u0 let packed_to_32 = _mm_packs_epi32(shuff, shuff); @@ -378,6 +425,7 @@ unsafe fn lrgb_to_i444_4x( y_weights: &[__m128i; 3], u_weights: &[__m128i; 3], v_weights: &[__m128i; 3], + full_range: bool, ) { let (rg, bg) = unpack_ui8x3_i16x2_4x(rgb, sampler); pack_i32_4x( @@ -385,15 +433,16 @@ unsafe fn lrgb_to_i444_4x( fix_to_i32_4x!(affine_transform(rg, bg, y_weights), FIX16), ); - pack_i32_4x( - u, - fix_to_i32_4x!(affine_transform(rg, bg, u_weights), FIX16), - ); + let mut tu = affine_transform(rg, bg, u_weights); + let mut tv = affine_transform(rg, bg, v_weights); + if full_range { + tu = _mm_add_epi32(tu, _mm_srli_epi32(_mm_slli_epi32(bg, 16), 2)); + tv = _mm_add_epi32(tv, _mm_srli_epi32(_mm_slli_epi32(rg, 16), 2)); + } - pack_i32_4x( - v, - fix_to_i32_4x!(affine_transform(rg, bg, v_weights), FIX16), - ); + pack_i32_4x(u, fix_to_i32_4x!(tu, FIX16)); + + pack_i32_4x(v, fix_to_i32_4x!(tv, FIX16)); } #[inline] @@ -406,7 +455,7 @@ unsafe fn lrgb_to_yuv_sse2( dst_strides: (usize, usize), dst_buffers: &mut (&mut [u8], &mut [u8]), depth: usize, - weights: &[i32; 6], + weights: &[i32; 8], sampler: Sampler, ) { const DST_DEPTH: usize = LRGB_TO_YUV_WAVES; @@ -416,13 +465,13 @@ unsafe fn lrgb_to_yuv_sse2( let y_weigths = [ _mm_set1_epi32(weights[0]), _mm_set1_epi32(weights[1]), - _mm_set1_epi32(Y_OFFSET), + _mm_set1_epi32(weights[6]), ]; let uv_weights = [ _mm_set_epi32(weights[2], weights[3], weights[2], weights[3]), _mm_set_epi32(weights[4], weights[5], weights[4], weights[5]), - _mm_set1_epi32(C_OFFSET), + _mm_set1_epi32(C_OFFSET - FIX18_HALF * weights[7]), ]; let src_group = src_buffer.as_ptr(); @@ -451,6 +500,7 @@ unsafe fn lrgb_to_yuv_sse2( sampler, &y_weigths, &uv_weights, + weights[7] == 1, ); } } @@ -468,6 +518,7 @@ unsafe fn lrgb_to_yuv_sse2( sampler, &y_weigths, &uv_weights, + weights[7] == 1, ); } @@ -481,6 +532,7 @@ unsafe fn lrgb_to_yuv_sse2( Sampler::BgrOverflow, &y_weigths, &uv_weights, + weights[7] == 1, ); } } @@ -495,7 +547,7 @@ unsafe fn lrgb_to_i420_sse2( dst_strides: (usize, usize, usize), dst_buffers: &mut (&mut [u8], &mut [u8], &mut [u8]), depth: usize, - weights: &[i32; 6], + weights: &[i32; 8], sampler: Sampler, ) { let (y_stride, u_stride, v_stride) = dst_strides; @@ -503,13 +555,13 @@ unsafe fn lrgb_to_i420_sse2( let y_weigths = [ _mm_set1_epi32(weights[0]), _mm_set1_epi32(weights[1]), - _mm_set1_epi32(Y_OFFSET), + _mm_set1_epi32(weights[6]), ]; let uv_weights = [ _mm_set_epi32(weights[2], weights[3], weights[2], weights[3]), _mm_set_epi32(weights[4], weights[5], weights[4], weights[5]), - _mm_set1_epi32(C_OFFSET), + _mm_set1_epi32(C_OFFSET - FIX18_HALF * weights[7]), ]; let src_group = src_buffer.as_ptr(); @@ -540,6 +592,7 @@ unsafe fn lrgb_to_i420_sse2( sampler, &y_weigths, &uv_weights, + weights[7] == 1, ); } } @@ -558,6 +611,7 @@ unsafe fn lrgb_to_i420_sse2( sampler, &y_weigths, &uv_weights, + weights[7] == 1, ); } @@ -572,6 +626,7 @@ unsafe fn lrgb_to_i420_sse2( Sampler::BgrOverflow, &y_weigths, &uv_weights, + weights[7] == 1, ); } } @@ -586,7 +641,7 @@ unsafe fn lrgb_to_i444_sse2( dst_strides: (usize, usize, usize), dst_buffers: &mut (&mut [u8], &mut [u8], &mut [u8]), depth: usize, - weights: &[i32; 6], + weights: &[i32; 8], sampler: Sampler, ) { let (y_stride, u_stride, v_stride) = dst_strides; @@ -594,19 +649,19 @@ unsafe fn lrgb_to_i444_sse2( let y_weights = [ _mm_set1_epi32(weights[0]), _mm_set1_epi32(weights[1]), - _mm_set1_epi32(Y_OFFSET), + _mm_set1_epi32(weights[6]), ]; let u_weights = [ _mm_set1_epi32(weights[3]), _mm_set1_epi32(weights[5]), - _mm_set1_epi32(C_OFFSET16), + _mm_set1_epi32(C_OFFSET16 - FIX16_HALF * weights[7]), ]; let v_weights = [ _mm_set1_epi32(weights[2]), _mm_set1_epi32(weights[4]), - _mm_set1_epi32(C_OFFSET16), + _mm_set1_epi32(C_OFFSET16 - FIX16_HALF * weights[7]), ]; let src_group = src_buffer.as_ptr(); @@ -636,6 +691,7 @@ unsafe fn lrgb_to_i444_sse2( &y_weights, &u_weights, &v_weights, + weights[7] == 1, ); } } @@ -653,6 +709,7 @@ unsafe fn lrgb_to_i444_sse2( &y_weights, &u_weights, &v_weights, + weights[7] == 1, ); } @@ -666,6 +723,7 @@ unsafe fn lrgb_to_i444_sse2( &y_weights, &u_weights, &v_weights, + weights[7] == 1, ); } } @@ -2259,3 +2317,277 @@ pub fn bgra_lrgb_rgb_lrgb( dst_buffers, ) } + +pub fn argb_lrgb_nv12_jpeg( + width: u32, + height: u32, + _last_src_plane: u32, + src_strides: &[usize], + src_buffers: &[&[u8]], + last_dst_plane: u32, + dst_strides: &[usize], + dst_buffers: &mut [&mut [u8]], +) -> bool { + lrgb_nv12( + width, + height, + src_strides, + src_buffers, + last_dst_plane as usize, + dst_strides, + dst_buffers, + PixelFormatChannels::Four, + Colorimetry::Jpeg as usize, + Sampler::Argb, + ) +} + +pub fn bgra_lrgb_nv12_jpeg( + width: u32, + height: u32, + _last_src_plane: u32, + src_strides: &[usize], + src_buffers: &[&[u8]], + last_dst_plane: u32, + dst_strides: &[usize], + dst_buffers: &mut [&mut [u8]], +) -> bool { + lrgb_nv12( + width, + height, + src_strides, + src_buffers, + last_dst_plane as usize, + dst_strides, + dst_buffers, + PixelFormatChannels::Four, + Colorimetry::Jpeg as usize, + Sampler::Bgra, + ) +} + +pub fn bgr_lrgb_nv12_jpeg( + width: u32, + height: u32, + _last_src_plane: u32, + src_strides: &[usize], + src_buffers: &[&[u8]], + last_dst_plane: u32, + dst_strides: &[usize], + dst_buffers: &mut [&mut [u8]], +) -> bool { + lrgb_nv12( + width, + height, + src_strides, + src_buffers, + last_dst_plane as usize, + dst_strides, + dst_buffers, + PixelFormatChannels::Three, + Colorimetry::Jpeg as usize, + Sampler::Bgr, + ) +} + +pub fn nv12_jpeg_bgra_lrgb( + width: u32, + height: u32, + last_src_plane: u32, + src_strides: &[usize], + src_buffers: &[&[u8]], + _last_dst_plane: u32, + dst_strides: &[usize], + dst_buffers: &mut [&mut [u8]], +) -> bool { + nv12_bgra_lrgb( + width, + height, + last_src_plane as usize, + src_strides, + src_buffers, + dst_strides, + dst_buffers, + Colorimetry::Jpeg as usize, + ) +} + +pub fn i420_jpeg_bgra_lrgb( + width: u32, + height: u32, + _last_src_plane: u32, + src_strides: &[usize], + src_buffers: &[&[u8]], + _last_dst_plane: u32, + dst_strides: &[usize], + dst_buffers: &mut [&mut [u8]], +) -> bool { + i420_bgra_lrgb( + width, + height, + src_strides, + src_buffers, + dst_strides, + dst_buffers, + Colorimetry::Jpeg as usize, + ) +} + +pub fn i444_jpeg_bgra_lrgb( + width: u32, + height: u32, + _last_src_plane: u32, + src_strides: &[usize], + src_buffers: &[&[u8]], + _last_dst_plane: u32, + dst_strides: &[usize], + dst_buffers: &mut [&mut [u8]], +) -> bool { + i444_bgra_lrgb( + width, + height, + src_strides, + src_buffers, + dst_strides, + dst_buffers, + Colorimetry::Jpeg as usize, + ) +} + +pub fn argb_lrgb_i420_jpeg( + width: u32, + height: u32, + _last_src_plane: u32, + src_strides: &[usize], + src_buffers: &[&[u8]], + _last_dst_plane: u32, + dst_strides: &[usize], + dst_buffers: &mut [&mut [u8]], +) -> bool { + lrgb_i420( + width, + height, + src_strides, + src_buffers, + dst_strides, + dst_buffers, + PixelFormatChannels::Four, + Colorimetry::Jpeg as usize, + Sampler::Argb, + ) +} + +pub fn bgra_lrgb_i420_jpeg( + width: u32, + height: u32, + _last_src_plane: u32, + src_strides: &[usize], + src_buffers: &[&[u8]], + _last_dst_plane: u32, + dst_strides: &[usize], + dst_buffers: &mut [&mut [u8]], +) -> bool { + lrgb_i420( + width, + height, + src_strides, + src_buffers, + dst_strides, + dst_buffers, + PixelFormatChannels::Four, + Colorimetry::Jpeg as usize, + Sampler::Bgra, + ) +} + +pub fn bgr_lrgb_i420_jpeg( + width: u32, + height: u32, + _last_src_plane: u32, + src_strides: &[usize], + src_buffers: &[&[u8]], + _last_dst_plane: u32, + dst_strides: &[usize], + dst_buffers: &mut [&mut [u8]], +) -> bool { + lrgb_i420( + width, + height, + src_strides, + src_buffers, + dst_strides, + dst_buffers, + PixelFormatChannels::Three, + Colorimetry::Jpeg as usize, + Sampler::Bgr, + ) +} + +pub fn argb_lrgb_i444_jpeg( + width: u32, + height: u32, + _last_src_plane: u32, + src_strides: &[usize], + src_buffers: &[&[u8]], + _last_dst_plane: u32, + dst_strides: &[usize], + dst_buffers: &mut [&mut [u8]], +) -> bool { + lrgb_i444( + width, + height, + src_strides, + src_buffers, + dst_strides, + dst_buffers, + PixelFormatChannels::Four, + Colorimetry::Jpeg as usize, + Sampler::Argb, + ) +} + +pub fn bgra_lrgb_i444_jpeg( + width: u32, + height: u32, + _last_src_plane: u32, + src_strides: &[usize], + src_buffers: &[&[u8]], + _last_dst_plane: u32, + dst_strides: &[usize], + dst_buffers: &mut [&mut [u8]], +) -> bool { + lrgb_i444( + width, + height, + src_strides, + src_buffers, + dst_strides, + dst_buffers, + PixelFormatChannels::Four, + Colorimetry::Jpeg as usize, + Sampler::Bgra, + ) +} + +pub fn bgr_lrgb_i444_jpeg( + width: u32, + height: u32, + _last_src_plane: u32, + src_strides: &[usize], + src_buffers: &[&[u8]], + _last_dst_plane: u32, + dst_strides: &[usize], + dst_buffers: &mut [&mut [u8]], +) -> bool { + lrgb_i444( + width, + height, + src_strides, + src_buffers, + dst_strides, + dst_buffers, + PixelFormatChannels::Three, + Colorimetry::Jpeg as usize, + Sampler::Bgr, + ) +} diff --git a/src/convert_image/x86.rs b/src/convert_image/x86.rs index 1b4d76e..9ee95e9 100644 --- a/src/convert_image/x86.rs +++ b/src/convert_image/x86.rs @@ -51,12 +51,16 @@ unsafe fn _bswap64(x: i64) -> i64 { (((_bswap(x as i32) as u64) << 32) | ((_bswap((x >> 32) as i32) as u64) & 0xFFFFFFFF)) as i64 } -pub const FORWARD_WEIGHTS: [[i32; 9]; Colorimetry::Length as usize] = [ +pub const FORWARD_WEIGHTS: [[i32; 11]; Colorimetry::Length as usize] = [ [ - XR_601, XG_601, XB_601, YR_601, YG_601, YB_601, ZR_601, ZG_601, ZB_601, + XR_601, XG_601, XB_601, YR_601, YG_601, YB_601, ZR_601, ZG_601, ZB_601, Y_OFFSET, 0, ], [ - XR_709, XG_709, XB_709, YR_709, YG_709, YB_709, ZR_709, ZG_709, ZB_709, + XR_709, XG_709, XB_709, YR_709, YG_709, YB_709, ZR_709, ZG_709, ZB_709, Y_OFFSET, 0, + ], + [ + XR_JPEG, XG_JPEG, XB_JPEG, YR_JPEG, YG_JPEG, YB_JPEG, ZR_JPEG, ZG_JPEG, ZB_JPEG, + FIX16_HALF, 1, ], ]; @@ -67,6 +71,9 @@ pub const BACKWARD_WEIGHTS: [[i32; 8]; Colorimetry::Length as usize] = [ [ XXYM_709, RCRM_709, GCRM_709, GCBM_709, BCBM_709, RN_709, GP_709, BN_709, ], + [ + XXYM_JPEG, RCRM_JPEG, GCRM_JPEG, GCBM_JPEG, BCBM_JPEG, RN_JPEG, GP_JPEG, BN_JPEG, + ], ]; const SAMPLER_OFFSETS: [[usize; 3]; Sampler::Length as usize] = @@ -172,7 +179,7 @@ pub fn lrgb_to_nv12( dst_strides: (usize, usize), dst_buffers: &mut (&mut [u8], &mut [u8]), depth: usize, - weights: &[i32; 9], + weights: &[i32; 11], sampler: Sampler, ) { let (y_stride, uv_stride) = dst_strides; @@ -188,6 +195,8 @@ pub fn lrgb_to_nv12( let zr = weights[6]; let zg = weights[7]; let zb = weights[8]; + let yo = weights[9]; + let co = weights[10]; let wg_width = width / 2; let wg_height = height / 2; @@ -211,8 +220,8 @@ pub fn lrgb_to_nv12( pack_i32x2( y_group.add(wg_index(2 * x, 2 * y, 1, y_stride)), - fix_to_i32(affine_transform(r00, g00, b00, xr, xg, xb, Y_OFFSET), FIX16), - fix_to_i32(affine_transform(r10, g10, b10, xr, xg, xb, Y_OFFSET), FIX16), + fix_to_i32(affine_transform(r00, g00, b00, xr, xg, xb, yo), FIX16), + fix_to_i32(affine_transform(r10, g10, b10, xr, xg, xb, yo), FIX16), ); let (r01, g01, b01) = unpack_ui8x3_i32( @@ -227,8 +236,8 @@ pub fn lrgb_to_nv12( pack_i32x2( y_group.add(wg_index(2 * x, 2 * y + 1, 1, y_stride)), - fix_to_i32(affine_transform(r01, g01, b01, xr, xg, xb, Y_OFFSET), FIX16), - fix_to_i32(affine_transform(r11, g11, b11, xr, xg, xb, Y_OFFSET), FIX16), + fix_to_i32(affine_transform(r01, g01, b01, xr, xg, xb, yo), FIX16), + fix_to_i32(affine_transform(r11, g11, b11, xr, xg, xb, yo), FIX16), ); let sr = (r00 + r10) + (r01 + r11); @@ -236,8 +245,14 @@ pub fn lrgb_to_nv12( let sb = (b00 + b10) + (b01 + b11); pack_i32x2( uv_group.add(wg_index(x, y, 2, uv_stride)), - fix_to_i32(affine_transform(sr, sg, sb, yr, yg, yb, C_OFFSET), FIX18), - fix_to_i32(affine_transform(sr, sg, sb, zr, zg, zb, C_OFFSET), FIX18), + fix_to_i32( + affine_transform(sr, sg, sb, yr, yg, yb, C_OFFSET - FIX18_HALF * co), + FIX18, + ), + fix_to_i32( + affine_transform(sr, sg, sb, zr, zg, zb, C_OFFSET - FIX18_HALF * co), + FIX18, + ), ); } } @@ -253,7 +268,7 @@ pub fn lrgb_to_i420( dst_strides: (usize, usize, usize), dst_buffers: &mut (&mut [u8], &mut [u8], &mut [u8]), depth: usize, - weights: &[i32; 9], + weights: &[i32; 11], sampler: Sampler, ) { let (y_stride, u_stride, v_stride) = dst_strides; @@ -267,6 +282,8 @@ pub fn lrgb_to_i420( let zr = weights[6]; let zg = weights[7]; let zb = weights[8]; + let yo = weights[9]; + let co = weights[10]; let wg_width = width / 2; let wg_height = height / 2; @@ -291,8 +308,8 @@ pub fn lrgb_to_i420( pack_i32x2( y_group.add(wg_index(2 * x, 2 * y, 1, y_stride)), - fix_to_i32(affine_transform(r00, g00, b00, xr, xg, xb, Y_OFFSET), FIX16), - fix_to_i32(affine_transform(r10, g10, b10, xr, xg, xb, Y_OFFSET), FIX16), + fix_to_i32(affine_transform(r00, g00, b00, xr, xg, xb, yo), FIX16), + fix_to_i32(affine_transform(r10, g10, b10, xr, xg, xb, yo), FIX16), ); let (r01, g01, b01) = unpack_ui8x3_i32( @@ -307,8 +324,8 @@ pub fn lrgb_to_i420( pack_i32x2( y_group.add(wg_index(2 * x, 2 * y + 1, 1, y_stride)), - fix_to_i32(affine_transform(r01, g01, b01, xr, xg, xb, Y_OFFSET), FIX16), - fix_to_i32(affine_transform(r11, g11, b11, xr, xg, xb, Y_OFFSET), FIX16), + fix_to_i32(affine_transform(r01, g01, b01, xr, xg, xb, yo), FIX16), + fix_to_i32(affine_transform(r11, g11, b11, xr, xg, xb, yo), FIX16), ); let sr = (r00 + r10) + (r01 + r11); @@ -321,10 +338,14 @@ pub fn lrgb_to_i420( // Checked: this is proved to not go outside the 8-bit boundary #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)] { - *u = - fix_to_i32(affine_transform(sr, sg, sb, yr, yg, yb, C_OFFSET), FIX18) as u8; - *v = - fix_to_i32(affine_transform(sr, sg, sb, zr, zg, zb, C_OFFSET), FIX18) as u8; + *u = fix_to_i32( + affine_transform(sr, sg, sb, yr, yg, yb, C_OFFSET - FIX18_HALF * co), + FIX18, + ) as u8; + *v = fix_to_i32( + affine_transform(sr, sg, sb, zr, zg, zb, C_OFFSET - FIX18_HALF * co), + FIX18, + ) as u8; } } } @@ -340,7 +361,7 @@ pub fn lrgb_to_i444( dst_strides: (usize, usize, usize), dst_buffers: &mut (&mut [u8], &mut [u8], &mut [u8]), depth: usize, - weights: &[i32; 9], + weights: &[i32; 11], sampler: Sampler, ) { let (y_stride, u_stride, v_stride) = dst_strides; @@ -354,6 +375,8 @@ pub fn lrgb_to_i444( let zr = weights[6]; let zg = weights[7]; let zb = weights[8]; + let yo = weights[9]; + let co = weights[10]; unsafe { let src_group = src_buffer.as_ptr(); @@ -373,12 +396,15 @@ pub fn lrgb_to_i444( // Checked: this is proved to not go outside the 8-bit boundary #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)] { - *y_data = - fix_to_i32(affine_transform(r, g, b, xr, xg, xb, Y_OFFSET), FIX16) as u8; - *u_data = - fix_to_i32(affine_transform(r, g, b, yr, yg, yb, C_OFFSET16), FIX16) as u8; - *v_data = - fix_to_i32(affine_transform(r, g, b, zr, zg, zb, C_OFFSET16), FIX16) as u8; + *y_data = fix_to_i32(affine_transform(r, g, b, xr, xg, xb, yo), FIX16) as u8; + *u_data = fix_to_i32( + affine_transform(r, g, b, yr, yg, yb, C_OFFSET16 - FIX16_HALF * co), + FIX16, + ) as u8; + *v_data = fix_to_i32( + affine_transform(r, g, b, zr, zg, zb, C_OFFSET16 - FIX16_HALF * co), + FIX16, + ) as u8; } } } @@ -1884,3 +1910,277 @@ pub fn bgra_lrgb_rgb_lrgb( true } + +pub fn argb_lrgb_nv12_jpeg( + width: u32, + height: u32, + _last_src_plane: u32, + src_strides: &[usize], + src_buffers: &[&[u8]], + last_dst_plane: u32, + dst_strides: &[usize], + dst_buffers: &mut [&mut [u8]], +) -> bool { + lrgb_nv12( + width, + height, + src_strides, + src_buffers, + last_dst_plane as usize, + dst_strides, + dst_buffers, + PixelFormatChannels::Four, + Colorimetry::Jpeg as usize, + Sampler::Argb, + ) +} + +pub fn bgra_lrgb_nv12_jpeg( + width: u32, + height: u32, + _last_src_plane: u32, + src_strides: &[usize], + src_buffers: &[&[u8]], + last_dst_plane: u32, + dst_strides: &[usize], + dst_buffers: &mut [&mut [u8]], +) -> bool { + lrgb_nv12( + width, + height, + src_strides, + src_buffers, + last_dst_plane as usize, + dst_strides, + dst_buffers, + PixelFormatChannels::Four, + Colorimetry::Jpeg as usize, + Sampler::Bgra, + ) +} + +pub fn bgr_lrgb_nv12_jpeg( + width: u32, + height: u32, + _last_src_plane: u32, + src_strides: &[usize], + src_buffers: &[&[u8]], + last_dst_plane: u32, + dst_strides: &[usize], + dst_buffers: &mut [&mut [u8]], +) -> bool { + lrgb_nv12( + width, + height, + src_strides, + src_buffers, + last_dst_plane as usize, + dst_strides, + dst_buffers, + PixelFormatChannels::Three, + Colorimetry::Jpeg as usize, + Sampler::Bgr, + ) +} + +pub fn nv12_jpeg_bgra_lrgb( + width: u32, + height: u32, + last_src_plane: u32, + src_strides: &[usize], + src_buffers: &[&[u8]], + _last_dst_plane: u32, + dst_strides: &[usize], + dst_buffers: &mut [&mut [u8]], +) -> bool { + nv12_bgra_lrgb( + width, + height, + last_src_plane as usize, + src_strides, + src_buffers, + dst_strides, + dst_buffers, + Colorimetry::Jpeg as usize, + ) +} + +pub fn i420_jpeg_bgra_lrgb( + width: u32, + height: u32, + _last_src_plane: u32, + src_strides: &[usize], + src_buffers: &[&[u8]], + _last_dst_plane: u32, + dst_strides: &[usize], + dst_buffers: &mut [&mut [u8]], +) -> bool { + i420_bgra_lrgb( + width, + height, + src_strides, + src_buffers, + dst_strides, + dst_buffers, + Colorimetry::Jpeg as usize, + ) +} + +pub fn i444_jpeg_bgra_lrgb( + width: u32, + height: u32, + _last_src_plane: u32, + src_strides: &[usize], + src_buffers: &[&[u8]], + _last_dst_plane: u32, + dst_strides: &[usize], + dst_buffers: &mut [&mut [u8]], +) -> bool { + i444_bgra_lrgb( + width, + height, + src_strides, + src_buffers, + dst_strides, + dst_buffers, + Colorimetry::Jpeg as usize, + ) +} + +pub fn argb_lrgb_i420_jpeg( + width: u32, + height: u32, + _last_src_plane: u32, + src_strides: &[usize], + src_buffers: &[&[u8]], + _last_dst_plane: u32, + dst_strides: &[usize], + dst_buffers: &mut [&mut [u8]], +) -> bool { + lrgb_i420( + width, + height, + src_strides, + src_buffers, + dst_strides, + dst_buffers, + PixelFormatChannels::Four, + Colorimetry::Jpeg as usize, + Sampler::Argb, + ) +} + +pub fn bgra_lrgb_i420_jpeg( + width: u32, + height: u32, + _last_src_plane: u32, + src_strides: &[usize], + src_buffers: &[&[u8]], + _last_dst_plane: u32, + dst_strides: &[usize], + dst_buffers: &mut [&mut [u8]], +) -> bool { + lrgb_i420( + width, + height, + src_strides, + src_buffers, + dst_strides, + dst_buffers, + PixelFormatChannels::Four, + Colorimetry::Jpeg as usize, + Sampler::Bgra, + ) +} + +pub fn bgr_lrgb_i420_jpeg( + width: u32, + height: u32, + _last_src_plane: u32, + src_strides: &[usize], + src_buffers: &[&[u8]], + _last_dst_plane: u32, + dst_strides: &[usize], + dst_buffers: &mut [&mut [u8]], +) -> bool { + lrgb_i420( + width, + height, + src_strides, + src_buffers, + dst_strides, + dst_buffers, + PixelFormatChannels::Three, + Colorimetry::Jpeg as usize, + Sampler::Bgr, + ) +} + +pub fn argb_lrgb_i444_jpeg( + width: u32, + height: u32, + _last_src_plane: u32, + src_strides: &[usize], + src_buffers: &[&[u8]], + _last_dst_plane: u32, + dst_strides: &[usize], + dst_buffers: &mut [&mut [u8]], +) -> bool { + lrgb_i444( + width, + height, + src_strides, + src_buffers, + dst_strides, + dst_buffers, + PixelFormatChannels::Four, + Colorimetry::Jpeg as usize, + Sampler::Argb, + ) +} + +pub fn bgra_lrgb_i444_jpeg( + width: u32, + height: u32, + _last_src_plane: u32, + src_strides: &[usize], + src_buffers: &[&[u8]], + _last_dst_plane: u32, + dst_strides: &[usize], + dst_buffers: &mut [&mut [u8]], +) -> bool { + lrgb_i444( + width, + height, + src_strides, + src_buffers, + dst_strides, + dst_buffers, + PixelFormatChannels::Four, + Colorimetry::Jpeg as usize, + Sampler::Bgra, + ) +} + +pub fn bgr_lrgb_i444_jpeg( + width: u32, + height: u32, + _last_src_plane: u32, + src_strides: &[usize], + src_buffers: &[&[u8]], + _last_dst_plane: u32, + dst_strides: &[usize], + dst_buffers: &mut [&mut [u8]], +) -> bool { + lrgb_i444( + width, + height, + src_strides, + src_buffers, + dst_strides, + dst_buffers, + PixelFormatChannels::Three, + Colorimetry::Jpeg as usize, + Sampler::Bgr, + ) +} diff --git a/src/dispatcher.rs b/src/dispatcher.rs index 61131ec..745f649 100644 --- a/src/dispatcher.rs +++ b/src/dispatcher.rs @@ -36,7 +36,7 @@ static_assert!(HI_RGB_PIXEL_FORMAT == LO_YUV_PIXEL_FORMAT - 1); const LO_RGB_COLOR_SPACE: u32 = ColorSpace::Lrgb as u32; const HI_RGB_COLOR_SPACE: u32 = ColorSpace::Lrgb as u32; const LO_YUV_COLOR_SPACE: u32 = ColorSpace::Bt601 as u32; -const HI_YUV_COLOR_SPACE: u32 = ColorSpace::Bt709 as u32; +const HI_YUV_COLOR_SPACE: u32 = ColorSpace::Bt601Full as u32; static_assert!(HI_RGB_COLOR_SPACE == LO_YUV_COLOR_SPACE - 1); const RGB_PIXEL_FORMAT_COUNT: u32 = enum_count(LO_RGB_PIXEL_FORMAT, HI_RGB_PIXEL_FORMAT); diff --git a/src/lib.rs b/src/lib.rs index 0746cad..535b00f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -53,6 +53,7 @@ //! The supported color models are: //! * ycbcr, ITU-R Recommendation BT.601 (standard video system) //! * ycbcr, ITU-R Recommendation BT.709 (CSC systems) +//! * ycbcr, Jpeg //! //! # Examples //! @@ -358,10 +359,10 @@ impl error::Error for ErrorKind { /// `PixelFormat::Bgr` | `ColorSpace::Lrgb` /// `PixelFormat::Rgba` | `ColorSpace::Lrgb` /// `PixelFormat::Rgb` | `ColorSpace::Lrgb` -/// `PixelFormat::I444` | `ColorSpace::Bt601`, `ColorSpace::Bt709` -/// `PixelFormat::I422` | `ColorSpace::Bt601`, `ColorSpace::Bt709` -/// `PixelFormat::I420` | `ColorSpace::Bt601`, `ColorSpace::Bt709` -/// `PixelFormat::Nv12` | `ColorSpace::Bt601`, `ColorSpace::Bt709` +/// `PixelFormat::I444` | `ColorSpace::Bt601`, `ColorSpace::Bt709`, `ColorSpace::Bt601Full` +/// `PixelFormat::I422` | `ColorSpace::Bt601`, `ColorSpace::Bt709`, `ColorSpace::Bt601Full` +/// `PixelFormat::I420` | `ColorSpace::Bt601`, `ColorSpace::Bt709`, `ColorSpace::Bt601Full` +/// `PixelFormat::Nv12` | `ColorSpace::Bt601`, `ColorSpace::Bt709`, `ColorSpace::Bt601Full` /// /// Some pixel formats might impose additional restrictions on the accepted number of /// planes and the image size: @@ -438,6 +439,91 @@ macro_rules! set_dispatch_table { set_dispatcher!($conv, $set, I444, Bt601, Bgra, Lrgb, i444_bt601_bgra_lrgb); set_dispatcher!($conv, $set, I444, Bt709, Bgra, Lrgb, i444_bt709_bgra_lrgb); set_dispatcher!($conv, $set, Bgra, Lrgb, Rgb, Lrgb, bgra_lrgb_rgb_lrgb); + + set_dispatcher!( + $conv, + $set, + Argb, + Lrgb, + Nv12, + Bt601Full, + argb_lrgb_nv12_jpeg + ); + set_dispatcher!( + $conv, + $set, + Bgra, + Lrgb, + Nv12, + Bt601Full, + bgra_lrgb_nv12_jpeg + ); + set_dispatcher!($conv, $set, Bgr, Lrgb, Nv12, Bt601Full, bgr_lrgb_nv12_jpeg); + set_dispatcher!( + $conv, + $set, + Argb, + Lrgb, + I420, + Bt601Full, + argb_lrgb_i420_jpeg + ); + set_dispatcher!( + $conv, + $set, + Bgra, + Lrgb, + I420, + Bt601Full, + bgra_lrgb_i420_jpeg + ); + set_dispatcher!($conv, $set, Bgr, Lrgb, I420, Bt601Full, bgr_lrgb_i420_jpeg); + set_dispatcher!( + $conv, + $set, + Argb, + Lrgb, + I444, + Bt601Full, + argb_lrgb_i444_jpeg + ); + set_dispatcher!( + $conv, + $set, + Bgra, + Lrgb, + I444, + Bt601Full, + bgra_lrgb_i444_jpeg + ); + set_dispatcher!($conv, $set, Bgr, Lrgb, I444, Bt601Full, bgr_lrgb_i444_jpeg); + set_dispatcher!( + $conv, + $set, + Nv12, + Bt601Full, + Bgra, + Lrgb, + nv12_jpeg_bgra_lrgb + ); + set_dispatcher!( + $conv, + $set, + I420, + Bt601Full, + Bgra, + Lrgb, + i420_jpeg_bgra_lrgb + ); + set_dispatcher!( + $conv, + $set, + I444, + Bt601Full, + Bgra, + Lrgb, + i444_jpeg_bgra_lrgb + ); }; } @@ -791,6 +877,12 @@ pub fn get_buffers_size( /// y = 0.213 * r + 0.715 * g + 0.072 * b + 16 /// cb = -0.117 * r - 0.394 * g + 0.511 * b + 128 /// cr = 0.511 * r - 0.464 * g - 0.047 * b + 128 +/// +/// If the destination image color space is Bt601Full, the following formula is applied: +/// ```text +/// y = 0.213 * r + 0.715 * g + 0.072 * b +/// cb = -0.117 * r - 0.394 * g + 0.511 * b + 128 +/// cr = 0.511 * r - 0.464 * g - 0.047 * b + 128 /// ``` /// /// # Algorithm 2 @@ -810,6 +902,12 @@ pub fn get_buffers_size( /// r = 1.164 * (y - 16) + 1.793 * (cr - 128) /// g = 1.164 * (y - 16) - 0.534 * (cr - 128) - 0.213 * (cb - 128) /// b = 1.164 * (y - 16) + 2.115 * (cb - 128) +/// +/// If the source image color space is Bt601Full, the following formula is applied: +/// ```text +/// r = 1.164 * (y) + 1.596 * (cr - 128) +/// g = 1.164 * (y) - 0.813 * (cr - 128) - 0.392 * (cb - 128) +/// b = 1.164 * (y) + 2.017 * (cb - 128) /// ``` /// /// # Algorithm 3 diff --git a/tests/tests.rs b/tests/tests.rs index 4d372f5..aa4051a 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -56,7 +56,12 @@ const PIXEL_FORMATS: &[PixelFormat; 9] = &[ PixelFormat::Nv12, ]; -const COLOR_SPACES: &[ColorSpace; 3] = &[ColorSpace::Lrgb, ColorSpace::Bt601, ColorSpace::Bt709]; +const COLOR_SPACES: &[ColorSpace; 4] = &[ + ColorSpace::Lrgb, + ColorSpace::Bt601, + ColorSpace::Bt709, + ColorSpace::Bt601Full, +]; const PIXEL_FORMAT_I444: u32 = PixelFormat::I444 as u32; const PIXEL_FORMAT_I422: u32 = PixelFormat::I422 as u32; @@ -242,6 +247,53 @@ const CR2_BT709_REF: &[&[u8]] = &[ &[146, 131, 135, 127], ]; +const Y_JPEG_REF: &[&[u8]] = &[ + &[67, 72, 99, 136, 119, 138, 83, 222], + &[90, 181, 116, 91, 100, 98, 180, 123], + &[146, 68, 214, 231, 95, 135, 57, 153], + &[145, 45, 198, 185, 161, 196, 75, 90], + &[119, 89, 127, 157, 28, 173, 192, 187], + &[144, 89, 110, 112, 135, 114, 165, 55], + &[113, 201, 99, 84, 104, 90, 130, 159], + &[134, 57, 37, 69, 132, 170, 118, 145], +]; + +const CB_JPEG_REF: &[&[u8]] = &[ + &[114, 116, 214, 86, 136, 162, 92, 95], + &[95, 40, 185, 130, 165, 95, 115, 67], + &[53, 119, 144, 83, 149, 159, 191, 54], + &[117, 180, 134, 157, 133, 100, 192, 184], + &[201, 97, 199, 98, 200, 130, 74, 31], + &[50, 80, 208, 74, 181, 80, 52, 105], + &[89, 49, 121, 215, 77, 204, 123, 152], + &[192, 213, 192, 93, 141, 35, 205, 86], +]; + +const CR_JPEG_REF: &[&[u8]] = &[ + &[194, 101, 183, 207, 219, 99, 117, 136], + &[242, 166, 149, 80, 72, 234, 116, 108], + &[195, 79, 113, 117, 142, 167, 171, 174], + &[168, 126, 83, 177, 77, 77, 74, 86], + &[190, 98, 68, 75, 119, 185, 92, 170], + &[76, 221, 223, 161, 190, 51, 82, 116], + &[135, 158, 87, 189, 62, 132, 182, 104], + &[161, 133, 118, 121, 194, 156, 56, 159], +]; + +const CB2_JPEG_REF: &[&[u8]] = &[ + &[91, 154, 140, 93], + &[117, 130, 135, 155], + &[107, 145, 148, 65], + &[136, 155, 114, 142], +]; + +const CR2_JPEG_REF: &[&[u8]] = &[ + &[176, 155, 156, 119], + &[142, 122, 116, 126], + &[146, 132, 136, 115], + &[147, 129, 136, 125], +]; + // Largest group that uses neither avx2 nor sse2 is 62x64. // We can arrange the image as blocks: // y0 y0 | y1 y1 | ... @@ -271,19 +323,33 @@ const CR2_BT709_REF: &[&[u8]] = &[ // magenta (255, 0, 255): 78, 214, 230 // cyan ( 0, 255, 255): 188, 154, 16 // white (255, 255, 255): 235, 128, 128 -const Y_SRC: [[u8; 8]; 2] = [ +// +// Color table (jpeg): +// r g b +// black ( 0, 0, 0): 0, 128, 128 +// red (255, 0, 0): 76, 84, 255 +// green ( 0, 255, 0): 149, 43, 21 +// yellow (255, 255, 0): 225, 0, 148 +// blue ( 0, 0, 255): 29, 255, 107 +// magenta (255, 0, 255): 105, 212, 234 +// cyan ( 0, 255, 255): 178, 171, 0 +// white (255, 255, 255): 255, 128, 128 +const Y_SRC: [[u8; 8]; 3] = [ [16, 82, 145, 210, 41, 107, 169, 235], [16, 63, 173, 219, 32, 78, 188, 235], + [0, 76, 149, 225, 29, 105, 178, 255], ]; -const U_SRC: [[u8; 8]; 2] = [ +const U_SRC: [[u8; 8]; 3] = [ [128, 90, 54, 16, 240, 202, 166, 128], [128, 102, 42, 16, 240, 214, 154, 128], + [128, 84, 43, 0, 255, 212, 171, 128], ]; -const V_SRC: [[u8; 8]; 2] = [ +const V_SRC: [[u8; 8]; 3] = [ [128, 240, 34, 146, 110, 222, 16, 128], [128, 240, 26, 138, 118, 230, 16, 128], + [128, 255, 21, 148, 107, 234, 0, 128], ]; const NUM_LOG2_DEN: [[usize; 2]; 9] = [ @@ -727,7 +793,8 @@ fn rgb_to_yuv_size( } fn rgb_to_yuv_ok(pixel_format: PixelFormat, num_planes: u32) { - const SUPPORTED_COLOR_SPACES: &[ColorSpace] = &[ColorSpace::Bt601, ColorSpace::Bt709]; + const SUPPORTED_COLOR_SPACES: &[ColorSpace] = + &[ColorSpace::Bt601, ColorSpace::Bt709, ColorSpace::Bt601Full]; const MAX_WIDTH: u32 = 8; const MAX_HEIGHT: u32 = 8; @@ -746,12 +813,14 @@ fn rgb_to_yuv_ok(pixel_format: PixelFormat, num_planes: u32) { let plane_ref = if let PixelFormat::I444 = pixel_format { match color_space { ColorSpace::Bt601 => (Y_BT601_REF, CB_BT601_REF, CR_BT601_REF), - _ => (Y_BT709_REF, CB_BT709_REF, CR_BT709_REF), + ColorSpace::Bt709 => (Y_BT709_REF, CB_BT709_REF, CR_BT709_REF), + _ => (Y_JPEG_REF, CB_JPEG_REF, CR_JPEG_REF), } } else { match color_space { ColorSpace::Bt601 => (Y_BT601_REF, CB2_BT601_REF, CR2_BT601_REF), - _ => (Y_BT709_REF, CB2_BT709_REF, CR2_BT709_REF), + ColorSpace::Bt709 => (Y_BT709_REF, CB2_BT709_REF, CR2_BT709_REF), + _ => (Y_JPEG_REF, CB2_JPEG_REF, CR2_JPEG_REF), } }; @@ -801,7 +870,8 @@ fn yuv_to_rgb_size_format_mode_stride( let dst_size = dst_stride * h; let color_space_index = match src_format.color_space { ColorSpace::Bt601 => 0, - _ => 1, + ColorSpace::Bt709 => 1, + _ => 2, }; // Allocate and initialize input @@ -1005,7 +1075,8 @@ fn yuv_to_rgb_size_format_mode( } fn yuv_to_rgb_ok(pixel_format: PixelFormat, num_planes: u32) { - const SUPPORTED_COLOR_SPACES: &[ColorSpace] = &[ColorSpace::Bt601, ColorSpace::Bt709]; + const SUPPORTED_COLOR_SPACES: &[ColorSpace] = + &[ColorSpace::Bt601, ColorSpace::Bt709, ColorSpace::Bt601Full]; const MAX_WIDTH: u32 = 34; const MAX_HEIGHT: u32 = 4;