diff --git a/.github/workflows/build_push.yml b/.github/workflows/build_push.yml
index d37abf0a..f8fd3be0 100644
--- a/.github/workflows/build_push.yml
+++ b/.github/workflows/build_push.yml
@@ -28,5 +28,20 @@ jobs:
       - run: RUSTFLAGS="-C target-feature=+avx2" cargo build --target i686-unknown-linux-gnu
       - run: cargo build --target powerpc-unknown-linux-gnu
       - run: RUSTFLAGS="-C target-feature=+avx2" cargo build --features "nightly_avx512" --target x86_64-unknown-linux-gnu
-      - name: Test release pipeline
-        run: cargo publish --dry-run
\ No newline at end of file
+
+  fuzz_decoding:
+    name: Fuzzing decoders
+    strategy:
+      matrix:
+        os: [ ubuntu-latest, macos-latest ]
+    runs-on: ${{ matrix.os }}
+    steps:
+      - uses: actions/checkout@v4
+      - uses: dtolnay/rust-toolchain@nightly
+      - run: cargo install cargo-fuzz
+      - run: cargo fuzz run yuv_to_rgb -- -max_total_time=15
+      - run: cargo fuzz run yuv_nv_to_rgb -- -max_total_time=15
+      - run: cargo fuzz run y_to_rgb -- -max_total_time=15
+      - run: cargo fuzz run yuv16_to_rgb16 -- -max_total_time=15
+      - run: cargo fuzz run y16_to_rgb16 -- -max_total_time=15
+      - run: cargo fuzz run yuv_to_yuyu2 -- -max_total_time=15
diff --git a/Cargo.lock b/Cargo.lock
index ff84c627..04233239 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -111,12 +111,6 @@ dependencies = [
  "arrayvec",
 ]
 
-[[package]]
-name = "az"
-version = "1.2.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7b7e4c2464d97fe331d41de9d5db0def0a96f4d823b8b32a2efd503578988973"
-
 [[package]]
 name = "bindgen"
 version = "0.69.5"
@@ -293,13 +287,6 @@ version = "0.7.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97"
 
-[[package]]
-name = "coeffs"
-version = "0.1.0"
-dependencies = [
- "rug",
-]
-
 [[package]]
 name = "color_quant"
 version = "1.1.0"
@@ -475,16 +462,6 @@ version = "0.3.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
 
-[[package]]
-name = "gmp-mpfr-sys"
-version = "1.6.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b0205cd82059bc63b63cf516d714352a30c44f2c74da9961dfda2617ae6b5918"
-dependencies = [
- "libc",
- "windows-sys 0.52.0",
-]
-
 [[package]]
 name = "half"
 version = "2.4.1"
@@ -686,12 +663,6 @@ dependencies = [
  "windows-targets",
 ]
 
-[[package]]
-name = "libm"
-version = "0.2.11"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8355be11b20d696c8f18f6cc018c4e372165b1fa8126cef092399c9951984ffa"
-
 [[package]]
 name = "linux-raw-sys"
 version = "0.4.14"
@@ -1095,18 +1066,6 @@ version = "0.8.50"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "57397d16646700483b67d2dd6511d79318f9d057fdbd21a4066aeac8b41d310a"
 
-[[package]]
-name = "rug"
-version = "1.26.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "97ae2c1089ec0575193eb9222881310cc1ed8bce3646ef8b81b44b518595b79d"
-dependencies = [
- "az",
- "gmp-mpfr-sys",
- "libc",
- "libm",
-]
-
 [[package]]
 name = "rustc-hash"
 version = "1.1.0"
@@ -1564,7 +1523,7 @@ dependencies = [
 
 [[package]]
 name = "yuvutils-rs"
-version = "0.5.10"
+version = "0.5.11"
 dependencies = [
  "num-traits",
  "rayon",
diff --git a/Cargo.toml b/Cargo.toml
index 573c07b2..772abc5a 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,8 +1,8 @@
-workspace = { members = ["app", "coeffs", "fuzz"] }
+workspace = { members = ["app", "fuzz"] }
 
 [package]
 name = "yuvutils-rs"
-version = "0.5.10"
+version = "0.5.11"
 edition = "2021"
 description = "High performance utilities for YUV format handling and conversion."
 readme = "README.md"
diff --git a/app/src/main.rs b/app/src/main.rs
index 98fdde01..eb89741d 100644
--- a/app/src/main.rs
+++ b/app/src/main.rs
@@ -30,12 +30,12 @@ use image::{ColorType, DynamicImage, EncodableLayout, GenericImageView, ImageRea
 use std::fs::File;
 use std::io::Read;
 use std::time::Instant;
-use yuv_sys::{rs_I420ToRGB24, rs_NV12ToRGB24, rs_NV21ToABGR, rs_NV21ToRGB24};
+use yuv_sys::{rs_I420ToRGB24, rs_NV12ToRGB24, rs_NV21ToABGR, rs_NV21ToRGB24, rs_RGB24ToI420};
 use yuvutils_rs::{
     gbr_to_rgb, rgb_to_gbr, rgb_to_sharp_yuv420, rgb_to_yuv420, rgb_to_yuv420_p16, rgb_to_yuv422,
     rgb_to_yuv422_p16, rgb_to_yuv444, rgb_to_yuv_nv12, yuv420_p16_to_rgb16, yuv420_to_rgb,
     yuv420_to_yuyv422, yuv422_p16_to_rgb16, yuv422_to_rgb, yuv444_to_rgb, yuv_nv12_to_rgb,
-    yuv_nv12_to_rgba, yuyv422_to_yuv420, BufferStoreMut, SharpYuvGammaTransfer,
+    yuv_nv12_to_rgba, yuyv422_to_rgb, yuyv422_to_yuv420, BufferStoreMut, SharpYuvGammaTransfer,
     YuvBiPlanarImageMut, YuvBytesPacking, YuvChromaSubsampling, YuvEndianness, YuvPackedImage,
     YuvPackedImageMut, YuvPlanarImageMut, YuvRange, YuvStandardMatrix,
 };
@@ -113,7 +113,7 @@ fn main() {
     // println!("rgb_to_yuv_nv12 time: {:?}", start_time.elapsed());
     //
     println!("Forward time: {:?}", start_time.elapsed());
-    // // //
+    // // // //
     let full_size = if width % 2 == 0 {
         2 * width as usize * height as usize
     } else {
@@ -128,34 +128,33 @@ fn main() {
         2 * (width as usize + 1)
     };
 
-    // let mut yuy2_plane = vec![0u8; full_size];
+    let mut yuy2_plane = vec![0u8; full_size];
     // // // //
     // let start_time = Instant::now();
     // // // //
-    // let plane = planar_image.to_fixed();
-    // //
-    // let mut packed_image_mut = YuvPackedImageMut {
-    //     yuy: BufferStoreMut::Owned(yuy2_plane),
-    //     yuy_stride: yuy2_stride as u32,
-    //     width,
-    //     height,
-    // };
+    let plane = planar_image.to_fixed();
     // //
-    // yuv420_to_yuyv422(&mut packed_image_mut, &plane).unwrap();
+    let mut packed_image_mut = YuvPackedImageMut {
+        yuy: BufferStoreMut::Owned(yuy2_plane),
+        yuy_stride: yuy2_stride as u32,
+        width,
+        height,
+    };
+    //
+    yuv420_to_yuyv422(&mut packed_image_mut, &plane).unwrap();
     // let end_time = Instant::now().sub(start_time);
     // println!("yuv420_to_yuyv422 time: {:?}", end_time);
     // // rgba.fill(0);
     // // let start_time = Instant::now();
-    // yuyv422_to_rgb(
-    //     &yuy2_plane,
-    //     yuy2_stride as u32,
-    //     &mut rgba,
-    //     rgba_stride as u32,
-    //     width,
-    //     height,
-    //     YuvRange::Limited,
-    //     YuvStandardMatrix::Bt709,
-    // );
+    let yuy2_img = packed_image_mut.to_fixed();
+    yuyv422_to_rgb(
+        &yuy2_img,
+        &mut rgba,
+        rgba_stride as u32,
+        YuvRange::Limited,
+        YuvStandardMatrix::Bt601,
+    )
+    .unwrap();
     //
     // let end_time = Instant::now().sub(start_time);
     // println!("yuyv422_to_rgb time: {:?}", end_time);
@@ -163,18 +162,13 @@ fn main() {
     // let start_time = Instant::now();
     // //
     //
-    // let packed_image = YuvPackedImage {
-    //     yuy: packed_image_mut.yuy.borrow(),
-    //     yuy_stride: yuy2_stride as u32,
-    //     width,
-    //     height,
-    // };
-    // //
-    // yuyv422_to_yuv420(&mut planar_image, &packed_image).unwrap();
+    let packed_image = packed_image_mut.to_fixed();
+    //
+    yuyv422_to_yuv420(&mut planar_image, &packed_image).unwrap();
     // // //
     // let end_time = Instant::now().sub(start_time);
     // println!("yuyv422_to_yuv444 time: {:?}", end_time);
-    rgba.fill(0);
+    // rgba.fill(0);
     // let mut bgra = vec![0u8; width as usize * height as usize * 4];
     // let start_time = Instant::now();
     // yuv420_to_rgb(
@@ -241,7 +235,7 @@ fn main() {
     //     YuvBytesPacking::LeastSignificantBytes,
     // )
     // .unwrap();
-    rgba.fill(0);
+    // rgba.fill(0);
     // ra30_to_rgb8(
     //     &ar30,
     //     width,
@@ -258,52 +252,77 @@ fn main() {
     // let rgba_stride = width as usize * 4;
     // let mut rgba = vec![0u8; height as usize * rgba_stride];
 
-    yuv420_to_rgb(
-        &fixed_planar,
-        &mut rgba,
-        rgba_stride as u32,
-        YuvRange::Limited,
-        YuvStandardMatrix::Bt601,
-    )
-    .unwrap();
+    // yuv420_to_rgb(
+    //     &fixed_planar,
+    //     &mut rgba,
+    //     rgba_stride as u32,
+    //     YuvRange::Limited,
+    //     YuvStandardMatrix::Bt601,
+    // )
+    // .unwrap();
 
     println!("Backward time: {:?}", start_time.elapsed());
 
     let start_time = Instant::now();
 
     // unsafe {
-    //     // rs_I420ToRGB24(
-    //     //     fixed_planar.y_plane.as_ptr(),
-    //     //     fixed_planar.y_stride as i32,
-    //     //     fixed_planar.u_plane.as_ptr(),
-    //     //     fixed_planar.u_stride as i32,
-    //     //     fixed_planar.v_plane.as_ptr(),
-    //     //     fixed_planar.v_stride as i32,
-    //     //     rgba.as_mut_ptr(),
-    //     //     rgba_stride as i32,
-    //     //     fixed_planar.width as i32,
-    //     //     fixed_planar.height as i32,
-    //     // );
-    //     rs_NV12ToRGB24(
-    //         fixed_biplanar.y_plane.as_ptr(),
-    //         fixed_biplanar.y_stride as i32,
-    //         fixed_biplanar.uv_plane.as_ptr(),
-    //         fixed_biplanar.uv_stride as i32,
+    //     let mut planar_image =
+    //         YuvPlanarImageMut::<u8>::alloc(width as u32, height as u32, YuvChromaSubsampling::Yuv420);
+    //
+    //     let mut source_bgr = vec![0u8; src_bytes.len()];
+    //
+    //     rgba.chunks_exact(3).zip(source_bgr.chunks_exact_mut(3)).for_each(|(src, dst)| {
+    //         let b = src[0];
+    //         dst[0] = src[2];
+    //         dst[1] = src[1];
+    //         dst[2] = b;
+    //     });
+    //
+    //     rs_RGB24ToI420(
+    //         src_bytes.as_ptr(),
+    //         rgba_stride as i32,
+    //         planar_image.y_plane.borrow_mut().as_mut_ptr(),
+    //         planar_image.y_stride as i32,
+    //         planar_image.u_plane.borrow_mut().as_mut_ptr(),
+    //         planar_image.u_stride as i32,
+    //         planar_image.v_plane.borrow_mut().as_mut_ptr(),
+    //         planar_image.v_stride as i32,
+    //         dimensions.0 as i32,
+    //         dimensions.1 as i32,
+    //     );
+    //     let fixed_planar = planar_image.to_fixed();
+    //     rs_I420ToRGB24(
+    //         fixed_planar.y_plane.as_ptr(),
+    //         fixed_planar.y_stride as i32,
+    //         fixed_planar.u_plane.as_ptr(),
+    //         fixed_planar.u_stride as i32,
+    //         fixed_planar.v_plane.as_ptr(),
+    //         fixed_planar.v_stride as i32,
     //         rgba.as_mut_ptr(),
     //         rgba_stride as i32,
     //         fixed_planar.width as i32,
     //         fixed_planar.height as i32,
     //     );
+    //
+    //     // rgba.chunks_exact_mut(3).for_each(|chunk| {
+    //     //     let b = chunk[0];
+    //     //     chunk[0] = chunk[2];
+    //     //     chunk[2] = b;
+    //     // });
+    // //     rs_NV12ToRGB24(
+    // //         fixed_biplanar.y_plane.as_ptr(),
+    // //         fixed_biplanar.y_stride as i32,
+    // //         fixed_biplanar.uv_plane.as_ptr(),
+    // //         fixed_biplanar.uv_stride as i32,
+    // //         rgba.as_mut_ptr(),
+    // //         rgba_stride as i32,
+    // //         fixed_planar.width as i32,
+    // //         fixed_planar.height as i32,
+    // //     );
     // }
 
     // /    println!("Backward LIBYUV time: {:?}", start_time.elapsed());
 
-    // rgba.chunks_exact_mut(3).for_each(|chunk| {
-    //     let b = chunk[0];
-    //     chunk[0] = chunk[2];
-    //     chunk[2] = b;
-    // });
-
     // rgba = bytes_16.iter().map(|&x| (x >> 4) as u8).collect();
 
     image::save_buffer(
diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml
index 0e0bf46c..13077b62 100644
--- a/fuzz/Cargo.toml
+++ b/fuzz/Cargo.toml
@@ -31,3 +31,24 @@ path = "y_to_rgb/y_to_rgb.rs"
 test = false
 doc = false
 bench = false
+
+[[bin]]
+name = "yuv16_to_rgb16"
+path = "yuv16_to_rgb16/yuv16_to_rgb16.rs"
+test = false
+doc = false
+bench = false
+
+[[bin]]
+name = "y16_to_rgb16"
+path = "y16_to_rgb16/y16_to_rgb16.rs"
+test = false
+doc = false
+bench = false
+
+[[bin]]
+name = "yuv_to_yuyu2"
+path = "yuv_to_yuyu2/yuv_to_yuyu2.rs"
+test = false
+doc = false
+bench = false
\ No newline at end of file
diff --git a/fuzz/y16_to_rgb16/y16_to_rgb16.rs b/fuzz/y16_to_rgb16/y16_to_rgb16.rs
new file mode 100644
index 00000000..33b9f976
--- /dev/null
+++ b/fuzz/y16_to_rgb16/y16_to_rgb16.rs
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) Radzivon Bartoshyk, 12/2024. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification,
+ * are permitted provided that the following conditions are met:
+ *
+ * 1.  Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2.  Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3.  Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#![no_main]
+use libfuzzer_sys::fuzz_target;
+use yuvutils_rs::{
+    yuv400_p16_to_rgb16, yuv400_p16_to_rgba16, YuvBytesPacking, YuvEndianness, YuvGrayImage,
+    YuvRange, YuvStandardMatrix,
+};
+
+fuzz_target!(|data: (u8, u8, u8)| {
+    fuzz_yuv(data.0, data.1, data.2);
+});
+
+fn fuzz_yuv(i_width: u8, i_height: u8, y_value: u8) {
+    if i_height == 0 || i_width == 0 {
+        return;
+    }
+
+    let y_plane = vec![y_value as u16; i_height as usize * i_width as usize];
+
+    let planar_image = YuvGrayImage {
+        y_plane: &y_plane,
+        y_stride: i_width as u32,
+        width: i_width as u32,
+        height: i_height as u32,
+    };
+
+    let mut target_rgb = vec![0u16; i_width as usize * i_height as usize * 3];
+
+    yuv400_p16_to_rgb16(
+        &planar_image,
+        &mut target_rgb,
+        i_width as u32 * 3,
+        10,
+        YuvRange::Limited,
+        YuvStandardMatrix::Bt601,
+        YuvEndianness::LittleEndian,
+        YuvBytesPacking::LeastSignificantBytes,
+    )
+    .unwrap();
+
+    let mut target_rgba = vec![0u16; i_width as usize * i_height as usize * 4];
+
+    yuv400_p16_to_rgba16(
+        &planar_image,
+        &mut target_rgba,
+        i_width as u32 * 4,
+        10,
+        YuvRange::Limited,
+        YuvStandardMatrix::Bt601,
+        YuvEndianness::LittleEndian,
+        YuvBytesPacking::LeastSignificantBytes,
+    )
+    .unwrap();
+}
diff --git a/fuzz/yuv16_to_rgb16/yuv16_to_rgb16.rs b/fuzz/yuv16_to_rgb16/yuv16_to_rgb16.rs
new file mode 100644
index 00000000..e5256f8c
--- /dev/null
+++ b/fuzz/yuv16_to_rgb16/yuv16_to_rgb16.rs
@@ -0,0 +1,187 @@
+/*
+ * Copyright (c) Radzivon Bartoshyk, 12/2024. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification,
+ * are permitted provided that the following conditions are met:
+ *
+ * 1.  Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2.  Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3.  Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#![no_main]
+
+use libfuzzer_sys::fuzz_target;
+use yuvutils_rs::{
+    yuv420_p16_to_rgb16, yuv420_p16_to_rgba16, yuv422_p16_to_rgb16, yuv422_p16_to_rgba16,
+    yuv444_p16_to_rgb16, yuv444_p16_to_rgba16, YuvBytesPacking, YuvEndianness, YuvPlanarImage,
+    YuvRange, YuvStandardMatrix,
+};
+
+fuzz_target!(|data: (u8, u8, u8, u8, u8, u8)| {
+    fuzz_yuv_420(data.0, data.1, data.2 as u16, data.3 as u16, data.4 as u16);
+    fuzz_yuv_422(data.0, data.1, data.2 as u16, data.3 as u16, data.4 as u16);
+    fuzz_yuv_444(data.0, data.1, data.2 as u16, data.3 as u16, data.4 as u16);
+});
+
+fn fuzz_yuv_420(i_width: u8, i_height: u8, y_value: u16, u_value: u16, v_value: u16) {
+    if i_height == 0 || i_width == 0 {
+        return;
+    }
+    let y_plane = vec![y_value; i_height as usize * i_width as usize];
+    let u_plane = vec![u_value; (i_width as usize).div_ceil(2) * (i_height as usize).div_ceil(2)];
+    let v_plane = vec![v_value; (i_width as usize).div_ceil(2) * (i_height as usize).div_ceil(2)];
+
+    let planar_image = YuvPlanarImage {
+        y_plane: &y_plane,
+        y_stride: i_width as u32,
+        u_plane: &u_plane,
+        u_stride: (i_width as u32).div_ceil(2),
+        v_plane: &v_plane,
+        v_stride: (i_width as u32).div_ceil(2),
+        width: i_width as u32,
+        height: i_height as u32,
+    };
+
+    let mut target_rgb = vec![0u16; i_width as usize * i_height as usize * 3];
+
+    yuv420_p16_to_rgb16(
+        &planar_image,
+        &mut target_rgb,
+        i_width as u32 * 3,
+        10,
+        YuvRange::Limited,
+        YuvStandardMatrix::Bt601,
+        YuvEndianness::LittleEndian,
+        YuvBytesPacking::LeastSignificantBytes,
+    )
+    .unwrap();
+
+    let mut target_rgba = vec![0u16; i_width as usize * i_height as usize * 4];
+
+    yuv420_p16_to_rgba16(
+        &planar_image,
+        &mut target_rgba,
+        i_width as u32 * 4,
+        10,
+        YuvRange::Limited,
+        YuvStandardMatrix::Bt601,
+        YuvEndianness::LittleEndian,
+        YuvBytesPacking::LeastSignificantBytes,
+    )
+    .unwrap();
+}
+
+fn fuzz_yuv_422(i_width: u8, i_height: u8, y_value: u16, u_value: u16, v_value: u16) {
+    if i_height == 0 || i_width == 0 {
+        return;
+    }
+    let y_plane = vec![y_value; i_height as usize * i_width as usize];
+    let u_plane = vec![u_value; (i_width as usize).div_ceil(2) * i_height as usize];
+    let v_plane = vec![v_value; (i_width as usize).div_ceil(2) * i_height as usize];
+
+    let planar_image = YuvPlanarImage {
+        y_plane: &y_plane,
+        y_stride: i_width as u32,
+        u_plane: &u_plane,
+        u_stride: (i_width as u32).div_ceil(2),
+        v_plane: &v_plane,
+        v_stride: (i_width as u32).div_ceil(2),
+        width: i_width as u32,
+        height: i_height as u32,
+    };
+
+    let mut target_rgb = vec![0u16; i_width as usize * i_height as usize * 3];
+
+    yuv422_p16_to_rgb16(
+        &planar_image,
+        &mut target_rgb,
+        i_width as u32 * 3,
+        10,
+        YuvRange::Limited,
+        YuvStandardMatrix::Bt601,
+        YuvEndianness::LittleEndian,
+        YuvBytesPacking::LeastSignificantBytes,
+    )
+    .unwrap();
+
+    let mut target_rgba = vec![0u16; i_width as usize * i_height as usize * 4];
+
+    yuv422_p16_to_rgba16(
+        &planar_image,
+        &mut target_rgba,
+        i_width as u32 * 4,
+        10,
+        YuvRange::Limited,
+        YuvStandardMatrix::Bt601,
+        YuvEndianness::LittleEndian,
+        YuvBytesPacking::LeastSignificantBytes,
+    )
+    .unwrap();
+}
+
+fn fuzz_yuv_444(i_width: u8, i_height: u8, y_value: u16, u_value: u16, v_value: u16) {
+    if i_height == 0 || i_width == 0 {
+        return;
+    }
+    let y_plane = vec![y_value; i_height as usize * i_width as usize];
+    let u_plane = vec![u_value; i_width as usize * i_height as usize];
+    let v_plane = vec![v_value; i_width as usize * i_height as usize];
+
+    let planar_image = YuvPlanarImage {
+        y_plane: &y_plane,
+        y_stride: i_width as u32,
+        u_plane: &u_plane,
+        u_stride: i_width as u32,
+        v_plane: &v_plane,
+        v_stride: i_width as u32,
+        width: i_width as u32,
+        height: i_height as u32,
+    };
+
+    let mut target_rgb = vec![0u16; i_width as usize * i_height as usize * 3];
+
+    yuv444_p16_to_rgb16(
+        &planar_image,
+        &mut target_rgb,
+        i_width as u32 * 3,
+        10,
+        YuvRange::Limited,
+        YuvStandardMatrix::Bt601,
+        YuvEndianness::LittleEndian,
+        YuvBytesPacking::LeastSignificantBytes,
+    )
+    .unwrap();
+
+    let mut target_rgba = vec![0u16; i_width as usize * i_height as usize * 4];
+
+    yuv444_p16_to_rgba16(
+        &planar_image,
+        &mut target_rgba,
+        i_width as u32 * 4,
+        10,
+        YuvRange::Limited,
+        YuvStandardMatrix::Bt601,
+        YuvEndianness::LittleEndian,
+        YuvBytesPacking::LeastSignificantBytes,
+    )
+    .unwrap();
+}
diff --git a/fuzz/yuv_to_yuyu2/yuv_to_yuyu2.rs b/fuzz/yuv_to_yuyu2/yuv_to_yuyu2.rs
new file mode 100644
index 00000000..8da435f3
--- /dev/null
+++ b/fuzz/yuv_to_yuyu2/yuv_to_yuyu2.rs
@@ -0,0 +1,171 @@
+/*
+ * Copyright (c) Radzivon Bartoshyk, 12/2024. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification,
+ * are permitted provided that the following conditions are met:
+ *
+ * 1.  Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2.  Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3.  Neither the name of the copyright holder nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#![no_main]
+
+use libfuzzer_sys::fuzz_target;
+use yuvutils_rs::{
+    yuv420_to_yuyv422, yuv422_to_yuyv422, yuv444_to_yuyv422, BufferStoreMut, YuvPackedImageMut,
+    YuvPlanarImage,
+};
+
+fuzz_target!(|data: (u8, u8, u8, u8, u8)| {
+    fuzz_yuv_420(data.0, data.1, data.2, data.3);
+    fuzz_yuv_422(data.0, data.1, data.2, data.3);
+    fuzz_yuv_444(data.0, data.1, data.2, data.3);
+});
+
+fn fuzz_yuv_420(i_width: u8, i_height: u8, y_value: u8, uv_value: u8) {
+    if i_height == 0 || i_width == 0 {
+        return;
+    }
+    let y_plane = vec![y_value; i_height as usize * i_width as usize];
+    let u_plane = vec![uv_value; (i_width as usize).div_ceil(2) * (i_height as usize).div_ceil(2)];
+    let v_plane = vec![uv_value; (i_width as usize).div_ceil(2) * (i_height as usize).div_ceil(2)];
+
+    let planar_image = YuvPlanarImage {
+        y_plane: &y_plane,
+        y_stride: i_width as u32,
+        u_plane: &u_plane,
+        u_stride: (i_width as u32).div_ceil(2),
+        v_plane: &v_plane,
+        v_stride: (i_width as u32).div_ceil(2),
+        width: i_width as u32,
+        height: i_height as u32,
+    };
+
+    let full_size = if i_width % 2 == 0 {
+        2 * i_width as usize * i_height as usize
+    } else {
+        2 * (i_width as usize + 1) * i_height as usize
+    };
+
+    let yuy2_stride = if i_width % 2 == 0 {
+        2 * i_width as usize
+    } else {
+        2 * (i_width as usize + 1)
+    };
+
+    let yuy2_plane = vec![0u8; full_size];
+
+    let mut packed_image = YuvPackedImageMut {
+        yuy: BufferStoreMut::Owned(yuy2_plane),
+        yuy_stride: yuy2_stride as u32,
+        width: i_width as u32,
+        height: i_height as u32,
+    };
+
+    yuv420_to_yuyv422(&mut packed_image, &planar_image).unwrap();
+}
+
+fn fuzz_yuv_422(i_width: u8, i_height: u8, y_value: u8, uv_value: u8) {
+    if i_height == 0 || i_width == 0 {
+        return;
+    }
+    let y_plane = vec![y_value; i_height as usize * i_width as usize];
+    let u_plane = vec![uv_value; (i_width as usize).div_ceil(2) * i_height as usize];
+    let v_plane = vec![uv_value; (i_width as usize).div_ceil(2) * i_height as usize];
+
+    let planar_image = YuvPlanarImage {
+        y_plane: &y_plane,
+        y_stride: i_width as u32,
+        u_plane: &u_plane,
+        u_stride: (i_width as u32).div_ceil(2),
+        v_plane: &v_plane,
+        v_stride: (i_width as u32).div_ceil(2),
+        width: i_width as u32,
+        height: i_height as u32,
+    };
+
+    let full_size = if i_width % 2 == 0 {
+        2 * i_width as usize * i_height as usize
+    } else {
+        2 * (i_width as usize + 1) * i_height as usize
+    };
+
+    let yuy2_stride = if i_width % 2 == 0 {
+        2 * i_width as usize
+    } else {
+        2 * (i_width as usize + 1)
+    };
+
+    let yuy2_plane = vec![0u8; full_size];
+
+    let mut packed_image = YuvPackedImageMut {
+        yuy: BufferStoreMut::Owned(yuy2_plane),
+        yuy_stride: yuy2_stride as u32,
+        width: i_width as u32,
+        height: i_height as u32,
+    };
+
+    yuv422_to_yuyv422(&mut packed_image, &planar_image).unwrap();
+}
+
+fn fuzz_yuv_444(i_width: u8, i_height: u8, y_value: u8, uv_value: u8) {
+    if i_height == 0 || i_width == 0 {
+        return;
+    }
+    let y_plane = vec![y_value; i_height as usize * i_width as usize];
+    let u_plane = vec![uv_value; i_width as usize * i_height as usize];
+    let v_plane = vec![uv_value; i_width as usize * i_height as usize];
+
+    let planar_image = YuvPlanarImage {
+        y_plane: &y_plane,
+        y_stride: i_width as u32,
+        u_plane: &u_plane,
+        u_stride: i_width as u32,
+        v_plane: &v_plane,
+        v_stride: i_width as u32,
+        width: i_width as u32,
+        height: i_height as u32,
+    };
+
+    let full_size = if i_width % 2 == 0 {
+        2 * i_width as usize * i_height as usize
+    } else {
+        2 * (i_width as usize + 1) * i_height as usize
+    };
+
+    let yuy2_stride = if i_width % 2 == 0 {
+        2 * i_width as usize
+    } else {
+        2 * (i_width as usize + 1)
+    };
+
+    let yuy2_plane = vec![0u8; full_size];
+
+    let mut packed_image = YuvPackedImageMut {
+        yuy: BufferStoreMut::Owned(yuy2_plane),
+        yuy_stride: yuy2_stride as u32,
+        width: i_width as u32,
+        height: i_height as u32,
+    };
+
+    yuv444_to_yuyv422(&mut packed_image, &planar_image).unwrap();
+}
diff --git a/src/avx2/avx2_utils.rs b/src/avx2/avx2_utils.rs
index 759f4351..e445ad5c 100644
--- a/src/avx2/avx2_utils.rs
+++ b/src/avx2/avx2_utils.rs
@@ -293,6 +293,22 @@ pub(crate) unsafe fn avx2_pairwise_widen_avg(v: __m256i) -> __m256i {
     _mm256_permute4x64_epi64::<MASK>(packed_lo)
 }
 
+#[inline(always)]
+pub(crate) unsafe fn avx2_pairwise_wide_avg(v: __m256i) -> __m256i {
+    let ones = _mm256_set1_epi8(1);
+    let sums = _mm256_maddubs_epi16(v, ones);
+    let shifted = _mm256_srli_epi16::<1>(_mm256_add_epi16(sums, ones));
+    shifted
+}
+
+#[inline(always)]
+pub(crate) unsafe fn avx_pairwise_avg_epi16(a: __m256i, b: __m256i) -> __m256i {
+    let sums = _mm256_hadd_epi16(a, b);
+    let product = _mm256_srli_epi16::<1>(_mm256_add_epi16(sums, _mm256_set1_epi16(1)));
+    const MASK: i32 = shuffle(3, 1, 2, 0);
+    _mm256_permute4x64_epi64::<MASK>(product)
+}
+
 #[inline(always)]
 pub(crate) unsafe fn avx2_div_by255(v: __m256i) -> __m256i {
     let addition = _mm256_set1_epi16(127);
@@ -391,3 +407,13 @@ pub(crate) unsafe fn _mm256_interleave_rgb_epi16(
     let bgr2 = _mm256_permute2x128_si256::<49>(p0, p2);
     (bgr0, p1, bgr2)
 }
+
+#[inline(always)]
+pub(crate) unsafe fn _mm256_havg_epu8(a: __m256i, b: __m256i) -> __m256i {
+    let ones = _mm256_set1_epi8(1);
+    let sums_lo = _mm256_maddubs_epi16(a, ones);
+    let lo = _mm256_srli_epi16::<1>(_mm256_add_epi16(sums_lo, ones));
+    let sums_hi = _mm256_maddubs_epi16(b, ones);
+    let hi = _mm256_srli_epi16::<1>(_mm256_add_epi16(sums_hi, ones));
+    avx_pairwise_avg_epi16(lo, hi)
+}
diff --git a/src/avx2/rgb_to_nv.rs b/src/avx2/rgb_to_nv.rs
index 2fdcd789..11d7d81d 100644
--- a/src/avx2/rgb_to_nv.rs
+++ b/src/avx2/rgb_to_nv.rs
@@ -29,6 +29,7 @@
 
 use crate::avx2::avx2_utils::{
     _mm256_deinterleave_rgba_epi8, _mm256_interleave_x2_epi8, avx2_deinterleave_rgb, avx2_pack_u16,
+    avx_pairwise_avg_epi16,
 };
 use crate::internals::ProcessedOffset;
 use crate::yuv_support::{
@@ -290,9 +291,9 @@ unsafe fn avx2_rgba_to_nv_impl<
         } else if chroma_subsampling == YuvChromaSubsampling::Yuv422
             || (chroma_subsampling == YuvChromaSubsampling::Yuv420 && compute_uv_row)
         {
-            let r1 = _mm256_avg_epu16(r_low, r_high);
-            let g1 = _mm256_avg_epu16(g_low, g_high);
-            let b1 = _mm256_avg_epu16(b_low, b_high);
+            let r1 = avx_pairwise_avg_epi16(r_low, r_high);
+            let g1 = avx_pairwise_avg_epi16(g_low, g_high);
+            let b1 = avx_pairwise_avg_epi16(b_low, b_high);
             let cb = _mm256_max_epi16(
                 _mm256_min_epi16(
                     _mm256_add_epi16(
diff --git a/src/avx2/rgba_to_yuv.rs b/src/avx2/rgba_to_yuv.rs
index ae031f4e..4dd4f398 100644
--- a/src/avx2/rgba_to_yuv.rs
+++ b/src/avx2/rgba_to_yuv.rs
@@ -28,7 +28,7 @@
  */
 
 use crate::avx2::avx2_utils::{
-    _mm256_deinterleave_rgba_epi8, avx2_deinterleave_rgb, avx2_pack_u16,
+    _mm256_deinterleave_rgba_epi8, avx2_deinterleave_rgb, avx2_pack_u16, avx_pairwise_avg_epi16,
 };
 use crate::internals::ProcessedOffset;
 use crate::yuv_support::{
@@ -272,9 +272,9 @@ unsafe fn avx2_rgba_to_yuv_impl<const ORIGIN_CHANNELS: u8, const SAMPLING: u8>(
         } else if chroma_subsampling == YuvChromaSubsampling::Yuv422
             || (chroma_subsampling == YuvChromaSubsampling::Yuv420)
         {
-            let r1 = _mm256_avg_epu16(r_low, r_high);
-            let g1 = _mm256_avg_epu16(g_low, g_high);
-            let b1 = _mm256_avg_epu16(b_low, b_high);
+            let r1 = avx_pairwise_avg_epi16(r_low, r_high);
+            let g1 = avx_pairwise_avg_epi16(g_low, g_high);
+            let b1 = avx_pairwise_avg_epi16(b_low, b_high);
             let cb = _mm256_max_epi16(
                 _mm256_min_epi16(
                     _mm256_add_epi16(
diff --git a/src/avx2/rgba_to_yuv420.rs b/src/avx2/rgba_to_yuv420.rs
index 8480c8c5..fc2ef2ef 100644
--- a/src/avx2/rgba_to_yuv420.rs
+++ b/src/avx2/rgba_to_yuv420.rs
@@ -28,7 +28,7 @@
  */
 
 use crate::avx2::avx2_utils::{
-    _mm256_deinterleave_rgba_epi8, avx2_deinterleave_rgb, avx2_pack_u16,
+    _mm256_deinterleave_rgba_epi8, avx2_deinterleave_rgb, avx2_pack_u16, avx2_pairwise_wide_avg,
 };
 use crate::internals::ProcessedOffset;
 use crate::yuv_support::{CbCrForwardTransform, YuvChromaRange, YuvSourceChannels};
@@ -125,7 +125,7 @@ unsafe fn avx2_rgba_to_yuv_impl420<const ORIGIN_CHANNELS: u8>(
                     b_values0 = it1;
                 }
 
-                let source_ptr1 = rgba0.get_unchecked(px..).as_ptr();
+                let source_ptr1 = rgba1.get_unchecked(px..).as_ptr();
                 let row_11 = _mm256_loadu_si256(source_ptr1 as *const __m256i);
                 let row_21 = _mm256_loadu_si256(source_ptr1.add(32) as *const __m256i);
                 let row_31 = _mm256_loadu_si256(source_ptr1.add(64) as *const __m256i);
@@ -291,9 +291,19 @@ unsafe fn avx2_rgba_to_yuv_impl420<const ORIGIN_CHANNELS: u8>(
             y1_yuv,
         );
 
-        let r_uv = _mm256_avg_epu16(r0_low, r0_high);
-        let g_uv = _mm256_avg_epu16(g0_low, g0_high);
-        let b_uv = _mm256_avg_epu16(b0_low, b0_high);
+        let r_uv = _mm256_slli_epi16::<V_SCALE>(_mm256_avg_epu16(
+            avx2_pairwise_wide_avg(r_values0),
+            avx2_pairwise_wide_avg(r_values1),
+        ));
+        let g_uv = _mm256_slli_epi16::<V_SCALE>(_mm256_avg_epu16(
+            avx2_pairwise_wide_avg(g_values0),
+            avx2_pairwise_wide_avg(g_values1),
+        ));
+        let b_uv = _mm256_slli_epi16::<V_SCALE>(_mm256_avg_epu16(
+            avx2_pairwise_wide_avg(b_values0),
+            avx2_pairwise_wide_avg(b_values1),
+        ));
+
         let cb = _mm256_max_epi16(
             _mm256_min_epi16(
                 _mm256_add_epi16(
@@ -310,6 +320,7 @@ unsafe fn avx2_rgba_to_yuv_impl420<const ORIGIN_CHANNELS: u8>(
             ),
             i_bias_y,
         );
+
         let cr = _mm256_max_epi16(
             _mm256_min_epi16(
                 _mm256_add_epi16(
@@ -338,8 +349,8 @@ unsafe fn avx2_rgba_to_yuv_impl420<const ORIGIN_CHANNELS: u8>(
             v_ptr.add(uv_x) as *mut _ as *mut __m128i,
             _mm256_castsi256_si128(cr),
         );
-        uv_x += 16;
 
+        uv_x += 16;
         cx += 32;
     }
 
diff --git a/src/avx2/yuv_to_yuv2.rs b/src/avx2/yuv_to_yuv2.rs
index 8436ec29..bbb6c48a 100644
--- a/src/avx2/yuv_to_yuv2.rs
+++ b/src/avx2/yuv_to_yuv2.rs
@@ -26,7 +26,9 @@
  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
-use crate::avx2::avx2_utils::{_mm256_deinterleave_x2_epi8, _mm256_store_interleaved_epi8};
+use crate::avx2::avx2_utils::{
+    _mm256_deinterleave_x2_epi8, _mm256_havg_epu8, _mm256_store_interleaved_epi8,
+};
 use crate::yuv_support::{YuvChromaSubsampling, Yuy2Description};
 use crate::yuv_to_yuy2::YuvToYuy2Navigation;
 #[cfg(target_arch = "x86")]
@@ -64,10 +66,12 @@ pub(crate) unsafe fn yuv_to_yuy2_avx2_row_impl<const SAMPLING: u8, const YUY2_TA
     let mut _cx = nav.cx;
     let mut _uv_x = nav.uv_x;
     let mut _yuy2_x = nav.x;
+    let chroma_big_step = match chroma_subsampling {
+        YuvChromaSubsampling::Yuv420 | YuvChromaSubsampling::Yuv422 => 32,
+        YuvChromaSubsampling::Yuv444 => 64,
+    };
     unsafe {
-        let max_x_32 = (width as usize / 2).saturating_sub(32);
-
-        for x in (_yuy2_x..max_x_32).step_by(32) {
+        while _cx + 64 < width as usize {
             let u_pos = _uv_x;
             let v_pos = _uv_x;
             let y_pos = _cx;
@@ -93,8 +97,8 @@ pub(crate) unsafe fn yuv_to_yuy2_avx2_row_impl<const SAMPLING: u8, const YUY2_TA
                     _mm256_loadu_si256(v_ptr.add(32) as *const __m256i),
                 );
 
-                u_pixels = _mm256_avg_epu8(full_u.0, full_u.1);
-                v_pixels = _mm256_avg_epu8(full_v.0, full_v.1);
+                u_pixels = _mm256_havg_epu8(full_u.0, full_u.1);
+                v_pixels = _mm256_havg_epu8(full_v.0, full_v.1);
             } else {
                 u_pixels = _mm256_loadu_si256(u_plane.as_ptr().add(u_pos) as *const __m256i);
                 v_pixels = _mm256_loadu_si256(v_plane.as_ptr().add(v_pos) as *const __m256i);
@@ -109,7 +113,7 @@ pub(crate) unsafe fn yuv_to_yuy2_avx2_row_impl<const SAMPLING: u8, const YUY2_TA
                 Yuy2Description::VYUY => (v_pixels, low_y, u_pixels, high_y),
             };
 
-            let dst_offset = x * 4;
+            let dst_offset = _cx * 2;
 
             _mm256_store_interleaved_epi8(
                 yuy2_store.as_mut_ptr().add(dst_offset),
@@ -119,17 +123,12 @@ pub(crate) unsafe fn yuv_to_yuy2_avx2_row_impl<const SAMPLING: u8, const YUY2_TA
                 storage.3,
             );
 
-            _yuy2_x = x;
-
-            if x + 32 < max_x_32 {
-                _uv_x += match chroma_subsampling {
-                    YuvChromaSubsampling::Yuv420 | YuvChromaSubsampling::Yuv422 => 32,
-                    YuvChromaSubsampling::Yuv444 => 64,
-                };
-                _cx += 64;
-            }
+            _uv_x += chroma_big_step;
+            _cx += 64;
         }
 
+        _yuy2_x = _cx;
+
         YuvToYuy2Navigation {
             cx: _cx,
             uv_x: _uv_x,
diff --git a/src/avx2/yuy2_to_rgb.rs b/src/avx2/yuy2_to_rgb.rs
index 862c4c87..654c31de 100644
--- a/src/avx2/yuy2_to_rgb.rs
+++ b/src/avx2/yuy2_to_rgb.rs
@@ -70,8 +70,6 @@ unsafe fn yuy2_to_rgb_avx_impl<const DST_CHANNELS: u8, const YUY2_TARGET: usize>
     let mut _yuy2_x = nav.x;
 
     unsafe {
-        let max_x_32 = (width as usize / 2).saturating_sub(32);
-
         let y_corr = _mm256_set1_epi8(range.bias_y as i8);
         let uv_corr = _mm256_set1_epi16(range.bias_uv as i16);
         let v_luma_coeff = _mm256_set1_epi16(transform.y_coef as i16);
@@ -83,8 +81,8 @@ unsafe fn yuy2_to_rgb_avx_impl<const DST_CHANNELS: u8, const YUY2_TARGET: usize>
         let zeros = _mm256_setzero_si256();
         let rounding_const = _mm256_set1_epi16((1 << 5) - 1);
 
-        for x in (_yuy2_x..max_x_32).step_by(32) {
-            let yuy2_offset = x * 4;
+        while _cx + 64 < width as usize {
+            let yuy2_offset = _cx * 2;
             let dst_pos = _cx * dst_chans.get_channels_count();
             let dst_ptr = rgb.as_mut_ptr().add(dst_pos);
 
@@ -347,11 +345,10 @@ unsafe fn yuy2_to_rgb_avx_impl<const DST_CHANNELS: u8, const YUY2_TARGET: usize>
                 }
             }
 
-            _yuy2_x = x;
-            if x + 32 < max_x_32 {
-                _cx += 64;
-            }
+            _cx += 64;
         }
+
+        _yuy2_x = _cx;
     }
 
     YuvToYuy2Navigation {
diff --git a/src/avx2/yuy2_to_yuv.rs b/src/avx2/yuy2_to_yuv.rs
index eacb6449..f53c0029 100644
--- a/src/avx2/yuy2_to_yuv.rs
+++ b/src/avx2/yuy2_to_yuv.rs
@@ -67,10 +67,8 @@ unsafe fn yuy2_to_yuv_avx_impl<const SAMPLING: u8, const YUY2_TARGET: usize>(
     let mut _uv_x = nav.uv_x;
     let mut _yuy2_x = nav.x;
 
-    let max_x_32 = (width as usize / 2).saturating_sub(32);
-
-    for x in (_yuy2_x..max_x_32).step_by(32) {
-        let dst_offset = x * 4;
+    while _cx + 64 < width as usize {
+        let dst_offset = _cx * 2;
         let u_pos = _uv_x;
         let v_pos = _uv_x;
         let y_pos = _cx;
@@ -128,16 +126,15 @@ unsafe fn yuy2_to_yuv_avx_impl<const SAMPLING: u8, const YUY2_TARGET: usize>(
         _mm256_storeu_si256(y_plane_ptr as *mut __m256i, y_first);
         _mm256_storeu_si256(y_plane_ptr.add(32) as *mut __m256i, y_second);
 
-        _yuy2_x = x;
-        if x + 32 < max_x_32 {
-            _uv_x += match chroma_subsampling {
-                YuvChromaSubsampling::Yuv420 | YuvChromaSubsampling::Yuv422 => 32,
-                YuvChromaSubsampling::Yuv444 => 64,
-            };
-            _cx += 64;
-        }
+        _uv_x += match chroma_subsampling {
+            YuvChromaSubsampling::Yuv420 | YuvChromaSubsampling::Yuv422 => 32,
+            YuvChromaSubsampling::Yuv444 => 64,
+        };
+        _cx += 64;
     }
 
+    _yuy2_x = _cx;
+
     YuvToYuy2Navigation {
         cx: _cx,
         uv_x: _uv_x,
diff --git a/src/images.rs b/src/images.rs
index 0ac7811d..e0066ec4 100644
--- a/src/images.rs
+++ b/src/images.rs
@@ -513,4 +513,13 @@ where
         check_yuv_packed(self.yuy.borrow(), self.yuy_stride, self.width, self.height)?;
         Ok(())
     }
+
+    pub fn to_fixed(&self) -> YuvPackedImage<T> {
+        YuvPackedImage {
+            yuy: self.yuy.borrow(),
+            yuy_stride: self.yuy_stride,
+            width: self.width,
+            height: self.height,
+        }
+    }
 }
diff --git a/src/neon/neon_simd_support.rs b/src/neon/neon_simd_support.rs
index e07a8a74..4ffae877 100644
--- a/src/neon/neon_simd_support.rs
+++ b/src/neon/neon_simd_support.rs
@@ -149,3 +149,14 @@ pub(crate) unsafe fn vldq_s16_endian<const ENDIANNESS: u8, const BYTES_POSITION:
     }
     vreinterpretq_s16_u16(v)
 }
+
+#[inline(always)]
+pub(crate) unsafe fn xvld1q_u8_x2(src: *const u8) -> uint8x16x2_t {
+    uint8x16x2_t(vld1q_u8(src), vld1q_u8(src.add(16)))
+}
+
+#[inline(always)]
+pub(crate) unsafe fn xvst1q_u8_x2(ptr: *mut u8, b: uint8x16x2_t) {
+    vst1q_u8(ptr, b.0);
+    vst1q_u8(ptr.add(16), b.1);
+}
diff --git a/src/neon/rgba_to_yuv420.rs b/src/neon/rgba_to_yuv420.rs
index 78b00323..79bf90ae 100644
--- a/src/neon/rgba_to_yuv420.rs
+++ b/src/neon/rgba_to_yuv420.rs
@@ -181,11 +181,11 @@ pub(crate) unsafe fn neon_rgba_to_yuv_rdm420<const ORIGIN_CHANNELS: u8, const PR
         let y1 = vcombine_u8(vmovn_u16(y1_low), vmovn_u16(y1_high));
         vst1q_u8(y_plane1.get_unchecked_mut(cx..).as_mut_ptr(), y1);
 
-        let box_r_values = vpaddlq_u8(r_values0);
+        let box_r_values = vhaddq_u16(vpaddlq_u8(r_values0), vpaddlq_u8(r_values1));
         let r1 = vreinterpretq_s16_u16(vshlq_n_u16::<V_SCALE>(vrshrq_n_u16::<1>(box_r_values)));
-        let box_g_values = vpaddlq_u8(g_values0);
+        let box_g_values = vhaddq_u16(vpaddlq_u8(g_values0), vpaddlq_u8(g_values1));
         let g1 = vreinterpretq_s16_u16(vshlq_n_u16::<V_SCALE>(vrshrq_n_u16::<1>(box_g_values)));
-        let box_b_values = vpaddlq_u8(b_values0);
+        let box_b_values = vhaddq_u16(vpaddlq_u8(b_values0), vpaddlq_u8(b_values1));
         let b1 = vreinterpretq_s16_u16(vshlq_n_u16::<V_SCALE>(vrshrq_n_u16::<1>(box_b_values)));
 
         let mut cbl = vqrdmlahq_laneq_s16::<3>(uv_bias, r1, v_weights);
@@ -210,7 +210,6 @@ pub(crate) unsafe fn neon_rgba_to_yuv_rdm420<const ORIGIN_CHANNELS: u8, const PR
         vst1_u8(v_ptr.get_unchecked_mut(ux..).as_mut_ptr(), cr);
 
         ux += 8;
-
         cx += 16;
     }
 
@@ -433,9 +432,18 @@ pub(crate) unsafe fn neon_rgba_to_yuv420<const ORIGIN_CHANNELS: u8, const PRECIS
         let y1 = vcombine_u8(vmovn_u16(y1_low), vmovn_u16(y1_high));
         vst1q_u8(y_plane1.get_unchecked_mut(cx..).as_mut_ptr(), y1);
 
-        let r1 = vreinterpretq_s16_u16(vrshrq_n_u16::<1>(vpaddlq_u8(r_values0)));
-        let g1 = vreinterpretq_s16_u16(vrshrq_n_u16::<1>(vpaddlq_u8(g_values0)));
-        let b1 = vreinterpretq_s16_u16(vrshrq_n_u16::<1>(vpaddlq_u8(b_values0)));
+        let r1 = vreinterpretq_s16_u16(vrshrq_n_u16::<1>(vhaddq_u16(
+            vpaddlq_u8(r_values0),
+            vpaddlq_u8(r_values1),
+        )));
+        let g1 = vreinterpretq_s16_u16(vrshrq_n_u16::<1>(vhaddq_u16(
+            vpaddlq_u8(g_values0),
+            vpaddlq_u8(g_values1),
+        )));
+        let b1 = vreinterpretq_s16_u16(vrshrq_n_u16::<1>(vhaddq_u16(
+            vpaddlq_u8(b_values0),
+            vpaddlq_u8(b_values1),
+        )));
 
         let mut cb_h = vmlal_high_laneq_s16::<3>(uv_bias, r1, v_weights);
         cb_h = vmlal_high_laneq_s16::<4>(cb_h, g1, v_weights);
diff --git a/src/neon/yuv_to_yuy2.rs b/src/neon/yuv_to_yuy2.rs
index 581d2103..d48d408b 100644
--- a/src/neon/yuv_to_yuy2.rs
+++ b/src/neon/yuv_to_yuy2.rs
@@ -26,19 +26,16 @@
  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
+use crate::neon::neon_simd_support::xvld1q_u8_x2;
 use crate::yuv_support::{YuvChromaSubsampling, Yuy2Description};
 use crate::yuv_to_yuy2::YuvToYuy2Navigation;
 use std::arch::aarch64::*;
 
 pub(crate) fn yuv_to_yuy2_neon_impl<const SAMPLING: u8, const YUY2_TARGET: usize>(
     y_plane: &[u8],
-    y_offset: usize,
     u_plane: &[u8],
-    u_offset: usize,
     v_plane: &[u8],
-    v_offset: usize,
     yuy2_store: &mut [u8],
-    yuy2_offset: usize,
     width: u32,
     nav: YuvToYuy2Navigation,
 ) -> YuvToYuy2Navigation {
@@ -47,27 +44,34 @@ pub(crate) fn yuv_to_yuy2_neon_impl<const SAMPLING: u8, const YUY2_TARGET: usize
 
     let shuffle_table: [u8; 16] = [0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15];
 
+    let chroma_big_step_size = match chroma_subsampling {
+        YuvChromaSubsampling::Yuv420 | YuvChromaSubsampling::Yuv422 => 16,
+        YuvChromaSubsampling::Yuv444 => 32,
+    };
+
+    let chroma_small_step_size = match chroma_subsampling {
+        YuvChromaSubsampling::Yuv420 | YuvChromaSubsampling::Yuv422 => 8,
+        YuvChromaSubsampling::Yuv444 => 16,
+    };
+
     let mut _cx = nav.cx;
     let mut _uv_x = nav.uv_x;
     let mut _yuy2_x = nav.x;
     unsafe {
         let v_shuffle = vld1q_u8(shuffle_table.as_ptr());
 
-        let max_x_16 = (width as usize / 2).saturating_sub(16);
-        let max_x_8 = (width as usize / 2).saturating_sub(8);
-
-        for x in (_yuy2_x..max_x_16).step_by(16) {
-            let u_pos = u_offset + _uv_x;
-            let v_pos = v_offset + _uv_x;
-            let y_pos = y_offset + _cx;
+        while _cx + 32 < width as usize {
+            let u_pos = _uv_x;
+            let v_pos = _uv_x;
+            let y_pos = _cx;
 
             let u_pixels;
             let v_pixels;
-            let y_pixels = vld1q_u8_x2(y_plane.as_ptr().add(y_pos));
+            let y_pixels = xvld1q_u8_x2(y_plane.as_ptr().add(y_pos));
 
             if chroma_subsampling == YuvChromaSubsampling::Yuv444 {
-                let full_u = vld1q_u8_x2(u_plane.as_ptr().add(u_pos));
-                let full_v = vld1q_u8_x2(v_plane.as_ptr().add(v_pos));
+                let full_u = xvld1q_u8_x2(u_plane.as_ptr().add(u_pos));
+                let full_v = xvld1q_u8_x2(v_plane.as_ptr().add(v_pos));
 
                 u_pixels = vhaddq_u8(full_u.0, full_u.1);
                 v_pixels = vhaddq_u8(full_v.0, full_v.1);
@@ -89,25 +93,17 @@ pub(crate) fn yuv_to_yuy2_neon_impl<const SAMPLING: u8, const YUY2_TARGET: usize
                 Yuy2Description::VYUY => uint8x16x4_t(v_pixels, low_y, u_pixels, high_y),
             };
 
-            let dst_offset = yuy2_offset + x * 4;
+            let dst_offset = _cx * 2;
 
             vst4q_u8(yuy2_store.as_mut_ptr().add(dst_offset), storage);
-
-            _yuy2_x = x;
-
-            if x + 16 < max_x_16 {
-                _uv_x += match chroma_subsampling {
-                    YuvChromaSubsampling::Yuv420 | YuvChromaSubsampling::Yuv422 => 16,
-                    YuvChromaSubsampling::Yuv444 => 32,
-                };
-                _cx += 32;
-            }
+            _cx += 32;
+            _uv_x += chroma_big_step_size;
         }
 
-        for x in (_yuy2_x..max_x_8).step_by(8) {
-            let u_pos = u_offset + _uv_x;
-            let v_pos = v_offset + _uv_x;
-            let y_pos = y_offset + _cx;
+        while _cx + 16 < width as usize {
+            let u_pos = _uv_x;
+            let v_pos = _uv_x;
+            let y_pos = _cx;
 
             let u_pixels;
             let v_pixels;
@@ -144,20 +140,15 @@ pub(crate) fn yuv_to_yuy2_neon_impl<const SAMPLING: u8, const YUY2_TARGET: usize
                 Yuy2Description::VYUY => uint8x8x4_t(v_pixels, low_y, u_pixels, high_y),
             };
 
-            let dst_offset = yuy2_offset + x * 4;
+            let dst_offset = _cx * 2;
 
             vst4_u8(yuy2_store.as_mut_ptr().add(dst_offset), storage);
 
-            _yuy2_x = x;
-
-            if x + 8 < max_x_8 {
-                _uv_x += match chroma_subsampling {
-                    YuvChromaSubsampling::Yuv420 | YuvChromaSubsampling::Yuv422 => 8,
-                    YuvChromaSubsampling::Yuv444 => 16,
-                };
-                _cx += 16;
-            }
+            _cx += 16;
+            _uv_x += chroma_small_step_size;
         }
+
+        _yuy2_x = _cx;
     }
 
     YuvToYuy2Navigation {
diff --git a/src/neon/yuy2_to_rgb.rs b/src/neon/yuy2_to_rgb.rs
index 038a8415..442857fd 100644
--- a/src/neon/yuy2_to_rgb.rs
+++ b/src/neon/yuy2_to_rgb.rs
@@ -51,9 +51,6 @@ pub(crate) fn yuy2_to_rgb_neon<
     let mut _yuy2_x = nav.x;
 
     unsafe {
-        let max_x_16 = (width as usize / 2).saturating_sub(16);
-        let max_x_8 = (width as usize / 2).saturating_sub(8);
-
         let y_corr = vdupq_n_u8(range.bias_y as u8);
         let uv_corr = vdupq_n_s16(range.bias_uv as i16);
         let v_luma_coeff = vdupq_n_u8(transform.y_coef as u8);
@@ -64,12 +61,12 @@ pub(crate) fn yuy2_to_rgb_neon<
         let v_g_coeff_2 = vdupq_n_s16(-(transform.g_coeff_2 as i16));
         let v_alpha = vdupq_n_u8(255u8);
 
-        for x in (_yuy2_x..max_x_16).step_by(16) {
-            let dst_offset = x * 4;
+        while _cx + 32 < width as usize {
+            let yuy2_offset = _cx * 2;
             let dst_pos = _cx * dst_chans.get_channels_count();
             let dst_ptr = rgb.as_mut_ptr().add(dst_pos);
 
-            let pixel_set = vld4q_u8(yuy2_store.as_ptr().add(dst_offset));
+            let pixel_set = vld4q_u8(yuy2_store.as_ptr().add(yuy2_offset));
             let mut y_first = match yuy2_source {
                 Yuy2Description::YUYV | Yuy2Description::YVYU => pixel_set.0,
                 Yuy2Description::UYVY | Yuy2Description::VYUY => pixel_set.1,
@@ -245,18 +242,15 @@ pub(crate) fn yuy2_to_rgb_neon<
                 }
             }
 
-            _yuy2_x = x;
-            if x + 16 < max_x_16 {
-                _cx += 32;
-            }
+            _cx += 32;
         }
 
-        for x in (_yuy2_x..max_x_8).step_by(8) {
-            let dst_offset = x * 4;
+        while _cx + 16 < width as usize {
+            let yuy2_offset = _cx * 2;
             let dst_pos = _cx * dst_chans.get_channels_count();
             let dst_ptr = rgb.as_mut_ptr().add(dst_pos);
 
-            let pixel_set = vld4_u8(yuy2_store.as_ptr().add(dst_offset));
+            let pixel_set = vld4_u8(yuy2_store.as_ptr().add(yuy2_offset));
             let mut y_first = match yuy2_source {
                 Yuy2Description::YUYV | Yuy2Description::YVYU => pixel_set.0,
                 Yuy2Description::UYVY | Yuy2Description::VYUY => pixel_set.1,
@@ -355,11 +349,9 @@ pub(crate) fn yuy2_to_rgb_neon<
                 }
             }
 
-            _yuy2_x = x;
-            if x + 8 < max_x_8 {
-                _cx += 16;
-            }
+            _cx += 16;
         }
+        _yuy2_x = _cx;
     }
 
     YuvToYuy2Navigation {
diff --git a/src/neon/yuy2_to_yuv.rs b/src/neon/yuy2_to_yuv.rs
index 30198aee..f2bace0b 100644
--- a/src/neon/yuy2_to_yuv.rs
+++ b/src/neon/yuy2_to_yuv.rs
@@ -26,6 +26,7 @@
  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
+use crate::neon::neon_simd_support::xvst1q_u8_x2;
 use crate::yuv_support::{YuvChromaSubsampling, Yuy2Description};
 use crate::yuv_to_yuy2::YuvToYuy2Navigation;
 use std::arch::aarch64::*;
@@ -46,11 +47,8 @@ pub(crate) fn yuy2_to_yuv_neon_impl<const SAMPLING: u8, const YUY2_TARGET: usize
     let mut _yuy2_x = nav.x;
 
     unsafe {
-        let max_x_16 = (width as usize / 2).saturating_sub(16);
-        let max_x_8 = (width as usize / 2).saturating_sub(8);
-
-        for x in (_yuy2_x..max_x_16).step_by(16) {
-            let dst_offset = x * 4;
+        while _cx + 32 < width as usize {
+            let dst_offset = _cx * 2;
             let u_pos = _uv_x;
             let v_pos = _uv_x;
             let y_pos = _cx;
@@ -83,7 +81,7 @@ pub(crate) fn yuy2_to_yuv_neon_impl<const SAMPLING: u8, const YUY2_TARGET: usize
                 Yuy2Description::VYUY => pixel_set.0,
             };
 
-            vst1q_u8_x2(
+            xvst1q_u8_x2(
                 y_plane.as_mut_ptr().add(y_pos),
                 uint8x16x2_t(y_first, y_second),
             );
@@ -93,11 +91,11 @@ pub(crate) fn yuy2_to_yuv_neon_impl<const SAMPLING: u8, const YUY2_TARGET: usize
                 let high_u_value = vzip2q_u8(u_value, u_value);
                 let low_v_value = vzip1q_u8(v_value, v_value);
                 let high_v_value = vzip2q_u8(v_value, v_value);
-                vst1q_u8_x2(
+                xvst1q_u8_x2(
                     u_plane.as_mut_ptr().add(u_pos),
                     uint8x16x2_t(low_u_value, high_u_value),
                 );
-                vst1q_u8_x2(
+                xvst1q_u8_x2(
                     v_plane.as_mut_ptr().add(v_pos),
                     uint8x16x2_t(low_v_value, high_v_value),
                 );
@@ -106,18 +104,15 @@ pub(crate) fn yuy2_to_yuv_neon_impl<const SAMPLING: u8, const YUY2_TARGET: usize
                 vst1q_u8(v_plane.as_mut_ptr().add(v_pos), v_value);
             }
 
-            _yuy2_x = x;
-            if x + 16 < max_x_16 {
-                _uv_x += match chroma_subsampling {
-                    YuvChromaSubsampling::Yuv420 | YuvChromaSubsampling::Yuv422 => 16,
-                    YuvChromaSubsampling::Yuv444 => 32,
-                };
-                _cx += 32;
-            }
+            _uv_x += match chroma_subsampling {
+                YuvChromaSubsampling::Yuv420 | YuvChromaSubsampling::Yuv422 => 16,
+                YuvChromaSubsampling::Yuv444 => 32,
+            };
+            _cx += 32;
         }
 
-        for x in (_yuy2_x..max_x_8).step_by(8) {
-            let dst_offset = x * 4;
+        while _cx + 16 < width as usize {
+            let dst_offset = _cx * 2;
             let u_pos = _uv_x;
             let v_pos = _uv_x;
             let y_pos = _cx;
@@ -173,15 +168,14 @@ pub(crate) fn yuy2_to_yuv_neon_impl<const SAMPLING: u8, const YUY2_TARGET: usize
                 vst1_u8(v_plane.as_mut_ptr().add(v_pos), v_value);
             }
 
-            _yuy2_x = x;
-            if x + 8 < max_x_8 {
-                _uv_x += match chroma_subsampling {
-                    YuvChromaSubsampling::Yuv420 | YuvChromaSubsampling::Yuv422 => 8,
-                    YuvChromaSubsampling::Yuv444 => 16,
-                };
-                _cx += 16;
-            }
+            _uv_x += match chroma_subsampling {
+                YuvChromaSubsampling::Yuv420 | YuvChromaSubsampling::Yuv422 => 8,
+                YuvChromaSubsampling::Yuv444 => 16,
+            };
+            _cx += 16;
         }
+
+        _yuy2_x = _cx;
     }
 
     YuvToYuy2Navigation {
diff --git a/src/rgba_to_yuv.rs b/src/rgba_to_yuv.rs
index 13befbc1..13e835f0 100644
--- a/src/rgba_to_yuv.rs
+++ b/src/rgba_to_yuv.rs
@@ -380,9 +380,9 @@ fn rgbx_to_yuv8<const ORIGIN_CHANNELS: u8, const SAMPLING: u8>(
                 >> PRECISION;
             y_dst1[1] = y_11.max(i_bias_y).min(i_cap_y) as u8;
 
-            let ruv = (r00 + r01 + 1) >> 1;
-            let guv = (g00 + g01 + 1) >> 1;
-            let buv = (b00 + b01 + 1) >> 1;
+            let ruv = (r00 + r01 + r10 + r11 + 2) >> 2;
+            let guv = (g00 + g01 + g10 + g11 + 2) >> 2;
+            let buv = (b00 + b01 + b10 + b11 + 2) >> 2;
 
             let cb = (ruv * transform.cb_r + guv * transform.cb_g + buv * transform.cb_b + bias_uv)
                 >> PRECISION;
@@ -416,6 +416,10 @@ fn rgbx_to_yuv8<const ORIGIN_CHANNELS: u8, const SAMPLING: u8>(
                 (r1 * transform.yr + g1 * transform.yg + b1 * transform.yb + bias_y) >> PRECISION;
             *y1_last = y_1.max(i_bias_y).min(i_cap_y) as u8;
 
+            let r0 = (r0 + r1) >> 1;
+            let g0 = (g0 + g1) >> 1;
+            let b0 = (b0 + b1) >> 1;
+
             let cb = (r0 * transform.cb_r + g0 * transform.cb_g + b0 * transform.cb_b + bias_uv)
                 >> PRECISION;
             let cr = (r0 * transform.cr_r + g0 * transform.cr_g + b0 * transform.cr_b + bias_uv)
diff --git a/src/sse/rgb_to_nv.rs b/src/sse/rgb_to_nv.rs
index 94b6d928..85cfff55 100644
--- a/src/sse/rgb_to_nv.rs
+++ b/src/sse/rgb_to_nv.rs
@@ -28,6 +28,7 @@
  */
 
 use crate::internals::ProcessedOffset;
+use crate::sse::sse_pairwise_avg_epi16;
 use crate::sse::sse_support::{sse_deinterleave_rgb, sse_deinterleave_rgba};
 use crate::yuv_support::{
     CbCrForwardTransform, YuvChromaRange, YuvChromaSubsampling, YuvNVOrder, YuvSourceChannels,
@@ -286,9 +287,9 @@ unsafe fn sse_rgba_to_nv_row_impl<
         } else if chroma_subsampling == YuvChromaSubsampling::Yuv422
             || (chroma_subsampling == YuvChromaSubsampling::Yuv420 && compute_uv_row)
         {
-            let r1 = _mm_avg_epu16(r_low, r_high);
-            let g1 = _mm_avg_epu16(g_low, g_high);
-            let b1 = _mm_avg_epu16(b_low, b_high);
+            let r1 = sse_pairwise_avg_epi16(r_low, r_high);
+            let g1 = sse_pairwise_avg_epi16(g_low, g_high);
+            let b1 = sse_pairwise_avg_epi16(b_low, b_high);
 
             let cbk = _mm_max_epi16(
                 _mm_min_epi16(
diff --git a/src/sse/rgba_to_yuv.rs b/src/sse/rgba_to_yuv.rs
index 9fa834cc..cbf600d1 100644
--- a/src/sse/rgba_to_yuv.rs
+++ b/src/sse/rgba_to_yuv.rs
@@ -28,6 +28,7 @@
  */
 
 use crate::internals::ProcessedOffset;
+use crate::sse::sse_pairwise_avg_epi16;
 use crate::sse::sse_support::{sse_deinterleave_rgb, sse_deinterleave_rgba};
 use crate::yuv_support::{
     CbCrForwardTransform, YuvChromaRange, YuvChromaSubsampling, YuvSourceChannels,
@@ -261,9 +262,9 @@ unsafe fn sse_rgba_to_yuv_row_impl<const ORIGIN_CHANNELS: u8, const SAMPLING: u8
         } else if chroma_subsampling == YuvChromaSubsampling::Yuv422
             || (chroma_subsampling == YuvChromaSubsampling::Yuv420)
         {
-            let r1 = _mm_avg_epu16(r_low, r_high);
-            let g1 = _mm_avg_epu16(g_low, g_high);
-            let b1 = _mm_avg_epu16(b_low, b_high);
+            let r1 = sse_pairwise_avg_epi16(r_low, r_high);
+            let g1 = sse_pairwise_avg_epi16(g_low, g_high);
+            let b1 = sse_pairwise_avg_epi16(b_low, b_high);
 
             let cbk = _mm_max_epi16(
                 _mm_min_epi16(
diff --git a/src/sse/rgba_to_yuv420.rs b/src/sse/rgba_to_yuv420.rs
index 6b1aaec5..5d9407aa 100644
--- a/src/sse/rgba_to_yuv420.rs
+++ b/src/sse/rgba_to_yuv420.rs
@@ -28,6 +28,7 @@
  */
 
 use crate::internals::ProcessedOffset;
+use crate::sse::sse_pairwise_wide_avg;
 use crate::sse::sse_support::{sse_deinterleave_rgb, sse_deinterleave_rgba};
 use crate::yuv_support::{CbCrForwardTransform, YuvChromaRange, YuvSourceChannels};
 #[cfg(target_arch = "x86")]
@@ -272,9 +273,18 @@ unsafe fn sse_rgba_to_yuv_row_impl420<const ORIGIN_CHANNELS: u8>(
             y1_yuv,
         );
 
-        let r1 = _mm_avg_epu16(r0_low, r0_high);
-        let g1 = _mm_avg_epu16(g0_low, g0_high);
-        let b1 = _mm_avg_epu16(b0_low, b0_high);
+        let r1 = _mm_slli_epi16::<V_SCALE>(_mm_avg_epu16(
+            sse_pairwise_wide_avg(r_values0),
+            sse_pairwise_wide_avg(r_values1),
+        ));
+        let g1 = _mm_slli_epi16::<V_SCALE>(_mm_avg_epu16(
+            sse_pairwise_wide_avg(g_values0),
+            sse_pairwise_wide_avg(g_values1),
+        ));
+        let b1 = _mm_slli_epi16::<V_SCALE>(_mm_avg_epu16(
+            sse_pairwise_wide_avg(b_values0),
+            sse_pairwise_wide_avg(b_values1),
+        ));
 
         let cbk = _mm_max_epi16(
             _mm_min_epi16(
diff --git a/src/sse/sse_support.rs b/src/sse/sse_support.rs
index 0ab2e04e..61364ad0 100644
--- a/src/sse/sse_support.rs
+++ b/src/sse/sse_support.rs
@@ -237,6 +237,23 @@ pub(crate) unsafe fn sse_pairwise_widen_avg(v: __m128i) -> __m128i {
     _mm_packus_epi16(shifted, shifted)
 }
 
+#[inline(always)]
+pub(crate) unsafe fn sse_pairwise_wide_avg(v: __m128i) -> __m128i {
+    let ones = _mm_set1_epi8(1);
+    let sums = _mm_maddubs_epi16(v, ones);
+    _mm_srli_epi16::<1>(_mm_add_epi16(sums, ones))
+}
+
+#[inline(always)]
+pub(crate) unsafe fn _mm_havg_epu8(a: __m128i, b: __m128i) -> __m128i {
+    let ones = _mm_set1_epi8(1);
+    let sums_lo = _mm_maddubs_epi16(a, ones);
+    let lo = _mm_srli_epi16::<1>(_mm_add_epi16(sums_lo, ones));
+    let sums_hi = _mm_maddubs_epi16(b, ones);
+    let hi = _mm_srli_epi16::<1>(_mm_add_epi16(sums_hi, ones));
+    _mm_packus_epi16(lo, hi)
+}
+
 #[inline(always)]
 pub(crate) unsafe fn sse_div_by255(v: __m128i) -> __m128i {
     let addition = _mm_set1_epi16(127);
diff --git a/src/sse/yuv_to_yuy2.rs b/src/sse/yuv_to_yuy2.rs
index 1aa4f089..9ee6ef4d 100644
--- a/src/sse/yuv_to_yuy2.rs
+++ b/src/sse/yuv_to_yuy2.rs
@@ -26,6 +26,7 @@
  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
+use crate::sse::_mm_havg_epu8;
 use crate::sse::sse_support::{
     __mm128x4, _mm_combineh_epi8, _mm_combinel_epi8, _mm_gethigh_epi8, _mm_getlow_epi8,
     _mm_loadu_si128_x2, _mm_storeu_si128_x4, sse_interleave_rgba,
@@ -67,15 +68,23 @@ unsafe fn yuv_to_yuy2_sse_impl<const SAMPLING: u8, const YUY2_TARGET: usize>(
     let mut _cx = nav.cx;
     let mut _uv_x = nav.uv_x;
     let mut _yuy2_x = nav.x;
+
+    let chroma_big_step_size = match chroma_subsampling {
+        YuvChromaSubsampling::Yuv420 | YuvChromaSubsampling::Yuv422 => 16,
+        YuvChromaSubsampling::Yuv444 => 32,
+    };
+
+    let chroma_small_step_size = match chroma_subsampling {
+        YuvChromaSubsampling::Yuv420 | YuvChromaSubsampling::Yuv422 => 8,
+        YuvChromaSubsampling::Yuv444 => 16,
+    };
+
     unsafe {
         #[rustfmt::skip]
         let v_shuffle = _mm_setr_epi8(0, 2, 4, 6, 8, 10, 12, 14,
                                                 1, 3, 5, 7, 9, 11, 13, 15);
 
-        let max_x_16 = (width as usize / 2).saturating_sub(16);
-        let max_x_8 = (width as usize / 2).saturating_sub(8);
-
-        for x in (_yuy2_x..max_x_16).step_by(16) {
+        while _cx + 32 < width as usize {
             let u_pos = _uv_x;
             let v_pos = _uv_x;
             let y_pos = _cx;
@@ -88,8 +97,8 @@ unsafe fn yuv_to_yuy2_sse_impl<const SAMPLING: u8, const YUY2_TARGET: usize>(
                 let full_u = _mm_loadu_si128_x2(u_plane.as_ptr().add(u_pos));
                 let full_v = _mm_loadu_si128_x2(v_plane.as_ptr().add(v_pos));
 
-                u_pixels = _mm_avg_epu8(full_u.0, full_u.1);
-                v_pixels = _mm_avg_epu8(full_v.0, full_v.1);
+                u_pixels = _mm_havg_epu8(full_u.0, full_u.1);
+                v_pixels = _mm_havg_epu8(full_v.0, full_v.1);
             } else {
                 u_pixels = _mm_loadu_si128(u_plane.as_ptr().add(u_pos) as *const __m128i);
                 v_pixels = _mm_loadu_si128(v_plane.as_ptr().add(v_pos) as *const __m128i);
@@ -108,25 +117,17 @@ unsafe fn yuv_to_yuy2_sse_impl<const SAMPLING: u8, const YUY2_TARGET: usize>(
                 Yuy2Description::VYUY => __mm128x4(v_pixels, low_y, u_pixels, high_y),
             };
 
-            let dst_offset = x * 4;
+            let dst_offset = _cx * 2;
 
             let inverleaved = sse_interleave_rgba(storage.0, storage.1, storage.2, storage.3);
             let converted = __mm128x4(inverleaved.0, inverleaved.1, inverleaved.2, inverleaved.3);
 
             _mm_storeu_si128_x4(yuy2_store.as_mut_ptr().add(dst_offset), converted);
-
-            _yuy2_x = x;
-
-            if x + 16 < max_x_16 {
-                _uv_x += match chroma_subsampling {
-                    YuvChromaSubsampling::Yuv420 | YuvChromaSubsampling::Yuv422 => 16,
-                    YuvChromaSubsampling::Yuv444 => 32,
-                };
-                _cx += 32;
-            }
+            _cx += 32;
+            _uv_x += chroma_big_step_size;
         }
 
-        for x in (_yuy2_x..max_x_8).step_by(8) {
+        while _cx + 16 < width as usize {
             let u_pos = _uv_x;
             let v_pos = _uv_x;
             let y_pos = _cx;
@@ -143,12 +144,12 @@ unsafe fn yuv_to_yuy2_sse_impl<const SAMPLING: u8, const YUY2_TARGET: usize>(
 
                 let low_u = _mm_getlow_epi8(full_u);
                 let high_u = _mm_gethigh_epi8(full_u);
-                u_pixels = _mm_avg_epu8(low_u, high_u);
+                u_pixels = _mm_havg_epu8(low_u, high_u);
 
                 let low_v = _mm_getlow_epi8(full_v);
                 let high_v = _mm_gethigh_epi8(full_v);
 
-                v_pixels = _mm_avg_epu8(low_v, high_v);
+                v_pixels = _mm_havg_epu8(low_v, high_v);
             } else {
                 u_pixels = _mm_loadu_si64(u_plane.as_ptr().add(u_pos));
                 v_pixels = _mm_loadu_si64(v_plane.as_ptr().add(v_pos));
@@ -169,23 +170,18 @@ unsafe fn yuv_to_yuy2_sse_impl<const SAMPLING: u8, const YUY2_TARGET: usize>(
             let inverleaved = sse_interleave_rgba(storage.0, storage.1, storage.2, storage.3);
             let converted = __mm128x4(inverleaved.0, inverleaved.1, inverleaved.2, inverleaved.3);
 
-            let dst_offset = x * 4;
+            let dst_offset = _cx * 2;
 
             let ptr = yuy2_store.as_mut_ptr().add(dst_offset);
 
             _mm_storeu_si128(ptr as *mut __m128i, converted.0);
             _mm_storeu_si128(ptr.add(16) as *mut __m128i, converted.1);
 
-            _yuy2_x = x;
-
-            if x + 8 < max_x_8 {
-                _uv_x += match chroma_subsampling {
-                    YuvChromaSubsampling::Yuv420 | YuvChromaSubsampling::Yuv422 => 8,
-                    YuvChromaSubsampling::Yuv444 => 16,
-                };
-                _cx += 16;
-            }
+            _cx += 16;
+            _uv_x += chroma_small_step_size;
         }
+
+        _yuy2_x = _cx;
     }
 
     YuvToYuy2Navigation {
diff --git a/src/sse/yuy2_to_rgb.rs b/src/sse/yuy2_to_rgb.rs
index 9eac582b..e33d16dc 100644
--- a/src/sse/yuy2_to_rgb.rs
+++ b/src/sse/yuy2_to_rgb.rs
@@ -67,9 +67,6 @@ unsafe fn yuy2_to_rgb_sse_impl<const DST_CHANNELS: u8, const YUY2_TARGET: usize>
     let mut _yuy2_x = nav.x;
 
     unsafe {
-        let max_x_16 = (width as usize / 2).saturating_sub(16);
-        let max_x_8 = (width as usize / 2).saturating_sub(8);
-
         let y_corr = _mm_set1_epi8(range.bias_y as i8);
         let uv_corr = _mm_set1_epi16(range.bias_uv as i16);
         let v_luma_coeff = _mm_set1_epi16(transform.y_coef as i16);
@@ -82,8 +79,8 @@ unsafe fn yuy2_to_rgb_sse_impl<const DST_CHANNELS: u8, const YUY2_TARGET: usize>
 
         let zeros = _mm_setzero_si128();
 
-        for x in (_yuy2_x..max_x_16).step_by(16) {
-            let yuy2_offset = x * 4;
+        while _cx + 32 < width as usize {
+            let yuy2_offset = _cx * 2;
             let dst_pos = _cx * dst_chans.get_channels_count();
             let dst_ptr = rgb.as_mut_ptr().add(dst_pos);
 
@@ -328,14 +325,11 @@ unsafe fn yuy2_to_rgb_sse_impl<const DST_CHANNELS: u8, const YUY2_TARGET: usize>
                 }
             }
 
-            _yuy2_x = x;
-            if x + 16 < max_x_16 {
-                _cx += 32;
-            }
+            _cx += 32;
         }
 
-        for x in (_yuy2_x..max_x_8).step_by(8) {
-            let yuy2_offset = x * 4;
+        while _cx + 16 < width as usize {
+            let yuy2_offset = _cx * 2;
             let dst_pos = _cx * dst_chans.get_channels_count();
             let dst_ptr = rgb.as_mut_ptr().add(dst_pos);
 
@@ -472,11 +466,10 @@ unsafe fn yuy2_to_rgb_sse_impl<const DST_CHANNELS: u8, const YUY2_TARGET: usize>
                 }
             }
 
-            _yuy2_x = x;
-            if x + 8 < max_x_8 {
-                _cx += 16;
-            }
+            _cx += 16;
         }
+
+        _yuy2_x = _cx;
     }
 
     YuvToYuy2Navigation {
diff --git a/src/sse/yuy2_to_yuv.rs b/src/sse/yuy2_to_yuv.rs
index bb49217c..32dfa66e 100644
--- a/src/sse/yuy2_to_yuv.rs
+++ b/src/sse/yuy2_to_yuv.rs
@@ -66,16 +66,13 @@ unsafe fn yuy2_to_yuv_sse_impl<const SAMPLING: u8, const YUY2_TARGET: usize>(
     let mut _yuy2_x = nav.x;
 
     unsafe {
-        let max_x_16 = (width as usize / 2).saturating_sub(16);
-        let max_x_8 = (width as usize / 2).saturating_sub(8);
-
-        for x in (_yuy2_x..max_x_16).step_by(16) {
-            let yuy2_offset = x * 4;
+        while _cx + 32 < width as usize {
+            let dst_offset = _cx * 2;
             let u_pos = _uv_x;
             let v_pos = _uv_x;
             let y_pos = _cx;
 
-            let yuy2_ptr = yuy2_store.as_ptr().add(yuy2_offset);
+            let yuy2_ptr = yuy2_store.as_ptr().add(dst_offset);
 
             let j0 = _mm_loadu_si128(yuy2_ptr as *const __m128i);
             let j1 = _mm_loadu_si128(yuy2_ptr.add(16) as *const __m128i);
@@ -133,23 +130,20 @@ unsafe fn yuy2_to_yuv_sse_impl<const SAMPLING: u8, const YUY2_TARGET: usize>(
             _mm_storeu_si128(y_plane_ptr as *mut __m128i, y_first);
             _mm_storeu_si128(y_plane_ptr.add(16) as *mut __m128i, y_second);
 
-            _yuy2_x = x;
-            if x + 16 < max_x_16 {
-                _uv_x += match chroma_subsampling {
-                    YuvChromaSubsampling::Yuv420 | YuvChromaSubsampling::Yuv422 => 16,
-                    YuvChromaSubsampling::Yuv444 => 32,
-                };
-                _cx += 32;
-            }
+            _uv_x += match chroma_subsampling {
+                YuvChromaSubsampling::Yuv420 | YuvChromaSubsampling::Yuv422 => 16,
+                YuvChromaSubsampling::Yuv444 => 32,
+            };
+            _cx += 32;
         }
 
-        for x in (_yuy2_x..max_x_8).step_by(8) {
-            let yuy2_offset = x * 4;
+        while _cx + 16 < width as usize {
+            let dst_offset = _cx * 2;
             let u_pos = _uv_x;
             let v_pos = _uv_x;
             let y_pos = _cx;
 
-            let yuy2_ptr = yuy2_store.as_ptr().add(yuy2_offset);
+            let yuy2_ptr = yuy2_store.as_ptr().add(dst_offset);
 
             let j0 = _mm_loadu_si128(yuy2_ptr as *const __m128i);
             let j1 = _mm_loadu_si128(yuy2_ptr.add(16) as *const __m128i);
@@ -203,15 +197,14 @@ unsafe fn yuy2_to_yuv_sse_impl<const SAMPLING: u8, const YUY2_TARGET: usize>(
                 );
             }
 
-            _yuy2_x = x;
-            if x + 8 < max_x_8 {
-                _uv_x += match chroma_subsampling {
-                    YuvChromaSubsampling::Yuv420 | YuvChromaSubsampling::Yuv422 => 8,
-                    YuvChromaSubsampling::Yuv444 => 16,
-                };
-                _cx += 16;
-            }
+            _uv_x += match chroma_subsampling {
+                YuvChromaSubsampling::Yuv420 | YuvChromaSubsampling::Yuv422 => 8,
+                YuvChromaSubsampling::Yuv444 => 16,
+            };
+            _cx += 16;
         }
+
+        _yuy2_x = _cx;
     }
 
     YuvToYuy2Navigation {
diff --git a/src/yuv_to_yuy2.rs b/src/yuv_to_yuy2.rs
index 7e3249e4..51c3d818 100644
--- a/src/yuv_to_yuy2.rs
+++ b/src/yuv_to_yuy2.rs
@@ -94,9 +94,9 @@ impl ProcessWideRow<u8> for u8 {
         let mut _processed = 0usize;
 
         #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-        let mut _use_sse = is_x86_feature_detected!("sse4.1");
+        let _use_sse = is_x86_feature_detected!("sse4.1");
         #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-        let mut _use_avx2 = is_x86_feature_detected!("avx2");
+        let _use_avx2 = is_x86_feature_detected!("avx2");
 
         #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
         {
@@ -128,13 +128,9 @@ impl ProcessWideRow<u8> for u8 {
         {
             let processed = yuv_to_yuy2_neon_impl::<SAMPLING, YUY2_TARGET>(
                 _y_src,
-                0,
                 _u_src,
-                0,
                 _v_src,
-                0,
                 _yuy2,
-                0,
                 _width as u32,
                 YuvToYuy2Navigation::new(0, 0, 0),
             );
@@ -332,7 +328,7 @@ pub(crate) fn yuv_to_yuy2_impl<
                 .zip(y_src.chunks_exact(2))
                 .zip(u_src.iter())
                 .zip(v_src.iter())
-                .skip(processed)
+                .skip(processed / 2)
             {
                 yuy2[yuy2_target.get_first_y_position()] = y_src[0];
                 yuy2[yuy2_target.get_second_y_position()] = y_src[1];
@@ -422,7 +418,7 @@ pub(crate) fn yuv_to_yuy2_impl<
                     .zip(y_src.chunks_exact(2))
                     .zip(u_src.iter())
                     .zip(v_src.iter())
-                    .skip(processed)
+                    .skip(processed / 2)
                 {
                     yuy2[yuy2_target.get_first_y_position()] = y_src[0];
                     yuy2[yuy2_target.get_second_y_position()] = y_src[1];
@@ -483,7 +479,7 @@ pub(crate) fn yuv_to_yuy2_impl<
                 .zip(rem_y.chunks_exact(2))
                 .zip(last_u.iter())
                 .zip(last_v.iter())
-                .skip(processed)
+                .skip(processed / 2)
             {
                 yuy2[yuy2_target.get_first_y_position()] = y_src[0];
                 yuy2[yuy2_target.get_second_y_position()] = y_src[1];
diff --git a/src/yuy2_to_rgb.rs b/src/yuy2_to_rgb.rs
index 471312b8..dbbe58fb 100644
--- a/src/yuy2_to_rgb.rs
+++ b/src/yuy2_to_rgb.rs
@@ -80,9 +80,9 @@ fn yuy2_to_rgb_impl<const DESTINATION_CHANNELS: u8, const YUY2_SOURCE: usize>(
     let bias_uv = range.bias_uv as i32;
 
     #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-    let mut _use_sse = std::arch::is_x86_feature_detected!("sse4.1");
+    let _use_sse = std::arch::is_x86_feature_detected!("sse4.1");
     #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
-    let mut _use_avx = std::arch::is_x86_feature_detected!("avx2");
+    let _use_avx = std::arch::is_x86_feature_detected!("avx2");
 
     let rgb_iter;
     let yuy2_iter;
@@ -159,7 +159,7 @@ fn yuy2_to_rgb_impl<const DESTINATION_CHANNELS: u8, const YUY2_SOURCE: usize>(
         for (rgb, yuy2) in rgb_store
             .chunks_exact_mut(2 * channels)
             .zip(yuy2_store.chunks_exact(4))
-            .skip(_cx)
+            .skip(_cx / 2)
         {
             let first_y = yuy2[yuy2_source.get_first_y_position()];
             let second_y = yuy2[yuy2_source.get_second_y_position()];