From fcc88722633d75dd219c1e5ea569ca1ac586913e Mon Sep 17 00:00:00 2001 From: Noah Stiltner Date: Thu, 30 May 2024 13:02:16 -0500 Subject: [PATCH 01/10] added 64-bit counter support for ChaCha20Legacy, and a test --- Cargo.lock | 47 ++++++++++++++++++++++++--- chacha20/Cargo.toml | 1 + chacha20/src/backends/avx2.rs | 51 +++++++++++++++++++++-------- chacha20/src/backends/neon.rs | 24 +++++++++++--- chacha20/src/backends/soft.rs | 13 +++++++- chacha20/src/backends/sse2.rs | 33 ++++++++++++------- chacha20/src/lib.rs | 18 +++++------ chacha20/src/variants.rs | 61 +++++++++++++++++++++++++++++++++++ chacha20/tests/mod.rs | 42 ++++++++++++++++++++++++ 9 files changed, 246 insertions(+), 44 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 40fd91e6..b524f0df 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -23,12 +23,24 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "chacha20" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f08493fa7707effc63254c66c6ea908675912493cd67952eda23c09fae2610b1" +dependencies = [ + "cfg-if", + "cipher 0.3.0", + "cpufeatures", +] + [[package]] name = "chacha20" version = "0.10.0-pre" dependencies = [ "cfg-if", - "cipher", + "chacha20 0.7.3", + "cipher 0.5.0-pre.4", "cpufeatures", "hex-literal", "rand_chacha", @@ -38,6 +50,15 @@ dependencies = [ "zeroize", ] +[[package]] +name = "cipher" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ee52072ec15386f770805afd189a01c8841be8696bed250fa2f13c4c0d6dfb7" +dependencies = [ + "generic-array", +] + [[package]] name = "cipher" version = "0.5.0-pre.4" @@ -70,6 +91,16 @@ dependencies = [ "rand_core 0.6.4", ] +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + [[package]] name = "getrandom" version = "0.2.13" @@ -85,7 +116,7 @@ dependencies = [ name = "hc-256" version = "0.6.0-pre" dependencies = [ - "cipher", + "cipher 0.5.0-pre.4", "hex-literal", ] @@ -154,7 +185,7 @@ dependencies = [ name = "rabbit" version = "0.5.0-pre" dependencies = [ - "cipher", + "cipher 0.5.0-pre.4", "hex-literal", ] @@ -190,7 +221,7 @@ dependencies = [ name = "rc4" version = "0.2.0-pre" dependencies = [ - "cipher", + "cipher 0.5.0-pre.4", "hex-literal", ] @@ -205,7 +236,7 @@ name = "salsa20" version = "0.11.0-pre" dependencies = [ "cfg-if", - "cipher", + "cipher 0.5.0-pre.4", "hex-literal", ] @@ -263,6 +294,12 @@ version = "1.0.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" +[[package]] +name = "version_check" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" + [[package]] name = "wasi" version = "0.11.0+wasi-snapshot-preview1" diff --git a/chacha20/Cargo.toml b/chacha20/Cargo.toml index 1420ea5b..ed8bc31c 100644 --- a/chacha20/Cargo.toml +++ b/chacha20/Cargo.toml @@ -29,6 +29,7 @@ zeroize = { version = "1.8.1", optional = true } cpufeatures = "0.2" [dev-dependencies] +chacha20_0_7 = { package = "chacha20", version = "0.7.0", features = ["legacy"] } cipher = { version = "=0.5.0-pre.4", features = ["dev"] } hex-literal = "0.4" rand_chacha = "0.3.1" diff --git a/chacha20/src/backends/avx2.rs b/chacha20/src/backends/avx2.rs index d0f05d12..946fdd93 100644 --- a/chacha20/src/backends/avx2.rs +++ b/chacha20/src/backends/avx2.rs @@ -1,8 +1,8 @@ -use crate::Rounds; +use crate::{Rounds, Variant}; use core::marker::PhantomData; #[cfg(feature = "rng")] -use crate::{ChaChaCore, Variant}; +use crate::ChaChaCore; #[cfg(feature = "cipher")] use crate::{ @@ -33,10 +33,11 @@ const N: usize = PAR_BLOCKS / 2; #[inline] #[target_feature(enable = "avx2")] #[cfg(feature = "cipher")] -pub(crate) unsafe fn inner(state: &mut [u32; STATE_WORDS], f: F) +pub(crate) unsafe fn inner(state: &mut [u32; STATE_WORDS], f: F) where R: Rounds, F: StreamClosure, + V: Variant { let state_ptr = state.as_ptr() as *const __m128i; let v = [ @@ -45,21 +46,33 @@ where _mm256_broadcastsi128_si256(_mm_loadu_si128(state_ptr.add(2))), ]; let mut c = _mm256_broadcastsi128_si256(_mm_loadu_si128(state_ptr.add(3))); - c = _mm256_add_epi32(c, _mm256_set_epi32(0, 0, 0, 1, 0, 0, 0, 0)); + if V::USES_U32_COUNTER { + c = _mm256_add_epi32(c, _mm256_set_epi32(0, 0, 0, 1, 0, 0, 0, 0)); + } else { + c = _mm256_add_epi64(c, _mm256_set_epi64x(0, 1, 0, 0)); + } let mut ctr = [c; N]; for i in 0..N { ctr[i] = c; - c = _mm256_add_epi32(c, _mm256_set_epi32(0, 0, 0, 2, 0, 0, 0, 2)); + if V::USES_U32_COUNTER { + c = _mm256_add_epi32(c, _mm256_set_epi32(0, 0, 0, 2, 0, 0, 0, 2)); + } else { + c = _mm256_add_epi64(c, _mm256_set_epi64x(0, 2, 0, 2)); + } } - let mut backend = Backend:: { + let mut backend = Backend:: { v, ctr, _pd: PhantomData, + _variant: PhantomData }; f.call(&mut backend); state[12] = _mm256_extract_epi32(backend.ctr[0], 0) as u32; + if !V::USES_U32_COUNTER { + state[13] = _mm256_extract_epi32(backend.ctr[0], 1) as u32; + } } #[inline] @@ -83,10 +96,11 @@ where ctr[i] = c; c = _mm256_add_epi32(c, _mm256_set_epi32(0, 0, 0, 2, 0, 0, 0, 2)); } - let mut backend = Backend:: { + let mut backend = Backend:: { v, ctr, _pd: PhantomData, + _variant: PhantomData }; backend.rng_gen_par_ks_blocks(buffer); @@ -94,30 +108,35 @@ where core.state[12] = _mm256_extract_epi32(backend.ctr[0], 0) as u32; } -struct Backend { +struct Backend { v: [__m256i; 3], ctr: [__m256i; N], _pd: PhantomData, + _variant: PhantomData } #[cfg(feature = "cipher")] -impl BlockSizeUser for Backend { +impl BlockSizeUser for Backend { type BlockSize = U64; } #[cfg(feature = "cipher")] -impl ParBlocksSizeUser for Backend { +impl ParBlocksSizeUser for Backend { type ParBlocksSize = U4; } #[cfg(feature = "cipher")] -impl StreamBackend for Backend { +impl StreamBackend for Backend { #[inline(always)] fn gen_ks_block(&mut self, block: &mut Block) { unsafe { let res = rounds::(&self.v, &self.ctr); for c in self.ctr.iter_mut() { - *c = _mm256_add_epi32(*c, _mm256_set_epi32(0, 0, 0, 1, 0, 0, 0, 1)); + if V::USES_U32_COUNTER { + *c = _mm256_add_epi32(*c, _mm256_set_epi32(0, 0, 0, 1, 0, 0, 0, 1)); + } else { + *c = _mm256_add_epi64(*c, _mm256_set_epi64x(0, 1, 0, 1)); + } } let res0: [__m128i; 8] = core::mem::transmute(res[0]); @@ -136,7 +155,11 @@ impl StreamBackend for Backend { let pb = PAR_BLOCKS as i32; for c in self.ctr.iter_mut() { - *c = _mm256_add_epi32(*c, _mm256_set_epi32(0, 0, 0, pb, 0, 0, 0, pb)); + if V::USES_U32_COUNTER { + *c = _mm256_add_epi32(*c, _mm256_set_epi32(0, 0, 0, pb, 0, 0, 0, pb)); + } else { + *c = _mm256_add_epi64(*c, _mm256_set_epi64x(0, pb as i64, 0, pb as i64)); + } } let mut block_ptr = blocks.as_mut_ptr() as *mut __m128i; @@ -153,7 +176,7 @@ impl StreamBackend for Backend { } #[cfg(feature = "rng")] -impl Backend { +impl Backend { #[inline(always)] fn rng_gen_par_ks_blocks(&mut self, blocks: &mut [u32; 64]) { unsafe { diff --git a/chacha20/src/backends/neon.rs b/chacha20/src/backends/neon.rs index a4f0be5c..3a3d0cf3 100644 --- a/chacha20/src/backends/neon.rs +++ b/chacha20/src/backends/neon.rs @@ -3,11 +3,11 @@ //! Adapted from the Crypto++ `chacha_simd` implementation by Jack Lloyd and //! Jeffrey Walton (public domain). -use crate::{Rounds, STATE_WORDS}; +use crate::{Rounds, Variant, STATE_WORDS}; use core::{arch::aarch64::*, marker::PhantomData}; #[cfg(feature = "rand_core")] -use crate::{ChaChaCore, Variant}; +use crate::ChaChaCore; #[cfg(feature = "cipher")] use crate::chacha::Block; @@ -59,7 +59,16 @@ where f.call(&mut backend); - vst1q_u32(state.as_mut_ptr().offset(12), backend.state[3]); + if V::IS_32_BIT_COUNTER { + // handle 32-bit counter + vst1q_u32(state.as_mut_ptr().offset(12), backend.state[3]); + } else { + // handle 64-bit counter + vst1q_u64( + state.as_mut_ptr().offset(12) as *mut u64, + vreinterpretq_u64_u32(backend.state[3]), + ); + } } #[inline] @@ -113,7 +122,14 @@ impl StreamBackend for Backend { self.gen_par_ks_blocks(&mut par); *block = par[0]; unsafe { - self.state[3] = add64!(state3, vld1q_u32([1, 0, 0, 0].as_ptr())); + if V::IS_32_BIT_COUNTER { + self.state[3] = add64!(state3, vld1q_u32([1, 0, 0, 0].as_ptr())); + } else { + self.state[3] = vreinterpretq_u32_u64(vaddq_u64( + vreinterpretq_u64_u32(state3), + vld1q_u64([1, 0].as_ptr()), + )); + } } } diff --git a/chacha20/src/backends/soft.rs b/chacha20/src/backends/soft.rs index 9cd4234f..d6cf67c3 100644 --- a/chacha20/src/backends/soft.rs +++ b/chacha20/src/backends/soft.rs @@ -28,7 +28,18 @@ impl<'a, R: Rounds, V: Variant> StreamBackend for Backend<'a, R, V> { #[inline(always)] fn gen_ks_block(&mut self, block: &mut Block) { let res = run_rounds::(&self.0.state); - self.0.state[12] = self.0.state[12].wrapping_add(1); + + if V::USES_U32_COUNTER { + self.0.state[12] = self.0.state[12].wrapping_add(1); + } else { + let no_carry = self.0.state[12].checked_add(1); + if let Some(v) = no_carry { + self.0.state[12] = v; + } else { + self.0.state[12] = 0; + self.0.state[13] = self.0.state[13].wrapping_add(1); + } + } for (chunk, val) in block.chunks_exact_mut(4).zip(res.iter()) { chunk.copy_from_slice(&val.to_le_bytes()); diff --git a/chacha20/src/backends/sse2.rs b/chacha20/src/backends/sse2.rs index 748c59c0..f580a2a3 100644 --- a/chacha20/src/backends/sse2.rs +++ b/chacha20/src/backends/sse2.rs @@ -1,7 +1,7 @@ -use crate::Rounds; +use crate::{Rounds, Variant}; #[cfg(feature = "rng")] -use crate::{ChaChaCore, Variant}; +use crate::ChaChaCore; #[cfg(feature = "cipher")] use crate::{STATE_WORDS, chacha::Block}; @@ -23,13 +23,14 @@ use core::arch::x86_64::*; #[inline] #[target_feature(enable = "sse2")] #[cfg(feature = "cipher")] -pub(crate) unsafe fn inner(state: &mut [u32; STATE_WORDS], f: F) +pub(crate) unsafe fn inner(state: &mut [u32; STATE_WORDS], f: F) where R: Rounds, F: StreamClosure, + V: Variant, { let state_ptr = state.as_ptr() as *const __m128i; - let mut backend = Backend:: { + let mut backend = Backend:: { v: [ _mm_loadu_si128(state_ptr.add(0)), _mm_loadu_si128(state_ptr.add(1)), @@ -37,35 +38,44 @@ where _mm_loadu_si128(state_ptr.add(3)), ], _pd: PhantomData, + _variant: PhantomData }; f.call(&mut backend); state[12] = _mm_cvtsi128_si32(backend.v[3]) as u32; + if !V::USES_U32_COUNTER { + state[13] = _mm_extract_epi32(backend.v[3], 1) as u32; + } } -struct Backend { +struct Backend { v: [__m128i; 4], _pd: PhantomData, + _variant: PhantomData } #[cfg(feature = "cipher")] -impl BlockSizeUser for Backend { +impl BlockSizeUser for Backend { type BlockSize = U64; } #[cfg(feature = "cipher")] -impl ParBlocksSizeUser for Backend { +impl ParBlocksSizeUser for Backend { type ParBlocksSize = U1; } #[cfg(feature = "cipher")] -impl StreamBackend for Backend { +impl StreamBackend for Backend { #[inline(always)] fn gen_ks_block(&mut self, block: &mut Block) { unsafe { let res = rounds::(&self.v); - self.v[3] = _mm_add_epi32(self.v[3], _mm_set_epi32(0, 0, 0, 1)); + if V::USES_U32_COUNTER { + self.v[3] = _mm_add_epi32(self.v[3], _mm_set_epi32(0, 0, 0, 1)); + } else { + self.v[3] = _mm_add_epi64(self.v[3], _mm_set_epi64x(0, 1)); + } let block_ptr = block.as_mut_ptr() as *mut __m128i; for i in 0..4 { @@ -84,7 +94,7 @@ where V: Variant { let state_ptr = core.state.as_ptr() as *const __m128i; - let mut backend = Backend:: { + let mut backend = Backend:: { v: [ _mm_loadu_si128(state_ptr.add(0)), _mm_loadu_si128(state_ptr.add(1)), @@ -92,6 +102,7 @@ where _mm_loadu_si128(state_ptr.add(3)), ], _pd: PhantomData, + _variant: PhantomData }; for i in 0..4 { @@ -102,7 +113,7 @@ where } #[cfg(feature = "rng")] -impl Backend { +impl Backend { #[inline(always)] fn gen_ks_block(&mut self, block: &mut [u32]) { unsafe { diff --git a/chacha20/src/lib.rs b/chacha20/src/lib.rs index 73212dcc..273a9b33 100644 --- a/chacha20/src/lib.rs +++ b/chacha20/src/lib.rs @@ -267,16 +267,17 @@ impl ChaChaCore { #[cfg(feature = "cipher")] impl StreamCipherSeekCore for ChaChaCore { - type Counter = u32; + type Counter = V::Counter; #[inline(always)] fn get_block_pos(&self) -> Self::Counter { - self.state[12] + V::get_block_pos(&self.state[12..V::NONCE_INDEX]) } #[inline(always)] fn set_block_pos(&mut self, pos: Self::Counter) { - self.state[12] = pos + let block_pos_words = V::set_block_pos_helper(pos); + self.state[12..V::NONCE_INDEX].copy_from_slice(block_pos_words.as_ref()) } } @@ -284,8 +285,7 @@ impl StreamCipherSeekCore for ChaChaCore { impl StreamCipherCore for ChaChaCore { #[inline(always)] fn remaining_blocks(&self) -> Option { - let rem = u32::MAX - self.get_block_pos(); - rem.try_into().ok() + V::remaining_blocks(self.get_block_pos()) } fn process_with_backend(&mut self, f: impl cipher::StreamClosure) { @@ -296,21 +296,21 @@ impl StreamCipherCore for ChaChaCore { cfg_if! { if #[cfg(chacha20_force_avx2)] { unsafe { - backends::avx2::inner::(&mut self.state, f); + backends::avx2::inner::(&mut self.state, f); } } else if #[cfg(chacha20_force_sse2)] { unsafe { - backends::sse2::inner::(&mut self.state, f); + backends::sse2::inner::(&mut self.state, f); } } else { let (avx2_token, sse2_token) = self.tokens; if avx2_token.get() { unsafe { - backends::avx2::inner::(&mut self.state, f); + backends::avx2::inner::(&mut self.state, f); } } else if sse2_token.get() { unsafe { - backends::sse2::inner::(&mut self.state, f); + backends::sse2::inner::(&mut self.state, f); } } else { f.call(&mut backends::soft::Backend(self)); diff --git a/chacha20/src/variants.rs b/chacha20/src/variants.rs index 58043a75..a15bd3c5 100644 --- a/chacha20/src/variants.rs +++ b/chacha20/src/variants.rs @@ -2,10 +2,36 @@ //! //! To be revisited for the 64-bit counter. +#[cfg(feature = "cipher")] +/// A trait to restrict the counter for the cipher crate +pub trait VariantCounter: cipher::Counter {} +#[cfg(not(feature = "cipher"))] +pub trait VariantCounter {} + +impl VariantCounter for u32 {} + +#[cfg(feature = "legacy")] +impl VariantCounter for u64 {} + /// A trait that distinguishes some ChaCha variants pub trait Variant: Clone { /// the size of the Nonce in u32s const NONCE_INDEX: usize; + /// This const should be evaluated at compile time + const USES_U32_COUNTER: bool; + type Counter: VariantCounter; + type CounterWords: AsRef<[u32]>; + + /// Takes a slice of state[12..NONCE_INDEX] to convert it into + /// Self::Counter. + fn get_block_pos(counter_row: &[u32]) -> Self::Counter; + + /// Breaks down the Self::Counter type into a u32 array for setting the + /// block pos. + fn set_block_pos_helper(value: Self::Counter) -> Self::CounterWords; + + /// A helper method for calculating the remaining blocks using these types + fn remaining_blocks(block_pos: Self::Counter) -> Option; } #[derive(Clone)] @@ -13,6 +39,21 @@ pub trait Variant: Clone { pub struct Ietf(); impl Variant for Ietf { const NONCE_INDEX: usize = 13; + const USES_U32_COUNTER: bool = true; + type Counter = u32; + type CounterWords = [u32; 1]; + #[inline(always)] + fn get_block_pos(counter_row: &[u32]) -> Self::Counter { + counter_row[0] + } + #[inline(always)] + fn set_block_pos_helper(value: Self::Counter) -> Self::CounterWords { + [value] + } + #[inline(always)] + fn remaining_blocks(block_pos: Self::Counter) -> Option { + (u32::MAX - block_pos).try_into().ok() + } } #[derive(Clone)] @@ -22,4 +63,24 @@ pub struct Legacy(); #[cfg(feature = "legacy")] impl Variant for Legacy { const NONCE_INDEX: usize = 14; + const USES_U32_COUNTER: bool = false; + type Counter = u64; + type CounterWords = [u32; 2]; + #[inline(always)] + fn get_block_pos(counter_row: &[u32]) -> Self::Counter { + counter_row[0] as u64 | (u64::from(counter_row[1]) << 32) + } + #[inline(always)] + fn set_block_pos_helper(value: Self::Counter) -> Self::CounterWords { + [value as u32, (value >> 32) as u32] + } + #[inline(always)] + fn remaining_blocks(block_pos: Self::Counter) -> Option { + let remaining = u64::MAX - block_pos; + #[cfg(target_pointer_width = "32")] + if remaining > usize::MAX as u64 { + return None; + } + remaining.try_into().ok() + } } diff --git a/chacha20/tests/mod.rs b/chacha20/tests/mod.rs index 4e4aa33c..dce154f1 100644 --- a/chacha20/tests/mod.rs +++ b/chacha20/tests/mod.rs @@ -233,4 +233,46 @@ mod legacy { } } } + + /// Tests the 64-bit counter + #[test] + fn legacy_64_bit_counter() { + use cipher::StreamCipherSeekCore; + use chacha20_0_7::{ChaCha20Legacy as OgLegacy, LegacyNonce as OgLegacyNonce, cipher::{NewCipher, StreamCipher, StreamCipherSeek}}; + let mut cipher = ChaCha20Legacy::new(&KEY_LONG.into(), &LegacyNonce::from(IV_LONG)); + let mut og_cipher = OgLegacy::new(&KEY_LONG.into(), &OgLegacyNonce::from(IV_LONG)); + + const TEST_BLOCKS: usize = 5; + const TEST: [u8; 64 * TEST_BLOCKS] = [0u8; 64 * TEST_BLOCKS]; + let mut expected = TEST.clone(); + og_cipher.apply_keystream(&mut expected); + let mut result = TEST.clone(); + cipher.apply_keystream(&mut result); + assert_eq!(expected, result); + + const SEEK_POS: u64 = (u32::MAX - 10) as u64 * 64; + cipher.seek(SEEK_POS); + og_cipher.seek(SEEK_POS); + + let pos: u64 = cipher.current_pos(); + assert_eq!(pos, og_cipher.current_pos()); + let block_pos = cipher.get_core().get_block_pos(); + assert!(block_pos < u32::MAX as u64); + // Apply keystream blocks until some point after the u32 boundary + for i in 1..20 { + let mut expected = TEST.clone(); + og_cipher.apply_keystream(&mut expected); + let mut result = TEST.clone(); + cipher.apply_keystream(&mut result); + assert_eq!(expected, result); + let expected_block_pos = block_pos + i * TEST_BLOCKS as u64; + assert!(expected_block_pos == cipher.get_core().get_block_pos(), + "Block pos did not increment as expected; Expected block pos: {}\n actual block_pos: {}\n iteration: {}", + expected_block_pos, + cipher.get_core().get_block_pos(), + i + ); + } + assert!(cipher.get_core().get_block_pos() > u32::MAX as u64); + } } From aa22cb6370d63313f827db29631c6e6fe8dd75ae Mon Sep 17 00:00:00 2001 From: nstilt1 Date: Thu, 30 May 2024 14:38:21 -0500 Subject: [PATCH 02/10] failing neon tests currently --- chacha20/src/backends/neon.rs | 36 ++++++++++++++++++++++------------- chacha20/src/lib.rs | 2 +- chacha20/tests/mod.rs | 15 ++++++++++++++- 3 files changed, 38 insertions(+), 15 deletions(-) diff --git a/chacha20/src/backends/neon.rs b/chacha20/src/backends/neon.rs index 3a3d0cf3..4c1bbdb8 100644 --- a/chacha20/src/backends/neon.rs +++ b/chacha20/src/backends/neon.rs @@ -18,13 +18,14 @@ use cipher::{ BlockSizeUser, ParBlocks, ParBlocksSizeUser, StreamBackend, StreamClosure, }; -struct Backend { +struct Backend { state: [uint32x4_t; 4], ctrs: [uint32x4_t; 4], _pd: PhantomData, + _variant: PhantomData } -impl Backend { +impl Backend { #[inline] unsafe fn new(state: &mut [u32; STATE_WORDS]) -> Self { let state = [ @@ -39,10 +40,11 @@ impl Backend { vld1q_u32([3, 0, 0, 0].as_ptr()), vld1q_u32([4, 0, 0, 0].as_ptr()), ]; - Backend:: { + Backend:: { state, ctrs, _pd: PhantomData, + _variant: PhantomData, } } } @@ -50,16 +52,17 @@ impl Backend { #[inline] #[cfg(feature = "cipher")] #[target_feature(enable = "neon")] -pub(crate) unsafe fn inner(state: &mut [u32; STATE_WORDS], f: F) +pub(crate) unsafe fn inner(state: &mut [u32; STATE_WORDS], f: F) where R: Rounds, F: StreamClosure, + V: Variant { - let mut backend = Backend::::new(state); + let mut backend = Backend::::new(state); f.call(&mut backend); - if V::IS_32_BIT_COUNTER { + if V::USES_U32_COUNTER { // handle 32-bit counter vst1q_u32(state.as_mut_ptr().offset(12), backend.state[3]); } else { @@ -81,7 +84,7 @@ where R: Rounds, V: Variant, { - let mut backend = Backend::::new(&mut core.state); + let mut backend = Backend::::new(&mut core.state); backend.write_par_ks_blocks(buffer); @@ -89,11 +92,11 @@ where } #[cfg(feature = "cipher")] -impl BlockSizeUser for Backend { +impl BlockSizeUser for Backend { type BlockSize = U64; } #[cfg(feature = "cipher")] -impl ParBlocksSizeUser for Backend { +impl ParBlocksSizeUser for Backend { type ParBlocksSize = U4; } @@ -114,7 +117,7 @@ macro_rules! add_assign_vec { } #[cfg(feature = "cipher")] -impl StreamBackend for Backend { +impl StreamBackend for Backend { #[inline(always)] fn gen_ks_block(&mut self, block: &mut Block) { let state3 = self.state[3]; @@ -122,7 +125,7 @@ impl StreamBackend for Backend { self.gen_par_ks_blocks(&mut par); *block = par[0]; unsafe { - if V::IS_32_BIT_COUNTER { + if V::USES_U32_COUNTER { self.state[3] = add64!(state3, vld1q_u32([1, 0, 0, 0].as_ptr())); } else { self.state[3] = vreinterpretq_u32_u64(vaddq_u64( @@ -178,7 +181,14 @@ impl StreamBackend for Backend { ); } } - self.state[3] = add64!(self.state[3], self.ctrs[3]); + if V::USES_U32_COUNTER { + self.state[3] = add64!(self.state[3], self.ctrs[3]); + } else { + self.state[3] = vreinterpretq_u32_u64(vaddq_u64( + vreinterpretq_u64_u32(self.state[3]), + vld1q_u64([4, 0].as_ptr()), + )); + } } } } @@ -204,7 +214,7 @@ macro_rules! extract { }; } -impl Backend { +impl Backend { #[inline(always)] /// Generates `num_blocks` blocks and blindly writes them to `dest_ptr` /// diff --git a/chacha20/src/lib.rs b/chacha20/src/lib.rs index 273a9b33..2e1cb706 100644 --- a/chacha20/src/lib.rs +++ b/chacha20/src/lib.rs @@ -319,7 +319,7 @@ impl StreamCipherCore for ChaChaCore { } } else if #[cfg(all(chacha20_force_neon, target_arch = "aarch64", target_feature = "neon"))] { unsafe { - backends::neon::inner::(&mut self.state, f); + backends::neon::inner::(&mut self.state, f); } } else { f.call(&mut backends::soft::Backend(self)); diff --git a/chacha20/tests/mod.rs b/chacha20/tests/mod.rs index dce154f1..069a5429 100644 --- a/chacha20/tests/mod.rs +++ b/chacha20/tests/mod.rs @@ -264,7 +264,20 @@ mod legacy { og_cipher.apply_keystream(&mut expected); let mut result = TEST.clone(); cipher.apply_keystream(&mut result); - assert_eq!(expected, result); + if expected != result { + let mut index: usize = 0; + let mut expected_u8: u8 = 0; + let mut found_u8: u8 = 0; + for (i, (e, r)) in expected.iter().zip(result.iter()).enumerate() { + if e != r { + index = i; + expected_u8 = *e; + found_u8 = *r; + break; + } + }; + panic!("Index {} did not match;\n iteration: {}\n expected: {} != {}", index, i, expected_u8, found_u8); + } let expected_block_pos = block_pos + i * TEST_BLOCKS as u64; assert!(expected_block_pos == cipher.get_core().get_block_pos(), "Block pos did not increment as expected; Expected block pos: {}\n actual block_pos: {}\n iteration: {}", From 14312d8db4b733a17aeb1983ec981a0ebb90c4ae Mon Sep 17 00:00:00 2001 From: Noah Stiltner Date: Fri, 7 Jun 2024 10:33:10 -0500 Subject: [PATCH 03/10] fixed neon code to pass 64-bit counter tests. Tested with multiple TEST_BLOCKS sizes, but 4 and 10 were failing, among others. Problem was the 64-bit addition was not being applied properly when adding the counter to the state prior to writing blocks to the dest --- chacha20/src/backends/neon.rs | 97 +++++++++++++++++++++++++---------- chacha20/tests/mod.rs | 26 +++++++--- 2 files changed, 88 insertions(+), 35 deletions(-) diff --git a/chacha20/src/backends/neon.rs b/chacha20/src/backends/neon.rs index 4c1bbdb8..a8350c77 100644 --- a/chacha20/src/backends/neon.rs +++ b/chacha20/src/backends/neon.rs @@ -22,7 +22,7 @@ struct Backend { state: [uint32x4_t; 4], ctrs: [uint32x4_t; 4], _pd: PhantomData, - _variant: PhantomData + _variant: PhantomData, } impl Backend { @@ -56,7 +56,7 @@ pub(crate) unsafe fn inner(state: &mut [u32; STATE_WORDS], f: F) where R: Rounds, F: StreamClosure, - V: Variant + V: Variant, { let mut backend = Backend::::new(state); @@ -66,7 +66,6 @@ where // handle 32-bit counter vst1q_u32(state.as_mut_ptr().offset(12), backend.state[3]); } else { - // handle 64-bit counter vst1q_u64( state.as_mut_ptr().offset(12) as *mut u64, vreinterpretq_u64_u32(backend.state[3]), @@ -116,6 +115,19 @@ macro_rules! add_assign_vec { }; } +macro_rules! add_counter { + // macro definition for when V::USES_U32_COUNTER is true + ($a:expr, $b:literal, $uses_u32_counter:expr) => { + match $uses_u32_counter { + true => add64!($a, vld1q_u32([$b, 0, 0, 0].as_ptr())), + false => vreinterpretq_u32_u64(vaddq_u64( + vreinterpretq_u64_u32($a), + vld1q_u64([$b, 0].as_ptr()), + )), + } + }; +} + #[cfg(feature = "cipher")] impl StreamBackend for Backend { #[inline(always)] @@ -125,14 +137,7 @@ impl StreamBackend for Backend { self.gen_par_ks_blocks(&mut par); *block = par[0]; unsafe { - if V::USES_U32_COUNTER { - self.state[3] = add64!(state3, vld1q_u32([1, 0, 0, 0].as_ptr())); - } else { - self.state[3] = vreinterpretq_u32_u64(vaddq_u64( - vreinterpretq_u64_u32(state3), - vld1q_u64([1, 0].as_ptr()), - )); - } + self.state[3] = add64!(state3, vld1q_u32([1, 0, 0, 0].as_ptr())); } } @@ -165,30 +170,40 @@ impl StreamBackend for Backend { double_quarter_round(&mut blocks); } - for block in 0..4 { - // add state to block - for state_row in 0..4 { + // write first block, with no special counter requirements + for state_row in 0..4 { + // add state + add_assign_vec!(blocks[0][state_row], self.state[state_row]); + // write + vst1q_u8( + dest[0] + .as_mut_ptr() + .offset((state_row as isize) << 4 as isize), + vreinterpretq_u8_u32(blocks[0][state_row as usize]), + ); + } + + // write blocks with adjusted counters + for block in 1..4 { + // add state with adjusted counter + for state_row in 0..3 { add_assign_vec!(blocks[block][state_row], self.state[state_row]); } - if block > 0 { - blocks[block][3] = add64!(blocks[block][3], self.ctrs[block - 1]); - } - // write blocks to dest + add_assign_vec!( + blocks[block][3], + add64!(self.state[3], self.ctrs[block - 1]) + ); + + // write for state_row in 0..4 { vst1q_u8( - dest[block].as_mut_ptr().offset(state_row << 4), + dest[block].as_mut_ptr().offset(state_row << 4 as usize), vreinterpretq_u8_u32(blocks[block][state_row as usize]), ); } } - if V::USES_U32_COUNTER { - self.state[3] = add64!(self.state[3], self.ctrs[3]); - } else { - self.state[3] = vreinterpretq_u32_u64(vaddq_u64( - vreinterpretq_u64_u32(self.state[3]), - vld1q_u64([4, 0].as_ptr()), - )); - } + //self.state[3] = vaddq_u32(self.state[3], self.ctrs[3]); + self.state[3] = add64!(self.state[3], self.ctrs[3]); } } } @@ -328,3 +343,31 @@ unsafe fn cols_to_rows(blocks: &mut [[uint32x4_t; 4]; 4]) { extract!(block[3], 1); } } + +#[cfg(test)] +mod tests { + use super::*; + + fn reg_to_arr(reg: uint32x4_t) -> [u32; 4] { + unsafe { + let result: [u32; 4] = core::mem::transmute_copy(®); + result + } + } + + #[test] + fn counter() { + unsafe { + let start: [u32; 4] = [0, 0, 0, 0]; + let mut reg = vld1q_u32(start.as_ptr()); + let one = vld1q_u32([1, 0, 0, 0].as_ptr()); + let result_add64 = add64!(reg, one); + assert_eq!(reg_to_arr(result_add64), [1, 0, 0, 0]); + + let max: [u32; 4] = [u32::MAX, 0, 0, 0]; + reg = vld1q_u32(max.as_ptr()); + let result_add64 = add64!(reg, one); + assert_eq!(reg_to_arr(result_add64), [0, 1, 0, 0]); + } + } +} diff --git a/chacha20/tests/mod.rs b/chacha20/tests/mod.rs index 069a5429..ae12b9fe 100644 --- a/chacha20/tests/mod.rs +++ b/chacha20/tests/mod.rs @@ -239,29 +239,37 @@ mod legacy { fn legacy_64_bit_counter() { use cipher::StreamCipherSeekCore; use chacha20_0_7::{ChaCha20Legacy as OgLegacy, LegacyNonce as OgLegacyNonce, cipher::{NewCipher, StreamCipher, StreamCipherSeek}}; - let mut cipher = ChaCha20Legacy::new(&KEY_LONG.into(), &LegacyNonce::from(IV_LONG)); - let mut og_cipher = OgLegacy::new(&KEY_LONG.into(), &OgLegacyNonce::from(IV_LONG)); + use rand_chacha::{ChaCha20Rng as OgRng, rand_core::{RngCore, SeedableRng}}; + let mut cipher = ChaCha20Legacy::new(&[0u8; 32].into(), &LegacyNonce::from([0u8; 8])); + let mut og_cipher = OgLegacy::new(&[0u8; 32].into(), &OgLegacyNonce::from([0u8; 8])); + //let mut rng = ChaCha20Rng + let mut rng = OgRng::from_seed([0u8; 32]); - const TEST_BLOCKS: usize = 5; + const TEST_BLOCKS: usize = 4; const TEST: [u8; 64 * TEST_BLOCKS] = [0u8; 64 * TEST_BLOCKS]; let mut expected = TEST.clone(); - og_cipher.apply_keystream(&mut expected); + rng.fill_bytes(&mut expected); + //og_cipher.apply_keystream(&mut expected); let mut result = TEST.clone(); cipher.apply_keystream(&mut result); assert_eq!(expected, result); const SEEK_POS: u64 = (u32::MAX - 10) as u64 * 64; cipher.seek(SEEK_POS); + rng.set_word_pos(SEEK_POS as u128 / 4); og_cipher.seek(SEEK_POS); let pos: u64 = cipher.current_pos(); - assert_eq!(pos, og_cipher.current_pos()); + //assert_eq!(pos, og_cipher.current_pos()); + assert_eq!(pos, rng.get_word_pos() as u64 * 4); let block_pos = cipher.get_core().get_block_pos(); assert!(block_pos < u32::MAX as u64); // Apply keystream blocks until some point after the u32 boundary - for i in 1..20 { + for i in 1..80 { + let starting_block_pos = cipher.get_core().get_block_pos() as i64 - u32::MAX as i64; let mut expected = TEST.clone(); - og_cipher.apply_keystream(&mut expected); + rng.fill_bytes(&mut expected); + //og_cipher.apply_keystream(&mut expected); let mut result = TEST.clone(); cipher.apply_keystream(&mut result); if expected != result { @@ -276,7 +284,7 @@ mod legacy { break; } }; - panic!("Index {} did not match;\n iteration: {}\n expected: {} != {}", index, i, expected_u8, found_u8); + panic!("Index {} did not match;\n iteration: {}\n expected: {} != {}\nstart block pos - u32::MAX: {}", index, i, expected_u8, found_u8, starting_block_pos); } let expected_block_pos = block_pos + i * TEST_BLOCKS as u64; assert!(expected_block_pos == cipher.get_core().get_block_pos(), @@ -286,6 +294,8 @@ mod legacy { i ); } + // this test assures us that the counter is in fact over u32::MAX, in + // case we change some of the parameters assert!(cipher.get_core().get_block_pos() > u32::MAX as u64); } } From f984d8eef4208cff8d24a8aaca14b623834db386 Mon Sep 17 00:00:00 2001 From: Noah Stiltner Date: Fri, 7 Jun 2024 10:34:45 -0500 Subject: [PATCH 04/10] removed unused macro and unnecessary test --- chacha20/src/backends/neon.rs | 41 ----------------------------------- 1 file changed, 41 deletions(-) diff --git a/chacha20/src/backends/neon.rs b/chacha20/src/backends/neon.rs index a8350c77..e411a72d 100644 --- a/chacha20/src/backends/neon.rs +++ b/chacha20/src/backends/neon.rs @@ -115,19 +115,6 @@ macro_rules! add_assign_vec { }; } -macro_rules! add_counter { - // macro definition for when V::USES_U32_COUNTER is true - ($a:expr, $b:literal, $uses_u32_counter:expr) => { - match $uses_u32_counter { - true => add64!($a, vld1q_u32([$b, 0, 0, 0].as_ptr())), - false => vreinterpretq_u32_u64(vaddq_u64( - vreinterpretq_u64_u32($a), - vld1q_u64([$b, 0].as_ptr()), - )), - } - }; -} - #[cfg(feature = "cipher")] impl StreamBackend for Backend { #[inline(always)] @@ -343,31 +330,3 @@ unsafe fn cols_to_rows(blocks: &mut [[uint32x4_t; 4]; 4]) { extract!(block[3], 1); } } - -#[cfg(test)] -mod tests { - use super::*; - - fn reg_to_arr(reg: uint32x4_t) -> [u32; 4] { - unsafe { - let result: [u32; 4] = core::mem::transmute_copy(®); - result - } - } - - #[test] - fn counter() { - unsafe { - let start: [u32; 4] = [0, 0, 0, 0]; - let mut reg = vld1q_u32(start.as_ptr()); - let one = vld1q_u32([1, 0, 0, 0].as_ptr()); - let result_add64 = add64!(reg, one); - assert_eq!(reg_to_arr(result_add64), [1, 0, 0, 0]); - - let max: [u32; 4] = [u32::MAX, 0, 0, 0]; - reg = vld1q_u32(max.as_ptr()); - let result_add64 = add64!(reg, one); - assert_eq!(reg_to_arr(result_add64), [0, 1, 0, 0]); - } - } -} From dedf194eeabbc352f4f673ea840fe7fed7f16133 Mon Sep 17 00:00:00 2001 From: Noah Stiltner Date: Sat, 8 Jun 2024 08:15:46 -0500 Subject: [PATCH 05/10] adjusted legacy counter test to try different amounts of blocks --- chacha20/Cargo.toml | 1 - chacha20/tests/mod.rs | 46 +++++++++++++++++++++++++------------------ 2 files changed, 27 insertions(+), 20 deletions(-) diff --git a/chacha20/Cargo.toml b/chacha20/Cargo.toml index ed8bc31c..1420ea5b 100644 --- a/chacha20/Cargo.toml +++ b/chacha20/Cargo.toml @@ -29,7 +29,6 @@ zeroize = { version = "1.8.1", optional = true } cpufeatures = "0.2" [dev-dependencies] -chacha20_0_7 = { package = "chacha20", version = "0.7.0", features = ["legacy"] } cipher = { version = "=0.5.0-pre.4", features = ["dev"] } hex-literal = "0.4" rand_chacha = "0.3.1" diff --git a/chacha20/tests/mod.rs b/chacha20/tests/mod.rs index ae12b9fe..21bf8e6f 100644 --- a/chacha20/tests/mod.rs +++ b/chacha20/tests/mod.rs @@ -234,43 +234,36 @@ mod legacy { } } - /// Tests the 64-bit counter - #[test] - fn legacy_64_bit_counter() { + /// Tests the 64-bit counter with a given amount of test blocks + fn legacy_counter_over_u32_max(test: &[u8; N]) { + assert!(N % 64 == 0, "N should be a multiple of 64"); use cipher::StreamCipherSeekCore; - use chacha20_0_7::{ChaCha20Legacy as OgLegacy, LegacyNonce as OgLegacyNonce, cipher::{NewCipher, StreamCipher, StreamCipherSeek}}; + // using rand_chacha v0.3 because it is already a dev-dependency, and + // it uses a 64-bit counter use rand_chacha::{ChaCha20Rng as OgRng, rand_core::{RngCore, SeedableRng}}; let mut cipher = ChaCha20Legacy::new(&[0u8; 32].into(), &LegacyNonce::from([0u8; 8])); - let mut og_cipher = OgLegacy::new(&[0u8; 32].into(), &OgLegacyNonce::from([0u8; 8])); - //let mut rng = ChaCha20Rng let mut rng = OgRng::from_seed([0u8; 32]); - const TEST_BLOCKS: usize = 4; - const TEST: [u8; 64 * TEST_BLOCKS] = [0u8; 64 * TEST_BLOCKS]; - let mut expected = TEST.clone(); + let mut expected = test.clone(); rng.fill_bytes(&mut expected); - //og_cipher.apply_keystream(&mut expected); - let mut result = TEST.clone(); + let mut result = test.clone(); cipher.apply_keystream(&mut result); assert_eq!(expected, result); const SEEK_POS: u64 = (u32::MAX - 10) as u64 * 64; cipher.seek(SEEK_POS); rng.set_word_pos(SEEK_POS as u128 / 4); - og_cipher.seek(SEEK_POS); let pos: u64 = cipher.current_pos(); - //assert_eq!(pos, og_cipher.current_pos()); assert_eq!(pos, rng.get_word_pos() as u64 * 4); let block_pos = cipher.get_core().get_block_pos(); assert!(block_pos < u32::MAX as u64); // Apply keystream blocks until some point after the u32 boundary - for i in 1..80 { + for i in 1..16 { let starting_block_pos = cipher.get_core().get_block_pos() as i64 - u32::MAX as i64; - let mut expected = TEST.clone(); + let mut expected = test.clone(); rng.fill_bytes(&mut expected); - //og_cipher.apply_keystream(&mut expected); - let mut result = TEST.clone(); + let mut result = test.clone(); cipher.apply_keystream(&mut result); if expected != result { let mut index: usize = 0; @@ -286,7 +279,7 @@ mod legacy { }; panic!("Index {} did not match;\n iteration: {}\n expected: {} != {}\nstart block pos - u32::MAX: {}", index, i, expected_u8, found_u8, starting_block_pos); } - let expected_block_pos = block_pos + i * TEST_BLOCKS as u64; + let expected_block_pos = block_pos + i * (test.len() / 64) as u64; assert!(expected_block_pos == cipher.get_core().get_block_pos(), "Block pos did not increment as expected; Expected block pos: {}\n actual block_pos: {}\n iteration: {}", expected_block_pos, @@ -296,6 +289,21 @@ mod legacy { } // this test assures us that the counter is in fact over u32::MAX, in // case we change some of the parameters - assert!(cipher.get_core().get_block_pos() > u32::MAX as u64); + assert!(cipher.get_core().get_block_pos() > u32::MAX as u64, "The 64-bit counter test did not surpass u32::MAX"); + } + + /// Runs the legacy_64_bit_counter test with different-sized arrays so that + /// both `gen_ks_block` and `gen_par_ks_blocks` are called with varying + /// starting positions. + #[test] + fn legacy_64_bit_counter() { + legacy_counter_over_u32_max(&[0u8; 64 * 1]); + legacy_counter_over_u32_max(&[0u8; 64 * 2]); + legacy_counter_over_u32_max(&[0u8; 64 * 3]); + legacy_counter_over_u32_max(&[0u8; 64 * 4]); + legacy_counter_over_u32_max(&[0u8; 64 * 5]); + legacy_counter_over_u32_max(&[0u8; 64 * 6]); + legacy_counter_over_u32_max(&[0u8; 64 * 7]); + legacy_counter_over_u32_max(&[0u8; 64 * 8]); } } From 61acc545267cdb71b5b3251211ef13403cd6013d Mon Sep 17 00:00:00 2001 From: Noah Stiltner Date: Sat, 8 Jun 2024 08:49:57 -0500 Subject: [PATCH 06/10] added test to see if it would fail, and it did --- chacha20/src/rng.rs | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/chacha20/src/rng.rs b/chacha20/src/rng.rs index 0209823d..67adbceb 100644 --- a/chacha20/src/rng.rs +++ b/chacha20/src/rng.rs @@ -1101,4 +1101,21 @@ pub(crate) mod tests { assert_eq!(rng1.next_u64(), rng2.next_u64()); } } + + #[test] + fn counter_wrapping() { + let mut rng = ChaChaRng::from_seed([0u8; 32]); + + // get first four blocks and word pos + let mut first_blocks = [0u8; 64 * 4]; + rng.fill_bytes(&mut first_blocks); + let word_pos = rng.get_word_pos(); + + // get first four blocks after wrapping + rng.set_block_pos(u32::MAX); + let mut result = [0u8; 64 * 5]; + rng.fill_bytes(&mut result); + assert_eq!(word_pos, rng.get_word_pos()); + assert_eq!(&first_blocks[0..64 * 4], &result[64..]); + } } From 66ae1089bf14f77cc729bf5669cc0fbb079c2d11 Mon Sep 17 00:00:00 2001 From: Noah Stiltner Date: Sat, 8 Jun 2024 08:54:38 -0500 Subject: [PATCH 07/10] turned add64! into add_counter! that accepts a bool; now it passes tests --- chacha20/src/backends/neon.rs | 44 +++++++++++++++++++++-------------- 1 file changed, 27 insertions(+), 17 deletions(-) diff --git a/chacha20/src/backends/neon.rs b/chacha20/src/backends/neon.rs index e411a72d..a4c9f62b 100644 --- a/chacha20/src/backends/neon.rs +++ b/chacha20/src/backends/neon.rs @@ -99,12 +99,17 @@ impl ParBlocksSizeUser for Backend { type ParBlocksSize = U4; } -macro_rules! add64 { - ($a:expr, $b:expr) => { - vreinterpretq_u32_u64(vaddq_u64( - vreinterpretq_u64_u32($a), - vreinterpretq_u64_u32($b), - )) +/// Adds a counter row with either 32-bit or 64-bit addition +macro_rules! add_counter { + ($a:expr, $b:expr, $is_32_bit:expr) => { + if $is_32_bit { + vaddq_u32($a, $b) + } else { + vreinterpretq_u32_u64(vaddq_u64( + vreinterpretq_u64_u32($a), + vreinterpretq_u64_u32($b), + )) + } }; } @@ -124,7 +129,11 @@ impl StreamBackend for Backend { self.gen_par_ks_blocks(&mut par); *block = par[0]; unsafe { - self.state[3] = add64!(state3, vld1q_u32([1, 0, 0, 0].as_ptr())); + self.state[3] = add_counter!( + state3, + vld1q_u32([1, 0, 0, 0].as_ptr()), + V::USES_U32_COUNTER + ); } } @@ -137,19 +146,19 @@ impl StreamBackend for Backend { self.state[0], self.state[1], self.state[2], - add64!(self.state[3], self.ctrs[0]), + add_counter!(self.state[3], self.ctrs[0], V::USES_U32_COUNTER), ], [ self.state[0], self.state[1], self.state[2], - add64!(self.state[3], self.ctrs[1]), + add_counter!(self.state[3], self.ctrs[1], V::USES_U32_COUNTER), ], [ self.state[0], self.state[1], self.state[2], - add64!(self.state[3], self.ctrs[2]), + add_counter!(self.state[3], self.ctrs[2], V::USES_U32_COUNTER), ], ]; @@ -178,7 +187,7 @@ impl StreamBackend for Backend { } add_assign_vec!( blocks[block][3], - add64!(self.state[3], self.ctrs[block - 1]) + add_counter!(self.state[3], self.ctrs[block - 1], V::USES_U32_COUNTER) ); // write @@ -190,7 +199,7 @@ impl StreamBackend for Backend { } } //self.state[3] = vaddq_u32(self.state[3], self.ctrs[3]); - self.state[3] = add64!(self.state[3], self.ctrs[3]); + self.state[3] = add_counter!(self.state[3], self.ctrs[3], V::USES_U32_COUNTER); } } } @@ -233,19 +242,19 @@ impl Backend { self.state[0], self.state[1], self.state[2], - add64!(self.state[3], self.ctrs[0]), + add_counter!(self.state[3], self.ctrs[0], V::USES_U32_COUNTER), ], [ self.state[0], self.state[1], self.state[2], - add64!(self.state[3], self.ctrs[1]), + add_counter!(self.state[3], self.ctrs[1], V::USES_U32_COUNTER), ], [ self.state[0], self.state[1], self.state[2], - add64!(self.state[3], self.ctrs[2]), + add_counter!(self.state[3], self.ctrs[2], V::USES_U32_COUNTER), ], ]; @@ -260,7 +269,8 @@ impl Backend { add_assign_vec!(blocks[block][state_row], self.state[state_row]); } if block > 0 { - blocks[block][3] = add64!(blocks[block][3], self.ctrs[block - 1]); + blocks[block][3] = + add_counter!(blocks[block][3], self.ctrs[block - 1], V::USES_U32_COUNTER); } // write blocks to buffer for state_row in 0..4 { @@ -271,7 +281,7 @@ impl Backend { } dest_ptr = dest_ptr.add(64); } - self.state[3] = add64!(self.state[3], self.ctrs[3]); + self.state[3] = add_counter!(self.state[3], self.ctrs[3], V::USES_U32_COUNTER); } } From bef4ccd990aacf607d106e1ab175a45a7ae667f3 Mon Sep 17 00:00:00 2001 From: Noah Stiltner Date: Fri, 2 Aug 2024 12:00:36 -0500 Subject: [PATCH 08/10] removed Variant::USES_32_BIT_COUNTER; about to check neon code --- chacha20/src/backends/avx2.rs | 10 +++++----- chacha20/src/backends/neon.rs | 34 ++++++++++++++-------------------- chacha20/src/backends/soft.rs | 2 +- chacha20/src/backends/sse2.rs | 4 ++-- chacha20/src/variants.rs | 4 ---- 5 files changed, 22 insertions(+), 32 deletions(-) diff --git a/chacha20/src/backends/avx2.rs b/chacha20/src/backends/avx2.rs index 946fdd93..eca6651d 100644 --- a/chacha20/src/backends/avx2.rs +++ b/chacha20/src/backends/avx2.rs @@ -46,7 +46,7 @@ where _mm256_broadcastsi128_si256(_mm_loadu_si128(state_ptr.add(2))), ]; let mut c = _mm256_broadcastsi128_si256(_mm_loadu_si128(state_ptr.add(3))); - if V::USES_U32_COUNTER { + if core::mem::size_of::() == 4 { c = _mm256_add_epi32(c, _mm256_set_epi32(0, 0, 0, 1, 0, 0, 0, 0)); } else { c = _mm256_add_epi64(c, _mm256_set_epi64x(0, 1, 0, 0)); @@ -54,7 +54,7 @@ where let mut ctr = [c; N]; for i in 0..N { ctr[i] = c; - if V::USES_U32_COUNTER { + if core::mem::size_of::() == 4 { c = _mm256_add_epi32(c, _mm256_set_epi32(0, 0, 0, 2, 0, 0, 0, 2)); } else { c = _mm256_add_epi64(c, _mm256_set_epi64x(0, 2, 0, 2)); @@ -70,7 +70,7 @@ where f.call(&mut backend); state[12] = _mm256_extract_epi32(backend.ctr[0], 0) as u32; - if !V::USES_U32_COUNTER { + if core::mem::size_of::() != 4 { state[13] = _mm256_extract_epi32(backend.ctr[0], 1) as u32; } } @@ -132,7 +132,7 @@ impl StreamBackend for Backend { unsafe { let res = rounds::(&self.v, &self.ctr); for c in self.ctr.iter_mut() { - if V::USES_U32_COUNTER { + if core::mem::size_of::() == 4 { *c = _mm256_add_epi32(*c, _mm256_set_epi32(0, 0, 0, 1, 0, 0, 0, 1)); } else { *c = _mm256_add_epi64(*c, _mm256_set_epi64x(0, 1, 0, 1)); @@ -155,7 +155,7 @@ impl StreamBackend for Backend { let pb = PAR_BLOCKS as i32; for c in self.ctr.iter_mut() { - if V::USES_U32_COUNTER { + if core::mem::size_of::() == 4 { *c = _mm256_add_epi32(*c, _mm256_set_epi32(0, 0, 0, pb, 0, 0, 0, pb)); } else { *c = _mm256_add_epi64(*c, _mm256_set_epi64x(0, pb as i64, 0, pb as i64)); diff --git a/chacha20/src/backends/neon.rs b/chacha20/src/backends/neon.rs index a4c9f62b..97774a7e 100644 --- a/chacha20/src/backends/neon.rs +++ b/chacha20/src/backends/neon.rs @@ -62,7 +62,7 @@ where f.call(&mut backend); - if V::USES_U32_COUNTER { + if core::mem::size_of::() == 4 { // handle 32-bit counter vst1q_u32(state.as_mut_ptr().offset(12), backend.state[3]); } else { @@ -101,8 +101,8 @@ impl ParBlocksSizeUser for Backend { /// Adds a counter row with either 32-bit or 64-bit addition macro_rules! add_counter { - ($a:expr, $b:expr, $is_32_bit:expr) => { - if $is_32_bit { + ($a:expr, $b:expr, $variant:ty) => { + if core::mem::size_of::<$variant::Counter>() == 4 { vaddq_u32($a, $b) } else { vreinterpretq_u32_u64(vaddq_u64( @@ -129,11 +129,7 @@ impl StreamBackend for Backend { self.gen_par_ks_blocks(&mut par); *block = par[0]; unsafe { - self.state[3] = add_counter!( - state3, - vld1q_u32([1, 0, 0, 0].as_ptr()), - V::USES_U32_COUNTER - ); + self.state[3] = add_counter!(state3, vld1q_u32([1, 0, 0, 0].as_ptr()), V); } } @@ -146,19 +142,19 @@ impl StreamBackend for Backend { self.state[0], self.state[1], self.state[2], - add_counter!(self.state[3], self.ctrs[0], V::USES_U32_COUNTER), + add_counter!(self.state[3], self.ctrs[0], V), ], [ self.state[0], self.state[1], self.state[2], - add_counter!(self.state[3], self.ctrs[1], V::USES_U32_COUNTER), + add_counter!(self.state[3], self.ctrs[1], V), ], [ self.state[0], self.state[1], self.state[2], - add_counter!(self.state[3], self.ctrs[2], V::USES_U32_COUNTER), + add_counter!(self.state[3], self.ctrs[2], V), ], ]; @@ -187,7 +183,7 @@ impl StreamBackend for Backend { } add_assign_vec!( blocks[block][3], - add_counter!(self.state[3], self.ctrs[block - 1], V::USES_U32_COUNTER) + add_counter!(self.state[3], self.ctrs[block - 1], V) ); // write @@ -198,8 +194,7 @@ impl StreamBackend for Backend { ); } } - //self.state[3] = vaddq_u32(self.state[3], self.ctrs[3]); - self.state[3] = add_counter!(self.state[3], self.ctrs[3], V::USES_U32_COUNTER); + self.state[3] = add_counter!(self.state[3], self.ctrs[3], V); } } } @@ -242,19 +237,19 @@ impl Backend { self.state[0], self.state[1], self.state[2], - add_counter!(self.state[3], self.ctrs[0], V::USES_U32_COUNTER), + add_counter!(self.state[3], self.ctrs[0], V), ], [ self.state[0], self.state[1], self.state[2], - add_counter!(self.state[3], self.ctrs[1], V::USES_U32_COUNTER), + add_counter!(self.state[3], self.ctrs[1], V), ], [ self.state[0], self.state[1], self.state[2], - add_counter!(self.state[3], self.ctrs[2], V::USES_U32_COUNTER), + add_counter!(self.state[3], self.ctrs[2], V), ], ]; @@ -269,8 +264,7 @@ impl Backend { add_assign_vec!(blocks[block][state_row], self.state[state_row]); } if block > 0 { - blocks[block][3] = - add_counter!(blocks[block][3], self.ctrs[block - 1], V::USES_U32_COUNTER); + blocks[block][3] = add_counter!(blocks[block][3], self.ctrs[block - 1], V); } // write blocks to buffer for state_row in 0..4 { @@ -281,7 +275,7 @@ impl Backend { } dest_ptr = dest_ptr.add(64); } - self.state[3] = add_counter!(self.state[3], self.ctrs[3], V::USES_U32_COUNTER); + self.state[3] = add_counter!(self.state[3], self.ctrs[3], V); } } diff --git a/chacha20/src/backends/soft.rs b/chacha20/src/backends/soft.rs index d6cf67c3..546c7cd5 100644 --- a/chacha20/src/backends/soft.rs +++ b/chacha20/src/backends/soft.rs @@ -29,7 +29,7 @@ impl<'a, R: Rounds, V: Variant> StreamBackend for Backend<'a, R, V> { fn gen_ks_block(&mut self, block: &mut Block) { let res = run_rounds::(&self.0.state); - if V::USES_U32_COUNTER { + if core::mem::size_of::() == 4 { self.0.state[12] = self.0.state[12].wrapping_add(1); } else { let no_carry = self.0.state[12].checked_add(1); diff --git a/chacha20/src/backends/sse2.rs b/chacha20/src/backends/sse2.rs index f580a2a3..d23673e0 100644 --- a/chacha20/src/backends/sse2.rs +++ b/chacha20/src/backends/sse2.rs @@ -44,7 +44,7 @@ where f.call(&mut backend); state[12] = _mm_cvtsi128_si32(backend.v[3]) as u32; - if !V::USES_U32_COUNTER { + if core::mem::size_of::() != 4 { state[13] = _mm_extract_epi32(backend.v[3], 1) as u32; } } @@ -71,7 +71,7 @@ impl StreamBackend for Backend { fn gen_ks_block(&mut self, block: &mut Block) { unsafe { let res = rounds::(&self.v); - if V::USES_U32_COUNTER { + if core::mem::size_of::() == 4 { self.v[3] = _mm_add_epi32(self.v[3], _mm_set_epi32(0, 0, 0, 1)); } else { self.v[3] = _mm_add_epi64(self.v[3], _mm_set_epi64x(0, 1)); diff --git a/chacha20/src/variants.rs b/chacha20/src/variants.rs index a15bd3c5..d2f50c8a 100644 --- a/chacha20/src/variants.rs +++ b/chacha20/src/variants.rs @@ -17,8 +17,6 @@ impl VariantCounter for u64 {} pub trait Variant: Clone { /// the size of the Nonce in u32s const NONCE_INDEX: usize; - /// This const should be evaluated at compile time - const USES_U32_COUNTER: bool; type Counter: VariantCounter; type CounterWords: AsRef<[u32]>; @@ -39,7 +37,6 @@ pub trait Variant: Clone { pub struct Ietf(); impl Variant for Ietf { const NONCE_INDEX: usize = 13; - const USES_U32_COUNTER: bool = true; type Counter = u32; type CounterWords = [u32; 1]; #[inline(always)] @@ -63,7 +60,6 @@ pub struct Legacy(); #[cfg(feature = "legacy")] impl Variant for Legacy { const NONCE_INDEX: usize = 14; - const USES_U32_COUNTER: bool = false; type Counter = u64; type CounterWords = [u32; 2]; #[inline(always)] From 1064554d00353c28ecb5bf9df966505899c4d07e Mon Sep 17 00:00:00 2001 From: Noah Stiltner Date: Fri, 2 Aug 2024 12:05:52 -0500 Subject: [PATCH 09/10] fixed neon code --- chacha20/src/backends/neon.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chacha20/src/backends/neon.rs b/chacha20/src/backends/neon.rs index 97774a7e..803573be 100644 --- a/chacha20/src/backends/neon.rs +++ b/chacha20/src/backends/neon.rs @@ -102,7 +102,7 @@ impl ParBlocksSizeUser for Backend { /// Adds a counter row with either 32-bit or 64-bit addition macro_rules! add_counter { ($a:expr, $b:expr, $variant:ty) => { - if core::mem::size_of::<$variant::Counter>() == 4 { + if core::mem::size_of::<<$variant>::Counter>() == 4 { vaddq_u32($a, $b) } else { vreinterpretq_u32_u64(vaddq_u64( From 388017d6747141b897483131a7d5c17e5605d6ed Mon Sep 17 00:00:00 2001 From: Noah Stiltner Date: Fri, 2 Aug 2024 12:32:06 -0500 Subject: [PATCH 10/10] removed unnecessary qualifications --- chacha20/src/backends/avx2.rs | 11 ++++++----- chacha20/src/backends/neon.rs | 4 ++-- chacha20/src/backends/soft.rs | 3 ++- chacha20/src/backends/sse2.rs | 5 +++-- 4 files changed, 13 insertions(+), 10 deletions(-) diff --git a/chacha20/src/backends/avx2.rs b/chacha20/src/backends/avx2.rs index eca6651d..3628d818 100644 --- a/chacha20/src/backends/avx2.rs +++ b/chacha20/src/backends/avx2.rs @@ -1,5 +1,6 @@ use crate::{Rounds, Variant}; use core::marker::PhantomData; +use core::mem::size_of; #[cfg(feature = "rng")] use crate::ChaChaCore; @@ -46,7 +47,7 @@ where _mm256_broadcastsi128_si256(_mm_loadu_si128(state_ptr.add(2))), ]; let mut c = _mm256_broadcastsi128_si256(_mm_loadu_si128(state_ptr.add(3))); - if core::mem::size_of::() == 4 { + if size_of::() == 4 { c = _mm256_add_epi32(c, _mm256_set_epi32(0, 0, 0, 1, 0, 0, 0, 0)); } else { c = _mm256_add_epi64(c, _mm256_set_epi64x(0, 1, 0, 0)); @@ -54,7 +55,7 @@ where let mut ctr = [c; N]; for i in 0..N { ctr[i] = c; - if core::mem::size_of::() == 4 { + if size_of::() == 4 { c = _mm256_add_epi32(c, _mm256_set_epi32(0, 0, 0, 2, 0, 0, 0, 2)); } else { c = _mm256_add_epi64(c, _mm256_set_epi64x(0, 2, 0, 2)); @@ -70,7 +71,7 @@ where f.call(&mut backend); state[12] = _mm256_extract_epi32(backend.ctr[0], 0) as u32; - if core::mem::size_of::() != 4 { + if size_of::() != 4 { state[13] = _mm256_extract_epi32(backend.ctr[0], 1) as u32; } } @@ -132,7 +133,7 @@ impl StreamBackend for Backend { unsafe { let res = rounds::(&self.v, &self.ctr); for c in self.ctr.iter_mut() { - if core::mem::size_of::() == 4 { + if size_of::() == 4 { *c = _mm256_add_epi32(*c, _mm256_set_epi32(0, 0, 0, 1, 0, 0, 0, 1)); } else { *c = _mm256_add_epi64(*c, _mm256_set_epi64x(0, 1, 0, 1)); @@ -155,7 +156,7 @@ impl StreamBackend for Backend { let pb = PAR_BLOCKS as i32; for c in self.ctr.iter_mut() { - if core::mem::size_of::() == 4 { + if size_of::() == 4 { *c = _mm256_add_epi32(*c, _mm256_set_epi32(0, 0, 0, pb, 0, 0, 0, pb)); } else { *c = _mm256_add_epi64(*c, _mm256_set_epi64x(0, pb as i64, 0, pb as i64)); diff --git a/chacha20/src/backends/neon.rs b/chacha20/src/backends/neon.rs index 803573be..b661c3d4 100644 --- a/chacha20/src/backends/neon.rs +++ b/chacha20/src/backends/neon.rs @@ -4,7 +4,7 @@ //! Jeffrey Walton (public domain). use crate::{Rounds, Variant, STATE_WORDS}; -use core::{arch::aarch64::*, marker::PhantomData}; +use core::{arch::aarch64::*, marker::PhantomData, mem::size_of}; #[cfg(feature = "rand_core")] use crate::ChaChaCore; @@ -102,7 +102,7 @@ impl ParBlocksSizeUser for Backend { /// Adds a counter row with either 32-bit or 64-bit addition macro_rules! add_counter { ($a:expr, $b:expr, $variant:ty) => { - if core::mem::size_of::<<$variant>::Counter>() == 4 { + if size_of::<<$variant>::Counter>() == 4 { vaddq_u32($a, $b) } else { vreinterpretq_u32_u64(vaddq_u64( diff --git a/chacha20/src/backends/soft.rs b/chacha20/src/backends/soft.rs index 546c7cd5..dab5df0c 100644 --- a/chacha20/src/backends/soft.rs +++ b/chacha20/src/backends/soft.rs @@ -2,6 +2,7 @@ //! intrinsics. use crate::{quarter_round, ChaChaCore, Rounds, Variant, STATE_WORDS}; +use core::mem::size_of; #[cfg(feature = "cipher")] use crate::chacha::Block; @@ -29,7 +30,7 @@ impl<'a, R: Rounds, V: Variant> StreamBackend for Backend<'a, R, V> { fn gen_ks_block(&mut self, block: &mut Block) { let res = run_rounds::(&self.0.state); - if core::mem::size_of::() == 4 { + if size_of::() == 4 { self.0.state[12] = self.0.state[12].wrapping_add(1); } else { let no_carry = self.0.state[12].checked_add(1); diff --git a/chacha20/src/backends/sse2.rs b/chacha20/src/backends/sse2.rs index d23673e0..48d18162 100644 --- a/chacha20/src/backends/sse2.rs +++ b/chacha20/src/backends/sse2.rs @@ -14,6 +14,7 @@ use cipher::{ ParBlocksSizeUser }; use core::marker::PhantomData; +use core::mem::size_of; #[cfg(target_arch = "x86")] use core::arch::x86::*; @@ -44,7 +45,7 @@ where f.call(&mut backend); state[12] = _mm_cvtsi128_si32(backend.v[3]) as u32; - if core::mem::size_of::() != 4 { + if size_of::() != 4 { state[13] = _mm_extract_epi32(backend.v[3], 1) as u32; } } @@ -71,7 +72,7 @@ impl StreamBackend for Backend { fn gen_ks_block(&mut self, block: &mut Block) { unsafe { let res = rounds::(&self.v); - if core::mem::size_of::() == 4 { + if size_of::() == 4 { self.v[3] = _mm_add_epi32(self.v[3], _mm_set_epi32(0, 0, 0, 1)); } else { self.v[3] = _mm_add_epi64(self.v[3], _mm_set_epi64x(0, 1));