diff --git a/Cargo.toml b/Cargo.toml
index a9fbcfd09..9982ba2d2 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -47,7 +47,7 @@ wasm-bindgen = { version = "0.2.87", optional = true }
 # When using the hax toolchain, we have more dependencies.
 # This is only required when doing proofs.
 [target.'cfg(hax)'.dependencies]
-hax-lib-macros = { version = "0.1.0-pre.1", git = "https://github.com/hacspec/hax" }
+hax-lib-macros = { version = "0.1.0-pre.1", git = "https://github.com/hacspec/hax", branch = "main" }
 hax-lib = { version = "0.1.0-pre.1", git = "https://github.com/hacspec/hax/", branch = "main" }
 
 [target.'cfg(all(not(target_os = "windows"), target_arch = "x86_64", libjade))'.dependencies]
@@ -74,3 +74,4 @@ rand = []
 wasm = ["wasm-bindgen"]
 log = ["dep:log"]
 tests = []              # Expose functions for testing.
+experimental = []       # Expose experimental APIs.
diff --git a/hax-driver.py b/hax-driver.py
index 3a1244798..d2aa6e198 100755
--- a/hax-driver.py
+++ b/hax-driver.py
@@ -127,7 +127,7 @@ def shell(command, expect=0, cwd=None, env={}):
             f"-** +libcrux::kem::kyber::** +!libcrux_platform::platform::* {exclude_sha3_implementations} -libcrux::**::types::index_impls::**",
             "fstar",
             "--interfaces",
-            "+* -libcrux::kem::kyber::types +!libcrux_platform::**",
+            "+* -libcrux::kem::kyber::types +!libcrux_platform::** +!libcrux::digest::**",
         ],
         cwd=".",
         env=hax_env,
@@ -136,6 +136,7 @@ def shell(command, expect=0, cwd=None, env={}):
     # remove this when https://github.com/hacspec/hax/issues/465 is
     # closed)
     shell(["rm", "-f", "./sys/platform/proofs/fstar/extraction/*.fst"])
+
 elif options.kyber_specification:
     shell(
         cargo_hax_into
diff --git a/kyber-c.yaml b/kyber-c.yaml
index ccbb99d2b..cf2f2d91e 100644
--- a/kyber-c.yaml
+++ b/kyber-c.yaml
@@ -2,6 +2,8 @@ files:
   - name: libcrux_digest
     api:
       - [libcrux, digest]
+    include_in_h:
+      - '"libcrux_hacl_glue.h"'
   - name: libcrux_platform
     api:
       - [libcrux_platform]
diff --git a/kyber-crate.sh b/kyber-crate.sh
index bf4a8f419..289058737 100755
--- a/kyber-crate.sh
+++ b/kyber-crate.sh
@@ -90,5 +90,6 @@ if [[ -n "$HACL_PACKAGES_HOME" ]]; then
     cp internal/*.h $HACL_PACKAGES_HOME/libcrux/include/internal/
     cp *.h $HACL_PACKAGES_HOME/libcrux/include
     cp *.c $HACL_PACKAGES_HOME/libcrux/src
+else
+    echo "Please set HACL_PACKAGES_HOME to the hacl-packages directory to copy the code over" 1>&2
 fi
-echo "Please set HACL_PACKAGES_HOME to the hacl-packages directory to copy the code over" 1>&2
diff --git a/proofs/fstar/extraction-edited.patch b/proofs/fstar/extraction-edited.patch
index 14508795a..ed1e0535b 100644
--- a/proofs/fstar/extraction-edited.patch
+++ b/proofs/fstar/extraction-edited.patch
@@ -1,6 +1,6 @@
 diff -ruN extraction/BitVecEq.fst extraction-edited/BitVecEq.fst
---- extraction/BitVecEq.fst	1970-01-01 01:00:00.000000000 +0100
-+++ extraction-edited/BitVecEq.fst	2024-03-12 10:45:44.812929749 +0100
+--- extraction/BitVecEq.fst	1970-01-01 01:00:00
++++ extraction-edited/BitVecEq.fst	2024-03-13 11:03:50
 @@ -0,0 +1,12 @@
 +module BitVecEq
 +
@@ -15,8 +15,8 @@ diff -ruN extraction/BitVecEq.fst extraction-edited/BitVecEq.fst
 +
 +
 diff -ruN extraction/BitVecEq.fsti extraction-edited/BitVecEq.fsti
---- extraction/BitVecEq.fsti	1970-01-01 01:00:00.000000000 +0100
-+++ extraction-edited/BitVecEq.fsti	2024-03-12 10:45:44.794930280 +0100
+--- extraction/BitVecEq.fsti	1970-01-01 01:00:00
++++ extraction-edited/BitVecEq.fsti	2024-03-13 11:03:50
 @@ -0,0 +1,294 @@
 +module BitVecEq
 +#set-options "--fuel 0 --ifuel 1 --z3rlimit 15"
@@ -312,62 +312,37 @@ diff -ruN extraction/BitVecEq.fsti extraction-edited/BitVecEq.fsti
 +           (ensures int_arr_bitwise_eq_range arr1 d arr2 d (n_offset1 * d) (n_offset2 * d) bits)
 + = admit ()
 +*)
-diff -ruN extraction/Libcrux.Digest.fst extraction-edited/Libcrux.Digest.fst
---- extraction/Libcrux.Digest.fst	2024-03-12 10:45:44.760931283 +0100
-+++ extraction-edited/Libcrux.Digest.fst	1970-01-01 01:00:00.000000000 +0100
-@@ -1,48 +0,0 @@
--module Libcrux.Digest
+diff -ruN extraction/Libcrux.Digest.Incremental_x4.fsti extraction-edited/Libcrux.Digest.Incremental_x4.fsti
+--- extraction/Libcrux.Digest.Incremental_x4.fsti	2024-03-13 11:03:50
++++ extraction-edited/Libcrux.Digest.Incremental_x4.fsti	1970-01-01 01:00:00
+@@ -1,23 +0,0 @@
+-module Libcrux.Digest.Incremental_x4
 -#set-options "--fuel 0 --ifuel 1 --z3rlimit 15"
 -open Core
 -open FStar.Mul
 -
--let sha3_256_ (payload: t_Slice u8) = Libcrux.Hacl.Sha3.sha256 payload
+-val t_Shake128StateX4:Type
 -
--let sha3_512_ (payload: t_Slice u8) = Libcrux.Hacl.Sha3.sha512 payload
+-val impl__Shake128StateX4__absorb_final
+-      (v_N: usize)
+-      (self: t_Shake128StateX4)
+-      (input: t_Array (t_Slice u8) v_N)
+-    : Prims.Pure t_Shake128StateX4 Prims.l_True (fun _ -> Prims.l_True)
 -
--let shake128 (v_LEN: usize) (data: t_Slice u8) = Libcrux.Hacl.Sha3.shake128 v_LEN data
+-val impl__Shake128StateX4__free_memory (self: t_Shake128StateX4)
+-    : Prims.Pure Prims.unit Prims.l_True (fun _ -> Prims.l_True)
 -
--let shake256 (v_LEN: usize) (data: t_Slice u8) = Libcrux.Hacl.Sha3.shake256 v_LEN data
+-val impl__Shake128StateX4__new: Prims.unit
+-  -> Prims.Pure t_Shake128StateX4 Prims.l_True (fun _ -> Prims.l_True)
 -
--let shake128x4_portable (v_LEN: usize) (data0 data1 data2 data3: t_Slice u8) =
--  let input_len:usize = Core.Slice.impl__len data0 in
--  let _:Prims.unit =
--    if true
--    then
--      let _:Prims.unit =
--        if
--          ~.((input_len =. (Core.Slice.impl__len data1 <: usize) <: bool) &&
--            (input_len =. (Core.Slice.impl__len data2 <: usize) <: bool) &&
--            (input_len =. (Core.Slice.impl__len data3 <: usize) <: bool) &&
--            (input_len <=. (cast (Core.Num.impl__u32__MAX <: u32) <: usize) <: bool) &&
--            (v_LEN <=. (cast (Core.Num.impl__u32__MAX <: u32) <: usize) <: bool))
--        then
--          Rust_primitives.Hax.never_to_any (Core.Panicking.panic "assertion failed: input_len == data1.len() && input_len == data2.len() &&\\n            input_len == data3.len() && input_len <= u32::MAX as usize &&\\n    LEN <= u32::MAX as usize"
--
--              <:
--              Rust_primitives.Hax.t_Never)
--      in
--      ()
--  in
--  let digest0:t_Array u8 v_LEN = Libcrux.Hacl.Sha3.shake128 v_LEN data0 in
--  let digest1:t_Array u8 v_LEN = Libcrux.Hacl.Sha3.shake128 v_LEN data1 in
--  let digest2:t_Array u8 v_LEN = Libcrux.Hacl.Sha3.shake128 v_LEN data2 in
--  let digest3:t_Array u8 v_LEN = Libcrux.Hacl.Sha3.shake128 v_LEN data3 in
--  digest0, digest1, digest2, digest3
--  <:
--  (t_Array u8 v_LEN & t_Array u8 v_LEN & t_Array u8 v_LEN & t_Array u8 v_LEN)
--
--let shake128x4_256_ (v_LEN: usize) (data0 data1 data2 data3: t_Slice u8) =
--  shake128x4_portable v_LEN data0 data1 data2 data3
--
--let shake128x4 (v_LEN: usize) (data0 data1 data2 data3: t_Slice u8) =
--  if Libcrux_platform.Platform.simd256_support ()
--  then shake128x4_256_ v_LEN data0 data1 data2 data3
--  else shake128x4_portable v_LEN data0 data1 data2 data3
+-val impl__Shake128StateX4__squeeze_blocks (v_N v_M: usize) (self: t_Shake128StateX4)
+-    : Prims.Pure (t_Shake128StateX4 & t_Array (t_Array u8 v_N) v_M)
+-      Prims.l_True
+-      (fun _ -> Prims.l_True)
 diff -ruN extraction/Libcrux.Digest.fsti extraction-edited/Libcrux.Digest.fsti
---- extraction/Libcrux.Digest.fsti	2024-03-12 10:45:44.730932168 +0100
-+++ extraction-edited/Libcrux.Digest.fsti	2024-03-12 10:45:44.826929336 +0100
-@@ -3,6 +3,11 @@
+--- extraction/Libcrux.Digest.fsti	2024-03-13 11:03:50
++++ extraction-edited/Libcrux.Digest.fsti	2024-03-13 11:03:50
+@@ -3,11 +3,29 @@
  open Core
  open FStar.Mul
  
@@ -379,31 +354,27 @@ diff -ruN extraction/Libcrux.Digest.fsti extraction-edited/Libcrux.Digest.fsti
  val sha3_256_ (payload: t_Slice u8)
      : Prims.Pure (t_Array u8 (sz 32)) Prims.l_True (fun _ -> Prims.l_True)
  
-@@ -19,11 +24,6 @@
-     : Prims.Pure (t_Array u8 v_LEN & t_Array u8 v_LEN & t_Array u8 v_LEN & t_Array u8 v_LEN)
-       Prims.l_True
-       (fun _ -> Prims.l_True)
--
--val shake128x4_256_ (v_LEN: usize) (data0 data1 data2 data3: t_Slice u8)
--    : Prims.Pure (t_Array u8 v_LEN & t_Array u8 v_LEN & t_Array u8 v_LEN & t_Array u8 v_LEN)
--      Prims.l_True
--      (fun _ -> Prims.l_True)
+ val sha3_512_ (payload: t_Slice u8)
+     : Prims.Pure (t_Array u8 (sz 64)) Prims.l_True (fun _ -> Prims.l_True)
  
- val shake128x4 (v_LEN: usize) (data0 data1 data2 data3: t_Slice u8)
-     : Prims.Pure (t_Array u8 v_LEN & t_Array u8 v_LEN & t_Array u8 v_LEN & t_Array u8 v_LEN)
-diff -ruN extraction/Libcrux.Kem.fst extraction-edited/Libcrux.Kem.fst
---- extraction/Libcrux.Kem.fst	1970-01-01 01:00:00.000000000 +0100
-+++ extraction-edited/Libcrux.Kem.fst	2024-03-12 10:45:44.788930457 +0100
-@@ -0,0 +1,6 @@
-+module Libcrux.Kem
-+#set-options "--fuel 0 --ifuel 1 --z3rlimit 15"
-+open Core
-+open FStar.Mul
++val shake128 (v_LEN: usize) (data: t_Slice u8)
++    : Prims.Pure (t_Array u8 v_LEN) Prims.l_True (fun _ -> Prims.l_True)
 +
+ val shake256 (v_LEN: usize) (data: t_Slice u8)
+     : Prims.Pure (t_Array u8 v_LEN) Prims.l_True (fun _ -> Prims.l_True)
++
++val shake128x4_portable (v_LEN: usize) (data0 data1 data2 data3: t_Slice u8)
++    : Prims.Pure (t_Array u8 v_LEN & t_Array u8 v_LEN & t_Array u8 v_LEN & t_Array u8 v_LEN)
++      Prims.l_True
++      (fun _ -> Prims.l_True)
 +
++val shake128x4 (v_LEN: usize) (data0 data1 data2 data3: t_Slice u8)
++    : Prims.Pure (t_Array u8 v_LEN & t_Array u8 v_LEN & t_Array u8 v_LEN & t_Array u8 v_LEN)
++      Prims.l_True
++      (fun _ -> Prims.l_True)
 diff -ruN extraction/Libcrux.Kem.Kyber.Arithmetic.fst extraction-edited/Libcrux.Kem.Kyber.Arithmetic.fst
---- extraction/Libcrux.Kem.Kyber.Arithmetic.fst	2024-03-12 10:45:44.774930870 +0100
-+++ extraction-edited/Libcrux.Kem.Kyber.Arithmetic.fst	2024-03-12 10:45:44.800930103 +0100
+--- extraction/Libcrux.Kem.Kyber.Arithmetic.fst	2024-03-13 11:03:50
++++ extraction-edited/Libcrux.Kem.Kyber.Arithmetic.fst	2024-03-13 11:03:50
 @@ -1,81 +1,364 @@
  module Libcrux.Kem.Kyber.Arithmetic
 -#set-options "--fuel 0 --ifuel 1 --z3rlimit 15"
@@ -649,18 +620,18 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Arithmetic.fst extraction-edited/Libcrux.
 +  };
 +  res
 +#pop-options
-+
-+let montgomery_multiply_sfe_by_fer fe fer =
-+  montgomery_reduce (mul_i32_b fe fer)
  
 -let montgomery_multiply_fe_by_fer (fe fer: i32) = montgomery_reduce (fe *! fer <: i32)
++let montgomery_multiply_sfe_by_fer fe fer =
++  montgomery_reduce (mul_i32_b fe fer)
  
 -let to_standard_domain (mfe: i32) =
 -  montgomery_reduce (mfe *! v_MONTGOMERY_R_SQUARED_MOD_FIELD_MODULUS <: i32)
-+let to_standard_domain mfe =
-+  montgomery_reduce (mul_i32_b mfe (v_MONTGOMERY_R_SQUARED_MOD_FIELD_MODULUS <: i32_b 1353))
  
 -let to_unsigned_representative (fe: i32) =
++let to_standard_domain mfe =
++  montgomery_reduce (mul_i32_b mfe (v_MONTGOMERY_R_SQUARED_MOD_FIELD_MODULUS <: i32_b 1353))
++
 +let to_unsigned_representative fe =
    let _:Prims.unit = () <: Prims.unit in
 -  cast (fe +! (Libcrux.Kem.Kyber.Constants.v_FIELD_MODULUS &. (fe >>! 31l <: i32) <: i32) <: i32)
@@ -680,7 +651,8 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Arithmetic.fst extraction-edited/Libcrux.
 +  assert (v fe < 0 ==> v res == v fe + 3329);
 +  assert (v fe >= 0 ==> v res == v fe);
 +  res <: int_t_d u16_inttype 12
-+
+ 
+-let add_to_ring_element (v_K: usize) (lhs rhs: t_PolynomialRingElement) =
 +let derefine_poly_b #b x =
 +  let r = createi (sz 256) (fun i -> (x.f_coefficients.[i] <: i32)) in
 +  {f_coefficients = r}
@@ -692,8 +664,7 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Arithmetic.fst extraction-edited/Libcrux.
 +let derefine_matrix_b #v_K #b x =
 +  let r = createi v_K (fun i -> derefine_vector_b #v_K #b x.[i]) in
 +  r
- 
--let add_to_ring_element (v_K: usize) (lhs rhs: t_PolynomialRingElement) =
++
 +let cast_poly_b #b1 #b2 x =
 +  let r = createi (sz 256) (fun i -> (x.f_coefficients.[i] <: i32_b b2)) in
 +  let res = {f_coefficients = r} in
@@ -809,8 +780,8 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Arithmetic.fst extraction-edited/Libcrux.
 +  
 + 
 diff -ruN extraction/Libcrux.Kem.Kyber.Arithmetic.fsti extraction-edited/Libcrux.Kem.Kyber.Arithmetic.fsti
---- extraction/Libcrux.Kem.Kyber.Arithmetic.fsti	2024-03-12 10:45:44.767931077 +0100
-+++ extraction-edited/Libcrux.Kem.Kyber.Arithmetic.fsti	2024-03-12 10:45:44.824929395 +0100
+--- extraction/Libcrux.Kem.Kyber.Arithmetic.fsti	2024-03-13 11:03:50
++++ extraction-edited/Libcrux.Kem.Kyber.Arithmetic.fsti	2024-03-13 11:03:50
 @@ -3,10 +3,32 @@
  open Core
  open FStar.Mul
@@ -860,7 +831,10 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Arithmetic.fsti extraction-edited/Libcrux
  
 -let v_MONTGOMERY_R: i32 = 1l <<! v_MONTGOMERY_SHIFT
 +val v_MONTGOMERY_R: x:i32{v x = pow2 16 /\ x = 65536l}
-+
+ 
+-val get_n_least_significant_bits (n: u8) (value: u32)
+-    : Prims.Pure u32
+-      (requires n =. 4uy || n =. 5uy || n =. 10uy || n =. 11uy || n =. v_MONTGOMERY_SHIFT)
 +val v_MONTGOMERY_R_INV: x:i32{v x >= 0 /\ v x < 3329 /\ (v x * v v_MONTGOMERY_R) % 3329 == 1 /\ x = 169l}
 +
 +let int_to_spec_fe (m:int) : Spec.Kyber.field_element = 
@@ -869,10 +843,7 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Arithmetic.fsti extraction-edited/Libcrux
 +    if m_v < 0 then
 +      m_v + v Libcrux.Kem.Kyber.Constants.v_FIELD_MODULUS
 +    else m_v
- 
--val get_n_least_significant_bits (n: u8) (value: u32)
--    : Prims.Pure u32
--      (requires n =. 4uy || n =. 5uy || n =. 10uy || n =. 11uy || n =. v_MONTGOMERY_SHIFT)
++
 +let wf_fe_to_spec_fe (m: wfFieldElement): Spec.Kyber.field_element =
 +  if v m < 0
 +  then v m + v Libcrux.Kem.Kyber.Constants.v_FIELD_MODULUS
@@ -945,27 +916,24 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Arithmetic.fsti extraction-edited/Libcrux
 -            <:
 -            i32) &&
 -          result <=. ((3l *! Libcrux.Kem.Kyber.Constants.v_FIELD_MODULUS <: i32) /! 2l <: i32))
--
++          montgomery_post value result)
+ 
 -val montgomery_multiply_fe_by_fer (fe fer: i32)
 -    : Prims.Pure i32 Prims.l_True (fun _ -> Prims.l_True)
--
+ 
 -val to_standard_domain (mfe: i32) : Prims.Pure i32 Prims.l_True (fun _ -> Prims.l_True)
--
--val to_unsigned_representative (fe: i32)
--    : Prims.Pure u16
--      (requires
--        fe >=. (Core.Ops.Arith.Neg.neg Libcrux.Kem.Kyber.Constants.v_FIELD_MODULUS <: i32) &&
--        fe <. Libcrux.Kem.Kyber.Constants.v_FIELD_MODULUS)
-+          montgomery_post value result)
-+
-+
 +val montgomery_multiply_sfe_by_fer #b1 #b2 (fe:i32_b b1) (fer: i32_b b2)
 +    : Pure (i32_b (nat_div_ceil (b1 * b2) (v v_MONTGOMERY_R) + 1665))
 +      (requires (b1 * b2 < pow2_31))
 +      (ensures (fun result -> 
 +          montgomery_post (mul_i32_b fe fer) (result)))
 +      
-+
+ 
+-val to_unsigned_representative (fe: i32)
+-    : Prims.Pure u16
+-      (requires
+-        fe >=. (Core.Ops.Arith.Neg.neg Libcrux.Kem.Kyber.Constants.v_FIELD_MODULUS <: i32) &&
+-        fe <. Libcrux.Kem.Kyber.Constants.v_FIELD_MODULUS)
 +val to_standard_domain #b (mfe: i32_b b) 
 +    : Pure (i32_b (nat_div_ceil (b * 1353) (v v_MONTGOMERY_R) + 1665))
 +      (requires (b * 1353 < pow2_31))
@@ -985,9 +953,57 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Arithmetic.fsti extraction-edited/Libcrux
  
 -type t_PolynomialRingElement = { f_coefficients:t_Array i32 (sz 256) }
 +type t_PolynomialRingElement = { f_coefficients:t_Array (t_FieldElement) (sz 256) }
-+
+ 
+-let impl__PolynomialRingElement__ZERO: t_PolynomialRingElement =
+-  { f_coefficients = Rust_primitives.Hax.repeat 0l (sz 256) } <: t_PolynomialRingElement
 +type t_PolynomialRingElement_b b = { f_coefficients:t_Array (i32_b b) (sz 256) }
-+
+ 
+-val add_to_ring_element (v_K: usize) (lhs rhs: t_PolynomialRingElement)
+-    : Prims.Pure t_PolynomialRingElement
+-      (requires
+-        Hax_lib.v_forall (fun i ->
+-              let i:usize = i in
+-              Hax_lib.implies (i <. Libcrux.Kem.Kyber.Constants.v_COEFFICIENTS_IN_RING_ELEMENT
+-                  <:
+-                  bool)
+-                (fun temp_0_ ->
+-                    let _:Prims.unit = temp_0_ in
+-                    ((Core.Num.impl__i32__abs (lhs.f_coefficients.[ i ] <: i32) <: i32) <=.
+-                      (((cast (v_K <: usize) <: i32) -! 1l <: i32) *!
+-                        Libcrux.Kem.Kyber.Constants.v_FIELD_MODULUS
+-                        <:
+-                        i32)
+-                      <:
+-                      bool) &&
+-                    ((Core.Num.impl__i32__abs (rhs.f_coefficients.[ i ] <: i32) <: i32) <=.
+-                      Libcrux.Kem.Kyber.Constants.v_FIELD_MODULUS
+-                      <:
+-                      bool))
+-              <:
+-              bool))
+-      (ensures
+-        fun result ->
+-          let result:t_PolynomialRingElement = result in
+-          Hax_lib.v_forall (fun i ->
+-                let i:usize = i in
+-                Hax_lib.implies (i <.
+-                    (Core.Slice.impl__len (Rust_primitives.unsize result.f_coefficients
+-                          <:
+-                          t_Slice i32)
+-                      <:
+-                      usize)
+-                    <:
+-                    bool)
+-                  (fun temp_0_ ->
+-                      let _:Prims.unit = temp_0_ in
+-                      (Core.Num.impl__i32__abs (result.f_coefficients.[ i ] <: i32) <: i32) <=.
+-                      ((cast (v_K <: usize) <: i32) *! Libcrux.Kem.Kyber.Constants.v_FIELD_MODULUS
+-                        <:
+-                        i32)
+-                      <:
+-                      bool)
+-                <:
+-                bool))
 +type wfPolynomialRingElement = t_PolynomialRingElement_b (v Libcrux.Kem.Kyber.Constants.v_FIELD_MODULUS - 1)
 +
 +val derefine_poly_b (#b1:nat) (x:t_PolynomialRingElement_b b1):  
@@ -1110,59 +1126,11 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Arithmetic.fsti extraction-edited/Libcrux
 +        (forall i. v result.f_coefficients.[i] == v lhs.f_coefficients.[i] + v rhs.f_coefficients.[i]))
 +
 +
- 
--let impl__PolynomialRingElement__ZERO: t_PolynomialRingElement =
--  { f_coefficients = Rust_primitives.Hax.repeat 0l (sz 256) } <: t_PolynomialRingElement
- 
--val add_to_ring_element (v_K: usize) (lhs rhs: t_PolynomialRingElement)
--    : Prims.Pure t_PolynomialRingElement
--      (requires
--        Hax_lib.v_forall (fun i ->
--              let i:usize = i in
--              Hax_lib.implies (i <. Libcrux.Kem.Kyber.Constants.v_COEFFICIENTS_IN_RING_ELEMENT
--                  <:
--                  bool)
--                (fun temp_0_ ->
--                    let _:Prims.unit = temp_0_ in
--                    ((Core.Num.impl__i32__abs (lhs.f_coefficients.[ i ] <: i32) <: i32) <=.
--                      (((cast (v_K <: usize) <: i32) -! 1l <: i32) *!
--                        Libcrux.Kem.Kyber.Constants.v_FIELD_MODULUS
--                        <:
--                        i32)
--                      <:
--                      bool) &&
--                    ((Core.Num.impl__i32__abs (rhs.f_coefficients.[ i ] <: i32) <: i32) <=.
--                      Libcrux.Kem.Kyber.Constants.v_FIELD_MODULUS
--                      <:
--                      bool))
--              <:
--              bool))
--      (ensures
--        fun result ->
--          let result:t_PolynomialRingElement = result in
--          Hax_lib.v_forall (fun i ->
--                let i:usize = i in
--                Hax_lib.implies (i <.
--                    (Core.Slice.impl__len (Rust_primitives.unsize result.f_coefficients
--                          <:
--                          t_Slice i32)
--                      <:
--                      usize)
--                    <:
--                    bool)
--                  (fun temp_0_ ->
--                      let _:Prims.unit = temp_0_ in
--                      (Core.Num.impl__i32__abs (result.f_coefficients.[ i ] <: i32) <: i32) <=.
--                      ((cast (v_K <: usize) <: i32) *! Libcrux.Kem.Kyber.Constants.v_FIELD_MODULUS
--                        <:
--                        i32)
--                      <:
--                      bool)
--                <:
--                bool))
++
++
 diff -ruN extraction/Libcrux.Kem.Kyber.Compress.fst extraction-edited/Libcrux.Kem.Kyber.Compress.fst
---- extraction/Libcrux.Kem.Kyber.Compress.fst	2024-03-12 10:45:44.761931254 +0100
-+++ extraction-edited/Libcrux.Kem.Kyber.Compress.fst	2024-03-12 10:45:44.803930015 +0100
+--- extraction/Libcrux.Kem.Kyber.Compress.fst	2024-03-13 11:03:50
++++ extraction-edited/Libcrux.Kem.Kyber.Compress.fst	2024-03-13 11:03:50
 @@ -1,39 +1,79 @@
  module Libcrux.Kem.Kyber.Compress
 -#set-options "--fuel 0 --ifuel 1 --z3rlimit 15"
@@ -1217,10 +1185,11 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Compress.fst extraction-edited/Libcrux.Ke
 +  assert (v fe > 2496 ==> r1 = 0s);
 +  assert (v res = v r1);
 +  res
-+
+ 
+-let decompress_ciphertext_coefficient (coefficient_bits: u8) (fe: i32) =
 +let compress_ciphertext_coefficient coefficient_bits fe =
-+  let _:Prims.unit = () <: Prims.unit in
-+  let _:Prims.unit = () <: Prims.unit in
+   let _:Prims.unit = () <: Prims.unit in
+   let _:Prims.unit = () <: Prims.unit in
 +  let compressed:u32 = (cast (fe <: u16) <: u32) <<! (coefficient_bits +! 1uy <: u8) in
 +  let compressed:u32 =
 +    compressed +! (cast (Libcrux.Kem.Kyber.Constants.v_FIELD_MODULUS <: i32) <: u32)
@@ -1234,12 +1203,11 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Compress.fst extraction-edited/Libcrux.Ke
 +  i32
 +  in
 +  res
- 
--let decompress_ciphertext_coefficient (coefficient_bits: u8) (fe: i32) =
++
 +#push-options "--z3rlimit 300"
 +let decompress_ciphertext_coefficient coefficient_bits fe =
-   let _:Prims.unit = () <: Prims.unit in
-   let _:Prims.unit = () <: Prims.unit in
++  let _:Prims.unit = () <: Prims.unit in
++  let _:Prims.unit = () <: Prims.unit in
 +  assert (v (1ul <<! coefficient_bits) <= pow2 11);
 +  assert (v fe < pow2 11);
    let decompressed:u32 =
@@ -1266,8 +1234,8 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Compress.fst extraction-edited/Libcrux.Ke
 +  res <: Libcrux.Kem.Kyber.Arithmetic.i32_b 3328
 +#pop-options
 diff -ruN extraction/Libcrux.Kem.Kyber.Compress.fsti extraction-edited/Libcrux.Kem.Kyber.Compress.fsti
---- extraction/Libcrux.Kem.Kyber.Compress.fsti	2024-03-12 10:45:44.732932109 +0100
-+++ extraction-edited/Libcrux.Kem.Kyber.Compress.fsti	2024-03-12 10:45:44.832929159 +0100
+--- extraction/Libcrux.Kem.Kyber.Compress.fsti	2024-03-13 11:03:50
++++ extraction-edited/Libcrux.Kem.Kyber.Compress.fsti	2024-03-13 11:03:50
 @@ -3,8 +3,19 @@
  open Core
  open FStar.Mul
@@ -1333,8 +1301,8 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Compress.fsti extraction-edited/Libcrux.K
 +      (requires fe =. 0l || fe =. 1l) 
 +      (fun result -> v result >= 0 /\ v result < 3329)
 diff -ruN extraction/Libcrux.Kem.Kyber.Constant_time_ops.fst extraction-edited/Libcrux.Kem.Kyber.Constant_time_ops.fst
---- extraction/Libcrux.Kem.Kyber.Constant_time_ops.fst	2024-03-12 10:45:44.744931755 +0100
-+++ extraction-edited/Libcrux.Kem.Kyber.Constant_time_ops.fst	2024-03-12 10:45:44.813929720 +0100
+--- extraction/Libcrux.Kem.Kyber.Constant_time_ops.fst	2024-03-13 11:03:50
++++ extraction-edited/Libcrux.Kem.Kyber.Constant_time_ops.fst	2024-03-13 11:03:50
 @@ -4,56 +4,163 @@
  open FStar.Mul
  
@@ -1520,8 +1488,8 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Constant_time_ops.fst extraction-edited/L
 +  )
 +#pop-options
 diff -ruN extraction/Libcrux.Kem.Kyber.Constant_time_ops.fsti extraction-edited/Libcrux.Kem.Kyber.Constant_time_ops.fsti
---- extraction/Libcrux.Kem.Kyber.Constant_time_ops.fsti	2024-03-12 10:45:44.765931136 +0100
-+++ extraction-edited/Libcrux.Kem.Kyber.Constant_time_ops.fsti	2024-03-12 10:45:44.823929425 +0100
+--- extraction/Libcrux.Kem.Kyber.Constant_time_ops.fsti	2024-03-13 11:03:50
++++ extraction-edited/Libcrux.Kem.Kyber.Constant_time_ops.fsti	2024-03-13 11:03:50
 @@ -20,7 +20,8 @@
  
  val compare_ciphertexts_in_constant_time (v_CIPHERTEXT_SIZE: usize) (lhs rhs: t_Slice u8)
@@ -1552,453 +1520,292 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Constant_time_ops.fsti extraction-edited/
 -                result =. rhs <: bool))
 +          Hax_lib.implies (selector =. 0uy <: bool) (fun _ -> result =. lhs <: bool) &&
 +          Hax_lib.implies (selector <>. 0uy <: bool) (fun _ -> result =. rhs <: bool))
-diff -ruN extraction/Libcrux.Kem.Kyber.fst extraction-edited/Libcrux.Kem.Kyber.fst
---- extraction/Libcrux.Kem.Kyber.fst	2024-03-12 10:45:44.729932198 +0100
-+++ extraction-edited/Libcrux.Kem.Kyber.fst	2024-03-12 10:45:44.787930487 +0100
-@@ -1,12 +1,29 @@
- module Libcrux.Kem.Kyber
--#set-options "--fuel 0 --ifuel 1 --z3rlimit 15"
-+#set-options "--fuel 0 --ifuel 1 --z3rlimit 100"
+diff -ruN extraction/Libcrux.Kem.Kyber.Constants.fsti extraction-edited/Libcrux.Kem.Kyber.Constants.fsti
+--- extraction/Libcrux.Kem.Kyber.Constants.fsti	2024-03-13 11:03:50
++++ extraction-edited/Libcrux.Kem.Kyber.Constants.fsti	2024-03-13 11:03:50
+@@ -17,4 +17,6 @@
+ 
+ let v_H_DIGEST_SIZE: usize = sz 32
+ 
++let v_REJECTION_SAMPLING_SEED_SIZE: usize = sz 168 *! sz 5
++
+ let v_SHARED_SECRET_SIZE: usize = sz 32
+diff -ruN extraction/Libcrux.Kem.Kyber.Hash_functions.fst extraction-edited/Libcrux.Kem.Kyber.Hash_functions.fst
+--- extraction/Libcrux.Kem.Kyber.Hash_functions.fst	2024-03-13 11:03:50
++++ extraction-edited/Libcrux.Kem.Kyber.Hash_functions.fst	2024-03-13 11:03:50
+@@ -3,129 +3,114 @@
  open Core
  open FStar.Mul
  
--let serialize_kem_secret_key
-+let update_at_range_lemma #n
-+  (s: t_Slice 't)
-+  (i: Core.Ops.Range.t_Range (int_t n) {(Core.Ops.Range.impl_index_range_slice 't n).f_index_pre s i}) 
-+  (x: t_Slice 't)
-+  : Lemma
-+    (requires (Seq.length x == v i.f_end - v i.f_start))
-+    (ensures (
-+      let s' = Rust_primitives.Hax.Monomorphized_update_at.update_at_range s i x in
-+      let len = v i.f_start in
-+      forall (i: nat). i < len ==> Seq.index s i == Seq.index s' i
-+    ))
-+    [SMTPat (Rust_primitives.Hax.Monomorphized_update_at.update_at_range s i x)]
-+  = let s' = Rust_primitives.Hax.Monomorphized_update_at.update_at_range s i x in
-+    let len = v i.f_start in
-+    introduce forall (i:nat {i < len}). Seq.index s i == Seq.index s' i
-+    with (assert ( Seq.index (Seq.slice s  0 len) i == Seq.index s  i 
-+                 /\ Seq.index (Seq.slice s' 0 len) i == Seq.index s' i ))
-+
-+let serialize_kem_secret_key #p
-       (v_SERIALIZED_KEY_LEN: usize)
--      (private_key public_key implicit_rejection_value: t_Slice u8)
--     =
-+      (private_key public_key implicit_rejection_value: t_Slice u8) =
-   let out:t_Array u8 v_SERIALIZED_KEY_LEN = Rust_primitives.Hax.repeat 0uy v_SERIALIZED_KEY_LEN in
-   let pointer:usize = sz 0 in
-   let out:t_Array u8 v_SERIALIZED_KEY_LEN =
-@@ -55,6 +72,8 @@
-         t_Slice u8)
-   in
-   let pointer:usize = pointer +! (Core.Slice.impl__len public_key <: usize) in
-+  let h_public_key = (Rust_primitives.unsize (Libcrux.Kem.Kyber.Hash_functions.v_H public_key)
-+                     <: t_Slice u8) in
-   let out:t_Array u8 v_SERIALIZED_KEY_LEN =
-     Rust_primitives.Hax.Monomorphized_update_at.update_at_range out
-       ({
-@@ -70,16 +89,7 @@
-                 pointer +! Libcrux.Kem.Kyber.Constants.v_H_DIGEST_SIZE <: usize
-               }
-               <:
--              Core.Ops.Range.t_Range usize ]
--            <:
--            t_Slice u8)
--          (Rust_primitives.unsize (Libcrux.Kem.Kyber.Hash_functions.v_H public_key
--                <:
--                t_Array u8 (sz 32))
--            <:
--            t_Slice u8)
--        <:
--        t_Slice u8)
-+              Core.Ops.Range.t_Range usize ]) h_public_key)
-   in
-   let pointer:usize = pointer +! Libcrux.Kem.Kyber.Constants.v_H_DIGEST_SIZE in
-   let out:t_Array u8 v_SERIALIZED_KEY_LEN =
-@@ -106,14 +116,32 @@
-         <:
-         t_Slice u8)
-   in
-+  assert (Seq.slice out 0 (v #usize_inttype (Spec.Kyber.v_CPA_PKE_SECRET_KEY_SIZE p)) `Seq.equal` private_key);
-+  assert (Seq.slice out (v #usize_inttype (Spec.Kyber.v_CPA_PKE_SECRET_KEY_SIZE p))
-+                        (v #usize_inttype (Spec.Kyber.v_CPA_PKE_SECRET_KEY_SIZE p +! Spec.Kyber.v_CPA_PKE_PUBLIC_KEY_SIZE p)) `Seq.equal` public_key);
-+  assert (Seq.slice out (v #usize_inttype (Spec.Kyber.v_CPA_PKE_SECRET_KEY_SIZE p +!
-+                                           Spec.Kyber.v_CPA_PKE_PUBLIC_KEY_SIZE p))
-+                        (v #usize_inttype (Spec.Kyber.v_CPA_PKE_SECRET_KEY_SIZE p +!
-+                                           Spec.Kyber.v_CPA_PKE_PUBLIC_KEY_SIZE p +!
-+                                           Libcrux.Kem.Kyber.Constants.v_H_DIGEST_SIZE))
-+          `Seq.equal` Libcrux.Kem.Kyber.Hash_functions.v_H public_key);
-+  assert (Seq.slice out (v #usize_inttype (Spec.Kyber.v_CPA_PKE_SECRET_KEY_SIZE p +!
-+                                           Spec.Kyber.v_CPA_PKE_PUBLIC_KEY_SIZE p +!
-+                                           Libcrux.Kem.Kyber.Constants.v_H_DIGEST_SIZE))
-+                        (v #usize_inttype (Spec.Kyber.v_CPA_PKE_SECRET_KEY_SIZE p +!
-+                                           Spec.Kyber.v_CPA_PKE_PUBLIC_KEY_SIZE p +!
-+                                           Libcrux.Kem.Kyber.Constants.v_H_DIGEST_SIZE +!
-+                                           Spec.Kyber.v_SHARED_SECRET_SIZE))
-+          == implicit_rejection_value);
-+  lemma_slice_append_4 out private_key public_key (Libcrux.Kem.Kyber.Hash_functions.v_H public_key) implicit_rejection_value;
-   out
+-let v_G (input: t_Slice u8) = Libcrux.Digest.sha3_512_ input
++let v_G (input: t_Slice u8) =
++  let res = Libcrux.Digest.sha3_512_ input in
++  admit(); // We assume that sha3_512 correctly implements G
++  res
  
--let decapsulate
-+let decapsulate #p
-       (v_K v_SECRET_KEY_SIZE v_CPA_SECRET_KEY_SIZE v_PUBLIC_KEY_SIZE v_CIPHERTEXT_SIZE v_T_AS_NTT_ENCODED_SIZE v_C1_SIZE v_C2_SIZE v_VECTOR_U_COMPRESSION_FACTOR v_VECTOR_V_COMPRESSION_FACTOR v_C1_BLOCK_SIZE v_ETA1 v_ETA1_RANDOMNESS_SIZE v_ETA2 v_ETA2_RANDOMNESS_SIZE v_IMPLICIT_REJECTION_HASH_INPUT_SIZE:
-           usize)
-       (secret_key: Libcrux.Kem.Kyber.Types.t_MlKemPrivateKey v_SECRET_KEY_SIZE)
--      (ciphertext: Libcrux.Kem.Kyber.Types.t_MlKemCiphertext v_CIPHERTEXT_SIZE)
--     =
-+      (ciphertext: Libcrux.Kem.Kyber.Types.t_MlKemCiphertext v_CIPHERTEXT_SIZE) =
-+  let orig_secret_key = secret_key.f_value in
-   let ind_cpa_secret_key, secret_key:(t_Slice u8 & t_Slice u8) =
-     Libcrux.Kem.Kyber.Types.impl_12__split_at v_SECRET_KEY_SIZE secret_key v_CPA_SECRET_KEY_SIZE
-   in
-@@ -123,8 +151,12 @@
-   let ind_cpa_public_key_hash, implicit_rejection_value:(t_Slice u8 & t_Slice u8) =
-     Core.Slice.impl__split_at secret_key Libcrux.Kem.Kyber.Constants.v_H_DIGEST_SIZE
-   in
-+  assert (ind_cpa_secret_key == slice orig_secret_key (sz 0) v_CPA_SECRET_KEY_SIZE);
-+  assert (ind_cpa_public_key == slice orig_secret_key v_CPA_SECRET_KEY_SIZE (v_CPA_SECRET_KEY_SIZE +! v_PUBLIC_KEY_SIZE));
-+  assert (ind_cpa_public_key_hash == slice orig_secret_key (v_CPA_SECRET_KEY_SIZE +! v_PUBLIC_KEY_SIZE) (v_CPA_SECRET_KEY_SIZE +! v_PUBLIC_KEY_SIZE +! Libcrux.Kem.Kyber.Constants.v_H_DIGEST_SIZE));
-+  assert (implicit_rejection_value == slice orig_secret_key (v_CPA_SECRET_KEY_SIZE +! v_PUBLIC_KEY_SIZE +! Libcrux.Kem.Kyber.Constants.v_H_DIGEST_SIZE) (length orig_secret_key));
-   let decrypted:t_Array u8 (sz 32) =
--    Libcrux.Kem.Kyber.Ind_cpa.decrypt v_K
-+    Libcrux.Kem.Kyber.Ind_cpa.decrypt #p v_K
-       v_CIPHERTEXT_SIZE
-       v_C1_SIZE
-       v_VECTOR_U_COMPRESSION_FACTOR
-@@ -152,6 +184,9 @@
-         <:
-         t_Slice u8)
-   in
-+  lemma_slice_append to_hash decrypted ind_cpa_public_key_hash;
-+  assert (decrypted == Spec.Kyber.ind_cpa_decrypt p ind_cpa_secret_key ciphertext.f_value);
-+  assert (to_hash == concat decrypted ind_cpa_public_key_hash);
-   let hashed:t_Array u8 (sz 64) =
-     Libcrux.Kem.Kyber.Hash_functions.v_G (Rust_primitives.unsize to_hash <: t_Slice u8)
-   in
-@@ -159,6 +194,10 @@
-     Core.Slice.impl__split_at (Rust_primitives.unsize hashed <: t_Slice u8)
-       Libcrux.Kem.Kyber.Constants.v_SHARED_SECRET_SIZE
-   in
-+  assert ((shared_secret,pseudorandomness) == split hashed Libcrux.Kem.Kyber.Constants.v_SHARED_SECRET_SIZE);
-+  assert (length implicit_rejection_value = v_SECRET_KEY_SIZE -! v_CPA_SECRET_KEY_SIZE -! v_PUBLIC_KEY_SIZE -! Libcrux.Kem.Kyber.Constants.v_H_DIGEST_SIZE);
-+  assert (length implicit_rejection_value = Spec.Kyber.v_SHARED_SECRET_SIZE);
-+  assert (Spec.Kyber.v_SHARED_SECRET_SIZE <=. Spec.Kyber.v_IMPLICIT_REJECTION_HASH_INPUT_SIZE p);
-   let (to_hash: t_Array u8 v_IMPLICIT_REJECTION_HASH_INPUT_SIZE):t_Array u8
-     v_IMPLICIT_REJECTION_HASH_INPUT_SIZE =
-     Libcrux.Kem.Kyber.Ind_cpa.into_padded_array v_IMPLICIT_REJECTION_HASH_INPUT_SIZE
-@@ -180,11 +219,14 @@
-         <:
-         t_Slice u8)
-   in
-+  lemma_slice_append to_hash implicit_rejection_value ciphertext.f_value;
-   let (implicit_rejection_shared_secret: t_Array u8 (sz 32)):t_Array u8 (sz 32) =
-     Libcrux.Kem.Kyber.Hash_functions.v_PRF (sz 32) (Rust_primitives.unsize to_hash <: t_Slice u8)
-   in
-+  assert (implicit_rejection_shared_secret == Spec.Kyber.v_J to_hash);
-+  assert (Seq.length ind_cpa_public_key == v v_PUBLIC_KEY_SIZE);
-   let expected_ciphertext:t_Array u8 v_CIPHERTEXT_SIZE =
--    Libcrux.Kem.Kyber.Ind_cpa.encrypt v_K v_CIPHERTEXT_SIZE v_T_AS_NTT_ENCODED_SIZE v_C1_SIZE
-+    Libcrux.Kem.Kyber.Ind_cpa.encrypt #p v_K v_CIPHERTEXT_SIZE v_T_AS_NTT_ENCODED_SIZE v_C1_SIZE
-       v_C2_SIZE v_VECTOR_U_COMPRESSION_FACTOR v_VECTOR_V_COMPRESSION_FACTOR v_C1_BLOCK_SIZE v_ETA1
-       v_ETA1_RANDOMNESS_SIZE v_ETA2 v_ETA2_RANDOMNESS_SIZE ind_cpa_public_key decrypted
-       pseudorandomness
-@@ -194,16 +236,18 @@
-       (Core.Convert.f_as_ref ciphertext <: t_Slice u8)
-       (Rust_primitives.unsize expected_ciphertext <: t_Slice u8)
-   in
-+  let res = 
-   Libcrux.Kem.Kyber.Constant_time_ops.select_shared_secret_in_constant_time shared_secret
-     (Rust_primitives.unsize implicit_rejection_shared_secret <: t_Slice u8)
-     selector
-+  in
+-let v_H (input: t_Slice u8) = Libcrux.Digest.sha3_256_ input
++let v_H (input: t_Slice u8) =
++  let res = Libcrux.Digest.sha3_256_ input in
++  admit(); // We assume that sha3_512 correctly implements H
 +  res
  
--let encapsulate
-+let encapsulate #p
-       (v_K v_CIPHERTEXT_SIZE v_PUBLIC_KEY_SIZE v_T_AS_NTT_ENCODED_SIZE v_C1_SIZE v_C2_SIZE v_VECTOR_U_COMPRESSION_FACTOR v_VECTOR_V_COMPRESSION_FACTOR v_VECTOR_U_BLOCK_LEN v_ETA1 v_ETA1_RANDOMNESS_SIZE v_ETA2 v_ETA2_RANDOMNESS_SIZE:
-           usize)
-       (public_key: Libcrux.Kem.Kyber.Types.t_MlKemPublicKey v_PUBLIC_KEY_SIZE)
--      (randomness: t_Array u8 (sz 32))
--     =
-+      (randomness: t_Array u8 (sz 32)) =
-   let (to_hash: t_Array u8 (sz 64)):t_Array u8 (sz 64) =
-     Libcrux.Kem.Kyber.Ind_cpa.into_padded_array (sz 64)
-       (Rust_primitives.unsize randomness <: t_Slice u8)
-@@ -234,6 +278,10 @@
-         <:
-         t_Slice u8)
-   in
-+  assert (Seq.slice to_hash 0 (v Libcrux.Kem.Kyber.Constants.v_H_DIGEST_SIZE) == randomness);
-+  lemma_slice_append to_hash randomness (Spec.Kyber.v_H public_key.f_value);
-+  assert (to_hash == concat randomness (Spec.Kyber.v_H public_key.f_value));
-+
-   let hashed:t_Array u8 (sz 64) =
-     Libcrux.Kem.Kyber.Hash_functions.v_G (Rust_primitives.unsize to_hash <: t_Slice u8)
-   in
-@@ -242,7 +290,7 @@
-       Libcrux.Kem.Kyber.Constants.v_SHARED_SECRET_SIZE
-   in
-   let ciphertext:t_Array u8 v_CIPHERTEXT_SIZE =
--    Libcrux.Kem.Kyber.Ind_cpa.encrypt v_K v_CIPHERTEXT_SIZE v_T_AS_NTT_ENCODED_SIZE v_C1_SIZE
-+    Libcrux.Kem.Kyber.Ind_cpa.encrypt #p v_K v_CIPHERTEXT_SIZE v_T_AS_NTT_ENCODED_SIZE v_C1_SIZE
-       v_C2_SIZE v_VECTOR_U_COMPRESSION_FACTOR v_VECTOR_V_COMPRESSION_FACTOR v_VECTOR_U_BLOCK_LEN
-       v_ETA1 v_ETA1_RANDOMNESS_SIZE v_ETA2 v_ETA2_RANDOMNESS_SIZE
-       (Rust_primitives.unsize (Libcrux.Kem.Kyber.Types.impl_18__as_slice v_PUBLIC_KEY_SIZE
-@@ -252,28 +300,26 @@
-         <:
-         t_Slice u8) randomness pseudorandomness
-   in
--  let shared_secret_array:t_Array u8 (sz 32) = Rust_primitives.Hax.repeat 0uy (sz 32) in
--  let shared_secret_array:t_Array u8 (sz 32) =
--    Core.Slice.impl__copy_from_slice shared_secret_array shared_secret
--  in
--  Core.Convert.f_into ciphertext, shared_secret_array
-+  Core.Convert.f_into ciphertext,
-+  Core.Result.impl__unwrap (Core.Convert.f_try_into shared_secret
-+      <:
-+      Core.Result.t_Result (t_Array u8 (sz 32)) Core.Array.t_TryFromSliceError)
-   <:
-   (Libcrux.Kem.Kyber.Types.t_MlKemCiphertext v_CIPHERTEXT_SIZE & t_Array u8 (sz 32))
+-let v_PRF (v_LEN: usize) (input: t_Slice u8) = Libcrux.Digest.shake256 v_LEN input
++let v_PRF (v_LEN: usize) (input: t_Slice u8) =
++  let res = Libcrux.Digest.shake256 v_LEN input in
++  admit(); // We assume that sha3_512 correctly implements H
++  res
  
--let validate_public_key
-+#push-options "--z3rlimit 100"
-+let validate_public_key #p
-       (v_K v_RANKED_BYTES_PER_RING_ELEMENT v_PUBLIC_KEY_SIZE: usize)
-       (public_key: t_Array u8 v_PUBLIC_KEY_SIZE)
-      =
--  let pk:t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K =
--    Libcrux.Kem.Kyber.Ind_cpa.deserialize_public_key v_K
-+  let pk:t_Array Libcrux.Kem.Kyber.Arithmetic.wfPolynomialRingElement v_K =
-+    Libcrux.Kem.Kyber.Ind_cpa.deserialize_public_key #p v_K
-       (public_key.[ { Core.Ops.Range.f_end = v_RANKED_BYTES_PER_RING_ELEMENT }
-           <:
--          Core.Ops.Range.t_RangeTo usize ]
+-let absorb (v_K: usize) (input: t_Array (t_Array u8 (sz 34)) v_K) =
+-  let _:Prims.unit =
+-    if true
++let v_XOFx4 v_K (input: t_Array (t_Array u8 (sz 34)) v_K) =
++  assert (v v_K >= 2);
++  let out:t_Array (t_Array u8 (sz 840)) v_K =
++    Rust_primitives.Hax.repeat (Rust_primitives.Hax.repeat 0uy (sz 840) <: t_Array u8 (sz 840)) v_K
++  in
++  let out:t_Array (t_Array u8 (sz 840)) v_K =
++    if ~.(Libcrux_platform.Platform.simd256_support () <: bool)
+     then
+-      let _:Prims.unit =
+-        if ~.((v_K =. sz 2 <: bool) || (v_K =. sz 3 <: bool) || (v_K =. sz 4 <: bool))
+-        then
+-          Rust_primitives.Hax.never_to_any (Core.Panicking.panic "assertion failed: K == 2 || K == 3 || K == 4"
+-
++      Core.Iter.Traits.Iterator.f_fold (Core.Iter.Traits.Collect.f_into_iter ({
++                Core.Ops.Range.f_start = sz 0;
++                Core.Ops.Range.f_end = v_K
++              }
+               <:
+-              Rust_primitives.Hax.t_Never)
+-      in
+-      ()
+-  in
+-  let state:Libcrux.Digest.Incremental_x4.t_Shake128StateX4 =
+-    Libcrux.Digest.Incremental_x4.impl__Shake128StateX4__new ()
+-  in
+-  let (data: t_Array (t_Slice u8) v_K):t_Array (t_Slice u8) v_K =
+-    Rust_primitives.Hax.repeat (Rust_primitives.unsize (let list = [0uy] in
+-            FStar.Pervasives.assert_norm (Prims.eq2 (List.Tot.length list) 1);
+-            Rust_primitives.Hax.array_of_list 1 list)
 -        <:
 -        t_Slice u8)
-+          Core.Ops.Range.t_RangeTo usize ])
-   in
-   let public_key_serialized:t_Array u8 v_PUBLIC_KEY_SIZE =
--    Libcrux.Kem.Kyber.Ind_cpa.serialize_public_key v_K
-+    Libcrux.Kem.Kyber.Ind_cpa.serialize_public_key #p v_K
-       v_RANKED_BYTES_PER_RING_ELEMENT
-       v_PUBLIC_KEY_SIZE
-       pk
-@@ -284,12 +330,12 @@
-         t_Slice u8)
+-      v_K
+-  in
+-  let data:t_Array (t_Slice u8) v_K =
+-    Core.Iter.Traits.Iterator.f_fold (Core.Iter.Traits.Collect.f_into_iter ({
+-              Core.Ops.Range.f_start = sz 0;
+-              Core.Ops.Range.f_end = v_K
+-            }
+-            <:
+-            Core.Ops.Range.t_Range usize)
+-        <:
+-        Core.Ops.Range.t_Range usize)
+-      data
+-      (fun data i ->
+-          let data:t_Array (t_Slice u8) v_K = data in
+-          let i:usize = i in
+-          Rust_primitives.Hax.Monomorphized_update_at.update_at_usize data
+-            i
+-            (Rust_primitives.unsize (input.[ i ] <: t_Array u8 (sz 34)) <: t_Slice u8)
++              Core.Ops.Range.t_Range usize)
+           <:
+-          t_Array (t_Slice u8) v_K)
+-  in
+-  let state:Libcrux.Digest.Incremental_x4.t_Shake128StateX4 =
+-    Libcrux.Digest.Incremental_x4.impl__Shake128StateX4__absorb_final v_K state data
+-  in
+-  state
+-
+-let free_state (xof_state: Libcrux.Digest.Incremental_x4.t_Shake128StateX4) =
+-  let _:Prims.unit = Libcrux.Digest.Incremental_x4.impl__Shake128StateX4__free_memory xof_state in
+-  ()
+-
+-let squeeze_block (v_K: usize) (xof_state: Libcrux.Digest.Incremental_x4.t_Shake128StateX4) =
+-  let tmp0, out1:(Libcrux.Digest.Incremental_x4.t_Shake128StateX4 &
+-    t_Array (t_Array u8 (sz 168)) v_K) =
+-    Libcrux.Digest.Incremental_x4.impl__Shake128StateX4__squeeze_blocks (sz 168) v_K xof_state
+-  in
+-  let xof_state:Libcrux.Digest.Incremental_x4.t_Shake128StateX4 = tmp0 in
+-  let (output: t_Array (t_Array u8 (sz 168)) v_K):t_Array (t_Array u8 (sz 168)) v_K = out1 in
+-  let out:t_Array (t_Array u8 (sz 168)) v_K =
+-    Rust_primitives.Hax.repeat (Rust_primitives.Hax.repeat 0uy (sz 168) <: t_Array u8 (sz 168)) v_K
+-  in
+-  let out:t_Array (t_Array u8 (sz 168)) v_K =
+-    Core.Iter.Traits.Iterator.f_fold (Core.Iter.Traits.Collect.f_into_iter ({
+-              Core.Ops.Range.f_start = sz 0;
+-              Core.Ops.Range.f_end = v_K
+-            }
++          Core.Ops.Range.t_Range usize)
++        out
++        (fun out i ->
++            let out:t_Array (t_Array u8 (sz 840)) v_K = out in
++            let i:usize = i in
++            Rust_primitives.Hax.Monomorphized_update_at.update_at_usize out
++              i
++              (Libcrux.Digest.shake128 (sz 840)
++                  (Rust_primitives.unsize (input.[ i ] <: t_Array u8 (sz 34)) <: t_Slice u8)
++                <:
++                t_Array u8 (sz 840))
+             <:
+-            Core.Ops.Range.t_Range usize)
+-        <:
+-        Core.Ops.Range.t_Range usize)
++            t_Array (t_Array u8 (sz 840)) v_K)
++    else
++      let out:t_Array (t_Array u8 (sz 840)) v_K =
++        match cast (v_K <: usize) <: u8 with
++        | 2uy ->
++          let d0, d1, _, _:(t_Array u8 (sz 840) & t_Array u8 (sz 840) & t_Array u8 (sz 840) &
++            t_Array u8 (sz 840)) =
++            Libcrux.Digest.shake128x4 (sz 840)
++              (Rust_primitives.unsize (input.[ sz 0 ] <: t_Array u8 (sz 34)) <: t_Slice u8)
++              (Rust_primitives.unsize (input.[ sz 1 ] <: t_Array u8 (sz 34)) <: t_Slice u8)
++              (Rust_primitives.unsize (input.[ sz 0 ] <: t_Array u8 (sz 34)) <: t_Slice u8)
++              (Rust_primitives.unsize (input.[ sz 1 ] <: t_Array u8 (sz 34)) <: t_Slice u8)
++          in
++          let out:t_Array (t_Array u8 (sz 840)) v_K =
++            Rust_primitives.Hax.Monomorphized_update_at.update_at_usize out (sz 0) d0
++          in
++          let out:t_Array (t_Array u8 (sz 840)) v_K =
++            Rust_primitives.Hax.Monomorphized_update_at.update_at_usize out (sz 1) d1
++          in
++          out
++        | 3uy ->
++          assert (v (cast v_K <: u8) = 3);
++          let d0, d1, d2, _:(t_Array u8 (sz 840) & t_Array u8 (sz 840) & t_Array u8 (sz 840) &
++            t_Array u8 (sz 840)) =
++            Libcrux.Digest.shake128x4 (sz 840)
++              (Rust_primitives.unsize (input.[ sz 0 ] <: t_Array u8 (sz 34)) <: t_Slice u8)
++              (Rust_primitives.unsize (input.[ sz 1 ] <: t_Array u8 (sz 34)) <: t_Slice u8)
++              (Rust_primitives.unsize (input.[ sz 2 ] <: t_Array u8 (sz 34)) <: t_Slice u8)
++              (Rust_primitives.unsize (input.[ sz 0 ] <: t_Array u8 (sz 34)) <: t_Slice u8)
++          in
++          let out:t_Array (t_Array u8 (sz 840)) v_K =
++            Rust_primitives.Hax.Monomorphized_update_at.update_at_usize out (sz 0) d0
++          in
++          let out:t_Array (t_Array u8 (sz 840)) v_K =
++            Rust_primitives.Hax.Monomorphized_update_at.update_at_usize out (sz 1) d1
++          in
++          let out:t_Array (t_Array u8 (sz 840)) v_K =
++            Rust_primitives.Hax.Monomorphized_update_at.update_at_usize out (sz 2) d2
++          in
++          out
++        | 4uy ->
++          assert (v (cast v_K <: u8) = 4);
++          let d0, d1, d2, d3:(t_Array u8 (sz 840) & t_Array u8 (sz 840) & t_Array u8 (sz 840) &
++            t_Array u8 (sz 840)) =
++            Libcrux.Digest.shake128x4 (sz 840)
++              (Rust_primitives.unsize (input.[ sz 0 ] <: t_Array u8 (sz 34)) <: t_Slice u8)
++              (Rust_primitives.unsize (input.[ sz 1 ] <: t_Array u8 (sz 34)) <: t_Slice u8)
++              (Rust_primitives.unsize (input.[ sz 2 ] <: t_Array u8 (sz 34)) <: t_Slice u8)
++              (Rust_primitives.unsize (input.[ sz 3 ] <: t_Array u8 (sz 34)) <: t_Slice u8)
++          in
++          let out:t_Array (t_Array u8 (sz 840)) v_K =
++            Rust_primitives.Hax.Monomorphized_update_at.update_at_usize out (sz 0) d0
++          in
++          let out:t_Array (t_Array u8 (sz 840)) v_K =
++            Rust_primitives.Hax.Monomorphized_update_at.update_at_usize out (sz 1) d1
++          in
++          let out:t_Array (t_Array u8 (sz 840)) v_K =
++            Rust_primitives.Hax.Monomorphized_update_at.update_at_usize out (sz 2) d2
++          in
++          let out:t_Array (t_Array u8 (sz 840)) v_K =
++            Rust_primitives.Hax.Monomorphized_update_at.update_at_usize out (sz 3) d3
++          in
++          out
++        | _ -> out
++      in
+       out
+-      (fun out i ->
+-          let out:t_Array (t_Array u8 (sz 168)) v_K = out in
+-          let i:usize = i in
+-          Rust_primitives.Hax.Monomorphized_update_at.update_at_usize out
+-            i
+-            (output.[ i ] <: t_Array u8 (sz 168))
+-          <:
+-          t_Array (t_Array u8 (sz 168)) v_K)
    in
-   public_key =. public_key_serialized
-+#pop-options
+-  let hax_temp_output:t_Array (t_Array u8 (sz 168)) v_K = out in
+-  xof_state, hax_temp_output
+-  <:
+-  (Libcrux.Digest.Incremental_x4.t_Shake128StateX4 & t_Array (t_Array u8 (sz 168)) v_K)
+-
+-let squeeze_three_blocks (v_K: usize) (xof_state: Libcrux.Digest.Incremental_x4.t_Shake128StateX4) =
+-  let tmp0, out1:(Libcrux.Digest.Incremental_x4.t_Shake128StateX4 &
+-    t_Array (t_Array u8 (sz 504)) v_K) =
+-    Libcrux.Digest.Incremental_x4.impl__Shake128StateX4__squeeze_blocks (sz 504) v_K xof_state
+-  in
+-  let xof_state:Libcrux.Digest.Incremental_x4.t_Shake128StateX4 = tmp0 in
+-  let (output: t_Array (t_Array u8 (sz 504)) v_K):t_Array (t_Array u8 (sz 504)) v_K = out1 in
+-  let out:t_Array (t_Array u8 (sz 504)) v_K =
+-    Rust_primitives.Hax.repeat (Rust_primitives.Hax.repeat 0uy (sz 504) <: t_Array u8 (sz 504)) v_K
+-  in
+-  let out:t_Array (t_Array u8 (sz 504)) v_K =
+-    Core.Iter.Traits.Iterator.f_fold (Core.Iter.Traits.Collect.f_into_iter ({
+-              Core.Ops.Range.f_start = sz 0;
+-              Core.Ops.Range.f_end = v_K
+-            }
+-            <:
+-            Core.Ops.Range.t_Range usize)
+-        <:
+-        Core.Ops.Range.t_Range usize)
+-      out
+-      (fun out i ->
+-          let out:t_Array (t_Array u8 (sz 504)) v_K = out in
+-          let i:usize = i in
+-          Rust_primitives.Hax.Monomorphized_update_at.update_at_usize out
+-            i
+-            (output.[ i ] <: t_Array u8 (sz 504))
+-          <:
+-          t_Array (t_Array u8 (sz 504)) v_K)
+-  in
+-  let hax_temp_output:t_Array (t_Array u8 (sz 504)) v_K = out in
+-  xof_state, hax_temp_output
+-  <:
+-  (Libcrux.Digest.Incremental_x4.t_Shake128StateX4 & t_Array (t_Array u8 (sz 504)) v_K)
++  admit(); // We assume that shake128x4 correctly implements XOFx4
++  out 
+diff -ruN extraction/Libcrux.Kem.Kyber.Hash_functions.fsti extraction-edited/Libcrux.Kem.Kyber.Hash_functions.fsti
+--- extraction/Libcrux.Kem.Kyber.Hash_functions.fsti	2024-03-13 11:03:50
++++ extraction-edited/Libcrux.Kem.Kyber.Hash_functions.fsti	2024-03-13 11:03:50
+@@ -3,33 +3,17 @@
+ open Core
+ open FStar.Mul
  
--let generate_keypair
-+let generate_keypair #p
-       (v_K v_CPA_PRIVATE_KEY_SIZE v_PRIVATE_KEY_SIZE v_PUBLIC_KEY_SIZE v_BYTES_PER_RING_ELEMENT v_ETA1 v_ETA1_RANDOMNESS_SIZE:
-           usize)
--      (randomness: t_Array u8 (sz 64))
--     =
-+      (randomness: t_Array u8 (sz 64)) =
-   let ind_cpa_keypair_randomness:t_Slice u8 =
-     randomness.[ {
-         Core.Ops.Range.f_start = sz 0;
-@@ -307,7 +353,7 @@
-   in
-   let ind_cpa_private_key, public_key:(t_Array u8 v_CPA_PRIVATE_KEY_SIZE &
-     t_Array u8 v_PUBLIC_KEY_SIZE) =
--    Libcrux.Kem.Kyber.Ind_cpa.generate_keypair v_K
-+    Libcrux.Kem.Kyber.Ind_cpa.generate_keypair #p v_K
-       v_CPA_PRIVATE_KEY_SIZE
-       v_PUBLIC_KEY_SIZE
-       v_BYTES_PER_RING_ELEMENT
-@@ -316,7 +362,7 @@
-       ind_cpa_keypair_randomness
-   in
-   let secret_key_serialized:t_Array u8 v_PRIVATE_KEY_SIZE =
--    serialize_kem_secret_key v_PRIVATE_KEY_SIZE
-+    serialize_kem_secret_key #p v_PRIVATE_KEY_SIZE
-       (Rust_primitives.unsize ind_cpa_private_key <: t_Slice u8)
-       (Rust_primitives.unsize public_key <: t_Slice u8)
-       implicit_rejection_value
-@@ -329,3 +375,4 @@
-     v_PUBLIC_KEY_SIZE
-     private_key
-     (Core.Convert.f_into public_key <: Libcrux.Kem.Kyber.Types.t_MlKemPublicKey v_PUBLIC_KEY_SIZE)
-+
-diff -ruN extraction/Libcrux.Kem.Kyber.fsti extraction-edited/Libcrux.Kem.Kyber.fsti
---- extraction/Libcrux.Kem.Kyber.fsti	2024-03-12 10:45:44.747931667 +0100
-+++ extraction-edited/Libcrux.Kem.Kyber.fsti	2024-03-12 10:45:44.818929572 +0100
-@@ -10,36 +10,84 @@
-   Libcrux.Kem.Kyber.Constants.v_CPA_PKE_KEY_GENERATION_SEED_SIZE +!
-   Libcrux.Kem.Kyber.Constants.v_SHARED_SECRET_SIZE
- 
--val serialize_kem_secret_key
-+val serialize_kem_secret_key (#p:Spec.Kyber.params)
-       (v_SERIALIZED_KEY_LEN: usize)
-       (private_key public_key implicit_rejection_value: t_Slice u8)
--    : Prims.Pure (t_Array u8 v_SERIALIZED_KEY_LEN) Prims.l_True (fun _ -> Prims.l_True)
-+    : Pure (t_Array u8 v_SERIALIZED_KEY_LEN)
-+      (requires (length private_key == Spec.Kyber.v_CPA_PKE_SECRET_KEY_SIZE p /\
-+                 length public_key == Spec.Kyber.v_CPA_PKE_PUBLIC_KEY_SIZE p /\
-+                 length implicit_rejection_value == Spec.Kyber.v_SHARED_SECRET_SIZE /\
-+                 v_SERIALIZED_KEY_LEN == Spec.Kyber.v_SECRET_KEY_SIZE p))
-+      (ensures (fun res -> res ==
-+                Seq.append private_key (
-+                Seq.append public_key (
-+                Seq.append (Libcrux.Kem.Kyber.Hash_functions.v_H public_key) implicit_rejection_value))))
- 
--val decapsulate
-+val decapsulate (#p:Spec.Kyber.params)
-       (v_K v_SECRET_KEY_SIZE v_CPA_SECRET_KEY_SIZE v_PUBLIC_KEY_SIZE v_CIPHERTEXT_SIZE v_T_AS_NTT_ENCODED_SIZE v_C1_SIZE v_C2_SIZE v_VECTOR_U_COMPRESSION_FACTOR v_VECTOR_V_COMPRESSION_FACTOR v_C1_BLOCK_SIZE v_ETA1 v_ETA1_RANDOMNESS_SIZE v_ETA2 v_ETA2_RANDOMNESS_SIZE v_IMPLICIT_REJECTION_HASH_INPUT_SIZE:
-           usize)
-       (secret_key: Libcrux.Kem.Kyber.Types.t_MlKemPrivateKey v_SECRET_KEY_SIZE)
-       (ciphertext: Libcrux.Kem.Kyber.Types.t_MlKemCiphertext v_CIPHERTEXT_SIZE)
--    : Prims.Pure (t_Array u8 (sz 32)) Prims.l_True (fun _ -> Prims.l_True)
-+    : Pure (t_Array u8 (sz 32))
-+    (requires ( p == (let open Spec.Kyber in {v_RANK = v_K; v_ETA1; v_ETA2; v_VECTOR_U_COMPRESSION_FACTOR; v_VECTOR_V_COMPRESSION_FACTOR}) /\
-+                Spec.Kyber.valid_params p /\
-+                v_ETA1_RANDOMNESS_SIZE == Spec.Kyber.v_ETA1_RANDOMNESS_SIZE p /\
-+                v_ETA2_RANDOMNESS_SIZE == Spec.Kyber.v_ETA2_RANDOMNESS_SIZE p /\
-+                v_IMPLICIT_REJECTION_HASH_INPUT_SIZE == Spec.Kyber.v_IMPLICIT_REJECTION_HASH_INPUT_SIZE p /\
-+                v_SECRET_KEY_SIZE == Spec.Kyber.v_SECRET_KEY_SIZE p /\
-+                v_CPA_SECRET_KEY_SIZE == Spec.Kyber.v_CPA_PKE_SECRET_KEY_SIZE p /\
-+                v_PUBLIC_KEY_SIZE == Spec.Kyber.v_CPA_PKE_PUBLIC_KEY_SIZE p /\
-+                v_CIPHERTEXT_SIZE == Spec.Kyber.v_CPA_PKE_CIPHERTEXT_SIZE p /\
-+                v_C1_SIZE == Spec.Kyber.v_C1_SIZE p /\
-+                v_C1_BLOCK_SIZE == Spec.Kyber.v_C1_BLOCK_SIZE p /\
-+                v_C2_SIZE == Spec.Kyber.v_C2_SIZE p /\
-+                v_T_AS_NTT_ENCODED_SIZE = Spec.Kyber.v_T_AS_NTT_ENCODED_SIZE p
-+               ))
-+    (ensures (fun res ->
-+                res == Spec.Kyber.ind_cca_decapsulate p secret_key.f_value ciphertext.f_value))
- 
--val encapsulate
-+val encapsulate (#p:Spec.Kyber.params)
-       (v_K v_CIPHERTEXT_SIZE v_PUBLIC_KEY_SIZE v_T_AS_NTT_ENCODED_SIZE v_C1_SIZE v_C2_SIZE v_VECTOR_U_COMPRESSION_FACTOR v_VECTOR_V_COMPRESSION_FACTOR v_VECTOR_U_BLOCK_LEN v_ETA1 v_ETA1_RANDOMNESS_SIZE v_ETA2 v_ETA2_RANDOMNESS_SIZE:
-           usize)
-       (public_key: Libcrux.Kem.Kyber.Types.t_MlKemPublicKey v_PUBLIC_KEY_SIZE)
-       (randomness: t_Array u8 (sz 32))
--    : Prims.Pure (Libcrux.Kem.Kyber.Types.t_MlKemCiphertext v_CIPHERTEXT_SIZE & t_Array u8 (sz 32))
--      Prims.l_True
--      (fun _ -> Prims.l_True)
-+    : Pure (Libcrux.Kem.Kyber.Types.t_MlKemCiphertext v_CIPHERTEXT_SIZE & t_Array u8 (sz 32))
-+     (requires (p == (let open Spec.Kyber in {v_RANK = v_K; v_ETA1; v_ETA2; v_VECTOR_U_COMPRESSION_FACTOR; v_VECTOR_V_COMPRESSION_FACTOR}) /\
-+                Spec.Kyber.valid_params p /\
-+                v_ETA1_RANDOMNESS_SIZE == Spec.Kyber.v_ETA1_RANDOMNESS_SIZE p /\
-+                v_ETA2_RANDOMNESS_SIZE == Spec.Kyber.v_ETA2_RANDOMNESS_SIZE p /\
-+                v_PUBLIC_KEY_SIZE == Spec.Kyber.v_CPA_PKE_PUBLIC_KEY_SIZE p /\
-+                v_CIPHERTEXT_SIZE == Spec.Kyber.v_CPA_PKE_CIPHERTEXT_SIZE p /\
-+                v_C1_SIZE == Spec.Kyber.v_C1_SIZE p /\
-+                v_C2_SIZE == Spec.Kyber.v_C2_SIZE p /\
-+                v_T_AS_NTT_ENCODED_SIZE = Spec.Kyber.v_T_AS_NTT_ENCODED_SIZE p /\
-+                v_VECTOR_U_BLOCK_LEN == Spec.Kyber.v_C1_BLOCK_SIZE p
-+                ))
- 
--val validate_public_key
-+      (ensures (fun (ct,ss) ->
-+                (ct.f_value,ss) == Spec.Kyber.ind_cca_encapsulate p public_key.f_value randomness))
-+
-+val validate_public_key (#p:Spec.Kyber.params)
-       (v_K v_RANKED_BYTES_PER_RING_ELEMENT v_PUBLIC_KEY_SIZE: usize)
-       (public_key: t_Array u8 v_PUBLIC_KEY_SIZE)
--    : Prims.Pure bool Prims.l_True (fun _ -> Prims.l_True)
-+    : Prims.Pure bool
-+      (requires (v_K == p.v_RANK /\
-+                 v_PUBLIC_KEY_SIZE == Spec.Kyber.v_CPA_PKE_PUBLIC_KEY_SIZE p /\
-+                 v_RANKED_BYTES_PER_RING_ELEMENT == Spec.Kyber.v_RANKED_BYTES_PER_RING_ELEMENT p
-+                 ))
-+      (ensures (fun _ -> Prims.l_True))
- 
--val generate_keypair
-+val generate_keypair (#p:Spec.Kyber.params)
-       (v_K v_CPA_PRIVATE_KEY_SIZE v_PRIVATE_KEY_SIZE v_PUBLIC_KEY_SIZE v_BYTES_PER_RING_ELEMENT v_ETA1 v_ETA1_RANDOMNESS_SIZE:
-           usize)
-       (randomness: t_Array u8 (sz 64))
--    : Prims.Pure (Libcrux.Kem.Kyber.Types.t_MlKemKeyPair v_PRIVATE_KEY_SIZE v_PUBLIC_KEY_SIZE)
--      Prims.l_True
--      (fun _ -> Prims.l_True)
-+    : Pure (Libcrux.Kem.Kyber.Types.t_MlKemKeyPair v_PRIVATE_KEY_SIZE v_PUBLIC_KEY_SIZE)
-+      (requires (v_K == p.v_RANK /\ v_ETA1 == p.v_ETA1 /\
-+                 v_ETA1_RANDOMNESS_SIZE == Spec.Kyber.v_ETA1_RANDOMNESS_SIZE p /\
-+                 v_PUBLIC_KEY_SIZE == Spec.Kyber.v_CPA_PKE_PUBLIC_KEY_SIZE p /\
-+                 v_CPA_PRIVATE_KEY_SIZE == Spec.Kyber.v_CPA_PKE_SECRET_KEY_SIZE p /\
-+                 v_PRIVATE_KEY_SIZE == Spec.Kyber.v_SECRET_KEY_SIZE p /\
-+                 v_BYTES_PER_RING_ELEMENT == Spec.Kyber.v_RANKED_BYTES_PER_RING_ELEMENT p
-+                 ))
-+      (ensures (fun kp -> 
-+                (kp.f_sk.f_value,kp.f_pk.f_value) == Spec.Kyber.ind_cca_generate_keypair p randomness))
-diff -ruN extraction/Libcrux.Kem.Kyber.Hash_functions.fst extraction-edited/Libcrux.Kem.Kyber.Hash_functions.fst
---- extraction/Libcrux.Kem.Kyber.Hash_functions.fst	2024-03-12 10:45:44.769931018 +0100
-+++ extraction-edited/Libcrux.Kem.Kyber.Hash_functions.fst	2024-03-12 10:45:44.802930044 +0100
-@@ -3,13 +3,23 @@
- open Core
- open FStar.Mul
- 
--let v_G (input: t_Slice u8) = Libcrux.Digest.sha3_512_ input
-+let v_G (input: t_Slice u8) =
-+  let res = Libcrux.Digest.sha3_512_ input in
-+  admit(); // We assume that sha3_512 correctly implements G
-+  res
- 
--let v_H (input: t_Slice u8) = Libcrux.Digest.sha3_256_ input
-+let v_H (input: t_Slice u8) =
-+  let res = Libcrux.Digest.sha3_256_ input in
-+  admit(); // We assume that sha3_512 correctly implements H
-+  res
- 
--let v_PRF (v_LEN: usize) (input: t_Slice u8) = Libcrux.Digest.shake256 v_LEN input
-+let v_PRF (v_LEN: usize) (input: t_Slice u8) =
-+  let res = Libcrux.Digest.shake256 v_LEN input in
-+  admit(); // We assume that sha3_512 correctly implements H
-+  res
- 
--let v_XOFx4 (v_K: usize) (input: t_Array (t_Array u8 (sz 34)) v_K) =
-+let v_XOFx4 v_K (input: t_Array (t_Array u8 (sz 34)) v_K) =
-+  assert (v v_K >= 2);
-   let out:t_Array (t_Array u8 (sz 840)) v_K =
-     Rust_primitives.Hax.repeat (Rust_primitives.Hax.repeat 0uy (sz 840) <: t_Array u8 (sz 840)) v_K
-   in
-@@ -56,6 +66,7 @@
-           in
-           out
-         | 3uy ->
-+          assert (v (cast v_K <: u8) = 3);
-           let d0, d1, d2, _:(t_Array u8 (sz 840) & t_Array u8 (sz 840) & t_Array u8 (sz 840) &
-             t_Array u8 (sz 840)) =
-             Libcrux.Digest.shake128x4 (sz 840)
-@@ -75,6 +86,7 @@
-           in
-           out
-         | 4uy ->
-+          assert (v (cast v_K <: u8) = 4);
-           let d0, d1, d2, d3:(t_Array u8 (sz 840) & t_Array u8 (sz 840) & t_Array u8 (sz 840) &
-             t_Array u8 (sz 840)) =
-             Libcrux.Digest.shake128x4 (sz 840)
-@@ -100,4 +112,5 @@
-       in
-       out
-   in
--  out
-+  admit(); // We assume that shake128x4 correctly implements XOFx4
-+  out 
-diff -ruN extraction/Libcrux.Kem.Kyber.Hash_functions.fsti extraction-edited/Libcrux.Kem.Kyber.Hash_functions.fsti
---- extraction/Libcrux.Kem.Kyber.Hash_functions.fsti	2024-03-12 10:45:44.743931785 +0100
-+++ extraction-edited/Libcrux.Kem.Kyber.Hash_functions.fsti	2024-03-12 10:45:44.827929307 +0100
-@@ -3,12 +3,17 @@
- open Core
- open FStar.Mul
- 
--val v_G (input: t_Slice u8) : Prims.Pure (t_Array u8 (sz 64)) Prims.l_True (fun _ -> Prims.l_True)
+-let v_BLOCK_SIZE: usize = sz 168
 +val v_G (input: t_Slice u8) : Prims.Pure (t_Array u8 (sz 64)) Prims.l_True
 +          (ensures (fun res -> res == Spec.Kyber.v_G input))
  
--val v_H (input: t_Slice u8) : Prims.Pure (t_Array u8 (sz 32)) Prims.l_True (fun _ -> Prims.l_True)
+-val v_G (input: t_Slice u8) : Prims.Pure (t_Array u8 (sz 64)) Prims.l_True (fun _ -> Prims.l_True)
 +val v_H (input: t_Slice u8) : Prims.Pure (t_Array u8 (sz 32)) Prims.l_True
 +          (ensures (fun res -> res == Spec.Kyber.v_H input))
  
+-val v_H (input: t_Slice u8) : Prims.Pure (t_Array u8 (sz 32)) Prims.l_True (fun _ -> Prims.l_True)
+-
  val v_PRF (v_LEN: usize) (input: t_Slice u8)
 -    : Prims.Pure (t_Array u8 v_LEN) Prims.l_True (fun _ -> Prims.l_True)
 -
--val v_XOFx4 (v_K: usize) (input: t_Array (t_Array u8 (sz 34)) v_K)
--    : Prims.Pure (t_Array (t_Array u8 (sz 840)) v_K) Prims.l_True (fun _ -> Prims.l_True)
+-let v_THREE_BLOCKS: usize = v_BLOCK_SIZE *! sz 3
+-
+-val absorb (v_K: usize) (input: t_Array (t_Array u8 (sz 34)) v_K)
+-    : Prims.Pure Libcrux.Digest.Incremental_x4.t_Shake128StateX4
+-      Prims.l_True
+-      (fun _ -> Prims.l_True)
+-
+-val free_state (xof_state: Libcrux.Digest.Incremental_x4.t_Shake128StateX4)
+-    : Prims.Pure Prims.unit Prims.l_True (fun _ -> Prims.l_True)
+-
+-val squeeze_block (v_K: usize) (xof_state: Libcrux.Digest.Incremental_x4.t_Shake128StateX4)
+-    : Prims.Pure
+-      (Libcrux.Digest.Incremental_x4.t_Shake128StateX4 & t_Array (t_Array u8 (sz 168)) v_K)
+-      Prims.l_True
+-      (fun _ -> Prims.l_True)
+-
+-val squeeze_three_blocks (v_K: usize) (xof_state: Libcrux.Digest.Incremental_x4.t_Shake128StateX4)
+-    : Prims.Pure
+-      (Libcrux.Digest.Incremental_x4.t_Shake128StateX4 & t_Array (t_Array u8 (sz 504)) v_K)
+-      Prims.l_True
+-      (fun _ -> Prims.l_True)
 +    : Prims.Pure (t_Array u8 v_LEN) Prims.l_True
 +          (ensures (fun res -> res == Spec.Kyber.v_PRF v_LEN input))
 +          
@@ -2007,8 +1814,8 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Hash_functions.fsti extraction-edited/Lib
 +          (ensures (fun res ->
 +            (forall i. i < v v_K ==> Seq.index res i == Spec.Kyber.v_XOF (sz 840) (Seq.index input i))))
 diff -ruN extraction/Libcrux.Kem.Kyber.Ind_cpa.fst extraction-edited/Libcrux.Kem.Kyber.Ind_cpa.fst
---- extraction/Libcrux.Kem.Kyber.Ind_cpa.fst	2024-03-12 10:45:44.771930959 +0100
-+++ extraction-edited/Libcrux.Kem.Kyber.Ind_cpa.fst	2024-03-12 10:45:44.817929602 +0100
+--- extraction/Libcrux.Kem.Kyber.Ind_cpa.fst	2024-03-13 11:03:50
++++ extraction-edited/Libcrux.Kem.Kyber.Ind_cpa.fst	2024-03-13 11:03:50
 @@ -1,5 +1,5 @@
  module Libcrux.Kem.Kyber.Ind_cpa
 -#set-options "--fuel 0 --ifuel 1 --z3rlimit 15"
@@ -2037,15 +1844,14 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Ind_cpa.fst extraction-edited/Libcrux.Kem
 -     =
 -  let error_1_:t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K =
 -    Rust_primitives.Hax.repeat Libcrux.Kem.Kyber.Arithmetic.impl__PolynomialRingElement__ZERO v_K
--  in
++      (prf_input: t_Array u8 (sz 33)) domain_separator = 
++  let error_1_:t_Array (Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement_b (pow2 (v v_ETA2) - 1)) v_K =
++    Rust_primitives.Hax.repeat (etaZero (sz (pow2 (v v_ETA2) - 1))) v_K
+   in
 -  let domain_separator, error_1_, prf_input:(u8 &
 -    t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K &
 -    t_Array u8 (sz 33)) =
 -    Core.Iter.Traits.Iterator.f_fold (Core.Iter.Traits.Collect.f_into_iter ({
-+      (prf_input: t_Array u8 (sz 33)) domain_separator = 
-+  let error_1_:t_Array (Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement_b (pow2 (v v_ETA2) - 1)) v_K =
-+    Rust_primitives.Hax.repeat (etaZero (sz (pow2 (v v_ETA2) - 1))) v_K
-+  in
 +  let orig_domain_separator = domain_separator in
 +  [@ inline_let]
 +  let inv : inv_t v_K v_ETA2 = fun (acc:acc_t v_K v_ETA2) (i:usize) ->
@@ -2114,14 +1920,13 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Ind_cpa.fst extraction-edited/Libcrux.Kem
 -     =
 -  let re_as_ntt:t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K =
 -    Rust_primitives.Hax.repeat Libcrux.Kem.Kyber.Arithmetic.impl__PolynomialRingElement__ZERO v_K
--  in
--  let domain_separator, prf_input, re_as_ntt:(u8 & t_Array u8 (sz 33) &
--    t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K) =
--    Core.Iter.Traits.Iterator.f_fold (Core.Iter.Traits.Collect.f_into_iter ({
 +      (prf_input: t_Array u8 (sz 33)) domain_separator =
 +  let re_as_ntt:t_Array Libcrux.Kem.Kyber.Arithmetic.wfPolynomialRingElement v_K =
 +    Rust_primitives.Hax.repeat (wfZero) v_K
-+  in
+   in
+-  let domain_separator, prf_input, re_as_ntt:(u8 & t_Array u8 (sz 33) &
+-    t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K) =
+-    Core.Iter.Traits.Iterator.f_fold (Core.Iter.Traits.Collect.f_into_iter ({
 +  let orig_domain_separator = domain_separator in
 +  [@ inline_let]
 +  let inv: (u8 & t_Array u8 (sz 33) & t_Array (Libcrux.Kem.Kyber.Arithmetic.wfPolynomialRingElement) v_K) -> usize -> Type = fun acc i ->
@@ -2268,18 +2073,17 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Ind_cpa.fst extraction-edited/Libcrux.Kem
 -     =
 -  let u_as_ntt:t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K =
 -    Rust_primitives.Hax.repeat Libcrux.Kem.Kyber.Arithmetic.impl__PolynomialRingElement__ZERO v_K
--  in
--  let u_as_ntt:t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K =
--    Core.Iter.Traits.Iterator.f_fold (Core.Iter.Traits.Collect.f_into_iter (Core.Iter.Traits.Iterator.f_enumerate
--              (Core.Slice.impl__chunks_exact (Rust_primitives.unsize ciphertext <: t_Slice u8)
--                  ((Libcrux.Kem.Kyber.Constants.v_COEFFICIENTS_IN_RING_ELEMENT *!
 +#push-options "--split_queries always"
 +let deserialize_then_decompress_u (#p:Spec.Kyber.params)
 +      (v_K v_CIPHERTEXT_SIZE v_VECTOR_U_ENCODED_SIZE v_U_COMPRESSION_FACTOR: usize)
 +      (ciphertext: t_Array u8 v_CIPHERTEXT_SIZE) =
 +  let u_as_ntt:t_Array Libcrux.Kem.Kyber.Arithmetic.wfPolynomialRingElement v_K =
 +    Rust_primitives.Hax.repeat wfZero v_K
-+  in
+   in
+-  let u_as_ntt:t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K =
+-    Core.Iter.Traits.Iterator.f_fold (Core.Iter.Traits.Collect.f_into_iter (Core.Iter.Traits.Iterator.f_enumerate
+-              (Core.Slice.impl__chunks_exact (Rust_primitives.unsize ciphertext <: t_Slice u8)
+-                  ((Libcrux.Kem.Kyber.Constants.v_COEFFICIENTS_IN_RING_ELEMENT *!
 +  let acc_t1 = t_Array Libcrux.Kem.Kyber.Arithmetic.wfPolynomialRingElement v_K in
 +  [@ inline_let]
 +  let inv = fun (acc:acc_t1) (i:usize) -> True in
@@ -2341,7 +2145,12 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Ind_cpa.fst extraction-edited/Libcrux.Kem
 -let deserialize_public_key (v_K: usize) (public_key: t_Slice u8) =
 -  let tt_as_ntt:t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K =
 -    Rust_primitives.Hax.repeat Libcrux.Kem.Kyber.Arithmetic.impl__PolynomialRingElement__ZERO v_K
--  in
++#push-options "--z3rlimit 200"
++let deserialize_public_key (#p:Spec.Kyber.params) 
++    (v_K: usize) (public_key: t_Slice u8) =
++  let tt_as_ntt:t_Array Libcrux.Kem.Kyber.Arithmetic.wfPolynomialRingElement v_K =
++    Rust_primitives.Hax.repeat wfZero v_K
+   in
 -  let tt_as_ntt:t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K =
 -    Core.Iter.Traits.Iterator.f_fold (Core.Iter.Traits.Collect.f_into_iter (Core.Iter.Traits.Iterator.f_enumerate
 -              (Core.Slice.impl__chunks_exact public_key
@@ -2352,12 +2161,6 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Ind_cpa.fst extraction-edited/Libcrux.Kem
 -            Core.Iter.Adapters.Enumerate.t_Enumerate (Core.Slice.Iter.t_ChunksExact u8))
 -        <:
 -        Core.Iter.Adapters.Enumerate.t_Enumerate (Core.Slice.Iter.t_ChunksExact u8))
-+#push-options "--z3rlimit 200"
-+let deserialize_public_key (#p:Spec.Kyber.params) 
-+    (v_K: usize) (public_key: t_Slice u8) =
-+  let tt_as_ntt:t_Array Libcrux.Kem.Kyber.Arithmetic.wfPolynomialRingElement v_K =
-+    Rust_primitives.Hax.repeat wfZero v_K
-+  in
 +  let acc_t = t_Array Libcrux.Kem.Kyber.Arithmetic.wfPolynomialRingElement v_K in
 +  [@ inline_let]
 +  let inv = fun (acc:acc_t) (i:usize) -> True in
@@ -2385,11 +2188,18 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Ind_cpa.fst extraction-edited/Libcrux.Kem
 +          t_Array Libcrux.Kem.Kyber.Arithmetic.wfPolynomialRingElement v_K)
    in
 -  tt_as_ntt
--
++  admit(); //P-F
++  tt_as_ntt 
++#pop-options
+ 
 -let deserialize_secret_key (v_K: usize) (secret_key: t_Slice u8) =
 -  let secret_as_ntt:t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K =
 -    Rust_primitives.Hax.repeat Libcrux.Kem.Kyber.Arithmetic.impl__PolynomialRingElement__ZERO v_K
--  in
++#push-options "--split_queries always"
++let deserialize_secret_key (#p:Spec.Kyber.params) (v_K: usize) (secret_key: t_Slice u8) =
++  let secret_as_ntt:t_Array Libcrux.Kem.Kyber.Arithmetic.wfPolynomialRingElement v_K =
++    Rust_primitives.Hax.repeat wfZero v_K
+   in
 -  let secret_as_ntt:t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K =
 -    Core.Iter.Traits.Iterator.f_fold (Core.Iter.Traits.Collect.f_into_iter (Core.Iter.Traits.Iterator.f_enumerate
 -              (Core.Slice.impl__chunks_exact secret_key
@@ -2400,15 +2210,6 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Ind_cpa.fst extraction-edited/Libcrux.Kem
 -            Core.Iter.Adapters.Enumerate.t_Enumerate (Core.Slice.Iter.t_ChunksExact u8))
 -        <:
 -        Core.Iter.Adapters.Enumerate.t_Enumerate (Core.Slice.Iter.t_ChunksExact u8))
-+  admit(); //P-F
-+  tt_as_ntt 
-+#pop-options
-+
-+#push-options "--split_queries always"
-+let deserialize_secret_key (#p:Spec.Kyber.params) (v_K: usize) (secret_key: t_Slice u8) =
-+  let secret_as_ntt:t_Array Libcrux.Kem.Kyber.Arithmetic.wfPolynomialRingElement v_K =
-+    Rust_primitives.Hax.repeat wfZero v_K
-+  in
 +  let acc_t = t_Array Libcrux.Kem.Kyber.Arithmetic.wfPolynomialRingElement v_K in
 +  [@ inline_let]
 +  let inv = fun (acc:acc_t) (i:usize) -> True in
@@ -2482,15 +2283,14 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Ind_cpa.fst extraction-edited/Libcrux.Kem
 +    Libcrux.Kem.Kyber.Matrix.compute_message #p v_K v secret_as_ntt u_as_ntt
    in
 -  Libcrux.Kem.Kyber.Serialize.compress_then_serialize_message message
--
++  let res = Libcrux.Kem.Kyber.Serialize.compress_then_serialize_message message in
++  res
++#pop-options
+ 
 -let encrypt
 -      (v_K v_CIPHERTEXT_SIZE v_T_AS_NTT_ENCODED_SIZE v_C1_LEN v_C2_LEN v_U_COMPRESSION_FACTOR v_V_COMPRESSION_FACTOR v_BLOCK_LEN v_ETA1 v_ETA1_RANDOMNESS_SIZE v_ETA2 v_ETA2_RANDOMNESS_SIZE:
 -          usize)
 -      (public_key: t_Slice u8)
-+  let res = Libcrux.Kem.Kyber.Serialize.compress_then_serialize_message message in
-+  res
-+#pop-options
-+
 +#push-options "--z3rlimit 200"
 +let encrypt #p
 +      v_K v_CIPHERTEXT_SIZE v_T_AS_NTT_ENCODED_SIZE v_C1_LEN v_C2_LEN v_U_COMPRESSION_FACTOR v_V_COMPRESSION_FACTOR v_BLOCK_LEN v_ETA1 v_ETA1_RANDOMNESS_SIZE v_ETA2 v_ETA2_RANDOMNESS_SIZE
@@ -2719,8 +2519,8 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Ind_cpa.fst extraction-edited/Libcrux.Kem
 +  res
 + 
 diff -ruN extraction/Libcrux.Kem.Kyber.Ind_cpa.fsti extraction-edited/Libcrux.Kem.Kyber.Ind_cpa.fsti
---- extraction/Libcrux.Kem.Kyber.Ind_cpa.fsti	2024-03-12 10:45:44.737931962 +0100
-+++ extraction-edited/Libcrux.Kem.Kyber.Ind_cpa.fsti	2024-03-12 10:45:44.829929248 +0100
+--- extraction/Libcrux.Kem.Kyber.Ind_cpa.fsti	2024-03-13 11:03:50
++++ extraction-edited/Libcrux.Kem.Kyber.Ind_cpa.fsti	2024-03-13 11:03:50
 @@ -1,80 +1,151 @@
  module Libcrux.Kem.Kyber.Ind_cpa
 -#set-options "--fuel 0 --ifuel 1 --z3rlimit 15"
@@ -2791,16 +2591,6 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Ind_cpa.fsti extraction-edited/Libcrux.Ke
 -    : Prims.Pure (t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K)
 -      Prims.l_True
 -      (fun _ -> Prims.l_True)
--
--val deserialize_public_key (v_K: usize) (public_key: t_Slice u8)
--    : Prims.Pure (t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K)
--      Prims.l_True
--      (fun _ -> Prims.l_True)
--
--val deserialize_secret_key (v_K: usize) (secret_key: t_Slice u8)
--    : Prims.Pure (t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K)
--      Prims.l_True
--      (fun _ -> Prims.l_True)
 +    : Pure (t_Array Libcrux.Kem.Kyber.Arithmetic.wfPolynomialRingElement v_K) 
 +      (requires v_K == p.v_RANK /\
 +                v_CIPHERTEXT_SIZE == Spec.Kyber.v_CPA_PKE_CIPHERTEXT_SIZE p /\
@@ -2809,7 +2599,11 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Ind_cpa.fsti extraction-edited/Libcrux.Ke
 +      (ensures fun res ->
 +        Libcrux.Kem.Kyber.Arithmetic.to_spec_vector_b #p res ==
 +        Spec.Kyber.(vector_ntt (decode_then_decompress_u p (Seq.slice ciphertext 0 (v (Spec.Kyber.v_C1_SIZE p))))))
-+
+ 
+-val deserialize_public_key (v_K: usize) (public_key: t_Slice u8)
+-    : Prims.Pure (t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K)
+-      Prims.l_True
+-      (fun _ -> Prims.l_True)
 +val deserialize_public_key (#p:Spec.Kyber.params) 
 +    (v_K: usize) (public_key: t_Array u8 (Spec.Kyber.v_T_AS_NTT_ENCODED_SIZE p))
 +    : Pure (t_Array Libcrux.Kem.Kyber.Arithmetic.wfPolynomialRingElement v_K)
@@ -2828,6 +2622,11 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Ind_cpa.fsti extraction-edited/Libcrux.Ke
 +         Spec.Kyber.vector_decode_12 #p secret_key)
 +    
  
+-val deserialize_secret_key (v_K: usize) (secret_key: t_Slice u8)
+-    : Prims.Pure (t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K)
+-      Prims.l_True
+-      (fun _ -> Prims.l_True)
+-
 -val decrypt
 +val decrypt (#p:Spec.Kyber.params)
        (v_K v_CIPHERTEXT_SIZE v_VECTOR_U_ENCODED_SIZE v_U_COMPRESSION_FACTOR v_V_COMPRESSION_FACTOR:
@@ -2844,9 +2643,9 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Ind_cpa.fsti extraction-edited/Libcrux.Ke
 +                 v_V_COMPRESSION_FACTOR == p.v_VECTOR_V_COMPRESSION_FACTOR))
 +      (ensures (fun res ->
 +                res == Spec.Kyber.ind_cpa_decrypt p secret_key ciphertext))
-+
  
 -val encrypt
++
 +val encrypt (#p:Spec.Kyber.params)
        (v_K v_CIPHERTEXT_SIZE v_T_AS_NTT_ENCODED_SIZE v_C1_LEN v_C2_LEN v_U_COMPRESSION_FACTOR v_V_COMPRESSION_FACTOR v_BLOCK_LEN v_ETA1 v_ETA1_RANDOMNESS_SIZE v_ETA2 v_ETA2_RANDOMNESS_SIZE:
            usize)
@@ -2922,8 +2721,8 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Ind_cpa.fsti extraction-edited/Libcrux.Ke
 + 
 +    
 diff -ruN extraction/Libcrux.Kem.Kyber.Kyber1024.fst extraction-edited/Libcrux.Kem.Kyber.Kyber1024.fst
---- extraction/Libcrux.Kem.Kyber.Kyber1024.fst	2024-03-12 10:45:44.756931401 +0100
-+++ extraction-edited/Libcrux.Kem.Kyber.Kyber1024.fst	2024-03-12 10:45:44.793930310 +0100
+--- extraction/Libcrux.Kem.Kyber.Kyber1024.fst	2024-03-13 11:03:50
++++ extraction-edited/Libcrux.Kem.Kyber.Kyber1024.fst	2024-03-13 11:03:50
 @@ -7,19 +7,19 @@
        (secret_key: Libcrux.Kem.Kyber.Types.t_MlKemPrivateKey (sz 3168))
        (ciphertext: Libcrux.Kem.Kyber.Types.t_MlKemCiphertext (sz 1568))
@@ -2957,8 +2756,8 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Kyber1024.fst extraction-edited/Libcrux.K
      (sz 3168)
      (sz 1568)
 diff -ruN extraction/Libcrux.Kem.Kyber.Kyber512.fst extraction-edited/Libcrux.Kem.Kyber.Kyber512.fst
---- extraction/Libcrux.Kem.Kyber.Kyber512.fst	2024-03-12 10:45:44.764931165 +0100
-+++ extraction-edited/Libcrux.Kem.Kyber.Kyber512.fst	2024-03-12 10:45:44.783930605 +0100
+--- extraction/Libcrux.Kem.Kyber.Kyber512.fst	2024-03-13 11:03:50
++++ extraction-edited/Libcrux.Kem.Kyber.Kyber512.fst	2024-03-13 11:03:50
 @@ -7,19 +7,19 @@
        (secret_key: Libcrux.Kem.Kyber.Types.t_MlKemPrivateKey (sz 1632))
        (ciphertext: Libcrux.Kem.Kyber.Types.t_MlKemCiphertext (sz 768))
@@ -2992,8 +2791,8 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Kyber512.fst extraction-edited/Libcrux.Ke
      (sz 1632)
      (sz 800)
 diff -ruN extraction/Libcrux.Kem.Kyber.Kyber768.fst extraction-edited/Libcrux.Kem.Kyber.Kyber768.fst
---- extraction/Libcrux.Kem.Kyber.Kyber768.fst	2024-03-12 10:45:44.772930929 +0100
-+++ extraction-edited/Libcrux.Kem.Kyber.Kyber768.fst	2024-03-12 10:45:44.780930693 +0100
+--- extraction/Libcrux.Kem.Kyber.Kyber768.fst	2024-03-13 11:03:50
++++ extraction-edited/Libcrux.Kem.Kyber.Kyber768.fst	2024-03-13 11:03:50
 @@ -7,19 +7,19 @@
        (secret_key: Libcrux.Kem.Kyber.Types.t_MlKemPrivateKey (sz 2400))
        (ciphertext: Libcrux.Kem.Kyber.Types.t_MlKemCiphertext (sz 1088))
@@ -3027,8 +2826,8 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Kyber768.fst extraction-edited/Libcrux.Ke
      (sz 2400)
      (sz 1184)
 diff -ruN extraction/Libcrux.Kem.Kyber.Kyber768.fsti extraction-edited/Libcrux.Kem.Kyber.Kyber768.fsti
---- extraction/Libcrux.Kem.Kyber.Kyber768.fsti	2024-03-12 10:45:44.749931608 +0100
-+++ extraction-edited/Libcrux.Kem.Kyber.Kyber768.fsti	2024-03-12 10:45:44.807929897 +0100
+--- extraction/Libcrux.Kem.Kyber.Kyber768.fsti	2024-03-13 11:03:50
++++ extraction-edited/Libcrux.Kem.Kyber.Kyber768.fsti	2024-03-13 11:03:50
 @@ -74,14 +74,15 @@
  val decapsulate
        (secret_key: Libcrux.Kem.Kyber.Types.t_MlKemPrivateKey (sz 2400))
@@ -3054,8 +2853,8 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Kyber768.fsti extraction-edited/Libcrux.K
 -      (fun _ -> Prims.l_True)
 +      (ensures (fun kp -> (kp.f_sk.f_value,kp.f_pk.f_value) == Spec.Kyber.kyber768_generate_keypair randomness))
 diff -ruN extraction/Libcrux.Kem.Kyber.Matrix.fst extraction-edited/Libcrux.Kem.Kyber.Matrix.fst
---- extraction/Libcrux.Kem.Kyber.Matrix.fst	2024-03-12 10:45:44.746931696 +0100
-+++ extraction-edited/Libcrux.Kem.Kyber.Matrix.fst	2024-03-12 10:45:44.791930369 +0100
+--- extraction/Libcrux.Kem.Kyber.Matrix.fst	2024-03-13 11:03:50
++++ extraction-edited/Libcrux.Kem.Kyber.Matrix.fst	2024-03-13 11:03:50
 @@ -3,192 +3,188 @@
  open Core
  open FStar.Mul
@@ -3067,7 +2866,18 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Matrix.fst extraction-edited/Libcrux.Kem.
 -     =
 -  let result:t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K =
 -    Rust_primitives.Hax.repeat Libcrux.Kem.Kyber.Arithmetic.impl__PolynomialRingElement__ZERO v_K
--  in
++open Libcrux.Kem.Kyber.Arithmetic
++
++let op_Array_Access (x:Libcrux.Kem.Kyber.Arithmetic.wfPolynomialRingElement) (i:usize{v i < 256}): i32  =
++    x.f_coefficients.[i]
++    
++
++#push-options "--ifuel 0 --z3rlimit 700"
++let compute_As_plus_e v_K matrix_A s_as_ntt error_as_ntt =
++  let wfZero: wfPolynomialRingElement = (Libcrux.Kem.Kyber.Arithmetic.cast_poly_b #1 #3328 Libcrux.Kem.Kyber.Arithmetic.impl__PolynomialRingElement__ZERO) in
++  let result:t_Array wfPolynomialRingElement v_K =
++    Rust_primitives.Hax.repeat wfZero v_K
+   in
 -  let result:t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K =
 -    Core.Iter.Traits.Iterator.f_fold (Core.Iter.Traits.Collect.f_into_iter (Core.Iter.Traits.Iterator.f_enumerate
 -              (Core.Slice.impl__iter (Rust_primitives.unsize matrix_A
@@ -3083,18 +2893,6 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Matrix.fst extraction-edited/Libcrux.Kem.
 -        <:
 -        Core.Iter.Adapters.Enumerate.t_Enumerate
 -        (Core.Slice.Iter.t_Iter (t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K)))
-+open Libcrux.Kem.Kyber.Arithmetic
-+
-+let op_Array_Access (x:Libcrux.Kem.Kyber.Arithmetic.wfPolynomialRingElement) (i:usize{v i < 256}): i32  =
-+    x.f_coefficients.[i]
-+    
-+
-+#push-options "--ifuel 0 --z3rlimit 700"
-+let compute_As_plus_e v_K matrix_A s_as_ntt error_as_ntt =
-+  let wfZero: wfPolynomialRingElement = (Libcrux.Kem.Kyber.Arithmetic.cast_poly_b #1 #3328 Libcrux.Kem.Kyber.Arithmetic.impl__PolynomialRingElement__ZERO) in
-+  let result:t_Array wfPolynomialRingElement v_K =
-+    Rust_primitives.Hax.repeat wfZero v_K
-+  in
 +  [@ inline_let]
 +  let inv0 = fun (acc:t_Array wfPolynomialRingElement v_K) (i:usize) -> 
 +   (v i <= v v_K) /\
@@ -3264,8 +3062,6 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Matrix.fst extraction-edited/Libcrux.Kem.
                      }
                      <:
 -                    Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement)))
--  in
--  result
 +                    Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement_b (v v_K*3328)) in
 +                assert ((result.[i]).f_coefficients.[j] == resultij);
 +                assert(inv2 result (j +! sz 1));
@@ -3277,7 +3073,8 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Matrix.fst extraction-edited/Libcrux.Kem.
 +      assert (forall (j:usize). (v j >= v i + 1 /\ v j < v v_K) ==> derefine_poly_b result.[j] == derefine_poly_b orig_result.[j]); 
 +      assume (inv0 result (i +! sz 1));
 +      result)
-+  in
+   in
+-  result
 +  admit(); //P-F
 +  result  
 +#pop-options 
@@ -3293,15 +3090,15 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Matrix.fst extraction-edited/Libcrux.Kem.
 +let compute_message #p v_K m_v secret_as_ntt u_as_ntt = 
 +  let result:Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement_b (v v_K * 3328) =
 +    Libcrux.Kem.Kyber.Arithmetic.cast_poly_b Libcrux.Kem.Kyber.Arithmetic.impl__PolynomialRingElement__ZERO
-+  in
+   in
+-  let result:Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement =
+-    Core.Iter.Traits.Iterator.f_fold (Core.Iter.Traits.Collect.f_into_iter ({
 +  let acc_t = Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement_b (v v_K * 3328) in
 +  [@ inline_let]
 +  let inv = fun (acc:acc_t) (i:usize) -> 
 +             (v i <= v v_K) /\
 +             (poly_range #(v v_K * 3328) acc (v i * 3328))
-   in
--  let result:Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement =
--    Core.Iter.Traits.Iterator.f_fold (Core.Iter.Traits.Collect.f_into_iter ({
++  in
 +  let result:Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement_b (v v_K * 3328) =
 +    Rust_primitives.Iterators.foldi_range #_ #acc_t #inv {
                Core.Ops.Range.f_start = sz 0;
@@ -3420,14 +3217,13 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Matrix.fst extraction-edited/Libcrux.Kem.
 -     =
 -  let result:Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement =
 -    Libcrux.Kem.Kyber.Arithmetic.impl__PolynomialRingElement__ZERO
--  in
--  let result:Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement =
--    Core.Iter.Traits.Iterator.f_fold (Core.Iter.Traits.Collect.f_into_iter ({
 +#push-options "--ifuel 0 --z3rlimit 100"
 +let compute_ring_element_v v_K tt_as_ntt r_as_ntt error_2_ message =
 +  let result:Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement_b (v v_K * 3328) =
 +    Libcrux.Kem.Kyber.Arithmetic.cast_poly_b Libcrux.Kem.Kyber.Arithmetic.impl__PolynomialRingElement__ZERO
-+  in
+   in
+-  let result:Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement =
+-    Core.Iter.Traits.Iterator.f_fold (Core.Iter.Traits.Collect.f_into_iter ({
 +  [@ inline_let]
 +  let inv = fun (acc:t_PolynomialRingElement_b (v v_K * 3328)) (i:usize) ->
 +    (v i <= 256) /\
@@ -3548,7 +3344,12 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Matrix.fst extraction-edited/Libcrux.Kem.
 -     =
 -  let result:t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K =
 -    Rust_primitives.Hax.repeat Libcrux.Kem.Kyber.Arithmetic.impl__PolynomialRingElement__ZERO v_K
--  in
++      (a_as_ntt: t_Array (t_Array Libcrux.Kem.Kyber.Arithmetic.wfPolynomialRingElement v_K) v_K)
++      (r_as_ntt error_1_: t_Array Libcrux.Kem.Kyber.Arithmetic.wfPolynomialRingElement v_K) =
++  let wfZero: wfPolynomialRingElement = (Libcrux.Kem.Kyber.Arithmetic.cast_poly_b #1 #3328 Libcrux.Kem.Kyber.Arithmetic.impl__PolynomialRingElement__ZERO) in
++  let result:t_Array Libcrux.Kem.Kyber.Arithmetic.wfPolynomialRingElement v_K =
++    Rust_primitives.Hax.repeat wfZero v_K
+   in
 -  let result:t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K =
 -    Core.Iter.Traits.Iterator.f_fold (Core.Iter.Traits.Collect.f_into_iter (Core.Iter.Traits.Iterator.f_enumerate
 -              (Core.Slice.impl__iter (Rust_primitives.unsize a_as_ntt
@@ -3564,12 +3365,6 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Matrix.fst extraction-edited/Libcrux.Kem.
 -        <:
 -        Core.Iter.Adapters.Enumerate.t_Enumerate
 -        (Core.Slice.Iter.t_Iter (t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K)))
-+      (a_as_ntt: t_Array (t_Array Libcrux.Kem.Kyber.Arithmetic.wfPolynomialRingElement v_K) v_K)
-+      (r_as_ntt error_1_: t_Array Libcrux.Kem.Kyber.Arithmetic.wfPolynomialRingElement v_K) =
-+  let wfZero: wfPolynomialRingElement = (Libcrux.Kem.Kyber.Arithmetic.cast_poly_b #1 #3328 Libcrux.Kem.Kyber.Arithmetic.impl__PolynomialRingElement__ZERO) in
-+  let result:t_Array Libcrux.Kem.Kyber.Arithmetic.wfPolynomialRingElement v_K =
-+    Rust_primitives.Hax.repeat wfZero v_K
-+  in
 +  let acc_t = t_Array Libcrux.Kem.Kyber.Arithmetic.wfPolynomialRingElement v_K in
 +  [@ inline_let]
 +  let inv0 = fun (acc:t_Array wfPolynomialRingElement v_K) (i:usize) -> 
@@ -3660,7 +3455,16 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Matrix.fst extraction-edited/Libcrux.Kem.
 -                  (result.[ i ] <: Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement)
 -                <:
 -                Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement)
--          in
++              (Libcrux.Kem.Kyber.Ntt.invert_ntt_montgomery v_K resulti)
++           in
++          [@ inline_let]
++          let inv2 = fun (acc:t_Array (Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement_b (64*v v_K * 3328)) v_K) (inner:usize) -> 
++             (v inner <= 256) /\
++             (forall (j:usize). (v j < v i /\ v j < v v_K) ==> acc.[j] == orig_result_cast.[j]) /\
++             (forall (j:usize). (v j > v i /\ v j < v v_K) ==> acc.[j] == orig_result_cast.[j]) /\
++             (forall (j:usize). (v j < v inner) ==> (i32_range (acc.[i] <: Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement_b (64*v v_K * 3328)).f_coefficients.[j] 3328))
++             // And all indexes above v inner are unchanged from result1
+           in
 -          Core.Iter.Traits.Iterator.f_fold (Core.Iter.Traits.Collect.f_into_iter ({
 -                    Core.Ops.Range.f_start = sz 0;
 -                    Core.Ops.Range.f_end
@@ -3671,16 +3475,6 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Matrix.fst extraction-edited/Libcrux.Kem.
 -                  Core.Ops.Range.t_Range usize)
 -              <:
 -              Core.Ops.Range.t_Range usize)
-+              (Libcrux.Kem.Kyber.Ntt.invert_ntt_montgomery v_K resulti)
-+           in
-+          [@ inline_let]
-+          let inv2 = fun (acc:t_Array (Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement_b (64*v v_K * 3328)) v_K) (inner:usize) -> 
-+             (v inner <= 256) /\
-+             (forall (j:usize). (v j < v i /\ v j < v v_K) ==> acc.[j] == orig_result_cast.[j]) /\
-+             (forall (j:usize). (v j > v i /\ v j < v v_K) ==> acc.[j] == orig_result_cast.[j]) /\
-+             (forall (j:usize). (v j < v inner) ==> (i32_range (acc.[i] <: Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement_b (64*v v_K * 3328)).f_coefficients.[j] 3328))
-+             // And all indexes above v inner are unchanged from result1
-+          in
 +          assert (forall (j:usize). (v j < v i /\ v j < v v_K) ==> result.[j] == orig_result_cast.[j]);
 +          assert (forall (j:usize). (v j > v i /\ v j < v v_K) ==> result.[j] == orig_result_cast.[j]);
 +          assert (inv2 result (sz 0));
@@ -3790,7 +3584,18 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Matrix.fst extraction-edited/Libcrux.Kem.
              v_A_transpose
            in
            let i:usize = i in
-@@ -496,11 +482,11 @@
+@@ -482,8 +468,8 @@
+                   in
+                   seeds)
+           in
+-          let sampled:t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K =
+-            Libcrux.Kem.Kyber.Sampling.sample_from_xof v_K seeds
++          let xof_bytes:t_Array (t_Array u8 (sz 840)) v_K =
++            Libcrux.Kem.Kyber.Hash_functions.v_XOFx4 v_K seeds
+           in
+           Core.Iter.Traits.Iterator.f_fold (Core.Iter.Traits.Collect.f_into_iter ({
+                     Core.Ops.Range.f_start = sz 0;
+@@ -496,40 +482,46 @@
              v_A_transpose
              (fun v_A_transpose j ->
                  let v_A_transpose:t_Array
@@ -3799,12 +3604,11 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Matrix.fst extraction-edited/Libcrux.Kem.
                    v_A_transpose
                  in
                  let j:usize = j in
--                let sampled:Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement =
 +                let sampled:Libcrux.Kem.Kyber.Arithmetic.wfPolynomialRingElement =
-                   Libcrux.Kem.Kyber.Sampling.sample_from_uniform_distribution (xof_bytes.[ j ]
-                       <:
-                       t_Array u8 (sz 840))
-@@ -508,33 +494,34 @@
++                  Libcrux.Kem.Kyber.Sampling.sample_from_uniform_distribution (xof_bytes.[ j ]
++                      <:
++                      t_Array u8 (sz 840))
++                in
                  if transpose
                  then
                    let v_A_transpose:t_Array
@@ -3818,7 +3622,8 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Matrix.fst extraction-edited/Libcrux.Kem.
 -                            t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K)
 +                            t_Array Libcrux.Kem.Kyber.Arithmetic.wfPolynomialRingElement v_K)
                            i
-                           sampled
+-                          (sampled.[ j ] <: Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement)
++                          sampled
                          <:
 -                        t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K)
 +                        t_Array Libcrux.Kem.Kyber.Arithmetic.wfPolynomialRingElement v_K)
@@ -3836,7 +3641,8 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Matrix.fst extraction-edited/Libcrux.Kem.
 -                            t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K)
 +                            t_Array Libcrux.Kem.Kyber.Arithmetic.wfPolynomialRingElement v_K)
                            j
-                           sampled
+-                          (sampled.[ j ] <: Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement)
++                          sampled
                          <:
 -                        t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K)
 +                        t_Array Libcrux.Kem.Kyber.Arithmetic.wfPolynomialRingElement v_K)
@@ -3846,8 +3652,8 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Matrix.fst extraction-edited/Libcrux.Kem.
 +  admit(); //P-F
    v_A_transpose
 diff -ruN extraction/Libcrux.Kem.Kyber.Matrix.fsti extraction-edited/Libcrux.Kem.Kyber.Matrix.fsti
---- extraction/Libcrux.Kem.Kyber.Matrix.fsti	2024-03-12 10:45:44.758931342 +0100
-+++ extraction-edited/Libcrux.Kem.Kyber.Matrix.fsti	2024-03-12 10:45:44.834929100 +0100
+--- extraction/Libcrux.Kem.Kyber.Matrix.fsti	2024-03-13 11:03:50
++++ extraction-edited/Libcrux.Kem.Kyber.Matrix.fsti	2024-03-13 11:03:50
 @@ -3,39 +3,71 @@
  open Core
  open FStar.Mul
@@ -3873,9 +3679,9 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Matrix.fsti extraction-edited/Libcrux.Kem
 +               (to_spec_matrix_b #p matrix_A) 
 +               (to_spec_vector_b #p s_as_ntt) 
 +               (to_spec_vector_b #p error_as_ntt))
-+
  
 -val compute_message
++
 +val compute_message (#p:Spec.Kyber.params)
        (v_K: usize)
 -      (v: Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement)
@@ -3924,11 +3730,6 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Matrix.fsti extraction-edited/Libcrux.Kem
 -    : Prims.Pure (t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K)
 -      Prims.l_True
 -      (fun _ -> Prims.l_True)
--
--val sample_matrix_A (v_K: usize) (seed: t_Array u8 (sz 34)) (transpose: bool)
--    : Prims.Pure (t_Array (t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K) v_K)
--      Prims.l_True
--      (fun _ -> Prims.l_True)
 +      (a_as_ntt: t_Array (t_Array Libcrux.Kem.Kyber.Arithmetic.wfPolynomialRingElement v_K) v_K)
 +      (r_as_ntt error_1_: t_Array Libcrux.Kem.Kyber.Arithmetic.wfPolynomialRingElement v_K)
 +    : Pure (t_Array Libcrux.Kem.Kyber.Arithmetic.wfPolynomialRingElement v_K)
@@ -3939,7 +3740,11 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Matrix.fsti extraction-edited/Libcrux.Kem
 +        let e_spec = Libcrux.Kem.Kyber.Arithmetic.to_spec_vector_b #p error_1_ in
 +        let res_spec = Libcrux.Kem.Kyber.Arithmetic.to_spec_vector_b #p res in
 +        res_spec == Spec.Kyber.(vector_add (vector_inv_ntt (matrix_vector_mul a_spec r_spec)) e_spec))
-+
+ 
+-val sample_matrix_A (v_K: usize) (seed: t_Array u8 (sz 34)) (transpose: bool)
+-    : Prims.Pure (t_Array (t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K) v_K)
+-      Prims.l_True
+-      (fun _ -> Prims.l_True)
 +
 +
 +val sample_matrix_A (#p:Spec.Kyber.params) (v_K: usize) (seed: t_Array u8 (sz 34)) (transpose: bool)
@@ -3950,8 +3755,8 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Matrix.fsti extraction-edited/Libcrux.Kem
 +        if transpose then Libcrux.Kem.Kyber.Arithmetic.to_spec_matrix_b #p res == matrix_A
 +        else Libcrux.Kem.Kyber.Arithmetic.to_spec_matrix_b #p res == Spec.Kyber.matrix_transpose matrix_A)
 diff -ruN extraction/Libcrux.Kem.Kyber.Ntt.fst extraction-edited/Libcrux.Kem.Kyber.Ntt.fst
---- extraction/Libcrux.Kem.Kyber.Ntt.fst	2024-03-12 10:45:44.735932021 +0100
-+++ extraction-edited/Libcrux.Kem.Kyber.Ntt.fst	2024-03-12 10:45:44.820929513 +0100
+--- extraction/Libcrux.Kem.Kyber.Ntt.fst	2024-03-13 11:03:50
++++ extraction-edited/Libcrux.Kem.Kyber.Ntt.fst	2024-03-13 11:03:50
 @@ -1,56 +1,130 @@
  module Libcrux.Kem.Kyber.Ntt
 -#set-options "--fuel 0 --ifuel 1 --z3rlimit 15"
@@ -3967,13 +3772,12 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Ntt.fst extraction-edited/Libcrux.Kem.Kyb
 -  Libcrux.Kem.Kyber.Arithmetic.montgomery_reduce ((a0 *! b1 <: i32) +! (a1 *! b0 <: i32) <: i32)
 -  <:
 -  (i32 & i32)
--
+ 
 -let invert_ntt_at_layer
 -      (zeta_i: usize)
 -      (re: Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement)
 -      (layer: usize)
 -     =
-+
 +let v_ZETAS_TIMES_MONTGOMERY_R =
 +  let list : list (i32_b 1664) =
 +    [
@@ -4618,9 +4422,9 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Ntt.fst extraction-edited/Libcrux.Kem.Kyb
 +  let re:Libcrux.Kem.Kyber.Arithmetic.wfPolynomialRingElement = down_cast_poly_b #(6*3328+11207) #3328 re in
 +  re 
 +#pop-options
-+
  
 -let ntt_multiply (lhs rhs: Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement) =
++
 +#push-options "--z3rlimit 100"
 +let ntt_multiply lhs rhs =
    let _:Prims.unit = () <: Prims.unit in
@@ -4862,8 +4666,8 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Ntt.fst extraction-edited/Libcrux.Kem.Kyb
 +  down_cast_poly_b #(8*3328) #3328 re 
 +#pop-options
 diff -ruN extraction/Libcrux.Kem.Kyber.Ntt.fsti extraction-edited/Libcrux.Kem.Kyber.Ntt.fsti
---- extraction/Libcrux.Kem.Kyber.Ntt.fsti	2024-03-12 10:45:44.768931047 +0100
-+++ extraction-edited/Libcrux.Kem.Kyber.Ntt.fsti	2024-03-12 10:45:44.808929867 +0100
+--- extraction/Libcrux.Kem.Kyber.Ntt.fsti	2024-03-13 11:03:50
++++ extraction-edited/Libcrux.Kem.Kyber.Ntt.fsti	2024-03-13 11:03:50
 @@ -2,223 +2,80 @@
  #set-options "--fuel 0 --ifuel 1 --z3rlimit 15"
  open Core
@@ -4888,34 +4692,20 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Ntt.fsti extraction-edited/Libcrux.Kem.Ky
 -  in
 -  FStar.Pervasives.assert_norm (Prims.eq2 (List.Tot.length list) 128);
 -  Rust_primitives.Hax.array_of_list 128 list
--
++val v_ZETAS_TIMES_MONTGOMERY_R: x:t_Array (i32_b 1664) (sz 128){v (x.[sz 1] <: i32) == -758}
+ 
 -val ntt_multiply_binomials: (i32 & i32) -> (i32 & i32) -> zeta: i32
 -  -> Prims.Pure (i32 & i32) Prims.l_True (fun _ -> Prims.l_True)
--
++val ntt_multiply_binomials (a:wfFieldElement&wfFieldElement) (b: wfFieldElement&wfFieldElement) (zeta: i32_b 1664) :
++    Pure (wfFieldElement & wfFieldElement)
++    (requires True)
++    (ensures (fun _ -> True))
+ 
 -val invert_ntt_at_layer
 -      (zeta_i: usize)
 -      (re: Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement)
 -      (layer: usize)
 -    : Prims.Pure (usize & Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement)
--      Prims.l_True
--      (fun _ -> Prims.l_True)
--
--val invert_ntt_montgomery (v_K: usize) (re: Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement)
--    : Prims.Pure Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement
--      Prims.l_True
--      (fun _ -> Prims.l_True)
-+val v_ZETAS_TIMES_MONTGOMERY_R: x:t_Array (i32_b 1664) (sz 128){v (x.[sz 1] <: i32) == -758}
- 
--val ntt_at_layer
--      (zeta_i: usize)
--      (re: Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement)
--      (layer v__initial_coefficient_bound: usize)
--    : Prims.Pure (usize & Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement)
-+val ntt_multiply_binomials (a:wfFieldElement&wfFieldElement) (b: wfFieldElement&wfFieldElement) (zeta: i32_b 1664) :
-+    Pure (wfFieldElement & wfFieldElement)
-+    (requires True)
-+    (ensures (fun _ -> True))
-+
 +val invert_ntt_at_layer (#v_K:usize{v v_K >= 1 /\ v v_K <= 4})
 +      (#b:nat{b <= v v_K * 3328 * 64})
 +      (zeta_i: usize{v zeta_i >= 1 /\ v zeta_i <= 128})
@@ -4929,15 +4719,21 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Ntt.fsti extraction-edited/Libcrux.Kem.Ky
 -      (fun _ -> Prims.l_True)
 +      (fun x -> let (zeta_fin,re) = x in v zeta_fin == pow2 (7 - v layer))
  
--val ntt_at_layer_3_
--      (zeta_i: usize)
--      (re: Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement)
--      (layer: usize)
--    : Prims.Pure (usize & Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement)
+-val invert_ntt_montgomery (v_K: usize) (re: Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement)
+-    : Prims.Pure Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement
+-      Prims.l_True
+-      (fun _ -> Prims.l_True)
 +val invert_ntt_montgomery (v_K: usize{v v_K >= 1 /\ v v_K <= 4}) 
 +      (re: Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement_b (v v_K * 3328))
 +    : Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement_b (64 * v v_K * 3328)
-+
+ 
+-val ntt_at_layer
+-      (zeta_i: usize)
+-      (re: Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement)
+-      (layer v__initial_coefficient_bound: usize)
+-    : Prims.Pure (usize & Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement)
+-      Prims.l_True
+-      (fun _ -> Prims.l_True)
 +val ntt_at_layer 
 +      (#b:nat{b <= 31175})
 +      (zeta_i: usize{v zeta_i < 128})
@@ -4949,7 +4745,12 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Ntt.fsti extraction-edited/Libcrux.Kem.Ky
 +    : Pure (usize & Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement_b (3328+b))
 +      (requires True)
 +      (ensures fun (zeta_i, result) -> v zeta_i == pow2 (8 - v layer) - 1)
-+
+ 
+-val ntt_at_layer_3_
+-      (zeta_i: usize)
+-      (re: Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement)
+-      (layer: usize)
+-    : Prims.Pure (usize & Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement)
 +val ntt_at_layer_3_ (#b:nat)
 +      (zeta_i: usize{v zeta_i < 128})
 +      (re: Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement_b b)
@@ -4977,7 +4778,8 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Ntt.fsti extraction-edited/Libcrux.Kem.Ky
 +    : Prims.Pure (usize & Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement_b (3328+b))
        Prims.l_True
 -      (fun _ -> Prims.l_True)
--
++      (ensures fun (zeta_i,result) -> v zeta_i == pow2 (8 - v layer) - 1)
+ 
 -val ntt_binomially_sampled_ring_element (re: Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement)
 -    : Prims.Pure Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement
 -      (requires
@@ -5031,7 +4833,11 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Ntt.fsti extraction-edited/Libcrux.Kem.Ky
 -                      bool)
 -                <:
 -                bool))
--
++val ntt_binomially_sampled_ring_element (re: Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement_b 7)
++    : Prims.Pure (Libcrux.Kem.Kyber.Arithmetic.wfPolynomialRingElement)
++      (requires True)
++      (ensures (fun _ -> True))
+ 
 -val ntt_multiply (lhs rhs: Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement)
 -    : Prims.Pure Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement
 -      (requires
@@ -5082,13 +4888,7 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Ntt.fsti extraction-edited/Libcrux.Kem.Ky
 -                      bool)
 -                <:
 -                bool))
-+      (ensures fun (zeta_i,result) -> v zeta_i == pow2 (8 - v layer) - 1)
- 
-+val ntt_binomially_sampled_ring_element (re: Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement_b 7)
-+    : Prims.Pure (Libcrux.Kem.Kyber.Arithmetic.wfPolynomialRingElement)
-+      (requires True)
-+      (ensures (fun _ -> True))
-+
+-
 +val ntt_multiply (lhs: Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement_b 3328)
 +                 (rhs: Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement_b 3328)
 +    : Prims.Pure (Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement_b 3328)
@@ -5156,19 +4956,18 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Ntt.fsti extraction-edited/Libcrux.Kem.Ky
 +      (ensures fun _ -> True)
 +
 diff -ruN extraction/Libcrux.Kem.Kyber.Sampling.fst extraction-edited/Libcrux.Kem.Kyber.Sampling.fst
---- extraction/Libcrux.Kem.Kyber.Sampling.fst	2024-03-12 10:45:44.738931932 +0100
-+++ extraction-edited/Libcrux.Kem.Kyber.Sampling.fst	2024-03-12 10:45:44.831929189 +0100
-@@ -3,27 +3,34 @@
+--- extraction/Libcrux.Kem.Kyber.Sampling.fst	2024-03-13 11:03:50
++++ extraction-edited/Libcrux.Kem.Kyber.Sampling.fst	2024-03-13 11:03:50
+@@ -3,22 +3,34 @@
  open Core
  open FStar.Mul
  
--let rejection_sampling_panic_with_diagnostic (_: Prims.unit) =
 +let rejection_sampling_panic_with_diagnostic () : Prims.unit =
 +  admit(); // This should never be reachable
-   Rust_primitives.Hax.never_to_any (Core.Panicking.panic "explicit panic"
-       <:
-       Rust_primitives.Hax.t_Never)
- 
++  Rust_primitives.Hax.never_to_any (Core.Panicking.panic "explicit panic"
++      <:
++      Rust_primitives.Hax.t_Never)
++
 +#push-options "--ifuel 0 --z3rlimit 100"
  let sample_from_binomial_distribution_2_ (randomness: t_Slice u8) =
 -  let (sampled: Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement):Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement
@@ -5206,7 +5005,7 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Sampling.fst extraction-edited/Libcrux.Ke
            let (random_bits_as_u32: u32):u32 =
              (((cast (byte_chunk.[ sz 0 ] <: u8) <: u32) |.
                  ((cast (byte_chunk.[ sz 1 ] <: u8) <: u32) <<! 8l <: u32)
-@@ -35,34 +42,58 @@
+@@ -30,34 +42,58 @@
              ((cast (byte_chunk.[ sz 3 ] <: u8) <: u32) <<! 24l <: u32)
            in
            let even_bits:u32 = random_bits_as_u32 &. 1431655765ul in
@@ -5281,7 +5080,7 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Sampling.fst extraction-edited/Libcrux.Ke
                    {
                      sampled with
                      Libcrux.Kem.Kyber.Arithmetic.f_coefficients
-@@ -73,29 +104,36 @@
+@@ -68,29 +104,36 @@
                        (outcome_1_ -! outcome_2_ <: i32)
                    }
                    <:
@@ -5334,7 +5133,7 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Sampling.fst extraction-edited/Libcrux.Ke
            let (random_bits_as_u24: u32):u32 =
              ((cast (byte_chunk.[ sz 0 ] <: u8) <: u32) |.
                ((cast (byte_chunk.[ sz 1 ] <: u8) <: u32) <<! 8l <: u32)
-@@ -104,21 +142,28 @@
+@@ -99,21 +142,28 @@
              ((cast (byte_chunk.[ sz 2 ] <: u8) <: u32) <<! 16l <: u32)
            in
            let first_bits:u32 = random_bits_as_u24 &. 2396745ul in
@@ -5373,7 +5172,7 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Sampling.fst extraction-edited/Libcrux.Ke
                  let outcome_set:i32 = outcome_set in
                  let outcome_1_:i32 =
                    cast ((coin_toss_outcomes >>! outcome_set <: u32) &. 7ul <: u32) <: i32
-@@ -128,8 +173,22 @@
+@@ -123,8 +173,22 @@
                    <:
                    i32
                  in
@@ -5397,7 +5196,7 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Sampling.fst extraction-edited/Libcrux.Ke
                    {
                      sampled with
                      Libcrux.Kem.Kyber.Arithmetic.f_coefficients
-@@ -140,15 +199,18 @@
+@@ -135,15 +199,18 @@
                        (outcome_1_ -! outcome_2_ <: i32)
                    }
                    <:
@@ -5417,60 +5216,79 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Sampling.fst extraction-edited/Libcrux.Ke
    match cast (v_ETA <: usize) <: u32 with
    | 2ul -> sample_from_binomial_distribution_2_ randomness
    | 3ul -> sample_from_binomial_distribution_3_ randomness
-@@ -158,46 +220,62 @@
+@@ -153,227 +220,131 @@
          <:
          Rust_primitives.Hax.t_Never)
  
+-let sample_from_uniform_distribution_next
+-      (v_K v_N: usize)
+-      (randomness: t_Array (t_Array u8 v_N) v_K)
+-      (sampled_coefficients: t_Array usize v_K)
+-      (out: t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K)
+-     =
+-  let done:bool = true in
+-  let done, out, sampled_coefficients:(bool &
+-    t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K &
+-    t_Array usize v_K) =
+-    Core.Iter.Traits.Iterator.f_fold (Core.Iter.Traits.Collect.f_into_iter ({
+-              Core.Ops.Range.f_start = sz 0;
+-              Core.Ops.Range.f_end = v_K
+-            }
+-            <:
+-            Core.Ops.Range.t_Range usize)
+-        <:
+-        Core.Ops.Range.t_Range usize)
 +#push-options "--z3rlimit 50"
- let sample_from_uniform_distribution (randomness: t_Array u8 (sz 840)) =
-   let (sampled_coefficients: usize):usize = sz 0 in
--  let (out: Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement):Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement
++let sample_from_uniform_distribution (randomness: t_Array u8 (sz 840)) =
++  let (sampled_coefficients: usize):usize = sz 0 in
 +  let (out: Libcrux.Kem.Kyber.Arithmetic.wfPolynomialRingElement):Libcrux.Kem.Kyber.Arithmetic.wfPolynomialRingElement
-   =
--    Libcrux.Kem.Kyber.Arithmetic.impl__PolynomialRingElement__ZERO
++  =
 +    Libcrux.Kem.Kyber.Arithmetic.cast_poly_b Libcrux.Kem.Kyber.Arithmetic.impl__PolynomialRingElement__ZERO
-   in
-   let done:bool = false in
--  let done, out, sampled_coefficients:(bool & Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement &
++  in
++  let done:bool = false in
 +  let acc_t = (bool & Libcrux.Kem.Kyber.Arithmetic.wfPolynomialRingElement & usize) in
 +  [@ inline_let]
 +  let inv = fun (acc:acc_t) -> True in
 +  let sl : t_Slice u8 = randomness in
 +  let chunk_len = sz 3 in
 +  let done, out, sampled_coefficients:(bool & Libcrux.Kem.Kyber.Arithmetic.wfPolynomialRingElement &
-     usize) =
--    Core.Iter.Traits.Iterator.f_fold (Core.Iter.Traits.Collect.f_into_iter (Core.Slice.impl__chunks (
--                Rust_primitives.unsize randomness <: t_Slice u8)
--              (sz 3)
--            <:
--            Core.Slice.Iter.t_Chunks u8)
--        <:
--        Core.Slice.Iter.t_Chunks u8)
++    usize) =
 +   Rust_primitives.Iterators.fold_chunks_exact #u8 #acc_t #inv
 +      sl
 +      chunk_len
        (done, out, sampled_coefficients
          <:
--        (bool & Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement & usize))
+-        (bool & t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K & t_Array usize v_K
+-        ))
+-      (fun temp_0_ i ->
 +        (bool & Libcrux.Kem.Kyber.Arithmetic.wfPolynomialRingElement & usize))
-       (fun temp_0_ bytes ->
++      (fun temp_0_ bytes ->
            let done, out, sampled_coefficients:(bool &
--            Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement &
+-            t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K &
+-            t_Array usize v_K) =
 +            Libcrux.Kem.Kyber.Arithmetic.wfPolynomialRingElement &
-             usize) =
++            usize) =
              temp_0_
            in
--          let bytes:t_Slice u8 = bytes in
+-          let i:usize = i in
+-          let out, sampled_coefficients:(t_Array
+-              Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K &
+-            t_Array usize v_K) =
+-            Core.Iter.Traits.Iterator.f_fold (Core.Iter.Traits.Collect.f_into_iter (Core.Slice.impl__chunks
+-                      (Rust_primitives.unsize (randomness.[ i ] <: t_Array u8 v_N) <: t_Slice u8)
+-                      (sz 3)
+-                    <:
+-                    Core.Slice.Iter.t_Chunks u8)
 +          let bytes:t_Array u8 chunk_len = bytes in
-           if ~.done <: bool
-           then
-             let b1:i32 = cast (bytes.[ sz 0 ] <: u8) <: i32 in
-             let b2:i32 = cast (bytes.[ sz 1 ] <: u8) <: i32 in
-             let b3:i32 = cast (bytes.[ sz 2 ] <: u8) <: i32 in
++          if ~.done <: bool
++          then
++            let b1:i32 = cast (bytes.[ sz 0 ] <: u8) <: i32 in
++            let b2:i32 = cast (bytes.[ sz 1 ] <: u8) <: i32 in
++            let b3:i32 = cast (bytes.[ sz 2 ] <: u8) <: i32 in
 +            assert(v b1 >= 0 /\ v b1 < pow2 8);
 +            assert(v b2 >= 0 /\ v b2 < pow2 8);
 +            assert(v b3 >= 0 /\ v b3 < pow2 8);
-             let d1:i32 = ((b2 &. 15l <: i32) <<! 8l <: i32) |. b1 in
++            let d1:i32 = ((b2 &. 15l <: i32) <<! 8l <: i32) |. b1 in
 +            assert (15 = pow2 4 - 1);
 +            mk_int_equiv_lemma #i32_inttype 15;
 +            assert (15l = mk_int (pow2 4) -! mk_int 1);
@@ -5479,107 +5297,277 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Sampling.fst extraction-edited/Libcrux.Ke
 +            logor_lemma ((b2 &. 15l <: i32) <<! 8l <: i32) b1;
 +            assert (v d1 >= v b1);
 +            assert (v d1 >= 0);
-             let d2:i32 = (b3 <<! 4l <: i32) |. (b2 >>! 4l <: i32) in
--            let out, sampled_coefficients:(Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement &
++            let d2:i32 = (b3 <<! 4l <: i32) |. (b2 >>! 4l <: i32) in
 +            logor_lemma (b3 <<! 4l <: i32) (b2 >>! 4l <: i32);
 +            assert (v d2 >= v b3 * pow2 4);
 +            assert (v d2 >= 0);
 +            let out, sampled_coefficients:(Libcrux.Kem.Kyber.Arithmetic.wfPolynomialRingElement &
-               usize) =
-               if
-                 d1 <. Libcrux.Kem.Kyber.Constants.v_FIELD_MODULUS &&
-                 sampled_coefficients <. Libcrux.Kem.Kyber.Constants.v_COEFFICIENTS_IN_RING_ELEMENT
-               then
--                let out:Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement =
++              usize) =
++              if
++                d1 <. Libcrux.Kem.Kyber.Constants.v_FIELD_MODULUS &&
++                sampled_coefficients <. Libcrux.Kem.Kyber.Constants.v_COEFFICIENTS_IN_RING_ELEMENT
++              then
 +                let out:Libcrux.Kem.Kyber.Arithmetic.wfPolynomialRingElement =
-                   {
-                     out with
-                     Libcrux.Kem.Kyber.Arithmetic.f_coefficients
-@@ -208,23 +286,23 @@
-                       d1
-                   }
-                   <:
--                  Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement
++                  {
++                    out with
++                    Libcrux.Kem.Kyber.Arithmetic.f_coefficients
++                    =
++                    Rust_primitives.Hax.Monomorphized_update_at.update_at_usize out
++                        .Libcrux.Kem.Kyber.Arithmetic.f_coefficients
++                      sampled_coefficients
++                      d1
++                  }
++                  <:
 +                  Libcrux.Kem.Kyber.Arithmetic.wfPolynomialRingElement
-                 in
-                 out, sampled_coefficients +! sz 1
++                in
++                out, sampled_coefficients +! sz 1
                  <:
--                (Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement & usize)
+-                Core.Slice.Iter.t_Chunks u8)
+-              (out, sampled_coefficients
 +                (Libcrux.Kem.Kyber.Arithmetic.wfPolynomialRingElement & usize)
-               else
-                 out, sampled_coefficients
++              else
++                out, sampled_coefficients
                  <:
--                (Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement & usize)
+-                (t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K &
+-                  t_Array usize v_K))
+-              (fun temp_0_ bytes ->
+-                  let out, sampled_coefficients:(t_Array
+-                      Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K &
+-                    t_Array usize v_K) =
+-                    temp_0_
+-                  in
+-                  let bytes:t_Slice u8 = bytes in
+-                  let b1:i32 = cast (bytes.[ sz 0 ] <: u8) <: i32 in
+-                  let b2:i32 = cast (bytes.[ sz 1 ] <: u8) <: i32 in
+-                  let b3:i32 = cast (bytes.[ sz 2 ] <: u8) <: i32 in
+-                  let d1:i32 = ((b2 &. 15l <: i32) <<! 8l <: i32) |. b1 in
+-                  let d2:i32 = (b3 <<! 4l <: i32) |. (b2 >>! 4l <: i32) in
+-                  let out, sampled_coefficients:(t_Array
+-                      Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K &
+-                    t_Array usize v_K) =
+-                    if
+-                      d1 <. Libcrux.Kem.Kyber.Constants.v_FIELD_MODULUS &&
+-                      (sampled_coefficients.[ i ] <: usize) <.
+-                      Libcrux.Kem.Kyber.Constants.v_COEFFICIENTS_IN_RING_ELEMENT
+-                    then
+-                      let out:t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K =
+-                        Rust_primitives.Hax.Monomorphized_update_at.update_at_usize out
+-                          i
+-                          ({
+-                              (out.[ i ] <: Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement) with
+-                              Libcrux.Kem.Kyber.Arithmetic.f_coefficients
+-                              =
+-                              Rust_primitives.Hax.Monomorphized_update_at.update_at_usize (out.[ i ]
+-                                  <:
+-                                  Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement)
+-                                  .Libcrux.Kem.Kyber.Arithmetic.f_coefficients
+-                                (sampled_coefficients.[ i ] <: usize)
+-                                d1
+-                              <:
+-                              t_Array i32 (sz 256)
+-                            }
+-                            <:
+-                            Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement)
+-                      in
+-                      out,
+-                      Rust_primitives.Hax.Monomorphized_update_at.update_at_usize sampled_coefficients
+-                        i
+-                        ((sampled_coefficients.[ i ] <: usize) +! sz 1 <: usize)
+-                      <:
+-                      (t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K &
+-                        t_Array usize v_K)
+-                    else
+-                      out, sampled_coefficients
+-                      <:
+-                      (t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K &
+-                        t_Array usize v_K)
+-                  in
+-                  if
+-                    d2 <. Libcrux.Kem.Kyber.Constants.v_FIELD_MODULUS &&
+-                    (sampled_coefficients.[ i ] <: usize) <.
+-                    Libcrux.Kem.Kyber.Constants.v_COEFFICIENTS_IN_RING_ELEMENT
+-                  then
+-                    let out:t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K =
+-                      Rust_primitives.Hax.Monomorphized_update_at.update_at_usize out
+-                        i
+-                        ({
+-                            (out.[ i ] <: Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement) with
+-                            Libcrux.Kem.Kyber.Arithmetic.f_coefficients
+-                            =
+-                            Rust_primitives.Hax.Monomorphized_update_at.update_at_usize (out.[ i ]
+-                                <:
+-                                Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement)
+-                                .Libcrux.Kem.Kyber.Arithmetic.f_coefficients
+-                              (sampled_coefficients.[ i ] <: usize)
+-                              d2
+-                            <:
+-                            t_Array i32 (sz 256)
+-                          }
+-                          <:
+-                          Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement)
+-                    in
+-                    let sampled_coefficients:t_Array usize v_K =
+-                      Rust_primitives.Hax.Monomorphized_update_at.update_at_usize sampled_coefficients
+-                        i
+-                        ((sampled_coefficients.[ i ] <: usize) +! sz 1 <: usize)
+-                    in
+-                    out, sampled_coefficients
+-                    <:
+-                    (t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K &
+-                      t_Array usize v_K)
+-                  else
+-                    out, sampled_coefficients
+-                    <:
+-                    (t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K &
+-                      t_Array usize v_K))
+-          in
+-          if
+-            (sampled_coefficients.[ i ] <: usize) <.
+-            Libcrux.Kem.Kyber.Constants.v_COEFFICIENTS_IN_RING_ELEMENT
+-          then
+-            false, out, sampled_coefficients
+-            <:
+-            (bool & t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K &
+-              t_Array usize v_K)
 +                (Libcrux.Kem.Kyber.Arithmetic.wfPolynomialRingElement & usize)
-             in
--            let out, sampled_coefficients:(Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement &
++            in
 +            let out, sampled_coefficients:(Libcrux.Kem.Kyber.Arithmetic.wfPolynomialRingElement &
-               usize) =
-               if
-                 d2 <. Libcrux.Kem.Kyber.Constants.v_FIELD_MODULUS &&
-                 sampled_coefficients <. Libcrux.Kem.Kyber.Constants.v_COEFFICIENTS_IN_RING_ELEMENT
-               then
--                let out:Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement =
++              usize) =
++              if
++                d2 <. Libcrux.Kem.Kyber.Constants.v_FIELD_MODULUS &&
++                sampled_coefficients <. Libcrux.Kem.Kyber.Constants.v_COEFFICIENTS_IN_RING_ELEMENT
++              then
 +                let out:Libcrux.Kem.Kyber.Arithmetic.wfPolynomialRingElement =
-                   {
-                     out with
-                     Libcrux.Kem.Kyber.Arithmetic.f_coefficients
-@@ -235,31 +313,31 @@
-                       d2
-                   }
-                   <:
--                  Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement
++                  {
++                    out with
++                    Libcrux.Kem.Kyber.Arithmetic.f_coefficients
++                    =
++                    Rust_primitives.Hax.Monomorphized_update_at.update_at_usize out
++                        .Libcrux.Kem.Kyber.Arithmetic.f_coefficients
++                      sampled_coefficients
++                      d2
++                  }
++                  <:
 +                  Libcrux.Kem.Kyber.Arithmetic.wfPolynomialRingElement
-                 in
-                 let sampled_coefficients:usize = sampled_coefficients +! sz 1 in
-                 out, sampled_coefficients
-                 <:
--                (Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement & usize)
++                in
++                let sampled_coefficients:usize = sampled_coefficients +! sz 1 in
++                out, sampled_coefficients
++                <:
 +                (Libcrux.Kem.Kyber.Arithmetic.wfPolynomialRingElement & usize)
-               else
-                 out, sampled_coefficients
-                 <:
--                (Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement & usize)
++              else
++                out, sampled_coefficients
++                <:
 +                (Libcrux.Kem.Kyber.Arithmetic.wfPolynomialRingElement & usize)
-             in
-             if sampled_coefficients =. Libcrux.Kem.Kyber.Constants.v_COEFFICIENTS_IN_RING_ELEMENT
-             then
-               let done:bool = true in
-               done, out, sampled_coefficients
-               <:
--              (bool & Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement & usize)
++            in
++            if sampled_coefficients =. Libcrux.Kem.Kyber.Constants.v_COEFFICIENTS_IN_RING_ELEMENT
++            then
++              let done:bool = true in
++              done, out, sampled_coefficients
++              <:
 +              (bool & Libcrux.Kem.Kyber.Arithmetic.wfPolynomialRingElement & usize)
-             else
-               done, out, sampled_coefficients
-               <:
--              (bool & Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement & usize)
++            else
++              done, out, sampled_coefficients
++              <:
 +              (bool & Libcrux.Kem.Kyber.Arithmetic.wfPolynomialRingElement & usize)
            else
              done, out, sampled_coefficients
              <:
--            (bool & Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement & usize))
+-            (bool & t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K &
+-              t_Array usize v_K))
 +            (bool & Libcrux.Kem.Kyber.Arithmetic.wfPolynomialRingElement & usize))
    in
-   let _:Prims.unit =
-     if ~.done
-@@ -268,4 +346,5 @@
-       ()
+-  let hax_temp_output:bool = done in
+-  sampled_coefficients, out, hax_temp_output
+-  <:
+-  (t_Array usize v_K & t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K & bool)
+-
+-let sample_from_xof (v_K: usize) (seeds: t_Array (t_Array u8 (sz 34)) v_K) =
+-  let (sampled_coefficients: t_Array usize v_K):t_Array usize v_K =
+-    Rust_primitives.Hax.repeat (sz 0) v_K
++  let _:Prims.unit =
++    if ~.done
++    then
++      let _:Prims.unit = rejection_sampling_panic_with_diagnostic () in
++      ()
    in
+-  let (out: t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K):t_Array
+-    Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K =
+-    Rust_primitives.Hax.repeat Libcrux.Kem.Kyber.Arithmetic.impl__PolynomialRingElement__ZERO v_K
+-  in
+-  let xof_state:Libcrux.Digest.Incremental_x4.t_Shake128StateX4 =
+-    Libcrux.Kem.Kyber.Hash_functions.absorb v_K seeds
+-  in
+-  let tmp0, out1:(Libcrux.Digest.Incremental_x4.t_Shake128StateX4 &
+-    t_Array (t_Array u8 (sz 504)) v_K) =
+-    Libcrux.Kem.Kyber.Hash_functions.squeeze_three_blocks v_K xof_state
+-  in
+-  let xof_state:Libcrux.Digest.Incremental_x4.t_Shake128StateX4 = tmp0 in
+-  let randomness:t_Array (t_Array u8 (sz 504)) v_K = out1 in
+-  let tmp0, tmp1, out1:(t_Array usize v_K &
+-    t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K &
+-    bool) =
+-    sample_from_uniform_distribution_next v_K (sz 504) randomness sampled_coefficients out
+-  in
+-  let sampled_coefficients:t_Array usize v_K = tmp0 in
+-  let out:t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K = tmp1 in
+-  let done:bool = out1 in
+-  let done, out, sampled_coefficients, xof_state:(bool &
+-    t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K &
+-    t_Array usize v_K &
+-    Libcrux.Digest.Incremental_x4.t_Shake128StateX4) =
+-    Rust_primitives.f_while_loop (fun temp_0_ ->
+-          let done, out, sampled_coefficients, xof_state:(bool &
+-            t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K &
+-            t_Array usize v_K &
+-            Libcrux.Digest.Incremental_x4.t_Shake128StateX4) =
+-            temp_0_
+-          in
+-          ~.done <: bool)
+-      (done, out, sampled_coefficients, xof_state
+-        <:
+-        (bool & t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K & t_Array usize v_K &
+-          Libcrux.Digest.Incremental_x4.t_Shake128StateX4))
+-      (fun temp_0_ ->
+-          let done, out, sampled_coefficients, xof_state:(bool &
+-            t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K &
+-            t_Array usize v_K &
+-            Libcrux.Digest.Incremental_x4.t_Shake128StateX4) =
+-            temp_0_
+-          in
+-          let tmp0, out1:(Libcrux.Digest.Incremental_x4.t_Shake128StateX4 &
+-            t_Array (t_Array u8 (sz 168)) v_K) =
+-            Libcrux.Kem.Kyber.Hash_functions.squeeze_block v_K xof_state
+-          in
+-          let xof_state:Libcrux.Digest.Incremental_x4.t_Shake128StateX4 = tmp0 in
+-          let randomness:t_Array (t_Array u8 (sz 168)) v_K = out1 in
+-          let tmp0, tmp1, out1:(t_Array usize v_K &
+-            t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K &
+-            bool) =
+-            sample_from_uniform_distribution_next v_K (sz 168) randomness sampled_coefficients out
+-          in
+-          let sampled_coefficients:t_Array usize v_K = tmp0 in
+-          let out:t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K = tmp1 in
+-          let done:bool = out1 in
+-          done, out, sampled_coefficients, xof_state
+-          <:
+-          (bool & t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K &
+-            t_Array usize v_K &
+-            Libcrux.Digest.Incremental_x4.t_Shake128StateX4))
+-  in
+-  let _:Prims.unit = Libcrux.Kem.Kyber.Hash_functions.free_state xof_state in
    let _:Prims.unit = () <: Prims.unit in
 -  out
 +  out 
 +#pop-options
 diff -ruN extraction/Libcrux.Kem.Kyber.Sampling.fsti extraction-edited/Libcrux.Kem.Kyber.Sampling.fsti
---- extraction/Libcrux.Kem.Kyber.Sampling.fsti	2024-03-12 10:45:44.741931844 +0100
-+++ extraction-edited/Libcrux.Kem.Kyber.Sampling.fsti	2024-03-12 10:45:44.821929484 +0100
-@@ -3,77 +3,37 @@
+--- extraction/Libcrux.Kem.Kyber.Sampling.fsti	2024-03-13 11:03:50
++++ extraction-edited/Libcrux.Kem.Kyber.Sampling.fsti	2024-03-13 11:03:50
+@@ -3,84 +3,37 @@
  open Core
  open FStar.Mul
  
--val rejection_sampling_panic_with_diagnostic: Prims.unit
--  -> Prims.Pure Prims.unit Prims.l_True (fun _ -> Prims.l_True)
 +open Libcrux.Kem.Kyber.Arithmetic
- 
++
  val sample_from_binomial_distribution_2_ (randomness: t_Slice u8)
 -    : Prims.Pure Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement
 +    : Prims.Pure (t_PolynomialRingElement_b 3)
@@ -5662,18 +5650,29 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Sampling.fsti extraction-edited/Libcrux.K
 +          Libcrux.Kem.Kyber.Arithmetic.to_spec_poly_b result == 
 +          Spec.Kyber.sample_poly_binomial v_ETA randomness)
  
- val sample_from_uniform_distribution (randomness: t_Array u8 (sz 840))
--    : Prims.Pure Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement
+-val sample_from_uniform_distribution_next
+-      (v_K v_N: usize)
+-      (randomness: t_Array (t_Array u8 v_N) v_K)
+-      (sampled_coefficients: t_Array usize v_K)
+-      (out: t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K)
+-    : Prims.Pure
+-      (t_Array usize v_K & t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K & bool)
 -      Prims.l_True
 -      (fun _ -> Prims.l_True)
+-
+-val sample_from_xof (v_K: usize) (seeds: t_Array (t_Array u8 (sz 34)) v_K)
+-    : Prims.Pure (t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K)
+-      Prims.l_True
+-      (fun _ -> Prims.l_True)
++val sample_from_uniform_distribution (randomness: t_Array u8 (sz 840))
 +    : Pure Libcrux.Kem.Kyber.Arithmetic.wfPolynomialRingElement
 +      (requires True)
 +      (ensures fun _ -> True)
 +//      (ensures fun result -> (forall i. v (result.f_coefficients.[i]) >= 0))
 +      
 diff -ruN extraction/Libcrux.Kem.Kyber.Serialize.fst extraction-edited/Libcrux.Kem.Kyber.Serialize.fst
---- extraction/Libcrux.Kem.Kyber.Serialize.fst	2024-03-12 10:45:44.751931549 +0100
-+++ extraction-edited/Libcrux.Kem.Kyber.Serialize.fst	2024-03-12 10:45:44.810929808 +0100
+--- extraction/Libcrux.Kem.Kyber.Serialize.fst	2024-03-13 11:03:50
++++ extraction-edited/Libcrux.Kem.Kyber.Serialize.fst	2024-03-13 11:03:50
 @@ -1,8 +1,15 @@
  module Libcrux.Kem.Kyber.Serialize
 -#set-options "--fuel 0 --ifuel 1 --z3rlimit 15"
@@ -5812,14 +5811,13 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Serialize.fst extraction-edited/Libcrux.K
    let coefficient1:i32 = cast (byte &. 15uy <: u8) <: i32 in
    let coefficient2:i32 = cast ((byte >>! 4l <: u8) &. 15uy <: u8) <: i32 in
 -  coefficient1, coefficient2 <: (i32 & i32)
--
--let decompress_coefficients_5_ (byte1 byte2 byte3 byte4 byte5: i32) =
 +  lemma_get_bit_bounded' coefficient1 4;
 +  lemma_get_bit_bounded' coefficient2 4;
 +  bit_vec_equal_intro_principle ();
 +  coefficient1, coefficient2
 +#pop-options
-+
+ 
+-let decompress_coefficients_5_ (byte1 byte2 byte3 byte4 byte5: i32) =
 +#push-options "--z3rlimit 400"
 +[@@"opaque_to_smt"]
 +let decompress_coefficients_5_ byte1 byte2 byte3 byte4 byte5 =
@@ -7150,8 +7148,8 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Serialize.fst extraction-edited/Libcrux.K
 +#pop-options
 +
 diff -ruN extraction/Libcrux.Kem.Kyber.Serialize.fsti extraction-edited/Libcrux.Kem.Kyber.Serialize.fsti
---- extraction/Libcrux.Kem.Kyber.Serialize.fsti	2024-03-12 10:45:44.733932080 +0100
-+++ extraction-edited/Libcrux.Kem.Kyber.Serialize.fsti	2024-03-12 10:45:44.815929661 +0100
+--- extraction/Libcrux.Kem.Kyber.Serialize.fsti	2024-03-13 11:03:50
++++ extraction-edited/Libcrux.Kem.Kyber.Serialize.fsti	2024-03-13 11:03:50
 @@ -2,118 +2,188 @@
  #set-options "--fuel 0 --ifuel 1 --z3rlimit 15"
  open Core
@@ -7176,16 +7174,15 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Serialize.fsti extraction-edited/Libcrux.
      : Prims.Pure (u8 & u8 & u8 & u8 & u8 & u8 & u8 & u8 & u8 & u8 & u8)
 -      Prims.l_True
 -      (fun _ -> Prims.l_True)
--
--val compress_coefficients_3_ (coefficient1 coefficient2: u16)
--    : Prims.Pure (u8 & u8 & u8) Prims.l_True (fun _ -> Prims.l_True)
 +      (requires True)
 +      (ensures fun tuple ->
 +         int_t_array_bitwise_eq'
 +                (create8 (coefficient1, coefficient2, coefficient3, coefficient4, coefficient5, coefficient6, coefficient7, coefficient8)) 11
 +                (create11 tuple) 8
 +      )
-+
+ 
+-val compress_coefficients_3_ (coefficient1 coefficient2: u16)
+-    : Prims.Pure (u8 & u8 & u8) Prims.l_True (fun _ -> Prims.l_True)
 +val compress_coefficients_3_ (coefficient1 coefficient2: int_t_d u16_inttype 12)
 +    : Prims.Pure (u8 & u8 & u8)
 +    (requires True)
@@ -7199,9 +7196,6 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Serialize.fsti extraction-edited/Libcrux.
 -      (coefficient2 coefficient1 coefficient4 coefficient3 coefficient5 coefficient7 coefficient6 coefficient8:
 -          u8)
 -    : Prims.Pure (u8 & u8 & u8 & u8 & u8) Prims.l_True (fun _ -> Prims.l_True)
--
--val decompress_coefficients_10_ (byte2 byte1 byte3 byte4 byte5: i32)
--    : Prims.Pure (i32 & i32 & i32 & i32) Prims.l_True (fun _ -> Prims.l_True)
 +      (coefficient2 coefficient1 coefficient4 coefficient3 coefficient5 coefficient7 coefficient6 coefficient8: int_t_d u8_inttype 5)
 +    : Prims.Pure (u8 & u8 & u8 & u8 & u8)
 +      (requires True)
@@ -7210,7 +7204,9 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Serialize.fsti extraction-edited/Libcrux.
 +                (create8 (coefficient1, coefficient2, coefficient3, coefficient4, coefficient5, coefficient6, coefficient7, coefficient8)) 5
 +                (create5 tuple) 8
 +      )
-+
+ 
+-val decompress_coefficients_10_ (byte2 byte1 byte3 byte4 byte5: i32)
+-    : Prims.Pure (i32 & i32 & i32 & i32) Prims.l_True (fun _ -> Prims.l_True)
 +private unfold type i32_d = int_t_d i32_inttype
 +val decompress_coefficients_10_ (byte2 byte1 byte3 byte4 byte5: int_t_d i32_inttype 8)
 +    : Prims.Pure (i32_d 10 & i32_d 10 & i32_d 10 & i32_d 10)
@@ -7237,11 +7233,6 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Serialize.fsti extraction-edited/Libcrux.
  
  val decompress_coefficients_4_ (byte: u8)
 -    : Prims.Pure (i32 & i32) Prims.l_True (fun _ -> Prims.l_True)
--
--val decompress_coefficients_5_ (byte1 byte2 byte3 byte4 byte5: i32)
--    : Prims.Pure (i32 & i32 & i32 & i32 & i32 & i32 & i32 & i32)
--      Prims.l_True
--      (fun _ -> Prims.l_True)
 +    : Prims.Pure (i32_d 4 & i32_d 4)
 +      (requires True)
 +      (ensures fun (r1, r2) ->
@@ -7249,7 +7240,11 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Serialize.fsti extraction-edited/Libcrux.
 +                (create1 byte) 8
 +                (create2 #i32 (r1, r2)) 4
 +      )
-+
+ 
+-val decompress_coefficients_5_ (byte1 byte2 byte3 byte4 byte5: i32)
+-    : Prims.Pure (i32 & i32 & i32 & i32 & i32 & i32 & i32 & i32)
+-      Prims.l_True
+-      (fun _ -> Prims.l_True)
 +val decompress_coefficients_5_ (byte1 byte2 byte3 byte4 byte5: int_t_d i32_inttype 8)
 +    : Prims.Pure (i32_d 5 & i32_d 5 & i32_d 5 & i32_d 5 & i32_d 5 & i32_d 5 & i32_d 5 & i32_d 5)
 +      (requires True)
@@ -7299,11 +7294,6 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Serialize.fsti extraction-edited/Libcrux.
 -      (v_COMPRESSION_FACTOR v_OUT_LEN: usize)
 -      (re: Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement)
 -    : Prims.Pure (t_Array u8 v_OUT_LEN) Prims.l_True (fun _ -> Prims.l_True)
--
--val compress_then_serialize_ring_element_v
--      (v_COMPRESSION_FACTOR v_OUT_LEN: usize)
--      (re: Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement)
--    : Prims.Pure (t_Array u8 v_OUT_LEN) Prims.l_True (fun _ -> Prims.l_True)
 +      (#p:Spec.Kyber.params)
 +      (v_COMPRESSION_FACTOR: usize {v v_COMPRESSION_FACTOR == 10 \/ v v_COMPRESSION_FACTOR == 11})
 +      (v_OUT_LEN: usize { v v_OUT_LEN = 32 * v v_COMPRESSION_FACTOR })
@@ -7321,6 +7311,11 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Serialize.fsti extraction-edited/Libcrux.
 +        Spec.Kyber.compress_then_encode_v p 
 +          (Libcrux.Kem.Kyber.Arithmetic.to_spec_poly_b re)))
  
+-val compress_then_serialize_ring_element_v
+-      (v_COMPRESSION_FACTOR v_OUT_LEN: usize)
+-      (re: Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement)
+-    : Prims.Pure (t_Array u8 v_OUT_LEN) Prims.l_True (fun _ -> Prims.l_True)
+-
 -val deserialize_then_decompress_10_ (serialized: t_Slice u8)
 -    : Prims.Pure Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement
 +val deserialize_then_decompress_10_ (serialized: t_Slice u8 {Seq.length serialized == 320})
@@ -7394,13 +7389,12 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Serialize.fsti extraction-edited/Libcrux.
 -    : Prims.Pure Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement
 -      Prims.l_True
 -      (fun _ -> Prims.l_True)
--
--val serialize_uncompressed_ring_element (re: Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement)
--    : Prims.Pure (t_Array u8 (sz 384)) Prims.l_True (fun _ -> Prims.l_True)
 +    : Pure (Libcrux.Kem.Kyber.Arithmetic.wfPolynomialRingElement)
 +      (requires (length serialized == Spec.Kyber.v_BYTES_PER_RING_ELEMENT))
 +      (ensures fun _ -> True)
-+
+ 
+-val serialize_uncompressed_ring_element (re: Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement)
+-    : Prims.Pure (t_Array u8 (sz 384)) Prims.l_True (fun _ -> Prims.l_True)
 +val serialize_uncompressed_ring_element (re: Libcrux.Kem.Kyber.Arithmetic.wfPolynomialRingElement)
 +    : Pure (t_Array u8 (sz 384))
 +      (requires True)
@@ -7409,8 +7403,8 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Serialize.fsti extraction-edited/Libcrux.
 +        int_t_array_bitwise_eq res 8 coefficients 12
 +      ))
 diff -ruN extraction/Libcrux.Kem.Kyber.Types.fst extraction-edited/Libcrux.Kem.Kyber.Types.fst
---- extraction/Libcrux.Kem.Kyber.Types.fst	2024-03-12 10:45:44.740931873 +0100
-+++ extraction-edited/Libcrux.Kem.Kyber.Types.fst	2024-03-12 10:45:44.796930221 +0100
+--- extraction/Libcrux.Kem.Kyber.Types.fst	2024-03-13 11:03:50
++++ extraction-edited/Libcrux.Kem.Kyber.Types.fst	2024-03-13 11:03:50
 @@ -40,13 +40,41 @@
      f_from = fun (value: t_MlKemCiphertext v_SIZE) -> value.f_value
    }
@@ -7441,163 +7435,547 @@ diff -ruN extraction/Libcrux.Kem.Kyber.Types.fst extraction-edited/Libcrux.Kem.K
 +        Core.Result.t_Result (t_MlKemCiphertext v_SIZE) Core.Array.t_TryFromSliceError
 +  }
 +
- let impl_6__as_slice (v_SIZE: usize) (self: t_MlKemCiphertext v_SIZE) : t_Array u8 v_SIZE =
-   self.f_value
- 
- let impl_6__len (v_SIZE: usize) (self: t_MlKemCiphertext v_SIZE) : usize = v_SIZE
- 
- let impl_6__split_at (v_SIZE: usize) (self: t_MlKemCiphertext v_SIZE) (mid: usize)
--    : (t_Slice u8 & t_Slice u8) =
-+    : Pure (t_Slice u8 & t_Slice u8)
-+      (requires (mid <=. v_SIZE))
-+      (ensures (fun (x,y) -> Seq.length x == v mid /\ Seq.length y == v (v_SIZE -! mid))) =
-   Core.Slice.impl__split_at (Rust_primitives.unsize self.f_value <: t_Slice u8) mid
- 
- type t_MlKemPrivateKey (v_SIZE: usize) = { f_value:t_Array u8 v_SIZE }
-@@ -86,15 +114,53 @@
-     f_from = fun (value: t_MlKemPrivateKey v_SIZE) -> value.f_value
-   }
+ let impl_6__as_slice (v_SIZE: usize) (self: t_MlKemCiphertext v_SIZE) : t_Array u8 v_SIZE =
+   self.f_value
+ 
+ let impl_6__len (v_SIZE: usize) (self: t_MlKemCiphertext v_SIZE) : usize = v_SIZE
+ 
+ let impl_6__split_at (v_SIZE: usize) (self: t_MlKemCiphertext v_SIZE) (mid: usize)
+-    : (t_Slice u8 & t_Slice u8) =
++    : Pure (t_Slice u8 & t_Slice u8)
++      (requires (mid <=. v_SIZE))
++      (ensures (fun (x,y) -> Seq.length x == v mid /\ Seq.length y == v (v_SIZE -! mid))) =
+   Core.Slice.impl__split_at (Rust_primitives.unsize self.f_value <: t_Slice u8) mid
+ 
+ type t_MlKemPrivateKey (v_SIZE: usize) = { f_value:t_Array u8 v_SIZE }
+@@ -86,15 +114,53 @@
+     f_from = fun (value: t_MlKemPrivateKey v_SIZE) -> value.f_value
+   }
+ 
++[@@ FStar.Tactics.Typeclasses.tcinstance]
++let impl_11 (v_SIZE: usize) : Core.Convert.t_TryFrom (t_MlKemPrivateKey v_SIZE) (t_Slice u8) =
++  {
++    f_Error = Core.Array.t_TryFromSliceError;
++    f_try_from_pre = (fun (value: t_Slice u8) -> true);
++    f_try_from_post
++    =
++    (fun
++        (value: t_Slice u8)
++        (out: Core.Result.t_Result (t_MlKemPrivateKey v_SIZE) Core.Array.t_TryFromSliceError)
++        ->
++        true);
++    f_try_from
++    =
++    fun (value: t_Slice u8) ->
++      match Core.Convert.f_try_into value with
++      | Core.Result.Result_Ok value ->
++        Core.Result.Result_Ok ({ f_value = value } <: t_MlKemPrivateKey v_SIZE)
++        <:
++        Core.Result.t_Result (t_MlKemPrivateKey v_SIZE) Core.Array.t_TryFromSliceError
++      | Core.Result.Result_Err e ->
++        Core.Result.Result_Err e
++        <:
++        Core.Result.t_Result (t_MlKemPrivateKey v_SIZE) Core.Array.t_TryFromSliceError
++  }
++
+ let impl_12__as_slice (v_SIZE: usize) (self: t_MlKemPrivateKey v_SIZE) : t_Array u8 v_SIZE =
+   self.f_value
+ 
+ let impl_12__len (v_SIZE: usize) (self: t_MlKemPrivateKey v_SIZE) : usize = v_SIZE
+ 
+ let impl_12__split_at (v_SIZE: usize) (self: t_MlKemPrivateKey v_SIZE) (mid: usize)
+-    : (t_Slice u8 & t_Slice u8) =
++    : Pure (t_Slice u8 & t_Slice u8)
++      (requires (mid <=. v_SIZE))
++      (ensures (fun (x,y) -> Seq.length x == v mid /\ Seq.length y == v (v_SIZE -! mid))) =
+   Core.Slice.impl__split_at (Rust_primitives.unsize self.f_value <: t_Slice u8) mid
+ 
++
++
++
++
++
++
++
++
++
++
+ type t_MlKemPublicKey (v_SIZE: usize) = { f_value:t_Array u8 v_SIZE }
+ 
+ [@@ FStar.Tactics.Typeclasses.tcinstance]
+@@ -132,68 +198,7 @@
+     f_from = fun (value: t_MlKemPublicKey v_SIZE) -> value.f_value
+   }
+ 
+-let impl_18__as_slice (v_SIZE: usize) (self: t_MlKemPublicKey v_SIZE) : t_Array u8 v_SIZE =
+-  self.f_value
+-
+-let impl_18__len (v_SIZE: usize) (self: t_MlKemPublicKey v_SIZE) : usize = v_SIZE
+-
+-let impl_18__split_at (v_SIZE: usize) (self: t_MlKemPublicKey v_SIZE) (mid: usize)
+-    : (t_Slice u8 & t_Slice u8) =
+-  Core.Slice.impl__split_at (Rust_primitives.unsize self.f_value <: t_Slice u8) mid
+-
+ [@@ FStar.Tactics.Typeclasses.tcinstance]
+-let impl_5 (v_SIZE: usize) : Core.Convert.t_TryFrom (t_MlKemCiphertext v_SIZE) (t_Slice u8) =
+-  {
+-    f_Error = Core.Array.t_TryFromSliceError;
+-    f_try_from_pre = (fun (value: t_Slice u8) -> true);
+-    f_try_from_post
+-    =
+-    (fun
+-        (value: t_Slice u8)
+-        (out: Core.Result.t_Result (t_MlKemCiphertext v_SIZE) Core.Array.t_TryFromSliceError)
+-        ->
+-        true);
+-    f_try_from
+-    =
+-    fun (value: t_Slice u8) ->
+-      match Core.Convert.f_try_into value with
+-      | Core.Result.Result_Ok value ->
+-        Core.Result.Result_Ok ({ f_value = value } <: t_MlKemCiphertext v_SIZE)
+-        <:
+-        Core.Result.t_Result (t_MlKemCiphertext v_SIZE) Core.Array.t_TryFromSliceError
+-      | Core.Result.Result_Err e ->
+-        Core.Result.Result_Err e
+-        <:
+-        Core.Result.t_Result (t_MlKemCiphertext v_SIZE) Core.Array.t_TryFromSliceError
+-  }
+-
+-[@@ FStar.Tactics.Typeclasses.tcinstance]
+-let impl_11 (v_SIZE: usize) : Core.Convert.t_TryFrom (t_MlKemPrivateKey v_SIZE) (t_Slice u8) =
+-  {
+-    f_Error = Core.Array.t_TryFromSliceError;
+-    f_try_from_pre = (fun (value: t_Slice u8) -> true);
+-    f_try_from_post
+-    =
+-    (fun
+-        (value: t_Slice u8)
+-        (out: Core.Result.t_Result (t_MlKemPrivateKey v_SIZE) Core.Array.t_TryFromSliceError)
+-        ->
+-        true);
+-    f_try_from
+-    =
+-    fun (value: t_Slice u8) ->
+-      match Core.Convert.f_try_into value with
+-      | Core.Result.Result_Ok value ->
+-        Core.Result.Result_Ok ({ f_value = value } <: t_MlKemPrivateKey v_SIZE)
+-        <:
+-        Core.Result.t_Result (t_MlKemPrivateKey v_SIZE) Core.Array.t_TryFromSliceError
+-      | Core.Result.Result_Err e ->
+-        Core.Result.Result_Err e
+-        <:
+-        Core.Result.t_Result (t_MlKemPrivateKey v_SIZE) Core.Array.t_TryFromSliceError
+-  }
+-
+-[@@ FStar.Tactics.Typeclasses.tcinstance]
+ let impl_17 (v_SIZE: usize) : Core.Convert.t_TryFrom (t_MlKemPublicKey v_SIZE) (t_Slice u8) =
+   {
+     f_Error = Core.Array.t_TryFromSliceError;
+@@ -218,6 +223,17 @@
+         <:
+         Core.Result.t_Result (t_MlKemPublicKey v_SIZE) Core.Array.t_TryFromSliceError
+   }
++
++let impl_18__as_slice (v_SIZE: usize) (self: t_MlKemPublicKey v_SIZE) : t_Array u8 v_SIZE =
++  self.f_value
++
++let impl_18__len (v_SIZE: usize) (self: t_MlKemPublicKey v_SIZE) : usize = v_SIZE
++
++let impl_18__split_at (v_SIZE: usize) (self: t_MlKemPublicKey v_SIZE) (mid: usize)
++    : Pure (t_Slice u8 & t_Slice u8)
++      (requires (mid <=. v_SIZE))
++      (ensures (fun (x,y) -> Seq.length x == v mid /\ Seq.length y == v (v_SIZE -! mid))) =
++  Core.Slice.impl__split_at (Rust_primitives.unsize self.f_value <: t_Slice u8) mid
+ 
+ type t_MlKemKeyPair (v_PRIVATE_KEY_SIZE: usize) (v_PUBLIC_KEY_SIZE: usize) = {
+   f_sk:t_MlKemPrivateKey v_PRIVATE_KEY_SIZE;
+diff -ruN extraction/Libcrux.Kem.Kyber.fst extraction-edited/Libcrux.Kem.Kyber.fst
+--- extraction/Libcrux.Kem.Kyber.fst	2024-03-13 11:03:50
++++ extraction-edited/Libcrux.Kem.Kyber.fst	2024-03-13 11:03:50
+@@ -1,12 +1,29 @@
+ module Libcrux.Kem.Kyber
+-#set-options "--fuel 0 --ifuel 1 --z3rlimit 15"
++#set-options "--fuel 0 --ifuel 1 --z3rlimit 100"
+ open Core
+ open FStar.Mul
+ 
+-let serialize_kem_secret_key
++let update_at_range_lemma #n
++  (s: t_Slice 't)
++  (i: Core.Ops.Range.t_Range (int_t n) {(Core.Ops.Range.impl_index_range_slice 't n).f_index_pre s i}) 
++  (x: t_Slice 't)
++  : Lemma
++    (requires (Seq.length x == v i.f_end - v i.f_start))
++    (ensures (
++      let s' = Rust_primitives.Hax.Monomorphized_update_at.update_at_range s i x in
++      let len = v i.f_start in
++      forall (i: nat). i < len ==> Seq.index s i == Seq.index s' i
++    ))
++    [SMTPat (Rust_primitives.Hax.Monomorphized_update_at.update_at_range s i x)]
++  = let s' = Rust_primitives.Hax.Monomorphized_update_at.update_at_range s i x in
++    let len = v i.f_start in
++    introduce forall (i:nat {i < len}). Seq.index s i == Seq.index s' i
++    with (assert ( Seq.index (Seq.slice s  0 len) i == Seq.index s  i 
++                 /\ Seq.index (Seq.slice s' 0 len) i == Seq.index s' i ))
++
++let serialize_kem_secret_key #p
+       (v_SERIALIZED_KEY_LEN: usize)
+-      (private_key public_key implicit_rejection_value: t_Slice u8)
+-     =
++      (private_key public_key implicit_rejection_value: t_Slice u8) =
+   let out:t_Array u8 v_SERIALIZED_KEY_LEN = Rust_primitives.Hax.repeat 0uy v_SERIALIZED_KEY_LEN in
+   let pointer:usize = sz 0 in
+   let out:t_Array u8 v_SERIALIZED_KEY_LEN =
+@@ -55,6 +72,8 @@
+         t_Slice u8)
+   in
+   let pointer:usize = pointer +! (Core.Slice.impl__len public_key <: usize) in
++  let h_public_key = (Rust_primitives.unsize (Libcrux.Kem.Kyber.Hash_functions.v_H public_key)
++                     <: t_Slice u8) in
+   let out:t_Array u8 v_SERIALIZED_KEY_LEN =
+     Rust_primitives.Hax.Monomorphized_update_at.update_at_range out
+       ({
+@@ -70,16 +89,7 @@
+                 pointer +! Libcrux.Kem.Kyber.Constants.v_H_DIGEST_SIZE <: usize
+               }
+               <:
+-              Core.Ops.Range.t_Range usize ]
+-            <:
+-            t_Slice u8)
+-          (Rust_primitives.unsize (Libcrux.Kem.Kyber.Hash_functions.v_H public_key
+-                <:
+-                t_Array u8 (sz 32))
+-            <:
+-            t_Slice u8)
+-        <:
+-        t_Slice u8)
++              Core.Ops.Range.t_Range usize ]) h_public_key)
+   in
+   let pointer:usize = pointer +! Libcrux.Kem.Kyber.Constants.v_H_DIGEST_SIZE in
+   let out:t_Array u8 v_SERIALIZED_KEY_LEN =
+@@ -106,14 +116,32 @@
+         <:
+         t_Slice u8)
+   in
++  assert (Seq.slice out 0 (v #usize_inttype (Spec.Kyber.v_CPA_PKE_SECRET_KEY_SIZE p)) `Seq.equal` private_key);
++  assert (Seq.slice out (v #usize_inttype (Spec.Kyber.v_CPA_PKE_SECRET_KEY_SIZE p))
++                        (v #usize_inttype (Spec.Kyber.v_CPA_PKE_SECRET_KEY_SIZE p +! Spec.Kyber.v_CPA_PKE_PUBLIC_KEY_SIZE p)) `Seq.equal` public_key);
++  assert (Seq.slice out (v #usize_inttype (Spec.Kyber.v_CPA_PKE_SECRET_KEY_SIZE p +!
++                                           Spec.Kyber.v_CPA_PKE_PUBLIC_KEY_SIZE p))
++                        (v #usize_inttype (Spec.Kyber.v_CPA_PKE_SECRET_KEY_SIZE p +!
++                                           Spec.Kyber.v_CPA_PKE_PUBLIC_KEY_SIZE p +!
++                                           Libcrux.Kem.Kyber.Constants.v_H_DIGEST_SIZE))
++          `Seq.equal` Libcrux.Kem.Kyber.Hash_functions.v_H public_key);
++  assert (Seq.slice out (v #usize_inttype (Spec.Kyber.v_CPA_PKE_SECRET_KEY_SIZE p +!
++                                           Spec.Kyber.v_CPA_PKE_PUBLIC_KEY_SIZE p +!
++                                           Libcrux.Kem.Kyber.Constants.v_H_DIGEST_SIZE))
++                        (v #usize_inttype (Spec.Kyber.v_CPA_PKE_SECRET_KEY_SIZE p +!
++                                           Spec.Kyber.v_CPA_PKE_PUBLIC_KEY_SIZE p +!
++                                           Libcrux.Kem.Kyber.Constants.v_H_DIGEST_SIZE +!
++                                           Spec.Kyber.v_SHARED_SECRET_SIZE))
++          == implicit_rejection_value);
++  lemma_slice_append_4 out private_key public_key (Libcrux.Kem.Kyber.Hash_functions.v_H public_key) implicit_rejection_value;
+   out
+ 
+-let decapsulate
++let decapsulate #p
+       (v_K v_SECRET_KEY_SIZE v_CPA_SECRET_KEY_SIZE v_PUBLIC_KEY_SIZE v_CIPHERTEXT_SIZE v_T_AS_NTT_ENCODED_SIZE v_C1_SIZE v_C2_SIZE v_VECTOR_U_COMPRESSION_FACTOR v_VECTOR_V_COMPRESSION_FACTOR v_C1_BLOCK_SIZE v_ETA1 v_ETA1_RANDOMNESS_SIZE v_ETA2 v_ETA2_RANDOMNESS_SIZE v_IMPLICIT_REJECTION_HASH_INPUT_SIZE:
+           usize)
+       (secret_key: Libcrux.Kem.Kyber.Types.t_MlKemPrivateKey v_SECRET_KEY_SIZE)
+-      (ciphertext: Libcrux.Kem.Kyber.Types.t_MlKemCiphertext v_CIPHERTEXT_SIZE)
+-     =
++      (ciphertext: Libcrux.Kem.Kyber.Types.t_MlKemCiphertext v_CIPHERTEXT_SIZE) =
++  let orig_secret_key = secret_key.f_value in
+   let ind_cpa_secret_key, secret_key:(t_Slice u8 & t_Slice u8) =
+     Libcrux.Kem.Kyber.Types.impl_12__split_at v_SECRET_KEY_SIZE secret_key v_CPA_SECRET_KEY_SIZE
+   in
+@@ -123,8 +151,12 @@
+   let ind_cpa_public_key_hash, implicit_rejection_value:(t_Slice u8 & t_Slice u8) =
+     Core.Slice.impl__split_at secret_key Libcrux.Kem.Kyber.Constants.v_H_DIGEST_SIZE
+   in
++  assert (ind_cpa_secret_key == slice orig_secret_key (sz 0) v_CPA_SECRET_KEY_SIZE);
++  assert (ind_cpa_public_key == slice orig_secret_key v_CPA_SECRET_KEY_SIZE (v_CPA_SECRET_KEY_SIZE +! v_PUBLIC_KEY_SIZE));
++  assert (ind_cpa_public_key_hash == slice orig_secret_key (v_CPA_SECRET_KEY_SIZE +! v_PUBLIC_KEY_SIZE) (v_CPA_SECRET_KEY_SIZE +! v_PUBLIC_KEY_SIZE +! Libcrux.Kem.Kyber.Constants.v_H_DIGEST_SIZE));
++  assert (implicit_rejection_value == slice orig_secret_key (v_CPA_SECRET_KEY_SIZE +! v_PUBLIC_KEY_SIZE +! Libcrux.Kem.Kyber.Constants.v_H_DIGEST_SIZE) (length orig_secret_key));
+   let decrypted:t_Array u8 (sz 32) =
+-    Libcrux.Kem.Kyber.Ind_cpa.decrypt v_K
++    Libcrux.Kem.Kyber.Ind_cpa.decrypt #p v_K
+       v_CIPHERTEXT_SIZE
+       v_C1_SIZE
+       v_VECTOR_U_COMPRESSION_FACTOR
+@@ -152,6 +184,9 @@
+         <:
+         t_Slice u8)
+   in
++  lemma_slice_append to_hash decrypted ind_cpa_public_key_hash;
++  assert (decrypted == Spec.Kyber.ind_cpa_decrypt p ind_cpa_secret_key ciphertext.f_value);
++  assert (to_hash == concat decrypted ind_cpa_public_key_hash);
+   let hashed:t_Array u8 (sz 64) =
+     Libcrux.Kem.Kyber.Hash_functions.v_G (Rust_primitives.unsize to_hash <: t_Slice u8)
+   in
+@@ -159,6 +194,10 @@
+     Core.Slice.impl__split_at (Rust_primitives.unsize hashed <: t_Slice u8)
+       Libcrux.Kem.Kyber.Constants.v_SHARED_SECRET_SIZE
+   in
++  assert ((shared_secret,pseudorandomness) == split hashed Libcrux.Kem.Kyber.Constants.v_SHARED_SECRET_SIZE);
++  assert (length implicit_rejection_value = v_SECRET_KEY_SIZE -! v_CPA_SECRET_KEY_SIZE -! v_PUBLIC_KEY_SIZE -! Libcrux.Kem.Kyber.Constants.v_H_DIGEST_SIZE);
++  assert (length implicit_rejection_value = Spec.Kyber.v_SHARED_SECRET_SIZE);
++  assert (Spec.Kyber.v_SHARED_SECRET_SIZE <=. Spec.Kyber.v_IMPLICIT_REJECTION_HASH_INPUT_SIZE p);
+   let (to_hash: t_Array u8 v_IMPLICIT_REJECTION_HASH_INPUT_SIZE):t_Array u8
+     v_IMPLICIT_REJECTION_HASH_INPUT_SIZE =
+     Libcrux.Kem.Kyber.Ind_cpa.into_padded_array v_IMPLICIT_REJECTION_HASH_INPUT_SIZE
+@@ -180,11 +219,14 @@
+         <:
+         t_Slice u8)
+   in
++  lemma_slice_append to_hash implicit_rejection_value ciphertext.f_value;
+   let (implicit_rejection_shared_secret: t_Array u8 (sz 32)):t_Array u8 (sz 32) =
+     Libcrux.Kem.Kyber.Hash_functions.v_PRF (sz 32) (Rust_primitives.unsize to_hash <: t_Slice u8)
+   in
++  assert (implicit_rejection_shared_secret == Spec.Kyber.v_J to_hash);
++  assert (Seq.length ind_cpa_public_key == v v_PUBLIC_KEY_SIZE);
+   let expected_ciphertext:t_Array u8 v_CIPHERTEXT_SIZE =
+-    Libcrux.Kem.Kyber.Ind_cpa.encrypt v_K v_CIPHERTEXT_SIZE v_T_AS_NTT_ENCODED_SIZE v_C1_SIZE
++    Libcrux.Kem.Kyber.Ind_cpa.encrypt #p v_K v_CIPHERTEXT_SIZE v_T_AS_NTT_ENCODED_SIZE v_C1_SIZE
+       v_C2_SIZE v_VECTOR_U_COMPRESSION_FACTOR v_VECTOR_V_COMPRESSION_FACTOR v_C1_BLOCK_SIZE v_ETA1
+       v_ETA1_RANDOMNESS_SIZE v_ETA2 v_ETA2_RANDOMNESS_SIZE ind_cpa_public_key decrypted
+       pseudorandomness
+@@ -194,16 +236,18 @@
+       (Core.Convert.f_as_ref ciphertext <: t_Slice u8)
+       (Rust_primitives.unsize expected_ciphertext <: t_Slice u8)
+   in
++  let res = 
+   Libcrux.Kem.Kyber.Constant_time_ops.select_shared_secret_in_constant_time shared_secret
+     (Rust_primitives.unsize implicit_rejection_shared_secret <: t_Slice u8)
+     selector
++  in
++  res
+ 
+-let encapsulate
++let encapsulate #p
+       (v_K v_CIPHERTEXT_SIZE v_PUBLIC_KEY_SIZE v_T_AS_NTT_ENCODED_SIZE v_C1_SIZE v_C2_SIZE v_VECTOR_U_COMPRESSION_FACTOR v_VECTOR_V_COMPRESSION_FACTOR v_VECTOR_U_BLOCK_LEN v_ETA1 v_ETA1_RANDOMNESS_SIZE v_ETA2 v_ETA2_RANDOMNESS_SIZE:
+           usize)
+       (public_key: Libcrux.Kem.Kyber.Types.t_MlKemPublicKey v_PUBLIC_KEY_SIZE)
+-      (randomness: t_Array u8 (sz 32))
+-     =
++      (randomness: t_Array u8 (sz 32)) =
+   let (to_hash: t_Array u8 (sz 64)):t_Array u8 (sz 64) =
+     Libcrux.Kem.Kyber.Ind_cpa.into_padded_array (sz 64)
+       (Rust_primitives.unsize randomness <: t_Slice u8)
+@@ -234,6 +278,10 @@
+         <:
+         t_Slice u8)
+   in
++  assert (Seq.slice to_hash 0 (v Libcrux.Kem.Kyber.Constants.v_H_DIGEST_SIZE) == randomness);
++  lemma_slice_append to_hash randomness (Spec.Kyber.v_H public_key.f_value);
++  assert (to_hash == concat randomness (Spec.Kyber.v_H public_key.f_value));
++
+   let hashed:t_Array u8 (sz 64) =
+     Libcrux.Kem.Kyber.Hash_functions.v_G (Rust_primitives.unsize to_hash <: t_Slice u8)
+   in
+@@ -242,7 +290,7 @@
+       Libcrux.Kem.Kyber.Constants.v_SHARED_SECRET_SIZE
+   in
+   let ciphertext:t_Array u8 v_CIPHERTEXT_SIZE =
+-    Libcrux.Kem.Kyber.Ind_cpa.encrypt v_K v_CIPHERTEXT_SIZE v_T_AS_NTT_ENCODED_SIZE v_C1_SIZE
++    Libcrux.Kem.Kyber.Ind_cpa.encrypt #p v_K v_CIPHERTEXT_SIZE v_T_AS_NTT_ENCODED_SIZE v_C1_SIZE
+       v_C2_SIZE v_VECTOR_U_COMPRESSION_FACTOR v_VECTOR_V_COMPRESSION_FACTOR v_VECTOR_U_BLOCK_LEN
+       v_ETA1 v_ETA1_RANDOMNESS_SIZE v_ETA2 v_ETA2_RANDOMNESS_SIZE
+       (Rust_primitives.unsize (Libcrux.Kem.Kyber.Types.impl_18__as_slice v_PUBLIC_KEY_SIZE
+@@ -252,28 +300,26 @@
+         <:
+         t_Slice u8) randomness pseudorandomness
+   in
+-  let shared_secret_array:t_Array u8 (sz 32) = Rust_primitives.Hax.repeat 0uy (sz 32) in
+-  let shared_secret_array:t_Array u8 (sz 32) =
+-    Core.Slice.impl__copy_from_slice shared_secret_array shared_secret
+-  in
+-  Core.Convert.f_into ciphertext, shared_secret_array
++  Core.Convert.f_into ciphertext,
++  Core.Result.impl__unwrap (Core.Convert.f_try_into shared_secret
++      <:
++      Core.Result.t_Result (t_Array u8 (sz 32)) Core.Array.t_TryFromSliceError)
+   <:
+   (Libcrux.Kem.Kyber.Types.t_MlKemCiphertext v_CIPHERTEXT_SIZE & t_Array u8 (sz 32))
+ 
+-let validate_public_key
++#push-options "--z3rlimit 100"
++let validate_public_key #p
+       (v_K v_RANKED_BYTES_PER_RING_ELEMENT v_PUBLIC_KEY_SIZE: usize)
+       (public_key: t_Array u8 v_PUBLIC_KEY_SIZE)
+      =
+-  let pk:t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K =
+-    Libcrux.Kem.Kyber.Ind_cpa.deserialize_public_key v_K
++  let pk:t_Array Libcrux.Kem.Kyber.Arithmetic.wfPolynomialRingElement v_K =
++    Libcrux.Kem.Kyber.Ind_cpa.deserialize_public_key #p v_K
+       (public_key.[ { Core.Ops.Range.f_end = v_RANKED_BYTES_PER_RING_ELEMENT }
+           <:
+-          Core.Ops.Range.t_RangeTo usize ]
+-        <:
+-        t_Slice u8)
++          Core.Ops.Range.t_RangeTo usize ])
+   in
+   let public_key_serialized:t_Array u8 v_PUBLIC_KEY_SIZE =
+-    Libcrux.Kem.Kyber.Ind_cpa.serialize_public_key v_K
++    Libcrux.Kem.Kyber.Ind_cpa.serialize_public_key #p v_K
+       v_RANKED_BYTES_PER_RING_ELEMENT
+       v_PUBLIC_KEY_SIZE
+       pk
+@@ -284,12 +330,12 @@
+         t_Slice u8)
+   in
+   public_key =. public_key_serialized
++#pop-options
+ 
+-let generate_keypair
++let generate_keypair #p
+       (v_K v_CPA_PRIVATE_KEY_SIZE v_PRIVATE_KEY_SIZE v_PUBLIC_KEY_SIZE v_BYTES_PER_RING_ELEMENT v_ETA1 v_ETA1_RANDOMNESS_SIZE:
+           usize)
+-      (randomness: t_Array u8 (sz 64))
+-     =
++      (randomness: t_Array u8 (sz 64)) =
+   let ind_cpa_keypair_randomness:t_Slice u8 =
+     randomness.[ {
+         Core.Ops.Range.f_start = sz 0;
+@@ -307,7 +353,7 @@
+   in
+   let ind_cpa_private_key, public_key:(t_Array u8 v_CPA_PRIVATE_KEY_SIZE &
+     t_Array u8 v_PUBLIC_KEY_SIZE) =
+-    Libcrux.Kem.Kyber.Ind_cpa.generate_keypair v_K
++    Libcrux.Kem.Kyber.Ind_cpa.generate_keypair #p v_K
+       v_CPA_PRIVATE_KEY_SIZE
+       v_PUBLIC_KEY_SIZE
+       v_BYTES_PER_RING_ELEMENT
+@@ -316,7 +362,7 @@
+       ind_cpa_keypair_randomness
+   in
+   let secret_key_serialized:t_Array u8 v_PRIVATE_KEY_SIZE =
+-    serialize_kem_secret_key v_PRIVATE_KEY_SIZE
++    serialize_kem_secret_key #p v_PRIVATE_KEY_SIZE
+       (Rust_primitives.unsize ind_cpa_private_key <: t_Slice u8)
+       (Rust_primitives.unsize public_key <: t_Slice u8)
+       implicit_rejection_value
+@@ -329,3 +375,4 @@
+     v_PUBLIC_KEY_SIZE
+     private_key
+     (Core.Convert.f_into public_key <: Libcrux.Kem.Kyber.Types.t_MlKemPublicKey v_PUBLIC_KEY_SIZE)
++
+diff -ruN extraction/Libcrux.Kem.Kyber.fsti extraction-edited/Libcrux.Kem.Kyber.fsti
+--- extraction/Libcrux.Kem.Kyber.fsti	2024-03-13 11:03:50
++++ extraction-edited/Libcrux.Kem.Kyber.fsti	2024-03-13 11:03:50
+@@ -10,36 +10,84 @@
+   Libcrux.Kem.Kyber.Constants.v_CPA_PKE_KEY_GENERATION_SEED_SIZE +!
+   Libcrux.Kem.Kyber.Constants.v_SHARED_SECRET_SIZE
  
-+[@@ FStar.Tactics.Typeclasses.tcinstance]
-+let impl_11 (v_SIZE: usize) : Core.Convert.t_TryFrom (t_MlKemPrivateKey v_SIZE) (t_Slice u8) =
-+  {
-+    f_Error = Core.Array.t_TryFromSliceError;
-+    f_try_from_pre = (fun (value: t_Slice u8) -> true);
-+    f_try_from_post
-+    =
-+    (fun
-+        (value: t_Slice u8)
-+        (out: Core.Result.t_Result (t_MlKemPrivateKey v_SIZE) Core.Array.t_TryFromSliceError)
-+        ->
-+        true);
-+    f_try_from
-+    =
-+    fun (value: t_Slice u8) ->
-+      match Core.Convert.f_try_into value with
-+      | Core.Result.Result_Ok value ->
-+        Core.Result.Result_Ok ({ f_value = value } <: t_MlKemPrivateKey v_SIZE)
-+        <:
-+        Core.Result.t_Result (t_MlKemPrivateKey v_SIZE) Core.Array.t_TryFromSliceError
-+      | Core.Result.Result_Err e ->
-+        Core.Result.Result_Err e
-+        <:
-+        Core.Result.t_Result (t_MlKemPrivateKey v_SIZE) Core.Array.t_TryFromSliceError
-+  }
-+
- let impl_12__as_slice (v_SIZE: usize) (self: t_MlKemPrivateKey v_SIZE) : t_Array u8 v_SIZE =
-   self.f_value
+-val serialize_kem_secret_key
++val serialize_kem_secret_key (#p:Spec.Kyber.params)
+       (v_SERIALIZED_KEY_LEN: usize)
+       (private_key public_key implicit_rejection_value: t_Slice u8)
+-    : Prims.Pure (t_Array u8 v_SERIALIZED_KEY_LEN) Prims.l_True (fun _ -> Prims.l_True)
++    : Pure (t_Array u8 v_SERIALIZED_KEY_LEN)
++      (requires (length private_key == Spec.Kyber.v_CPA_PKE_SECRET_KEY_SIZE p /\
++                 length public_key == Spec.Kyber.v_CPA_PKE_PUBLIC_KEY_SIZE p /\
++                 length implicit_rejection_value == Spec.Kyber.v_SHARED_SECRET_SIZE /\
++                 v_SERIALIZED_KEY_LEN == Spec.Kyber.v_SECRET_KEY_SIZE p))
++      (ensures (fun res -> res ==
++                Seq.append private_key (
++                Seq.append public_key (
++                Seq.append (Libcrux.Kem.Kyber.Hash_functions.v_H public_key) implicit_rejection_value))))
  
- let impl_12__len (v_SIZE: usize) (self: t_MlKemPrivateKey v_SIZE) : usize = v_SIZE
+-val decapsulate
++val decapsulate (#p:Spec.Kyber.params)
+       (v_K v_SECRET_KEY_SIZE v_CPA_SECRET_KEY_SIZE v_PUBLIC_KEY_SIZE v_CIPHERTEXT_SIZE v_T_AS_NTT_ENCODED_SIZE v_C1_SIZE v_C2_SIZE v_VECTOR_U_COMPRESSION_FACTOR v_VECTOR_V_COMPRESSION_FACTOR v_C1_BLOCK_SIZE v_ETA1 v_ETA1_RANDOMNESS_SIZE v_ETA2 v_ETA2_RANDOMNESS_SIZE v_IMPLICIT_REJECTION_HASH_INPUT_SIZE:
+           usize)
+       (secret_key: Libcrux.Kem.Kyber.Types.t_MlKemPrivateKey v_SECRET_KEY_SIZE)
+       (ciphertext: Libcrux.Kem.Kyber.Types.t_MlKemCiphertext v_CIPHERTEXT_SIZE)
+-    : Prims.Pure (t_Array u8 (sz 32)) Prims.l_True (fun _ -> Prims.l_True)
++    : Pure (t_Array u8 (sz 32))
++    (requires ( p == (let open Spec.Kyber in {v_RANK = v_K; v_ETA1; v_ETA2; v_VECTOR_U_COMPRESSION_FACTOR; v_VECTOR_V_COMPRESSION_FACTOR}) /\
++                Spec.Kyber.valid_params p /\
++                v_ETA1_RANDOMNESS_SIZE == Spec.Kyber.v_ETA1_RANDOMNESS_SIZE p /\
++                v_ETA2_RANDOMNESS_SIZE == Spec.Kyber.v_ETA2_RANDOMNESS_SIZE p /\
++                v_IMPLICIT_REJECTION_HASH_INPUT_SIZE == Spec.Kyber.v_IMPLICIT_REJECTION_HASH_INPUT_SIZE p /\
++                v_SECRET_KEY_SIZE == Spec.Kyber.v_SECRET_KEY_SIZE p /\
++                v_CPA_SECRET_KEY_SIZE == Spec.Kyber.v_CPA_PKE_SECRET_KEY_SIZE p /\
++                v_PUBLIC_KEY_SIZE == Spec.Kyber.v_CPA_PKE_PUBLIC_KEY_SIZE p /\
++                v_CIPHERTEXT_SIZE == Spec.Kyber.v_CPA_PKE_CIPHERTEXT_SIZE p /\
++                v_C1_SIZE == Spec.Kyber.v_C1_SIZE p /\
++                v_C1_BLOCK_SIZE == Spec.Kyber.v_C1_BLOCK_SIZE p /\
++                v_C2_SIZE == Spec.Kyber.v_C2_SIZE p /\
++                v_T_AS_NTT_ENCODED_SIZE = Spec.Kyber.v_T_AS_NTT_ENCODED_SIZE p
++               ))
++    (ensures (fun res ->
++                res == Spec.Kyber.ind_cca_decapsulate p secret_key.f_value ciphertext.f_value))
  
- let impl_12__split_at (v_SIZE: usize) (self: t_MlKemPrivateKey v_SIZE) (mid: usize)
--    : (t_Slice u8 & t_Slice u8) =
-+    : Pure (t_Slice u8 & t_Slice u8)
-+      (requires (mid <=. v_SIZE))
-+      (ensures (fun (x,y) -> Seq.length x == v mid /\ Seq.length y == v (v_SIZE -! mid))) =
-   Core.Slice.impl__split_at (Rust_primitives.unsize self.f_value <: t_Slice u8) mid
+-val encapsulate
++val encapsulate (#p:Spec.Kyber.params)
+       (v_K v_CIPHERTEXT_SIZE v_PUBLIC_KEY_SIZE v_T_AS_NTT_ENCODED_SIZE v_C1_SIZE v_C2_SIZE v_VECTOR_U_COMPRESSION_FACTOR v_VECTOR_V_COMPRESSION_FACTOR v_VECTOR_U_BLOCK_LEN v_ETA1 v_ETA1_RANDOMNESS_SIZE v_ETA2 v_ETA2_RANDOMNESS_SIZE:
+           usize)
+       (public_key: Libcrux.Kem.Kyber.Types.t_MlKemPublicKey v_PUBLIC_KEY_SIZE)
+       (randomness: t_Array u8 (sz 32))
+-    : Prims.Pure (Libcrux.Kem.Kyber.Types.t_MlKemCiphertext v_CIPHERTEXT_SIZE & t_Array u8 (sz 32))
+-      Prims.l_True
+-      (fun _ -> Prims.l_True)
++    : Pure (Libcrux.Kem.Kyber.Types.t_MlKemCiphertext v_CIPHERTEXT_SIZE & t_Array u8 (sz 32))
++     (requires (p == (let open Spec.Kyber in {v_RANK = v_K; v_ETA1; v_ETA2; v_VECTOR_U_COMPRESSION_FACTOR; v_VECTOR_V_COMPRESSION_FACTOR}) /\
++                Spec.Kyber.valid_params p /\
++                v_ETA1_RANDOMNESS_SIZE == Spec.Kyber.v_ETA1_RANDOMNESS_SIZE p /\
++                v_ETA2_RANDOMNESS_SIZE == Spec.Kyber.v_ETA2_RANDOMNESS_SIZE p /\
++                v_PUBLIC_KEY_SIZE == Spec.Kyber.v_CPA_PKE_PUBLIC_KEY_SIZE p /\
++                v_CIPHERTEXT_SIZE == Spec.Kyber.v_CPA_PKE_CIPHERTEXT_SIZE p /\
++                v_C1_SIZE == Spec.Kyber.v_C1_SIZE p /\
++                v_C2_SIZE == Spec.Kyber.v_C2_SIZE p /\
++                v_T_AS_NTT_ENCODED_SIZE = Spec.Kyber.v_T_AS_NTT_ENCODED_SIZE p /\
++                v_VECTOR_U_BLOCK_LEN == Spec.Kyber.v_C1_BLOCK_SIZE p
++                ))
  
+-val validate_public_key
++      (ensures (fun (ct,ss) ->
++                (ct.f_value,ss) == Spec.Kyber.ind_cca_encapsulate p public_key.f_value randomness))
 +
-+
-+
-+
-+
-+
-+
-+
-+
-+
- type t_MlKemPublicKey (v_SIZE: usize) = { f_value:t_Array u8 v_SIZE }
- 
- [@@ FStar.Tactics.Typeclasses.tcinstance]
-@@ -132,67 +198,6 @@
-     f_from = fun (value: t_MlKemPublicKey v_SIZE) -> value.f_value
-   }
- 
--let impl_18__as_slice (v_SIZE: usize) (self: t_MlKemPublicKey v_SIZE) : t_Array u8 v_SIZE =
--  self.f_value
--
--let impl_18__len (v_SIZE: usize) (self: t_MlKemPublicKey v_SIZE) : usize = v_SIZE
--
--let impl_18__split_at (v_SIZE: usize) (self: t_MlKemPublicKey v_SIZE) (mid: usize)
--    : (t_Slice u8 & t_Slice u8) =
--  Core.Slice.impl__split_at (Rust_primitives.unsize self.f_value <: t_Slice u8) mid
--
--[@@ FStar.Tactics.Typeclasses.tcinstance]
--let impl_5 (v_SIZE: usize) : Core.Convert.t_TryFrom (t_MlKemCiphertext v_SIZE) (t_Slice u8) =
--  {
--    f_Error = Core.Array.t_TryFromSliceError;
--    f_try_from_pre = (fun (value: t_Slice u8) -> true);
--    f_try_from_post
--    =
--    (fun
--        (value: t_Slice u8)
--        (out: Core.Result.t_Result (t_MlKemCiphertext v_SIZE) Core.Array.t_TryFromSliceError)
--        ->
--        true);
--    f_try_from
--    =
--    fun (value: t_Slice u8) ->
--      match Core.Convert.f_try_into value with
--      | Core.Result.Result_Ok value ->
--        Core.Result.Result_Ok ({ f_value = value } <: t_MlKemCiphertext v_SIZE)
--        <:
--        Core.Result.t_Result (t_MlKemCiphertext v_SIZE) Core.Array.t_TryFromSliceError
--      | Core.Result.Result_Err e ->
--        Core.Result.Result_Err e
--        <:
--        Core.Result.t_Result (t_MlKemCiphertext v_SIZE) Core.Array.t_TryFromSliceError
--  }
--
--[@@ FStar.Tactics.Typeclasses.tcinstance]
--let impl_11 (v_SIZE: usize) : Core.Convert.t_TryFrom (t_MlKemPrivateKey v_SIZE) (t_Slice u8) =
--  {
--    f_Error = Core.Array.t_TryFromSliceError;
--    f_try_from_pre = (fun (value: t_Slice u8) -> true);
--    f_try_from_post
--    =
--    (fun
--        (value: t_Slice u8)
--        (out: Core.Result.t_Result (t_MlKemPrivateKey v_SIZE) Core.Array.t_TryFromSliceError)
--        ->
--        true);
--    f_try_from
--    =
--    fun (value: t_Slice u8) ->
--      match Core.Convert.f_try_into value with
--      | Core.Result.Result_Ok value ->
--        Core.Result.Result_Ok ({ f_value = value } <: t_MlKemPrivateKey v_SIZE)
--        <:
--        Core.Result.t_Result (t_MlKemPrivateKey v_SIZE) Core.Array.t_TryFromSliceError
--      | Core.Result.Result_Err e ->
--        Core.Result.Result_Err e
--        <:
--        Core.Result.t_Result (t_MlKemPrivateKey v_SIZE) Core.Array.t_TryFromSliceError
--  }
--
- [@@ FStar.Tactics.Typeclasses.tcinstance]
- let impl_17 (v_SIZE: usize) : Core.Convert.t_TryFrom (t_MlKemPublicKey v_SIZE) (t_Slice u8) =
-   {
-@@ -219,6 +224,17 @@
-         Core.Result.t_Result (t_MlKemPublicKey v_SIZE) Core.Array.t_TryFromSliceError
-   }
++val validate_public_key (#p:Spec.Kyber.params)
+       (v_K v_RANKED_BYTES_PER_RING_ELEMENT v_PUBLIC_KEY_SIZE: usize)
+       (public_key: t_Array u8 v_PUBLIC_KEY_SIZE)
+-    : Prims.Pure bool Prims.l_True (fun _ -> Prims.l_True)
++    : Prims.Pure bool
++      (requires (v_K == p.v_RANK /\
++                 v_PUBLIC_KEY_SIZE == Spec.Kyber.v_CPA_PKE_PUBLIC_KEY_SIZE p /\
++                 v_RANKED_BYTES_PER_RING_ELEMENT == Spec.Kyber.v_RANKED_BYTES_PER_RING_ELEMENT p
++                 ))
++      (ensures (fun _ -> Prims.l_True))
  
-+let impl_18__as_slice (v_SIZE: usize) (self: t_MlKemPublicKey v_SIZE) : t_Array u8 v_SIZE =
-+  self.f_value
-+
-+let impl_18__len (v_SIZE: usize) (self: t_MlKemPublicKey v_SIZE) : usize = v_SIZE
+-val generate_keypair
++val generate_keypair (#p:Spec.Kyber.params)
+       (v_K v_CPA_PRIVATE_KEY_SIZE v_PRIVATE_KEY_SIZE v_PUBLIC_KEY_SIZE v_BYTES_PER_RING_ELEMENT v_ETA1 v_ETA1_RANDOMNESS_SIZE:
+           usize)
+       (randomness: t_Array u8 (sz 64))
+-    : Prims.Pure (Libcrux.Kem.Kyber.Types.t_MlKemKeyPair v_PRIVATE_KEY_SIZE v_PUBLIC_KEY_SIZE)
+-      Prims.l_True
+-      (fun _ -> Prims.l_True)
++    : Pure (Libcrux.Kem.Kyber.Types.t_MlKemKeyPair v_PRIVATE_KEY_SIZE v_PUBLIC_KEY_SIZE)
++      (requires (v_K == p.v_RANK /\ v_ETA1 == p.v_ETA1 /\
++                 v_ETA1_RANDOMNESS_SIZE == Spec.Kyber.v_ETA1_RANDOMNESS_SIZE p /\
++                 v_PUBLIC_KEY_SIZE == Spec.Kyber.v_CPA_PKE_PUBLIC_KEY_SIZE p /\
++                 v_CPA_PRIVATE_KEY_SIZE == Spec.Kyber.v_CPA_PKE_SECRET_KEY_SIZE p /\
++                 v_PRIVATE_KEY_SIZE == Spec.Kyber.v_SECRET_KEY_SIZE p /\
++                 v_BYTES_PER_RING_ELEMENT == Spec.Kyber.v_RANKED_BYTES_PER_RING_ELEMENT p
++                 ))
++      (ensures (fun kp -> 
++                (kp.f_sk.f_value,kp.f_pk.f_value) == Spec.Kyber.ind_cca_generate_keypair p randomness))
+diff -ruN extraction/Libcrux.Kem.fst extraction-edited/Libcrux.Kem.fst
+--- extraction/Libcrux.Kem.fst	1970-01-01 01:00:00
++++ extraction-edited/Libcrux.Kem.fst	2024-03-13 11:03:50
+@@ -0,0 +1,6 @@
++module Libcrux.Kem
++#set-options "--fuel 0 --ifuel 1 --z3rlimit 15"
++open Core
++open FStar.Mul
 +
-+let impl_18__split_at (v_SIZE: usize) (self: t_MlKemPublicKey v_SIZE) (mid: usize)
-+    : Pure (t_Slice u8 & t_Slice u8)
-+      (requires (mid <=. v_SIZE))
-+      (ensures (fun (x,y) -> Seq.length x == v mid /\ Seq.length y == v (v_SIZE -! mid))) =
-+  Core.Slice.impl__split_at (Rust_primitives.unsize self.f_value <: t_Slice u8) mid
 +
- type t_MlKemKeyPair (v_PRIVATE_KEY_SIZE: usize) (v_PUBLIC_KEY_SIZE: usize) = {
-   f_sk:t_MlKemPrivateKey v_PRIVATE_KEY_SIZE;
-   f_pk:t_MlKemPublicKey v_PUBLIC_KEY_SIZE
 diff -ruN extraction/Libcrux_platform.Platform.fsti extraction-edited/Libcrux_platform.Platform.fsti
---- extraction/Libcrux_platform.Platform.fsti	1970-01-01 01:00:00.000000000 +0100
-+++ extraction-edited/Libcrux_platform.Platform.fsti	2024-03-12 10:45:44.782930634 +0100
+--- extraction/Libcrux_platform.Platform.fsti	1970-01-01 01:00:00
++++ extraction-edited/Libcrux_platform.Platform.fsti	2024-03-13 11:03:50
 @@ -0,0 +1,20 @@
 +module Libcrux_platform.Platform
 +#set-options "--fuel 0 --ifuel 1 --z3rlimit 15"
@@ -7620,8 +7998,8 @@ diff -ruN extraction/Libcrux_platform.Platform.fsti extraction-edited/Libcrux_pl
 +
 +val simd128_support: Prims.unit -> Prims.Pure bool Prims.l_True (fun _ -> Prims.l_True)
 diff -ruN extraction/MkSeq.fst extraction-edited/MkSeq.fst
---- extraction/MkSeq.fst	1970-01-01 01:00:00.000000000 +0100
-+++ extraction-edited/MkSeq.fst	2024-03-12 10:45:44.778930752 +0100
+--- extraction/MkSeq.fst	1970-01-01 01:00:00
++++ extraction-edited/MkSeq.fst	2024-03-13 11:03:50
 @@ -0,0 +1,91 @@
 +module MkSeq
 +open Core
@@ -7715,8 +8093,8 @@ diff -ruN extraction/MkSeq.fst extraction-edited/MkSeq.fst
 +
 +%splice[] (init 13 (fun i -> create_gen_tac (i + 1)))
 diff -ruN extraction/Spec.Kyber.fst extraction-edited/Spec.Kyber.fst
---- extraction/Spec.Kyber.fst	1970-01-01 01:00:00.000000000 +0100
-+++ extraction-edited/Spec.Kyber.fst	2024-03-12 10:45:44.805929956 +0100
+--- extraction/Spec.Kyber.fst	1970-01-01 01:00:00
++++ extraction-edited/Spec.Kyber.fst	2024-03-13 11:03:50
 @@ -0,0 +1,435 @@
 +module Spec.Kyber
 +#set-options "--fuel 0 --ifuel 1 --z3rlimit 100"
diff --git a/proofs/fstar/extraction-secret-independent.patch b/proofs/fstar/extraction-secret-independent.patch
index e59a2bffa..04c93cee2 100644
--- a/proofs/fstar/extraction-secret-independent.patch
+++ b/proofs/fstar/extraction-secret-independent.patch
@@ -1,6 +1,6 @@
 diff -ruN extraction-edited/BitVecEq.fst extraction-secret-independent/BitVecEq.fst
---- extraction-edited/BitVecEq.fst	2024-03-12 10:45:44.812929749 +0100
-+++ extraction-secret-independent/BitVecEq.fst	1970-01-01 01:00:00.000000000 +0100
+--- extraction-edited/BitVecEq.fst	2024-03-13 11:03:50
++++ extraction-secret-independent/BitVecEq.fst	1970-01-01 01:00:00
 @@ -1,12 +0,0 @@
 -module BitVecEq
 -
@@ -15,8 +15,8 @@ diff -ruN extraction-edited/BitVecEq.fst extraction-secret-independent/BitVecEq.
 -
 -
 diff -ruN extraction-edited/BitVecEq.fsti extraction-secret-independent/BitVecEq.fsti
---- extraction-edited/BitVecEq.fsti	2024-03-12 10:45:44.794930280 +0100
-+++ extraction-secret-independent/BitVecEq.fsti	1970-01-01 01:00:00.000000000 +0100
+--- extraction-edited/BitVecEq.fsti	2024-03-13 11:03:50
++++ extraction-secret-independent/BitVecEq.fsti	1970-01-01 01:00:00
 @@ -1,294 +0,0 @@
 -module BitVecEq
 -#set-options "--fuel 0 --ifuel 1 --z3rlimit 15"
@@ -313,8 +313,8 @@ diff -ruN extraction-edited/BitVecEq.fsti extraction-secret-independent/BitVecEq
 - = admit ()
 -*)
 diff -ruN extraction-edited/Libcrux.Digest.fsti extraction-secret-independent/Libcrux.Digest.fsti
---- extraction-edited/Libcrux.Digest.fsti	2024-03-12 10:45:44.826929336 +0100
-+++ extraction-secret-independent/Libcrux.Digest.fsti	2024-03-12 10:45:44.845928775 +0100
+--- extraction-edited/Libcrux.Digest.fsti	2024-03-13 11:03:50
++++ extraction-secret-independent/Libcrux.Digest.fsti	2024-03-13 11:03:50
 @@ -1,31 +1,41 @@
  module Libcrux.Digest
  #set-options "--fuel 0 --ifuel 1 --z3rlimit 15"
@@ -326,28 +326,6 @@ diff -ruN extraction-edited/Libcrux.Digest.fsti extraction-secret-independent/Li
 -    : Prims.Pure (t_Array u8 v_LEN & t_Array u8 v_LEN & t_Array u8 v_LEN & t_Array u8 v_LEN)
 -      Prims.l_True
 -      (fun _ -> Prims.l_True)
--
--val sha3_256_ (payload: t_Slice u8)
--    : Prims.Pure (t_Array u8 (sz 32)) Prims.l_True (fun _ -> Prims.l_True)
--
--val sha3_512_ (payload: t_Slice u8)
--    : Prims.Pure (t_Array u8 (sz 64)) Prims.l_True (fun _ -> Prims.l_True)
--
--val shake128 (v_LEN: usize) (data: t_Slice u8)
--    : Prims.Pure (t_Array u8 v_LEN) Prims.l_True (fun _ -> Prims.l_True)
--
--val shake256 (v_LEN: usize) (data: t_Slice u8)
--    : Prims.Pure (t_Array u8 v_LEN) Prims.l_True (fun _ -> Prims.l_True)
--
--val shake128x4_portable (v_LEN: usize) (data0 data1 data2 data3: t_Slice u8)
--    : Prims.Pure (t_Array u8 v_LEN & t_Array u8 v_LEN & t_Array u8 v_LEN & t_Array u8 v_LEN)
--      Prims.l_True
--      (fun _ -> Prims.l_True)
--
--val shake128x4 (v_LEN: usize) (data0 data1 data2 data3: t_Slice u8)
--    : Prims.Pure (t_Array u8 v_LEN & t_Array u8 v_LEN & t_Array u8 v_LEN & t_Array u8 v_LEN)
--      Prims.l_True
--      (fun _ -> Prims.l_True)
 +type t_Algorithm =
 +  | Algorithm_Sha1 : t_Algorithm
 +  | Algorithm_Sha224 : t_Algorithm
@@ -360,7 +338,9 @@ diff -ruN extraction-edited/Libcrux.Digest.fsti extraction-secret-independent/Li
 +  | Algorithm_Sha3_256_ : t_Algorithm
 +  | Algorithm_Sha3_384_ : t_Algorithm
 +  | Algorithm_Sha3_512_ : t_Algorithm
-+
+ 
+-val sha3_256_ (payload: t_Slice u8)
+-    : Prims.Pure (t_Array u8 (sz 32)) Prims.l_True (fun _ -> Prims.l_True)
 +let digest_size (mode: t_Algorithm) : usize =
 +  match mode with
 +  | Algorithm_Sha1  -> sz 20
@@ -374,19 +354,33 @@ diff -ruN extraction-edited/Libcrux.Digest.fsti extraction-secret-independent/Li
 +  | Algorithm_Sha3_256_  -> sz 32
 +  | Algorithm_Sha3_384_  -> sz 48
 +  | Algorithm_Sha3_512_  -> sz 64
-+
+ 
+-val sha3_512_ (payload: t_Slice u8)
+-    : Prims.Pure (t_Array u8 (sz 64)) Prims.l_True (fun _ -> Prims.l_True)
 +val sha3_256_ (payload: t_Slice u8) : t_Array u8 (sz 32)
-+
+ 
+-val shake128 (v_LEN: usize) (data: t_Slice u8)
+-    : Prims.Pure (t_Array u8 v_LEN) Prims.l_True (fun _ -> Prims.l_True)
 +val sha3_512_ (payload: t_Slice u8) : t_Array u8 (sz 64)
-+
+ 
+-val shake256 (v_LEN: usize) (data: t_Slice u8)
+-    : Prims.Pure (t_Array u8 v_LEN) Prims.l_True (fun _ -> Prims.l_True)
 +val shake128 (v_LEN: usize) (data: t_Slice u8) : t_Array u8 v_LEN
-+
+ 
+-val shake128x4_portable (v_LEN: usize) (data0 data1 data2 data3: t_Slice u8)
+-    : Prims.Pure (t_Array u8 v_LEN & t_Array u8 v_LEN & t_Array u8 v_LEN & t_Array u8 v_LEN)
+-      Prims.l_True
+-      (fun _ -> Prims.l_True)
 +val shake128x4 (v_LEN: usize) (data0: t_Slice u8) (data1: t_Slice u8) (data2: t_Slice u8) (data3: t_Slice u8): (t_Array u8 v_LEN & t_Array u8 v_LEN & t_Array u8 v_LEN & t_Array u8 v_LEN)
-+
+ 
+-val shake128x4 (v_LEN: usize) (data0 data1 data2 data3: t_Slice u8)
+-    : Prims.Pure (t_Array u8 v_LEN & t_Array u8 v_LEN & t_Array u8 v_LEN & t_Array u8 v_LEN)
+-      Prims.l_True
+-      (fun _ -> Prims.l_True)
 +val shake256 (v_LEN: usize) (data: t_Slice u8) : t_Array u8 v_LEN
 diff -ruN extraction-edited/Libcrux.Kem.Kyber.Arithmetic.fst extraction-secret-independent/Libcrux.Kem.Kyber.Arithmetic.fst
---- extraction-edited/Libcrux.Kem.Kyber.Arithmetic.fst	2024-03-12 10:45:44.800930103 +0100
-+++ extraction-secret-independent/Libcrux.Kem.Kyber.Arithmetic.fst	2024-03-12 10:45:44.846928746 +0100
+--- extraction-edited/Libcrux.Kem.Kyber.Arithmetic.fst	2024-03-13 11:03:50
++++ extraction-secret-independent/Libcrux.Kem.Kyber.Arithmetic.fst	2024-03-13 11:03:50
 @@ -1,364 +1,81 @@
  module Libcrux.Kem.Kyber.Arithmetic
 -#set-options "--fuel 0 --ifuel 1 --z3rlimit 100"
@@ -628,20 +622,20 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Arithmetic.fst extraction-secret-i
 -  };
 -  res
 -#pop-options
--
--let montgomery_multiply_sfe_by_fer fe fer =
--  montgomery_reduce (mul_i32_b fe fer)
 +  let c:i32 = k_times_modulus >>! v_MONTGOMERY_SHIFT in
 +  let value_high:i32 = value >>! v_MONTGOMERY_SHIFT in
 +  value_high -! c
  
+-let montgomery_multiply_sfe_by_fer fe fer =
+-  montgomery_reduce (mul_i32_b fe fer)
 +let montgomery_multiply_sfe_by_fer (fe fer: i32) = montgomery_reduce (fe *! fer <: i32)
  
--let to_standard_domain mfe =
--  montgomery_reduce (mul_i32_b mfe (v_MONTGOMERY_R_SQUARED_MOD_FIELD_MODULUS <: i32_b 1353))
 +let to_standard_domain (mfe: i32) =
 +  montgomery_reduce (mfe *! v_MONTGOMERY_R_SQUARED_MOD_FIELD_MODULUS <: i32)
  
+-let to_standard_domain mfe =
+-  montgomery_reduce (mul_i32_b mfe (v_MONTGOMERY_R_SQUARED_MOD_FIELD_MODULUS <: i32_b 1353))
+-
 -let to_unsigned_representative fe =
 +let to_unsigned_representative (fe: i32) =
    let _:Prims.unit = () <: Prims.unit in
@@ -659,7 +653,10 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Arithmetic.fst extraction-secret-i
 -  assert (v fe < 0 ==> v res == v fe + 3329);
 -  assert (v fe >= 0 ==> v res == v fe);
 -  res <: int_t_d u16_inttype 12
--
++  cast (fe +! (Libcrux.Kem.Kyber.Constants.v_FIELD_MODULUS &. (fe >>! 31l <: i32) <: i32) <: i32)
++  <:
++  u16
+ 
 -let derefine_poly_b #b x =
 -  let r = createi (sz 256) (fun i -> (x.f_coefficients.[i] <: i32)) in
 -  {f_coefficients = r}
@@ -671,10 +668,7 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Arithmetic.fst extraction-secret-i
 -let derefine_matrix_b #v_K #b x =
 -  let r = createi v_K (fun i -> derefine_vector_b #v_K #b x.[i]) in
 -  r
-+  cast (fe +! (Libcrux.Kem.Kyber.Constants.v_FIELD_MODULUS &. (fe >>! 31l <: i32) <: i32) <: i32)
-+  <:
-+  u16
- 
+-
 -let cast_poly_b #b1 #b2 x =
 -  let r = createi (sz 256) (fun i -> (x.f_coefficients.[i] <: i32_b b2)) in
 -  let res = {f_coefficients = r} in
@@ -791,8 +785,8 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Arithmetic.fst extraction-secret-i
 -  
 - 
 diff -ruN extraction-edited/Libcrux.Kem.Kyber.Arithmetic.fsti extraction-secret-independent/Libcrux.Kem.Kyber.Arithmetic.fsti
---- extraction-edited/Libcrux.Kem.Kyber.Arithmetic.fsti	2024-03-12 10:45:44.824929395 +0100
-+++ extraction-secret-independent/Libcrux.Kem.Kyber.Arithmetic.fsti	2024-03-12 10:45:44.869928067 +0100
+--- extraction-edited/Libcrux.Kem.Kyber.Arithmetic.fsti	2024-03-13 11:03:50
++++ extraction-secret-independent/Libcrux.Kem.Kyber.Arithmetic.fsti	2024-03-13 11:03:50
 @@ -3,32 +3,10 @@
  open Core
  open FStar.Mul
@@ -841,13 +835,13 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Arithmetic.fsti extraction-secret-
  let v_MONTGOMERY_R_SQUARED_MOD_FIELD_MODULUS: i32 = 1353l
  
 -let v_MONTGOMERY_SHIFT: u8 = 16uy
--
--val v_MONTGOMERY_R: x:i32{v x = pow2 16 /\ x = 65536l}
 +let v_MONTGOMERY_SHIFT: pub_u8 = 16uy
  
--val v_MONTGOMERY_R_INV: x:i32{v x >= 0 /\ v x < 3329 /\ (v x * v v_MONTGOMERY_R) % 3329 == 1 /\ x = 169l}
+-val v_MONTGOMERY_R: x:i32{v x = pow2 16 /\ x = 65536l}
 +let v_MONTGOMERY_R: i32 = 1l <<! v_MONTGOMERY_SHIFT
  
+-val v_MONTGOMERY_R_INV: x:i32{v x >= 0 /\ v x < 3329 /\ (v x * v v_MONTGOMERY_R) % 3329 == 1 /\ x = 169l}
+-
 -let int_to_spec_fe (m:int) : Spec.Kyber.field_element = 
 -    let m_v = m % v Libcrux.Kem.Kyber.Constants.v_FIELD_MODULUS in
 -    assert (m_v > -  v Libcrux.Kem.Kyber.Constants.v_FIELD_MODULUS);
@@ -878,11 +872,21 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Arithmetic.fsti extraction-secret-
          fun result ->
            let result:u32 = result in
 -          v result = v value % pow2 (v n))
--
++          v result < v (Core.Num.impl__u32__pow 2ul (Core.Convert.f_into n <: u32) <: u32))
+ 
 -//let barrett_pre (value:i32) = 
 -//    v value <= v v_BARRETT_R /\ v value >= - v v_BARRETT_R
 -// Appears to work up to +/- 2^28, but not at +/- 2^29
-+          v result < v (Core.Num.impl__u32__pow 2ul (Core.Convert.f_into n <: u32) <: u32))
++val barrett_reduce (value: i32)
++    : Prims.Pure i32
++      (requires
++        v (Core.Convert.f_from value <: i64) > v (Core.Ops.Arith.Neg.neg v_BARRETT_R <: i64) &&
++        v (Core.Convert.f_from value <: i64) < v v_BARRETT_R)
++      (ensures
++        fun result ->
++          let result:i32 = result in
++          v result > v (Core.Ops.Arith.Neg.neg Libcrux.Kem.Kyber.Constants.v_FIELD_MODULUS <: i32) &&
++          v result < v Libcrux.Kem.Kyber.Constants.v_FIELD_MODULUS)
  
 -let barrett_post (value:i32) (result:i32) = 
 -    v result % v Libcrux.Kem.Kyber.Constants.v_FIELD_MODULUS =
@@ -901,18 +905,29 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Arithmetic.fsti extraction-secret-
 -val montgomery_reduce #b (value: i32_b b)
 -    : Prims.Pure (i32_b (nat_div_ceil b (v v_MONTGOMERY_R) + 1665))
 -      (requires True)
-+val barrett_reduce (value: i32)
++val montgomery_reduce (value: i32)
 +    : Prims.Pure i32
 +      (requires
-+        v (Core.Convert.f_from value <: i64) > v (Core.Ops.Arith.Neg.neg v_BARRETT_R <: i64) &&
-+        v (Core.Convert.f_from value <: i64) < v v_BARRETT_R)
++       v value >=
++       v ((Core.Ops.Arith.Neg.neg Libcrux.Kem.Kyber.Constants.v_FIELD_MODULUS <: i32) *!
++          v_MONTGOMERY_R
++          <:
++          i32) &&
++        v value <= v (Libcrux.Kem.Kyber.Constants.v_FIELD_MODULUS *! v_MONTGOMERY_R <: i32))
        (ensures
          fun result ->
            let result:i32 = result in
 -          montgomery_post value result)
--
-+          v result > v (Core.Ops.Arith.Neg.neg Libcrux.Kem.Kyber.Constants.v_FIELD_MODULUS <: i32) &&
-+          v result < v Libcrux.Kem.Kyber.Constants.v_FIELD_MODULUS)
++          v result >=
++          v ((Core.Ops.Arith.Neg.neg (3l *! Libcrux.Kem.Kyber.Constants.v_FIELD_MODULUS <: i32) <: i32
++            ) /!
++            2l
++            <:
++            i32) &&
++          v result <= v ((3l *! Libcrux.Kem.Kyber.Constants.v_FIELD_MODULUS <: i32) /! 2l <: i32))
+ 
++val montgomery_multiply_sfe_by_fer (fe fer: i32)
++    : Prims.Pure i32 Prims.l_True (fun _ -> Prims.l_True)
  
 -val montgomery_multiply_sfe_by_fer #b1 #b2 (fe:i32_b b1) (fer: i32_b b2)
 -    : Pure (i32_b (nat_div_ceil (b1 * b2) (v v_MONTGOMERY_R) + 1665))
@@ -920,7 +935,8 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Arithmetic.fsti extraction-secret-
 -      (ensures (fun result -> 
 -          montgomery_post (mul_i32_b fe fer) (result)))
 -      
--
++val to_standard_domain (mfe: i32) : Prims.Pure i32 Prims.l_True (fun _ -> Prims.l_True)
+ 
 -val to_standard_domain #b (mfe: i32_b b) 
 -    : Pure (i32_b (nat_div_ceil (b * 1353) (v v_MONTGOMERY_R) + 1665))
 -      (requires (b * 1353 < pow2_31))
@@ -931,31 +947,6 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Arithmetic.fsti extraction-secret-
 -val to_unsigned_representative (fe: wfFieldElement)
 -    : Prims.Pure (int_t_d u16_inttype 12)
 -      (requires True)
-+val montgomery_reduce (value: i32)
-+    : Prims.Pure i32
-+      (requires
-+       v value >=
-+       v ((Core.Ops.Arith.Neg.neg Libcrux.Kem.Kyber.Constants.v_FIELD_MODULUS <: i32) *!
-+          v_MONTGOMERY_R
-+          <:
-+          i32) &&
-+        v value <= v (Libcrux.Kem.Kyber.Constants.v_FIELD_MODULUS *! v_MONTGOMERY_R <: i32))
-+      (ensures
-+        fun result ->
-+          let result:i32 = result in
-+          v result >=
-+          v ((Core.Ops.Arith.Neg.neg (3l *! Libcrux.Kem.Kyber.Constants.v_FIELD_MODULUS <: i32) <: i32
-+            ) /!
-+            2l
-+            <:
-+            i32) &&
-+          v result <= v ((3l *! Libcrux.Kem.Kyber.Constants.v_FIELD_MODULUS <: i32) /! 2l <: i32))
-+
-+val montgomery_multiply_sfe_by_fer (fe fer: i32)
-+    : Prims.Pure i32 Prims.l_True (fun _ -> Prims.l_True)
-+
-+val to_standard_domain (mfe: i32) : Prims.Pure i32 Prims.l_True (fun _ -> Prims.l_True)
-+
 +val to_unsigned_representative (fe: i32)
 +    : Prims.Pure u16
 +      (requires
@@ -967,11 +958,16 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Arithmetic.fsti extraction-secret-
 -          v result == to_spec_fe fe /\
 -          result >=. 0us &&
 -          result <. (cast (Libcrux.Kem.Kyber.Constants.v_FIELD_MODULUS <: i32) <: u16))
--
++          v result >= v 0us &&
++          v result < v (cast (Libcrux.Kem.Kyber.Constants.v_FIELD_MODULUS <: i32) <: u16))
+ 
 -type t_PolynomialRingElement = { f_coefficients:t_Array (t_FieldElement) (sz 256) }
--
++type t_PolynomialRingElement = { f_coefficients:t_Array i32 (sz 256) }
+ 
 -type t_PolynomialRingElement_b b = { f_coefficients:t_Array (i32_b b) (sz 256) }
--
++let impl__PolynomialRingElement__ZERO: t_PolynomialRingElement =
++  { f_coefficients = Rust_primitives.Hax.repeat 0l (sz 256) } <: t_PolynomialRingElement
+ 
 -type wfPolynomialRingElement = t_PolynomialRingElement_b (v Libcrux.Kem.Kyber.Constants.v_FIELD_MODULUS - 1)
 -
 -val derefine_poly_b (#b1:nat) (x:t_PolynomialRingElement_b b1):  
@@ -1093,14 +1089,9 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Arithmetic.fsti extraction-secret-
 -      (ensures fun result ->
 -        (forall i. v result.f_coefficients.[i] == v lhs.f_coefficients.[i] + v rhs.f_coefficients.[i]))
 -
-+          v result >= v 0us &&
-+          v result < v (cast (Libcrux.Kem.Kyber.Constants.v_FIELD_MODULUS <: i32) <: u16))
- 
-+type t_PolynomialRingElement = { f_coefficients:t_Array i32 (sz 256) }
- 
-+let impl__PolynomialRingElement__ZERO: t_PolynomialRingElement =
-+  { f_coefficients = Rust_primitives.Hax.repeat 0l (sz 256) } <: t_PolynomialRingElement
- 
+-
+-
+-
 +val add_to_ring_element (v_K: usize) (lhs rhs: t_PolynomialRingElement)
 +    : Prims.Pure t_PolynomialRingElement
 +      (requires
@@ -1149,8 +1140,8 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Arithmetic.fsti extraction-secret-
 +                <:
 +                bool))
 diff -ruN extraction-edited/Libcrux.Kem.Kyber.Compress.fst extraction-secret-independent/Libcrux.Kem.Kyber.Compress.fst
---- extraction-edited/Libcrux.Kem.Kyber.Compress.fst	2024-03-12 10:45:44.803930015 +0100
-+++ extraction-secret-independent/Libcrux.Kem.Kyber.Compress.fst	2024-03-12 10:45:44.890927448 +0100
+--- extraction-edited/Libcrux.Kem.Kyber.Compress.fst	2024-03-13 11:03:50
++++ extraction-secret-independent/Libcrux.Kem.Kyber.Compress.fst	2024-03-13 11:03:50
 @@ -1,79 +1,39 @@
  module Libcrux.Kem.Kyber.Compress
 -#set-options "--fuel 0 --ifuel 0 --z3rlimit 200"
@@ -1159,12 +1150,27 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Compress.fst extraction-secret-ind
  open FStar.Mul
  
 -let compress_message_coefficient fe =
--  let (shifted: i16):i16 = 1664s -! (cast (fe <: u16) <: i16) in
++let compress_ciphertext_coefficient (coefficient_bits: pub_u8) (fe: u16) =
++  let _:Prims.unit = () <: Prims.unit in
++  let _:Prims.unit = () <: Prims.unit in
++  let compressed:u64 = (cast (fe <: u16) <: u64) <<! coefficient_bits in
++  let compressed:u64 = compressed +! 1664uL in
++  let compressed:u64 = compressed *! 10321340uL in
++  let compressed:u64 = compressed >>! 35l in
++  cast (Libcrux.Kem.Kyber.Arithmetic.get_n_least_significant_bits coefficient_bits
++        (cast (compressed <: u64) <: u32)
++      <:
++      u32)
++  <:
++  i32
++
++let compress_message_coefficient (fe: u16) =
+   let (shifted: i16):i16 = 1664s -! (cast (fe <: u16) <: i16) in
 -  assert (v shifted == 1664 - v fe);
--  let mask:i16 = shifted >>! 15l in
+   let mask:i16 = shifted >>! 15l in
 -  assert (v mask = v shifted / pow2 15);
 -  assert (if v shifted < 0 then mask = ones else mask = zero);
--  let shifted_to_positive:i16 = mask ^. shifted in
+   let shifted_to_positive:i16 = mask ^. shifted in
 -  logxor_lemma shifted mask;
 -  assert (v shifted < 0 ==> v shifted_to_positive = v (lognot shifted));
 -  neg_equiv_lemma shifted;
@@ -1174,7 +1180,7 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Compress.fst extraction-secret-ind
 -  assert (v shifted >= 0 ==> mask ^. shifted = shifted);
 -  assert (v shifted >= 0 ==> v shifted_to_positive = v shifted);
 -  assert (shifted_to_positive >=. 0s);
--  let shifted_positive_in_range:i16 = shifted_to_positive -! 832s in
+   let shifted_positive_in_range:i16 = shifted_to_positive -! 832s in
 -  assert (1664 - v fe >= 0 ==> v shifted_positive_in_range == 832 - v fe);
 -  assert (1664 - v fe < 0 ==> v shifted_positive_in_range == -2497 + v fe);
 -  let r0 = shifted_positive_in_range >>! 15l in
@@ -1189,9 +1195,10 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Compress.fst extraction-secret-ind
 -  assert (v fe > 2496 ==> r1 = 0s);
 -  assert (v res = v r1);
 -  res
--
++  cast ((shifted_positive_in_range >>! 15l <: i16) &. 1s <: i16) <: u8
+ 
 -let compress_ciphertext_coefficient coefficient_bits fe =
-+let compress_ciphertext_coefficient (coefficient_bits: pub_u8) (fe: u16) =
++let decompress_ciphertext_coefficient (coefficient_bits: pub_u8) (fe: i32) =
    let _:Prims.unit = () <: Prims.unit in
    let _:Prims.unit = () <: Prims.unit in
 -  let compressed:u32 = (cast (fe <: u16) <: u32) <<! (coefficient_bits +! 1uy <: u8) in
@@ -1203,31 +1210,15 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Compress.fst extraction-secret-ind
 -  in
 -  let res = cast (Libcrux.Kem.Kyber.Arithmetic.get_n_least_significant_bits coefficient_bits compressed <: u32
 -    )
-+  let compressed:u64 = (cast (fe <: u16) <: u64) <<! coefficient_bits in
-+  let compressed:u64 = compressed +! 1664uL in
-+  let compressed:u64 = compressed *! 10321340uL in
-+  let compressed:u64 = compressed >>! 35l in
-+  cast (Libcrux.Kem.Kyber.Arithmetic.get_n_least_significant_bits coefficient_bits
-+        (cast (compressed <: u64) <: u32)
-+      <:
-+      u32)
-   <:
-   i32
+-  <:
+-  i32
 -  in
 -  res
- 
+-
 -#push-options "--z3rlimit 300"
 -let decompress_ciphertext_coefficient coefficient_bits fe =
-+let compress_message_coefficient (fe: u16) =
-+  let (shifted: i16):i16 = 1664s -! (cast (fe <: u16) <: i16) in
-+  let mask:i16 = shifted >>! 15l in
-+  let shifted_to_positive:i16 = mask ^. shifted in
-+  let shifted_positive_in_range:i16 = shifted_to_positive -! 832s in
-+  cast ((shifted_positive_in_range >>! 15l <: i16) &. 1s <: i16) <: u8
-+
-+let decompress_ciphertext_coefficient (coefficient_bits: pub_u8) (fe: i32) =
-   let _:Prims.unit = () <: Prims.unit in
-   let _:Prims.unit = () <: Prims.unit in
+-  let _:Prims.unit = () <: Prims.unit in
+-  let _:Prims.unit = () <: Prims.unit in
 -  assert (v (1ul <<! coefficient_bits) <= pow2 11);
 -  assert (v fe < pow2 11);
    let decompressed:u32 =
@@ -1255,8 +1246,8 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Compress.fst extraction-secret-ind
 +  (Core.Ops.Arith.Neg.neg fe <: i32) &.
 +  ((Libcrux.Kem.Kyber.Constants.v_FIELD_MODULUS +! 1l <: i32) /! 2l <: i32)
 diff -ruN extraction-edited/Libcrux.Kem.Kyber.Compress.fsti extraction-secret-independent/Libcrux.Kem.Kyber.Compress.fsti
---- extraction-edited/Libcrux.Kem.Kyber.Compress.fsti	2024-03-12 10:45:44.832929159 +0100
-+++ extraction-secret-independent/Libcrux.Kem.Kyber.Compress.fsti	2024-03-12 10:45:44.849928657 +0100
+--- extraction-edited/Libcrux.Kem.Kyber.Compress.fsti	2024-03-13 11:03:50
++++ extraction-secret-independent/Libcrux.Kem.Kyber.Compress.fsti	2024-03-13 11:03:50
 @@ -3,42 +3,44 @@
  open Core
  open FStar.Mul
@@ -1301,12 +1292,15 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Compress.fsti extraction-secret-in
          (coefficient_bits =. 4uy || coefficient_bits =. 5uy || coefficient_bits =. 10uy ||
          coefficient_bits =. 11uy) &&
 -        fe <. (cast (Libcrux.Kem.Kyber.Constants.v_FIELD_MODULUS <: i32) <: u16))
--      (ensures
--        fun result ->
--          let result:i32 = result in
++        v fe >= v 0l &&
++        v fe < v (Core.Num.impl__i32__pow 2l (cast (coefficient_bits <: u8) <: u32) <: i32))
+       (ensures
+         fun result ->
+           let result:i32 = result in
 -          result >=. 0l &&
 -          result <. (Core.Num.impl__i32__pow 2l (cast (coefficient_bits <: u8) <: u32) <: i32))
--
++          v result < v Libcrux.Kem.Kyber.Constants.v_FIELD_MODULUS)
+ 
 -open Rust_primitives.Integers
 -
 -val decompress_ciphertext_coefficient
@@ -1314,22 +1308,19 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Compress.fsti extraction-secret-in
 -    (fe: int_t_d i32_inttype (v coefficient_bits))
 -    : Prims.Pure (Libcrux.Kem.Kyber.Arithmetic.i32_b (v Libcrux.Kem.Kyber.Constants.v_FIELD_MODULUS - 1))
 -      (requires True)
-+        v fe >= v 0l &&
-+        v fe < v (Core.Num.impl__i32__pow 2l (cast (coefficient_bits <: u8) <: u32) <: i32))
-       (ensures
-         fun result ->
-           let result:i32 = result in
+-      (ensures
+-        fun result ->
+-          let result:i32 = result in
 -          result <. Libcrux.Kem.Kyber.Constants.v_FIELD_MODULUS)
-+          v result < v Libcrux.Kem.Kyber.Constants.v_FIELD_MODULUS)
- 
+-
  val decompress_message_coefficient (fe: i32)
 -    : Prims.Pure Libcrux.Kem.Kyber.Arithmetic.wfFieldElement
 -      (requires fe =. 0l || fe =. 1l) 
 -      (fun result -> v result >= 0 /\ v result < 3329)
 +    : Prims.Pure i32 (requires fe =. 0l || fe =. 1l) (fun _ -> Prims.l_True)
 diff -ruN extraction-edited/Libcrux.Kem.Kyber.Constant_time_ops.fst extraction-secret-independent/Libcrux.Kem.Kyber.Constant_time_ops.fst
---- extraction-edited/Libcrux.Kem.Kyber.Constant_time_ops.fst	2024-03-12 10:45:44.813929720 +0100
-+++ extraction-secret-independent/Libcrux.Kem.Kyber.Constant_time_ops.fst	2024-03-12 10:45:44.881927713 +0100
+--- extraction-edited/Libcrux.Kem.Kyber.Constant_time_ops.fst	2024-03-13 11:03:50
++++ extraction-secret-independent/Libcrux.Kem.Kyber.Constant_time_ops.fst	2024-03-13 11:03:50
 @@ -4,163 +4,61 @@
  open FStar.Mul
  
@@ -1518,8 +1509,8 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Constant_time_ops.fst extraction-s
 -#pop-options
 +  out
 diff -ruN extraction-edited/Libcrux.Kem.Kyber.Constant_time_ops.fsti extraction-secret-independent/Libcrux.Kem.Kyber.Constant_time_ops.fsti
---- extraction-edited/Libcrux.Kem.Kyber.Constant_time_ops.fsti	2024-03-12 10:45:44.823929425 +0100
-+++ extraction-secret-independent/Libcrux.Kem.Kyber.Constant_time_ops.fsti	2024-03-12 10:45:44.852928569 +0100
+--- extraction-edited/Libcrux.Kem.Kyber.Constant_time_ops.fsti	2024-03-13 11:03:50
++++ extraction-secret-independent/Libcrux.Kem.Kyber.Constant_time_ops.fsti	2024-03-13 11:03:50
 @@ -20,26 +20,30 @@
  
  val compare_ciphertexts_in_constant_time (v_CIPHERTEXT_SIZE: usize) (lhs rhs: t_Slice u8)
@@ -1562,8 +1553,8 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Constant_time_ops.fsti extraction-
 +                let _:Prims.unit = temp_0_ in
 +                result = rhs <: bool))
 diff -ruN extraction-edited/Libcrux.Kem.Kyber.Conversions.fst extraction-secret-independent/Libcrux.Kem.Kyber.Conversions.fst
---- extraction-edited/Libcrux.Kem.Kyber.Conversions.fst	1970-01-01 01:00:00.000000000 +0100
-+++ extraction-secret-independent/Libcrux.Kem.Kyber.Conversions.fst	2024-03-12 10:45:44.848928687 +0100
+--- extraction-edited/Libcrux.Kem.Kyber.Conversions.fst	1970-01-01 01:00:00
++++ extraction-secret-independent/Libcrux.Kem.Kyber.Conversions.fst	2024-03-13 11:03:50
 @@ -0,0 +1,87 @@
 +module Libcrux.Kem.Kyber.Conversions
 +#set-options "--fuel 0 --ifuel 1 --z3rlimit 15"
@@ -1652,401 +1643,11 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Conversions.fst extraction-secret-
 +
 +let to_unsigned_representative (fe: i32) : u16 =
 +  cast (fe +! ((fe >>! 15l <: i32) &. Libcrux.Kem.Kyber.Constants.v_FIELD_MODULUS <: i32)) <: u16
-\ Pas de fin de ligne à la fin du fichier
-diff -ruN extraction-edited/Libcrux.Kem.Kyber.fst extraction-secret-independent/Libcrux.Kem.Kyber.fst
---- extraction-edited/Libcrux.Kem.Kyber.fst	2024-03-12 10:45:44.787930487 +0100
-+++ extraction-secret-independent/Libcrux.Kem.Kyber.fst	2024-03-12 10:45:44.838928982 +0100
-@@ -1,29 +1,12 @@
- module Libcrux.Kem.Kyber
--#set-options "--fuel 0 --ifuel 1 --z3rlimit 100"
-+#set-options "--fuel 0 --ifuel 1 --z3rlimit 15"
- open Core
- open FStar.Mul
- 
--let update_at_range_lemma #n
--  (s: t_Slice 't)
--  (i: Core.Ops.Range.t_Range (int_t n) {(Core.Ops.Range.impl_index_range_slice 't n).f_index_pre s i}) 
--  (x: t_Slice 't)
--  : Lemma
--    (requires (Seq.length x == v i.f_end - v i.f_start))
--    (ensures (
--      let s' = Rust_primitives.Hax.Monomorphized_update_at.update_at_range s i x in
--      let len = v i.f_start in
--      forall (i: nat). i < len ==> Seq.index s i == Seq.index s' i
--    ))
--    [SMTPat (Rust_primitives.Hax.Monomorphized_update_at.update_at_range s i x)]
--  = let s' = Rust_primitives.Hax.Monomorphized_update_at.update_at_range s i x in
--    let len = v i.f_start in
--    introduce forall (i:nat {i < len}). Seq.index s i == Seq.index s' i
--    with (assert ( Seq.index (Seq.slice s  0 len) i == Seq.index s  i 
--                 /\ Seq.index (Seq.slice s' 0 len) i == Seq.index s' i ))
--
--let serialize_kem_secret_key #p
-+let serialize_kem_secret_key
-       (v_SERIALIZED_KEY_LEN: usize)
--      (private_key public_key implicit_rejection_value: t_Slice u8) =
-+      (private_key public_key implicit_rejection_value: t_Slice u8)
-+     =
-   let out:t_Array u8 v_SERIALIZED_KEY_LEN = Rust_primitives.Hax.repeat 0uy v_SERIALIZED_KEY_LEN in
-   let pointer:usize = sz 0 in
-   let out:t_Array u8 v_SERIALIZED_KEY_LEN =
-@@ -72,8 +55,6 @@
-         t_Slice u8)
-   in
-   let pointer:usize = pointer +! (Core.Slice.impl__len public_key <: usize) in
--  let h_public_key = (Rust_primitives.unsize (Libcrux.Kem.Kyber.Hash_functions.v_H public_key)
--                     <: t_Slice u8) in
-   let out:t_Array u8 v_SERIALIZED_KEY_LEN =
-     Rust_primitives.Hax.Monomorphized_update_at.update_at_range out
-       ({
-@@ -89,7 +70,16 @@
-                 pointer +! Libcrux.Kem.Kyber.Constants.v_H_DIGEST_SIZE <: usize
-               }
-               <:
--              Core.Ops.Range.t_Range usize ]) h_public_key)
-+              Core.Ops.Range.t_Range usize ]
-+            <:
-+            t_Slice u8)
-+          (Rust_primitives.unsize (Libcrux.Kem.Kyber.Hash_functions.v_H public_key
-+                <:
-+                t_Array u8 (sz 32))
-+            <:
-+            t_Slice u8)
-+        <:
-+        t_Slice u8)
-   in
-   let pointer:usize = pointer +! Libcrux.Kem.Kyber.Constants.v_H_DIGEST_SIZE in
-   let out:t_Array u8 v_SERIALIZED_KEY_LEN =
-@@ -116,32 +106,14 @@
-         <:
-         t_Slice u8)
-   in
--  assert (Seq.slice out 0 (v #usize_inttype (Spec.Kyber.v_CPA_PKE_SECRET_KEY_SIZE p)) `Seq.equal` private_key);
--  assert (Seq.slice out (v #usize_inttype (Spec.Kyber.v_CPA_PKE_SECRET_KEY_SIZE p))
--                        (v #usize_inttype (Spec.Kyber.v_CPA_PKE_SECRET_KEY_SIZE p +! Spec.Kyber.v_CPA_PKE_PUBLIC_KEY_SIZE p)) `Seq.equal` public_key);
--  assert (Seq.slice out (v #usize_inttype (Spec.Kyber.v_CPA_PKE_SECRET_KEY_SIZE p +!
--                                           Spec.Kyber.v_CPA_PKE_PUBLIC_KEY_SIZE p))
--                        (v #usize_inttype (Spec.Kyber.v_CPA_PKE_SECRET_KEY_SIZE p +!
--                                           Spec.Kyber.v_CPA_PKE_PUBLIC_KEY_SIZE p +!
--                                           Libcrux.Kem.Kyber.Constants.v_H_DIGEST_SIZE))
--          `Seq.equal` Libcrux.Kem.Kyber.Hash_functions.v_H public_key);
--  assert (Seq.slice out (v #usize_inttype (Spec.Kyber.v_CPA_PKE_SECRET_KEY_SIZE p +!
--                                           Spec.Kyber.v_CPA_PKE_PUBLIC_KEY_SIZE p +!
--                                           Libcrux.Kem.Kyber.Constants.v_H_DIGEST_SIZE))
--                        (v #usize_inttype (Spec.Kyber.v_CPA_PKE_SECRET_KEY_SIZE p +!
--                                           Spec.Kyber.v_CPA_PKE_PUBLIC_KEY_SIZE p +!
--                                           Libcrux.Kem.Kyber.Constants.v_H_DIGEST_SIZE +!
--                                           Spec.Kyber.v_SHARED_SECRET_SIZE))
--          == implicit_rejection_value);
--  lemma_slice_append_4 out private_key public_key (Libcrux.Kem.Kyber.Hash_functions.v_H public_key) implicit_rejection_value;
-   out
- 
--let decapsulate #p
-+let decapsulate
-       (v_K v_SECRET_KEY_SIZE v_CPA_SECRET_KEY_SIZE v_PUBLIC_KEY_SIZE v_CIPHERTEXT_SIZE v_T_AS_NTT_ENCODED_SIZE v_C1_SIZE v_C2_SIZE v_VECTOR_U_COMPRESSION_FACTOR v_VECTOR_V_COMPRESSION_FACTOR v_C1_BLOCK_SIZE v_ETA1 v_ETA1_RANDOMNESS_SIZE v_ETA2 v_ETA2_RANDOMNESS_SIZE v_IMPLICIT_REJECTION_HASH_INPUT_SIZE:
-           usize)
--      (secret_key: Libcrux.Kem.Kyber.Types.t_MlKemPrivateKey v_SECRET_KEY_SIZE)
--      (ciphertext: Libcrux.Kem.Kyber.Types.t_MlKemCiphertext v_CIPHERTEXT_SIZE) =
--  let orig_secret_key = secret_key.f_value in
-+      (secret_key: Libcrux.Kem.Kyber.Types.t_KyberPrivateKey v_SECRET_KEY_SIZE)
-+      (ciphertext: Libcrux.Kem.Kyber.Types.t_KyberCiphertext v_CIPHERTEXT_SIZE)
-+     =
-   let ind_cpa_secret_key, secret_key:(t_Slice u8 & t_Slice u8) =
-     Libcrux.Kem.Kyber.Types.impl_12__split_at v_SECRET_KEY_SIZE secret_key v_CPA_SECRET_KEY_SIZE
-   in
-@@ -151,12 +123,8 @@
-   let ind_cpa_public_key_hash, implicit_rejection_value:(t_Slice u8 & t_Slice u8) =
-     Core.Slice.impl__split_at secret_key Libcrux.Kem.Kyber.Constants.v_H_DIGEST_SIZE
-   in
--  assert (ind_cpa_secret_key == slice orig_secret_key (sz 0) v_CPA_SECRET_KEY_SIZE);
--  assert (ind_cpa_public_key == slice orig_secret_key v_CPA_SECRET_KEY_SIZE (v_CPA_SECRET_KEY_SIZE +! v_PUBLIC_KEY_SIZE));
--  assert (ind_cpa_public_key_hash == slice orig_secret_key (v_CPA_SECRET_KEY_SIZE +! v_PUBLIC_KEY_SIZE) (v_CPA_SECRET_KEY_SIZE +! v_PUBLIC_KEY_SIZE +! Libcrux.Kem.Kyber.Constants.v_H_DIGEST_SIZE));
--  assert (implicit_rejection_value == slice orig_secret_key (v_CPA_SECRET_KEY_SIZE +! v_PUBLIC_KEY_SIZE +! Libcrux.Kem.Kyber.Constants.v_H_DIGEST_SIZE) (length orig_secret_key));
-   let decrypted:t_Array u8 (sz 32) =
--    Libcrux.Kem.Kyber.Ind_cpa.decrypt #p v_K
-+    Libcrux.Kem.Kyber.Ind_cpa.decrypt v_K
-       v_CIPHERTEXT_SIZE
-       v_C1_SIZE
-       v_VECTOR_U_COMPRESSION_FACTOR
-@@ -184,9 +152,6 @@
-         <:
-         t_Slice u8)
-   in
--  lemma_slice_append to_hash decrypted ind_cpa_public_key_hash;
--  assert (decrypted == Spec.Kyber.ind_cpa_decrypt p ind_cpa_secret_key ciphertext.f_value);
--  assert (to_hash == concat decrypted ind_cpa_public_key_hash);
-   let hashed:t_Array u8 (sz 64) =
-     Libcrux.Kem.Kyber.Hash_functions.v_G (Rust_primitives.unsize to_hash <: t_Slice u8)
-   in
-@@ -194,10 +159,6 @@
-     Core.Slice.impl__split_at (Rust_primitives.unsize hashed <: t_Slice u8)
-       Libcrux.Kem.Kyber.Constants.v_SHARED_SECRET_SIZE
-   in
--  assert ((shared_secret,pseudorandomness) == split hashed Libcrux.Kem.Kyber.Constants.v_SHARED_SECRET_SIZE);
--  assert (length implicit_rejection_value = v_SECRET_KEY_SIZE -! v_CPA_SECRET_KEY_SIZE -! v_PUBLIC_KEY_SIZE -! Libcrux.Kem.Kyber.Constants.v_H_DIGEST_SIZE);
--  assert (length implicit_rejection_value = Spec.Kyber.v_SHARED_SECRET_SIZE);
--  assert (Spec.Kyber.v_SHARED_SECRET_SIZE <=. Spec.Kyber.v_IMPLICIT_REJECTION_HASH_INPUT_SIZE p);
-   let (to_hash: t_Array u8 v_IMPLICIT_REJECTION_HASH_INPUT_SIZE):t_Array u8
-     v_IMPLICIT_REJECTION_HASH_INPUT_SIZE =
-     Libcrux.Kem.Kyber.Ind_cpa.into_padded_array v_IMPLICIT_REJECTION_HASH_INPUT_SIZE
-@@ -219,14 +180,11 @@
-         <:
-         t_Slice u8)
-   in
--  lemma_slice_append to_hash implicit_rejection_value ciphertext.f_value;
-   let (implicit_rejection_shared_secret: t_Array u8 (sz 32)):t_Array u8 (sz 32) =
-     Libcrux.Kem.Kyber.Hash_functions.v_PRF (sz 32) (Rust_primitives.unsize to_hash <: t_Slice u8)
-   in
--  assert (implicit_rejection_shared_secret == Spec.Kyber.v_J to_hash);
--  assert (Seq.length ind_cpa_public_key == v v_PUBLIC_KEY_SIZE);
-   let expected_ciphertext:t_Array u8 v_CIPHERTEXT_SIZE =
--    Libcrux.Kem.Kyber.Ind_cpa.encrypt #p v_K v_CIPHERTEXT_SIZE v_T_AS_NTT_ENCODED_SIZE v_C1_SIZE
-+    Libcrux.Kem.Kyber.Ind_cpa.encrypt v_K v_CIPHERTEXT_SIZE v_T_AS_NTT_ENCODED_SIZE v_C1_SIZE
-       v_C2_SIZE v_VECTOR_U_COMPRESSION_FACTOR v_VECTOR_V_COMPRESSION_FACTOR v_C1_BLOCK_SIZE v_ETA1
-       v_ETA1_RANDOMNESS_SIZE v_ETA2 v_ETA2_RANDOMNESS_SIZE ind_cpa_public_key decrypted
-       pseudorandomness
-@@ -236,18 +194,16 @@
-       (Core.Convert.f_as_ref ciphertext <: t_Slice u8)
-       (Rust_primitives.unsize expected_ciphertext <: t_Slice u8)
-   in
--  let res = 
-   Libcrux.Kem.Kyber.Constant_time_ops.select_shared_secret_in_constant_time shared_secret
-     (Rust_primitives.unsize implicit_rejection_shared_secret <: t_Slice u8)
-     selector
--  in
--  res
- 
--let encapsulate #p
-+let encapsulate
-       (v_K v_CIPHERTEXT_SIZE v_PUBLIC_KEY_SIZE v_T_AS_NTT_ENCODED_SIZE v_C1_SIZE v_C2_SIZE v_VECTOR_U_COMPRESSION_FACTOR v_VECTOR_V_COMPRESSION_FACTOR v_VECTOR_U_BLOCK_LEN v_ETA1 v_ETA1_RANDOMNESS_SIZE v_ETA2 v_ETA2_RANDOMNESS_SIZE:
-           usize)
--      (public_key: Libcrux.Kem.Kyber.Types.t_MlKemPublicKey v_PUBLIC_KEY_SIZE)
--      (randomness: t_Array u8 (sz 32)) =
-+      (public_key: Libcrux.Kem.Kyber.Types.t_KyberPublicKey v_PUBLIC_KEY_SIZE)
-+      (randomness: t_Array u8 (sz 32))
-+     =
-   let (to_hash: t_Array u8 (sz 64)):t_Array u8 (sz 64) =
-     Libcrux.Kem.Kyber.Ind_cpa.into_padded_array (sz 64)
-       (Rust_primitives.unsize randomness <: t_Slice u8)
-@@ -278,10 +234,6 @@
-         <:
-         t_Slice u8)
-   in
--  assert (Seq.slice to_hash 0 (v Libcrux.Kem.Kyber.Constants.v_H_DIGEST_SIZE) == randomness);
--  lemma_slice_append to_hash randomness (Spec.Kyber.v_H public_key.f_value);
--  assert (to_hash == concat randomness (Spec.Kyber.v_H public_key.f_value));
--
-   let hashed:t_Array u8 (sz 64) =
-     Libcrux.Kem.Kyber.Hash_functions.v_G (Rust_primitives.unsize to_hash <: t_Slice u8)
-   in
-@@ -290,7 +242,7 @@
-       Libcrux.Kem.Kyber.Constants.v_SHARED_SECRET_SIZE
-   in
-   let ciphertext:t_Array u8 v_CIPHERTEXT_SIZE =
--    Libcrux.Kem.Kyber.Ind_cpa.encrypt #p v_K v_CIPHERTEXT_SIZE v_T_AS_NTT_ENCODED_SIZE v_C1_SIZE
-+    Libcrux.Kem.Kyber.Ind_cpa.encrypt v_K v_CIPHERTEXT_SIZE v_T_AS_NTT_ENCODED_SIZE v_C1_SIZE
-       v_C2_SIZE v_VECTOR_U_COMPRESSION_FACTOR v_VECTOR_V_COMPRESSION_FACTOR v_VECTOR_U_BLOCK_LEN
-       v_ETA1 v_ETA1_RANDOMNESS_SIZE v_ETA2 v_ETA2_RANDOMNESS_SIZE
-       (Rust_primitives.unsize (Libcrux.Kem.Kyber.Types.impl_18__as_slice v_PUBLIC_KEY_SIZE
-@@ -300,42 +252,23 @@
-         <:
-         t_Slice u8) randomness pseudorandomness
-   in
--  Core.Convert.f_into ciphertext,
--  Core.Result.impl__unwrap (Core.Convert.f_try_into shared_secret
--      <:
--      Core.Result.t_Result (t_Array u8 (sz 32)) Core.Array.t_TryFromSliceError)
--  <:
--  (Libcrux.Kem.Kyber.Types.t_MlKemCiphertext v_CIPHERTEXT_SIZE & t_Array u8 (sz 32))
--
--#push-options "--z3rlimit 100"
--let validate_public_key #p
--      (v_K v_RANKED_BYTES_PER_RING_ELEMENT v_PUBLIC_KEY_SIZE: usize)
--      (public_key: t_Array u8 v_PUBLIC_KEY_SIZE)
--     =
--  let pk:t_Array Libcrux.Kem.Kyber.Arithmetic.wfPolynomialRingElement v_K =
--    Libcrux.Kem.Kyber.Ind_cpa.deserialize_public_key #p v_K
--      (public_key.[ { Core.Ops.Range.f_end = v_RANKED_BYTES_PER_RING_ELEMENT }
-+  let shared_secret:t_Array u8 (sz 32) =
-+    match Core.Convert.f_try_into shared_secret with
-+    | Core.Result.Result_Ok shared_secret -> shared_secret
-+    | Core.Result.Result_Err _ ->
-+      Rust_primitives.Hax.never_to_any (Core.Panicking.panic "explicit panic"
-           <:
--          Core.Ops.Range.t_RangeTo usize ])
-+          Rust_primitives.Hax.t_Never)
-   in
--  let public_key_serialized:t_Array u8 v_PUBLIC_KEY_SIZE =
--    Libcrux.Kem.Kyber.Ind_cpa.serialize_public_key #p v_K
--      v_RANKED_BYTES_PER_RING_ELEMENT
--      v_PUBLIC_KEY_SIZE
--      pk
--      (public_key.[ { Core.Ops.Range.f_start = v_RANKED_BYTES_PER_RING_ELEMENT }
--          <:
--          Core.Ops.Range.t_RangeFrom usize ]
--        <:
--        t_Slice u8)
--  in
--  public_key =. public_key_serialized
--#pop-options
-+  Core.Convert.f_into ciphertext, shared_secret
-+  <:
-+  (Libcrux.Kem.Kyber.Types.t_KyberCiphertext v_CIPHERTEXT_SIZE & t_Array u8 (sz 32))
- 
--let generate_keypair #p
-+let generate_keypair
-       (v_K v_CPA_PRIVATE_KEY_SIZE v_PRIVATE_KEY_SIZE v_PUBLIC_KEY_SIZE v_BYTES_PER_RING_ELEMENT v_ETA1 v_ETA1_RANDOMNESS_SIZE:
-           usize)
--      (randomness: t_Array u8 (sz 64)) =
-+      (randomness: t_Array u8 (sz 64))
-+     =
-   let ind_cpa_keypair_randomness:t_Slice u8 =
-     randomness.[ {
-         Core.Ops.Range.f_start = sz 0;
-@@ -353,7 +286,7 @@
-   in
-   let ind_cpa_private_key, public_key:(t_Array u8 v_CPA_PRIVATE_KEY_SIZE &
-     t_Array u8 v_PUBLIC_KEY_SIZE) =
--    Libcrux.Kem.Kyber.Ind_cpa.generate_keypair #p v_K
-+    Libcrux.Kem.Kyber.Ind_cpa.generate_keypair v_K
-       v_CPA_PRIVATE_KEY_SIZE
-       v_PUBLIC_KEY_SIZE
-       v_BYTES_PER_RING_ELEMENT
-@@ -362,17 +295,16 @@
-       ind_cpa_keypair_randomness
-   in
-   let secret_key_serialized:t_Array u8 v_PRIVATE_KEY_SIZE =
--    serialize_kem_secret_key #p v_PRIVATE_KEY_SIZE
-+    serialize_kem_secret_key v_PRIVATE_KEY_SIZE
-       (Rust_primitives.unsize ind_cpa_private_key <: t_Slice u8)
-       (Rust_primitives.unsize public_key <: t_Slice u8)
-       implicit_rejection_value
-   in
--  let (private_key: Libcrux.Kem.Kyber.Types.t_MlKemPrivateKey v_PRIVATE_KEY_SIZE):Libcrux.Kem.Kyber.Types.t_MlKemPrivateKey
-+  let (private_key: Libcrux.Kem.Kyber.Types.t_KyberPrivateKey v_PRIVATE_KEY_SIZE):Libcrux.Kem.Kyber.Types.t_KyberPrivateKey
-   v_PRIVATE_KEY_SIZE =
-     Core.Convert.f_from secret_key_serialized
-   in
-   Libcrux.Kem.Kyber.Types.impl__from v_PRIVATE_KEY_SIZE
-     v_PUBLIC_KEY_SIZE
-     private_key
--    (Core.Convert.f_into public_key <: Libcrux.Kem.Kyber.Types.t_MlKemPublicKey v_PUBLIC_KEY_SIZE)
--
-+    (Core.Convert.f_into public_key <: Libcrux.Kem.Kyber.Types.t_KyberPublicKey v_PUBLIC_KEY_SIZE)
-diff -ruN extraction-edited/Libcrux.Kem.Kyber.fsti extraction-secret-independent/Libcrux.Kem.Kyber.fsti
---- extraction-edited/Libcrux.Kem.Kyber.fsti	2024-03-12 10:45:44.818929572 +0100
-+++ extraction-secret-independent/Libcrux.Kem.Kyber.fsti	2024-03-12 10:45:44.842928864 +0100
-@@ -4,90 +4,37 @@
- open FStar.Mul
- 
- unfold
--let t_MlKemSharedSecret = t_Array u8 (sz 32)
-+let t_KyberSharedSecret = t_Array u8 (sz 32)
- 
- let v_KEY_GENERATION_SEED_SIZE: usize =
-   Libcrux.Kem.Kyber.Constants.v_CPA_PKE_KEY_GENERATION_SEED_SIZE +!
-   Libcrux.Kem.Kyber.Constants.v_SHARED_SECRET_SIZE
- 
--val serialize_kem_secret_key (#p:Spec.Kyber.params)
-+val serialize_kem_secret_key
-       (v_SERIALIZED_KEY_LEN: usize)
-       (private_key public_key implicit_rejection_value: t_Slice u8)
--    : Pure (t_Array u8 v_SERIALIZED_KEY_LEN)
--      (requires (length private_key == Spec.Kyber.v_CPA_PKE_SECRET_KEY_SIZE p /\
--                 length public_key == Spec.Kyber.v_CPA_PKE_PUBLIC_KEY_SIZE p /\
--                 length implicit_rejection_value == Spec.Kyber.v_SHARED_SECRET_SIZE /\
--                 v_SERIALIZED_KEY_LEN == Spec.Kyber.v_SECRET_KEY_SIZE p))
--      (ensures (fun res -> res ==
--                Seq.append private_key (
--                Seq.append public_key (
--                Seq.append (Libcrux.Kem.Kyber.Hash_functions.v_H public_key) implicit_rejection_value))))
-+    : Prims.Pure (t_Array u8 v_SERIALIZED_KEY_LEN) Prims.l_True (fun _ -> Prims.l_True)
- 
--val decapsulate (#p:Spec.Kyber.params)
-+val decapsulate
-       (v_K v_SECRET_KEY_SIZE v_CPA_SECRET_KEY_SIZE v_PUBLIC_KEY_SIZE v_CIPHERTEXT_SIZE v_T_AS_NTT_ENCODED_SIZE v_C1_SIZE v_C2_SIZE v_VECTOR_U_COMPRESSION_FACTOR v_VECTOR_V_COMPRESSION_FACTOR v_C1_BLOCK_SIZE v_ETA1 v_ETA1_RANDOMNESS_SIZE v_ETA2 v_ETA2_RANDOMNESS_SIZE v_IMPLICIT_REJECTION_HASH_INPUT_SIZE:
-           usize)
--      (secret_key: Libcrux.Kem.Kyber.Types.t_MlKemPrivateKey v_SECRET_KEY_SIZE)
--      (ciphertext: Libcrux.Kem.Kyber.Types.t_MlKemCiphertext v_CIPHERTEXT_SIZE)
--    : Pure (t_Array u8 (sz 32))
--    (requires ( p == (let open Spec.Kyber in {v_RANK = v_K; v_ETA1; v_ETA2; v_VECTOR_U_COMPRESSION_FACTOR; v_VECTOR_V_COMPRESSION_FACTOR}) /\
--                Spec.Kyber.valid_params p /\
--                v_ETA1_RANDOMNESS_SIZE == Spec.Kyber.v_ETA1_RANDOMNESS_SIZE p /\
--                v_ETA2_RANDOMNESS_SIZE == Spec.Kyber.v_ETA2_RANDOMNESS_SIZE p /\
--                v_IMPLICIT_REJECTION_HASH_INPUT_SIZE == Spec.Kyber.v_IMPLICIT_REJECTION_HASH_INPUT_SIZE p /\
--                v_SECRET_KEY_SIZE == Spec.Kyber.v_SECRET_KEY_SIZE p /\
--                v_CPA_SECRET_KEY_SIZE == Spec.Kyber.v_CPA_PKE_SECRET_KEY_SIZE p /\
--                v_PUBLIC_KEY_SIZE == Spec.Kyber.v_CPA_PKE_PUBLIC_KEY_SIZE p /\
--                v_CIPHERTEXT_SIZE == Spec.Kyber.v_CPA_PKE_CIPHERTEXT_SIZE p /\
--                v_C1_SIZE == Spec.Kyber.v_C1_SIZE p /\
--                v_C1_BLOCK_SIZE == Spec.Kyber.v_C1_BLOCK_SIZE p /\
--                v_C2_SIZE == Spec.Kyber.v_C2_SIZE p /\
--                v_T_AS_NTT_ENCODED_SIZE = Spec.Kyber.v_T_AS_NTT_ENCODED_SIZE p
--               ))
--    (ensures (fun res ->
--                res == Spec.Kyber.ind_cca_decapsulate p secret_key.f_value ciphertext.f_value))
-+      (secret_key: Libcrux.Kem.Kyber.Types.t_KyberPrivateKey v_SECRET_KEY_SIZE)
-+      (ciphertext: Libcrux.Kem.Kyber.Types.t_KyberCiphertext v_CIPHERTEXT_SIZE)
-+    : Prims.Pure (t_Array u8 (sz 32)) Prims.l_True (fun _ -> Prims.l_True)
- 
--val encapsulate (#p:Spec.Kyber.params)
-+val encapsulate
-       (v_K v_CIPHERTEXT_SIZE v_PUBLIC_KEY_SIZE v_T_AS_NTT_ENCODED_SIZE v_C1_SIZE v_C2_SIZE v_VECTOR_U_COMPRESSION_FACTOR v_VECTOR_V_COMPRESSION_FACTOR v_VECTOR_U_BLOCK_LEN v_ETA1 v_ETA1_RANDOMNESS_SIZE v_ETA2 v_ETA2_RANDOMNESS_SIZE:
-           usize)
--      (public_key: Libcrux.Kem.Kyber.Types.t_MlKemPublicKey v_PUBLIC_KEY_SIZE)
-+      (public_key: Libcrux.Kem.Kyber.Types.t_KyberPublicKey v_PUBLIC_KEY_SIZE)
-       (randomness: t_Array u8 (sz 32))
--    : Pure (Libcrux.Kem.Kyber.Types.t_MlKemCiphertext v_CIPHERTEXT_SIZE & t_Array u8 (sz 32))
--     (requires (p == (let open Spec.Kyber in {v_RANK = v_K; v_ETA1; v_ETA2; v_VECTOR_U_COMPRESSION_FACTOR; v_VECTOR_V_COMPRESSION_FACTOR}) /\
--                Spec.Kyber.valid_params p /\
--                v_ETA1_RANDOMNESS_SIZE == Spec.Kyber.v_ETA1_RANDOMNESS_SIZE p /\
--                v_ETA2_RANDOMNESS_SIZE == Spec.Kyber.v_ETA2_RANDOMNESS_SIZE p /\
--                v_PUBLIC_KEY_SIZE == Spec.Kyber.v_CPA_PKE_PUBLIC_KEY_SIZE p /\
--                v_CIPHERTEXT_SIZE == Spec.Kyber.v_CPA_PKE_CIPHERTEXT_SIZE p /\
--                v_C1_SIZE == Spec.Kyber.v_C1_SIZE p /\
--                v_C2_SIZE == Spec.Kyber.v_C2_SIZE p /\
--                v_T_AS_NTT_ENCODED_SIZE = Spec.Kyber.v_T_AS_NTT_ENCODED_SIZE p /\
--                v_VECTOR_U_BLOCK_LEN == Spec.Kyber.v_C1_BLOCK_SIZE p
--                ))
--
--      (ensures (fun (ct,ss) ->
--                (ct.f_value,ss) == Spec.Kyber.ind_cca_encapsulate p public_key.f_value randomness))
--
--val validate_public_key (#p:Spec.Kyber.params)
--      (v_K v_RANKED_BYTES_PER_RING_ELEMENT v_PUBLIC_KEY_SIZE: usize)
--      (public_key: t_Array u8 v_PUBLIC_KEY_SIZE)
--    : Prims.Pure bool
--      (requires (v_K == p.v_RANK /\
--                 v_PUBLIC_KEY_SIZE == Spec.Kyber.v_CPA_PKE_PUBLIC_KEY_SIZE p /\
--                 v_RANKED_BYTES_PER_RING_ELEMENT == Spec.Kyber.v_RANKED_BYTES_PER_RING_ELEMENT p
--                 ))
--      (ensures (fun _ -> Prims.l_True))
-+    : Prims.Pure (Libcrux.Kem.Kyber.Types.t_KyberCiphertext v_CIPHERTEXT_SIZE & t_Array u8 (sz 32))
-+      Prims.l_True
-+      (fun _ -> Prims.l_True)
- 
--val generate_keypair (#p:Spec.Kyber.params)
-+val generate_keypair
-       (v_K v_CPA_PRIVATE_KEY_SIZE v_PRIVATE_KEY_SIZE v_PUBLIC_KEY_SIZE v_BYTES_PER_RING_ELEMENT v_ETA1 v_ETA1_RANDOMNESS_SIZE:
-           usize)
-       (randomness: t_Array u8 (sz 64))
--    : Pure (Libcrux.Kem.Kyber.Types.t_MlKemKeyPair v_PRIVATE_KEY_SIZE v_PUBLIC_KEY_SIZE)
--      (requires (v_K == p.v_RANK /\ v_ETA1 == p.v_ETA1 /\
--                 v_ETA1_RANDOMNESS_SIZE == Spec.Kyber.v_ETA1_RANDOMNESS_SIZE p /\
--                 v_PUBLIC_KEY_SIZE == Spec.Kyber.v_CPA_PKE_PUBLIC_KEY_SIZE p /\
--                 v_CPA_PRIVATE_KEY_SIZE == Spec.Kyber.v_CPA_PKE_SECRET_KEY_SIZE p /\
--                 v_PRIVATE_KEY_SIZE == Spec.Kyber.v_SECRET_KEY_SIZE p /\
--                 v_BYTES_PER_RING_ELEMENT == Spec.Kyber.v_RANKED_BYTES_PER_RING_ELEMENT p
--                 ))
--      (ensures (fun kp -> 
--                (kp.f_sk.f_value,kp.f_pk.f_value) == Spec.Kyber.ind_cca_generate_keypair p randomness))
-+    : Prims.Pure (Libcrux.Kem.Kyber.Types.t_KyberKeyPair v_PRIVATE_KEY_SIZE v_PUBLIC_KEY_SIZE)
-+      Prims.l_True
-+      (fun _ -> Prims.l_True)
-diff -ruN extraction-edited/Libcrux.Kem.Kyber.Hash_functions.fst extraction-secret-independent/Libcrux.Kem.Kyber.Hash_functions.fst
---- extraction-edited/Libcrux.Kem.Kyber.Hash_functions.fst	2024-03-12 10:45:44.802930044 +0100
-+++ extraction-secret-independent/Libcrux.Kem.Kyber.Hash_functions.fst	2024-03-12 10:45:44.864928215 +0100
-@@ -3,28 +3,18 @@
+\ No newline at end of file
+diff -ruN extraction-edited/Libcrux.Kem.Kyber.Hash_functions.fst extraction-secret-independent/Libcrux.Kem.Kyber.Hash_functions.fst
+--- extraction-edited/Libcrux.Kem.Kyber.Hash_functions.fst	2024-03-13 11:03:50
++++ extraction-secret-independent/Libcrux.Kem.Kyber.Hash_functions.fst	2024-03-13 11:03:50
+@@ -3,28 +3,18 @@
  open Core
  open FStar.Mul
  
@@ -2113,8 +1714,8 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Hash_functions.fst extraction-secr
 -  out 
 +  out
 diff -ruN extraction-edited/Libcrux.Kem.Kyber.Hash_functions.fsti extraction-secret-independent/Libcrux.Kem.Kyber.Hash_functions.fsti
---- extraction-edited/Libcrux.Kem.Kyber.Hash_functions.fsti	2024-03-12 10:45:44.827929307 +0100
-+++ extraction-secret-independent/Libcrux.Kem.Kyber.Hash_functions.fsti	2024-03-12 10:45:44.887927536 +0100
+--- extraction-edited/Libcrux.Kem.Kyber.Hash_functions.fsti	2024-03-13 11:03:50
++++ extraction-secret-independent/Libcrux.Kem.Kyber.Hash_functions.fsti	2024-03-13 11:03:50
 @@ -3,17 +3,12 @@
  open Core
  open FStar.Mul
@@ -2140,8 +1741,8 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Hash_functions.fsti extraction-sec
 +val v_XOFx4 (v_K: usize) (input: t_Array (t_Array u8 (sz 34)) v_K)
 +    : Prims.Pure (t_Array (t_Array u8 (sz 840)) v_K) Prims.l_True (fun _ -> Prims.l_True)
 diff -ruN extraction-edited/Libcrux.Kem.Kyber.Helper.fst extraction-secret-independent/Libcrux.Kem.Kyber.Helper.fst
---- extraction-edited/Libcrux.Kem.Kyber.Helper.fst	1970-01-01 01:00:00.000000000 +0100
-+++ extraction-secret-independent/Libcrux.Kem.Kyber.Helper.fst	2024-03-12 10:45:44.874927920 +0100
+--- extraction-edited/Libcrux.Kem.Kyber.Helper.fst	1970-01-01 01:00:00
++++ extraction-secret-independent/Libcrux.Kem.Kyber.Helper.fst	2024-03-13 11:03:50
 @@ -0,0 +1,6 @@
 +module Libcrux.Kem.Kyber.Helper
 +#set-options "--fuel 0 --ifuel 1 --z3rlimit 15"
@@ -2150,8 +1751,8 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Helper.fst extraction-secret-indep
 +
 +
 diff -ruN extraction-edited/Libcrux.Kem.Kyber.Ind_cpa.fst extraction-secret-independent/Libcrux.Kem.Kyber.Ind_cpa.fst
---- extraction-edited/Libcrux.Kem.Kyber.Ind_cpa.fst	2024-03-12 10:45:44.817929602 +0100
-+++ extraction-secret-independent/Libcrux.Kem.Kyber.Ind_cpa.fst	2024-03-12 10:45:44.867928126 +0100
+--- extraction-edited/Libcrux.Kem.Kyber.Ind_cpa.fst	2024-03-13 11:03:50
++++ extraction-secret-independent/Libcrux.Kem.Kyber.Ind_cpa.fst	2024-03-13 11:03:50
 @@ -1,5 +1,5 @@
  module Libcrux.Kem.Kyber.Ind_cpa
 -#set-options "--fuel 0 --ifuel 1 --z3rlimit 100"
@@ -2178,7 +1779,12 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Ind_cpa.fst extraction-secret-inde
 -      (prf_input: t_Array u8 (sz 33)) domain_separator = 
 -  let error_1_:t_Array (Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement_b (pow2 (v v_ETA2) - 1)) v_K =
 -    Rust_primitives.Hax.repeat (etaZero (sz (pow2 (v v_ETA2) - 1))) v_K
--  in
++      (prf_input: t_Array u8 (sz 33))
++      (domain_separator: u8)
++     =
++  let error_1_:t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K =
++    Rust_primitives.Hax.repeat Libcrux.Kem.Kyber.Arithmetic.impl__PolynomialRingElement__ZERO v_K
+   in
 -  let orig_domain_separator = domain_separator in
 -  [@ inline_let]
 -  let inv : inv_t v_K v_ETA2 = fun (acc:acc_t v_K v_ETA2) (i:usize) ->
@@ -2189,12 +1795,6 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Ind_cpa.fst extraction-secret-inde
 -    else true in
 -  let (domain_separator, prf_input, error_1_):acc_t v_K (v_ETA2) = 
 -    Rust_primitives.Iterators.foldi_range #_ #(acc_t v_K (v_ETA2)) #inv {
-+      (prf_input: t_Array u8 (sz 33))
-+      (domain_separator: u8)
-+     =
-+  let error_1_:t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K =
-+    Rust_primitives.Hax.repeat Libcrux.Kem.Kyber.Arithmetic.impl__PolynomialRingElement__ZERO v_K
-+  in
 +  let domain_separator, error_1_, prf_input:(u8 &
 +    t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K &
 +    t_Array u8 (sz 33)) =
@@ -2255,7 +1855,12 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Ind_cpa.fst extraction-secret-inde
 -      (prf_input: t_Array u8 (sz 33)) domain_separator =
 -  let re_as_ntt:t_Array Libcrux.Kem.Kyber.Arithmetic.wfPolynomialRingElement v_K =
 -    Rust_primitives.Hax.repeat (wfZero) v_K
--  in
++      (prf_input: t_Array u8 (sz 33))
++      (domain_separator: u8)
++     =
++  let re_as_ntt:t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K =
++    Rust_primitives.Hax.repeat Libcrux.Kem.Kyber.Arithmetic.impl__PolynomialRingElement__ZERO v_K
+   in
 -  let orig_domain_separator = domain_separator in
 -  [@ inline_let]
 -  let inv: (u8 & t_Array u8 (sz 33) & t_Array (Libcrux.Kem.Kyber.Arithmetic.wfPolynomialRingElement) v_K) -> usize -> Type = fun acc i ->
@@ -2266,12 +1871,6 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Ind_cpa.fst extraction-secret-inde
 -    else true in
 -  let (domain_separator, prf_input, re_as_ntt):(u8 & t_Array u8 (sz 33) & t_Array (Libcrux.Kem.Kyber.Arithmetic.wfPolynomialRingElement) v_K)= 
 -    Rust_primitives.Iterators.foldi_range #_ #_  #inv {
-+      (prf_input: t_Array u8 (sz 33))
-+      (domain_separator: u8)
-+     =
-+  let re_as_ntt:t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K =
-+    Rust_primitives.Hax.repeat Libcrux.Kem.Kyber.Arithmetic.impl__PolynomialRingElement__ZERO v_K
-+  in
 +  let domain_separator, prf_input, re_as_ntt:(u8 & t_Array u8 (sz 33) &
 +    t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K) =
 +    Core.Iter.Traits.Iterator.f_fold (Core.Iter.Traits.Collect.f_into_iter ({
@@ -2325,9 +1924,9 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Ind_cpa.fst extraction-secret-inde
    re_as_ntt, domain_separator
    <:
 -  (t_Array Libcrux.Kem.Kyber.Arithmetic.wfPolynomialRingElement v_K & u8)
--
 +  (t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K & u8)
  
+-
 -let compress_then_serialize_u #p v_K v_OUT_LEN v_COMPRESSION_FACTOR v_BLOCK_LEN input = 
 +let compress_then_serialize_u
 +      (v_K v_OUT_LEN v_COMPRESSION_FACTOR v_BLOCK_LEN: usize)
@@ -2411,7 +2010,13 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Ind_cpa.fst extraction-secret-inde
 -      (ciphertext: t_Array u8 v_CIPHERTEXT_SIZE) =
 -  let u_as_ntt:t_Array Libcrux.Kem.Kyber.Arithmetic.wfPolynomialRingElement v_K =
 -    Rust_primitives.Hax.repeat wfZero v_K
--  in
++let deserialize_then_decompress_u
++      (v_K v_CIPHERTEXT_SIZE v_U_COMPRESSION_FACTOR: usize)
++      (ciphertext: t_Array u8 v_CIPHERTEXT_SIZE)
++     =
++  let u_as_ntt:t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K =
++    Rust_primitives.Hax.repeat Libcrux.Kem.Kyber.Arithmetic.impl__PolynomialRingElement__ZERO v_K
+   in
 -  let acc_t1 = t_Array Libcrux.Kem.Kyber.Arithmetic.wfPolynomialRingElement v_K in
 -  [@ inline_let]
 -  let inv = fun (acc:acc_t1) (i:usize) -> True in
@@ -2419,13 +2024,6 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Ind_cpa.fst extraction-secret-inde
 -                      { Core.Ops.Range.f_end = v_VECTOR_U_ENCODED_SIZE  } <: Core.Ops.Range.t_RangeTo usize ] in
 -  assert (length sl == v_VECTOR_U_ENCODED_SIZE);
 -  let chunk_len = ((Libcrux.Kem.Kyber.Constants.v_COEFFICIENTS_IN_RING_ELEMENT *!
-+let deserialize_then_decompress_u
-+      (v_K v_CIPHERTEXT_SIZE v_U_COMPRESSION_FACTOR: usize)
-+      (ciphertext: t_Array u8 v_CIPHERTEXT_SIZE)
-+     =
-+  let u_as_ntt:t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K =
-+    Rust_primitives.Hax.repeat Libcrux.Kem.Kyber.Arithmetic.impl__PolynomialRingElement__ZERO v_K
-+  in
 +  let u_as_ntt:t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K =
 +    Core.Iter.Traits.Iterator.f_fold (Core.Iter.Traits.Collect.f_into_iter (Core.Iter.Traits.Iterator.f_enumerate
 +              (Core.Slice.impl__chunks_exact (Rust_primitives.unsize ciphertext <: t_Slice u8)
@@ -2486,7 +2084,10 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Ind_cpa.fst extraction-secret-inde
 -    (v_K: usize) (public_key: t_Slice u8) =
 -  let tt_as_ntt:t_Array Libcrux.Kem.Kyber.Arithmetic.wfPolynomialRingElement v_K =
 -    Rust_primitives.Hax.repeat wfZero v_K
--  in
++let deserialize_public_key (v_K: usize) (public_key: t_Slice u8) =
++  let tt_as_ntt:t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K =
++    Rust_primitives.Hax.repeat Libcrux.Kem.Kyber.Arithmetic.impl__PolynomialRingElement__ZERO v_K
+   in
 -  let acc_t = t_Array Libcrux.Kem.Kyber.Arithmetic.wfPolynomialRingElement v_K in
 -  [@ inline_let]
 -  let inv = fun (acc:acc_t) (i:usize) -> True in
@@ -2495,10 +2096,6 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Ind_cpa.fst extraction-secret-inde
 -   Rust_primitives.Iterators.foldi_chunks_exact #u8 #acc_t #inv
 -      public_key
 -      chunk_len
-+let deserialize_public_key (v_K: usize) (public_key: t_Slice u8) =
-+  let tt_as_ntt:t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K =
-+    Rust_primitives.Hax.repeat Libcrux.Kem.Kyber.Arithmetic.impl__PolynomialRingElement__ZERO v_K
-+  in
 +  let tt_as_ntt:t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K =
 +    Core.Iter.Traits.Iterator.f_fold (Core.Iter.Traits.Collect.f_into_iter (Core.Iter.Traits.Iterator.f_enumerate
 +              (Core.Slice.impl__chunks_exact public_key
@@ -2530,12 +2127,16 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Ind_cpa.fst extraction-secret-inde
 -  admit(); //P-F
 -  tt_as_ntt 
 -#pop-options
--
++  tt_as_ntt
+ 
 -#push-options "--split_queries always"
 -let deserialize_secret_key (#p:Spec.Kyber.params) (v_K: usize) (secret_key: t_Slice u8) =
 -  let secret_as_ntt:t_Array Libcrux.Kem.Kyber.Arithmetic.wfPolynomialRingElement v_K =
 -    Rust_primitives.Hax.repeat wfZero v_K
--  in
++let deserialize_secret_key (v_K: usize) (secret_key: t_Slice u8) =
++  let secret_as_ntt:t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K =
++    Rust_primitives.Hax.repeat Libcrux.Kem.Kyber.Arithmetic.impl__PolynomialRingElement__ZERO v_K
+   in
 -  let acc_t = t_Array Libcrux.Kem.Kyber.Arithmetic.wfPolynomialRingElement v_K in
 -  [@ inline_let]
 -  let inv = fun (acc:acc_t) (i:usize) -> True in
@@ -2548,12 +2149,6 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Ind_cpa.fst extraction-secret-inde
 -   Rust_primitives.Iterators.foldi_chunks_exact #u8 #acc_t #inv
 -      sl
 -      chunk_len
-+  tt_as_ntt
-+
-+let deserialize_secret_key (v_K: usize) (secret_key: t_Slice u8) =
-+  let secret_as_ntt:t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K =
-+    Rust_primitives.Hax.repeat Libcrux.Kem.Kyber.Arithmetic.impl__PolynomialRingElement__ZERO v_K
-+  in
 +  let secret_as_ntt:t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K =
 +    Core.Iter.Traits.Iterator.f_fold (Core.Iter.Traits.Collect.f_into_iter (Core.Iter.Traits.Iterator.f_enumerate
 +              (Core.Slice.impl__chunks_exact secret_key
@@ -2627,13 +2222,12 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Ind_cpa.fst extraction-secret-inde
 -  let res = Libcrux.Kem.Kyber.Serialize.compress_then_serialize_message message in
 -  res
 -#pop-options
--
++  Libcrux.Kem.Kyber.Serialize.compress_then_serialize_message message
+ 
 -#push-options "--z3rlimit 200"
 -let encrypt #p
 -      v_K v_CIPHERTEXT_SIZE v_T_AS_NTT_ENCODED_SIZE v_C1_LEN v_C2_LEN v_U_COMPRESSION_FACTOR v_V_COMPRESSION_FACTOR v_BLOCK_LEN v_ETA1 v_ETA1_RANDOMNESS_SIZE v_ETA2 v_ETA2_RANDOMNESS_SIZE
 -      public_key
-+  Libcrux.Kem.Kyber.Serialize.compress_then_serialize_message message
-+
 +let encrypt
 +      (v_K v_CIPHERTEXT_SIZE v_T_AS_NTT_ENCODED_SIZE v_C1_LEN v_C2_LEN v_U_COMPRESSION_FACTOR v_V_COMPRESSION_FACTOR v_BLOCK_LEN v_ETA1 v_ETA1_RANDOMNESS_SIZE v_ETA2 v_ETA2_RANDOMNESS_SIZE:
 +          usize)
@@ -2866,8 +2460,8 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Ind_cpa.fst extraction-secret-inde
 -  res
 - 
 diff -ruN extraction-edited/Libcrux.Kem.Kyber.Ind_cpa.fsti extraction-secret-independent/Libcrux.Kem.Kyber.Ind_cpa.fsti
---- extraction-edited/Libcrux.Kem.Kyber.Ind_cpa.fsti	2024-03-12 10:45:44.829929248 +0100
-+++ extraction-secret-independent/Libcrux.Kem.Kyber.Ind_cpa.fsti	2024-03-12 10:45:44.859928362 +0100
+--- extraction-edited/Libcrux.Kem.Kyber.Ind_cpa.fsti	2024-03-13 11:03:50
++++ extraction-secret-independent/Libcrux.Kem.Kyber.Ind_cpa.fsti	2024-03-13 11:03:50
 @@ -1,151 +1,80 @@
  module Libcrux.Kem.Kyber.Ind_cpa
 -#set-options "--fuel 0 --ifuel 1 --z3rlimit 100"
@@ -2943,7 +2537,10 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Ind_cpa.fsti extraction-secret-ind
 -      (ensures fun res ->
 -        Libcrux.Kem.Kyber.Arithmetic.to_spec_vector_b #p res ==
 -        Spec.Kyber.(vector_ntt (decode_then_decompress_u p (Seq.slice ciphertext 0 (v (Spec.Kyber.v_C1_SIZE p))))))
--
++    : Prims.Pure (t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K)
++      Prims.l_True
++      (fun _ -> Prims.l_True)
+ 
 -val deserialize_public_key (#p:Spec.Kyber.params) 
 -    (v_K: usize) (public_key: t_Array u8 (Spec.Kyber.v_T_AS_NTT_ENCODED_SIZE p))
 -    : Pure (t_Array Libcrux.Kem.Kyber.Arithmetic.wfPolynomialRingElement v_K)
@@ -2961,21 +2558,17 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Ind_cpa.fsti extraction-secret-ind
 -         Libcrux.Kem.Kyber.Arithmetic.to_spec_vector_b #p res ==
 -         Spec.Kyber.vector_decode_12 #p secret_key)
 -    
-+    : Prims.Pure (t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K)
-+      Prims.l_True
-+      (fun _ -> Prims.l_True)
-+
 +val deserialize_public_key (v_K: usize) (public_key: t_Slice u8)
 +    : Prims.Pure (t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K)
 +      Prims.l_True
 +      (fun _ -> Prims.l_True)
-+
+ 
+-val decrypt (#p:Spec.Kyber.params)
 +val deserialize_secret_key (v_K: usize) (secret_key: t_Slice u8)
 +    : Prims.Pure (t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K)
 +      Prims.l_True
 +      (fun _ -> Prims.l_True)
- 
--val decrypt (#p:Spec.Kyber.params)
++
 +val decrypt
        (v_K v_CIPHERTEXT_SIZE v_VECTOR_U_ENCODED_SIZE v_U_COMPRESSION_FACTOR v_V_COMPRESSION_FACTOR:
            usize)
@@ -2990,9 +2583,9 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Ind_cpa.fsti extraction-secret-ind
 -                 v_V_COMPRESSION_FACTOR == p.v_VECTOR_V_COMPRESSION_FACTOR))
 -      (ensures (fun res ->
 -                res == Spec.Kyber.ind_cpa_decrypt p secret_key ciphertext))
--
 +    : Prims.Pure (t_Array u8 (sz 32)) Prims.l_True (fun _ -> Prims.l_True)
  
+-
 -val encrypt (#p:Spec.Kyber.params)
 +val encrypt
        (v_K v_CIPHERTEXT_SIZE v_T_AS_NTT_ENCODED_SIZE v_C1_LEN v_C2_LEN v_U_COMPRESSION_FACTOR v_V_COMPRESSION_FACTOR v_BLOCK_LEN v_ETA1 v_ETA1_RANDOMNESS_SIZE v_ETA2 v_ETA2_RANDOMNESS_SIZE:
@@ -3069,8 +2662,8 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Ind_cpa.fsti extraction-secret-ind
 +      Prims.l_True
 +      (fun _ -> Prims.l_True)
 diff -ruN extraction-edited/Libcrux.Kem.Kyber.Kyber1024.fst extraction-secret-independent/Libcrux.Kem.Kyber.Kyber1024.fst
---- extraction-edited/Libcrux.Kem.Kyber.Kyber1024.fst	2024-03-12 10:45:44.793930310 +0100
-+++ extraction-secret-independent/Libcrux.Kem.Kyber.Kyber1024.fst	2024-03-12 10:45:44.873927949 +0100
+--- extraction-edited/Libcrux.Kem.Kyber.Kyber1024.fst	2024-03-13 11:03:50
++++ extraction-secret-independent/Libcrux.Kem.Kyber.Kyber1024.fst	2024-03-13 11:03:50
 @@ -3,37 +3,22 @@
  open Core
  open FStar.Mul
@@ -3119,8 +2712,8 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Kyber1024.fst extraction-secret-in
      (sz 3168)
      (sz 1568)
 diff -ruN extraction-edited/Libcrux.Kem.Kyber.Kyber1024.fsti extraction-secret-independent/Libcrux.Kem.Kyber.Kyber1024.fsti
---- extraction-edited/Libcrux.Kem.Kyber.Kyber1024.fsti	2024-03-12 10:45:44.790930398 +0100
-+++ extraction-secret-independent/Libcrux.Kem.Kyber.Kyber1024.fsti	2024-03-12 10:45:44.854928510 +0100
+--- extraction-edited/Libcrux.Kem.Kyber.Kyber1024.fsti	2024-03-13 11:03:50
++++ extraction-secret-independent/Libcrux.Kem.Kyber.Kyber1024.fsti	2024-03-13 11:03:50
 @@ -63,32 +63,27 @@
    Libcrux.Kem.Kyber.Constants.v_SHARED_SECRET_SIZE +! v_CPA_PKE_CIPHERTEXT_SIZE_1024_
  
@@ -3166,8 +2759,8 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Kyber1024.fsti extraction-secret-i
        Prims.l_True
        (fun _ -> Prims.l_True)
 diff -ruN extraction-edited/Libcrux.Kem.Kyber.Kyber512.fst extraction-secret-independent/Libcrux.Kem.Kyber.Kyber512.fst
---- extraction-edited/Libcrux.Kem.Kyber.Kyber512.fst	2024-03-12 10:45:44.783930605 +0100
-+++ extraction-secret-independent/Libcrux.Kem.Kyber.Kyber512.fst	2024-03-12 10:45:44.866928156 +0100
+--- extraction-edited/Libcrux.Kem.Kyber.Kyber512.fst	2024-03-13 11:03:50
++++ extraction-secret-independent/Libcrux.Kem.Kyber.Kyber512.fst	2024-03-13 11:03:50
 @@ -3,37 +3,22 @@
  open Core
  open FStar.Mul
@@ -3216,8 +2809,8 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Kyber512.fst extraction-secret-ind
      (sz 1632)
      (sz 800)
 diff -ruN extraction-edited/Libcrux.Kem.Kyber.Kyber512.fsti extraction-secret-independent/Libcrux.Kem.Kyber.Kyber512.fsti
---- extraction-edited/Libcrux.Kem.Kyber.Kyber512.fsti	2024-03-12 10:45:44.798930162 +0100
-+++ extraction-secret-independent/Libcrux.Kem.Kyber.Kyber512.fsti	2024-03-12 10:45:44.871928008 +0100
+--- extraction-edited/Libcrux.Kem.Kyber.Kyber512.fsti	2024-03-13 11:03:50
++++ extraction-secret-independent/Libcrux.Kem.Kyber.Kyber512.fsti	2024-03-13 11:03:50
 @@ -63,32 +63,27 @@
    Libcrux.Kem.Kyber.Constants.v_SHARED_SECRET_SIZE +! v_CPA_PKE_CIPHERTEXT_SIZE_512_
  
@@ -3263,8 +2856,8 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Kyber512.fsti extraction-secret-in
        Prims.l_True
        (fun _ -> Prims.l_True)
 diff -ruN extraction-edited/Libcrux.Kem.Kyber.Kyber768.fst extraction-secret-independent/Libcrux.Kem.Kyber.Kyber768.fst
---- extraction-edited/Libcrux.Kem.Kyber.Kyber768.fst	2024-03-12 10:45:44.780930693 +0100
-+++ extraction-secret-independent/Libcrux.Kem.Kyber.Kyber768.fst	2024-03-12 10:45:44.876927861 +0100
+--- extraction-edited/Libcrux.Kem.Kyber.Kyber768.fst	2024-03-13 11:03:50
++++ extraction-secret-independent/Libcrux.Kem.Kyber.Kyber768.fst	2024-03-13 11:03:50
 @@ -3,37 +3,22 @@
  open Core
  open FStar.Mul
@@ -3313,8 +2906,8 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Kyber768.fst extraction-secret-ind
      (sz 2400)
      (sz 1184)
 diff -ruN extraction-edited/Libcrux.Kem.Kyber.Kyber768.fsti extraction-secret-independent/Libcrux.Kem.Kyber.Kyber768.fsti
---- extraction-edited/Libcrux.Kem.Kyber.Kyber768.fsti	2024-03-12 10:45:44.807929897 +0100
-+++ extraction-secret-independent/Libcrux.Kem.Kyber.Kyber768.fsti	2024-03-12 10:45:44.880927743 +0100
+--- extraction-edited/Libcrux.Kem.Kyber.Kyber768.fsti	2024-03-13 11:03:50
++++ extraction-secret-independent/Libcrux.Kem.Kyber.Kyber768.fsti	2024-03-13 11:03:50
 @@ -63,33 +63,27 @@
    Libcrux.Kem.Kyber.Constants.v_SHARED_SECRET_SIZE +! v_CPA_PKE_CIPHERTEXT_SIZE_768_
  
@@ -3346,13 +2939,13 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Kyber768.fsti extraction-secret-in
 +      (public_key: Libcrux.Kem.Kyber.Types.t_KyberPublicKey (sz 1184))
        (randomness: t_Array u8 (sz 32))
 -    : Prims.Pure (Libcrux.Kem.Kyber.Types.t_MlKemCiphertext (sz 1088) & t_Array u8 (sz 32))
--      Prims.l_True
++    : Prims.Pure (Libcrux.Kem.Kyber.Types.t_KyberCiphertext (sz 1088) & t_Array u8 (sz 32))
+       Prims.l_True
 -      (ensures (fun (ct,ss)-> (ct.f_value,ss) == Spec.Kyber.kyber768_encapsulate public_key.f_value randomness))
 -
 -val validate_public_key (public_key: Libcrux.Kem.Kyber.Types.t_MlKemPublicKey (sz 1184))
 -    : Prims.Pure (Core.Option.t_Option (Libcrux.Kem.Kyber.Types.t_MlKemPublicKey (sz 1184)))
-+    : Prims.Pure (Libcrux.Kem.Kyber.Types.t_KyberCiphertext (sz 1088) & t_Array u8 (sz 32))
-       Prims.l_True
+-      Prims.l_True
        (fun _ -> Prims.l_True)
  
 -val generate_key_pair (randomness: t_Array u8 (sz 64))
@@ -3363,8 +2956,8 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Kyber768.fsti extraction-secret-in
 -      (ensures (fun kp -> (kp.f_sk.f_value,kp.f_pk.f_value) == Spec.Kyber.kyber768_generate_keypair randomness))
 +      (fun _ -> Prims.l_True)
 diff -ruN extraction-edited/Libcrux.Kem.Kyber.Matrix.fst extraction-secret-independent/Libcrux.Kem.Kyber.Matrix.fst
---- extraction-edited/Libcrux.Kem.Kyber.Matrix.fst	2024-03-12 10:45:44.791930369 +0100
-+++ extraction-secret-independent/Libcrux.Kem.Kyber.Matrix.fst	2024-03-12 10:45:44.884927625 +0100
+--- extraction-edited/Libcrux.Kem.Kyber.Matrix.fst	2024-03-13 11:03:50
++++ extraction-secret-independent/Libcrux.Kem.Kyber.Matrix.fst	2024-03-13 11:03:50
 @@ -3,418 +3,432 @@
  open Core
  open FStar.Mul
@@ -3380,7 +2973,14 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Matrix.fst extraction-secret-indep
 -  let wfZero: wfPolynomialRingElement = (Libcrux.Kem.Kyber.Arithmetic.cast_poly_b #1 #3328 Libcrux.Kem.Kyber.Arithmetic.impl__PolynomialRingElement__ZERO) in
 -  let result:t_Array wfPolynomialRingElement v_K =
 -    Rust_primitives.Hax.repeat wfZero v_K
--  in
++let compute_As_plus_e
++      (v_K: usize)
++      (matrix_A: t_Array (t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K) v_K)
++      (s_as_ntt error_as_ntt: t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K)
++     =
++  let result:t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K =
++    Rust_primitives.Hax.repeat Libcrux.Kem.Kyber.Arithmetic.impl__PolynomialRingElement__ZERO v_K
+   in
 -  [@ inline_let]
 -  let inv0 = fun (acc:t_Array wfPolynomialRingElement v_K) (i:usize) -> 
 -   (v i <= v v_K) /\
@@ -3390,14 +2990,6 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Matrix.fst extraction-secret-indep
 -  let result:t_Array wfPolynomialRingElement v_K =
 -    Rust_primitives.Iterators.foldi_slice #(t_Array Libcrux.Kem.Kyber.Arithmetic.wfPolynomialRingElement v_K) #(t_Array wfPolynomialRingElement v_K) #inv0
 -      matrix_A
-+let compute_As_plus_e
-+      (v_K: usize)
-+      (matrix_A: t_Array (t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K) v_K)
-+      (s_as_ntt error_as_ntt: t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K)
-+     =
-+  let result:t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K =
-+    Rust_primitives.Hax.repeat Libcrux.Kem.Kyber.Arithmetic.impl__PolynomialRingElement__ZERO v_K
-+  in
 +  let result:t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K =
 +    Core.Iter.Traits.Iterator.f_fold (Core.Iter.Traits.Collect.f_into_iter (Core.Iter.Traits.Iterator.f_enumerate
 +              (Core.Slice.impl__iter (Rust_primitives.unsize matrix_A
@@ -3583,24 +3175,17 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Matrix.fst extraction-secret-indep
 -      assert (forall (j:usize). (v j >= v i + 1 /\ v j < v v_K) ==> derefine_poly_b result.[j] == derefine_poly_b orig_result.[j]); 
 -      assume (inv0 result (i +! sz 1));
 -      result)
--  in
++                    Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement)))
+   in
 -  admit(); //P-F
 -  result  
 -#pop-options 
-+                    Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement)))
-+  in
 +  result
  
 -#push-options "--ifuel 0 --z3rlimit 100"
 -let compute_message #p v_K m_v secret_as_ntt u_as_ntt = 
 -  let result:Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement_b (v v_K * 3328) =
 -    Libcrux.Kem.Kyber.Arithmetic.cast_poly_b Libcrux.Kem.Kyber.Arithmetic.impl__PolynomialRingElement__ZERO
--  in
--  let acc_t = Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement_b (v v_K * 3328) in
--  [@ inline_let]
--  let inv = fun (acc:acc_t) (i:usize) -> 
--             (v i <= v v_K) /\
--             (poly_range #(v v_K * 3328) acc (v i * 3328))
 +let compute_message
 +      (v_K: usize)
 +      (v: Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement)
@@ -3609,6 +3194,12 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Matrix.fst extraction-secret-indep
 +  let result:Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement =
 +    Libcrux.Kem.Kyber.Arithmetic.impl__PolynomialRingElement__ZERO
    in
+-  let acc_t = Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement_b (v v_K * 3328) in
+-  [@ inline_let]
+-  let inv = fun (acc:acc_t) (i:usize) -> 
+-             (v i <= v v_K) /\
+-             (poly_range #(v v_K * 3328) acc (v i * 3328))
+-  in
 -  let result:Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement_b (v v_K * 3328) =
 -    Rust_primitives.Iterators.foldi_range #_ #acc_t #inv {
 +  let result:Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement =
@@ -3726,13 +3317,6 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Matrix.fst extraction-secret-indep
 -let compute_ring_element_v v_K tt_as_ntt r_as_ntt error_2_ message =
 -  let result:Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement_b (v v_K * 3328) =
 -    Libcrux.Kem.Kyber.Arithmetic.cast_poly_b Libcrux.Kem.Kyber.Arithmetic.impl__PolynomialRingElement__ZERO
--  in
--  [@ inline_let]
--  let inv = fun (acc:t_PolynomialRingElement_b (v v_K * 3328)) (i:usize) ->
--    (v i <= 256) /\
--    (poly_range acc (v i * 3328)) in
--  let result:Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement_b (v v_K * 3328) =
--    Rust_primitives.Iterators.foldi_range #_ #_ #inv ({
 +let compute_ring_element_v
 +      (v_K: usize)
 +      (tt_as_ntt r_as_ntt: t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K)
@@ -3740,7 +3324,13 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Matrix.fst extraction-secret-indep
 +     =
 +  let result:Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement =
 +    Libcrux.Kem.Kyber.Arithmetic.impl__PolynomialRingElement__ZERO
-+  in
+   in
+-  [@ inline_let]
+-  let inv = fun (acc:t_PolynomialRingElement_b (v v_K * 3328)) (i:usize) ->
+-    (v i <= 256) /\
+-    (poly_range acc (v i * 3328)) in
+-  let result:Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement_b (v v_K * 3328) =
+-    Rust_primitives.Iterators.foldi_range #_ #_ #inv ({
 +  let result:Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement =
 +    Core.Iter.Traits.Iterator.f_fold (Core.Iter.Traits.Collect.f_into_iter ({
                Core.Ops.Range.f_start = sz 0;
@@ -3857,7 +3447,12 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Matrix.fst extraction-secret-indep
 -  let wfZero: wfPolynomialRingElement = (Libcrux.Kem.Kyber.Arithmetic.cast_poly_b #1 #3328 Libcrux.Kem.Kyber.Arithmetic.impl__PolynomialRingElement__ZERO) in
 -  let result:t_Array Libcrux.Kem.Kyber.Arithmetic.wfPolynomialRingElement v_K =
 -    Rust_primitives.Hax.repeat wfZero v_K
--  in
++      (a_as_ntt: t_Array (t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K) v_K)
++      (r_as_ntt error_1_: t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K)
++     =
++  let result:t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K =
++    Rust_primitives.Hax.repeat Libcrux.Kem.Kyber.Arithmetic.impl__PolynomialRingElement__ZERO v_K
+   in
 -  let acc_t = t_Array Libcrux.Kem.Kyber.Arithmetic.wfPolynomialRingElement v_K in
 -  [@ inline_let]
 -  let inv0 = fun (acc:t_Array wfPolynomialRingElement v_K) (i:usize) -> 
@@ -3867,12 +3462,6 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Matrix.fst extraction-secret-indep
 -  let result:t_Array Libcrux.Kem.Kyber.Arithmetic.wfPolynomialRingElement v_K =
 -    Rust_primitives.Iterators.foldi_slice #(t_Array Libcrux.Kem.Kyber.Arithmetic.wfPolynomialRingElement v_K) #acc_t #inv0
 -      a_as_ntt
-+      (a_as_ntt: t_Array (t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K) v_K)
-+      (r_as_ntt error_1_: t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K)
-+     =
-+  let result:t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K =
-+    Rust_primitives.Hax.repeat Libcrux.Kem.Kyber.Arithmetic.impl__PolynomialRingElement__ZERO v_K
-+  in
 +  let result:t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K =
 +    Core.Iter.Traits.Iterator.f_fold (Core.Iter.Traits.Collect.f_into_iter (Core.Iter.Traits.Iterator.f_enumerate
 +              (Core.Slice.impl__iter (Rust_primitives.unsize a_as_ntt
@@ -3974,7 +3563,11 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Matrix.fst extraction-secret-indep
 -             (forall (j:usize). (v j > v i /\ v j < v v_K) ==> acc.[j] == orig_result_cast.[j]) /\
 -             (forall (j:usize). (v j < v inner) ==> (i32_range (acc.[i] <: Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement_b (64*v v_K * 3328)).f_coefficients.[j] 3328))
 -             // And all indexes above v inner are unchanged from result1
--          in
++              (Libcrux.Kem.Kyber.Ntt.invert_ntt_montgomery v_K
++                  (result.[ i ] <: Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement)
++                <:
++                Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement)
+           in
 -          assert (forall (j:usize). (v j < v i /\ v j < v v_K) ==> result.[j] == orig_result_cast.[j]);
 -          assert (forall (j:usize). (v j > v i /\ v j < v v_K) ==> result.[j] == orig_result_cast.[j]);
 -          assert (inv2 result (sz 0));
@@ -3983,11 +3576,6 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Matrix.fst extraction-secret-indep
 -              Core.Ops.Range.f_start = sz 0;
 -              Core.Ops.Range.f_end = Libcrux.Kem.Kyber.Constants.v_COEFFICIENTS_IN_RING_ELEMENT
 -            }
-+              (Libcrux.Kem.Kyber.Ntt.invert_ntt_montgomery v_K
-+                  (result.[ i ] <: Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement)
-+                <:
-+                Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement)
-+          in
 +          Core.Iter.Traits.Iterator.f_fold (Core.Iter.Traits.Collect.f_into_iter ({
 +                    Core.Ops.Range.f_start = sz 0;
 +                    Core.Ops.Range.f_end
@@ -4173,8 +3761,8 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Matrix.fst extraction-secret-indep
 -  admit(); //P-F
    v_A_transpose
 diff -ruN extraction-edited/Libcrux.Kem.Kyber.Matrix.fsti extraction-secret-independent/Libcrux.Kem.Kyber.Matrix.fsti
---- extraction-edited/Libcrux.Kem.Kyber.Matrix.fsti	2024-03-12 10:45:44.834929100 +0100
-+++ extraction-secret-independent/Libcrux.Kem.Kyber.Matrix.fsti	2024-03-12 10:45:44.892927389 +0100
+--- extraction-edited/Libcrux.Kem.Kyber.Matrix.fsti	2024-03-13 11:03:50
++++ extraction-secret-independent/Libcrux.Kem.Kyber.Matrix.fsti	2024-03-13 11:03:50
 @@ -3,71 +3,39 @@
  open Core
  open FStar.Mul
@@ -4195,13 +3783,13 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Matrix.fsti extraction-secret-inde
 -               (to_spec_matrix_b #p matrix_A) 
 -               (to_spec_vector_b #p s_as_ntt) 
 -               (to_spec_vector_b #p error_as_ntt))
--
 +      (matrix_A: t_Array (t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K) v_K)
 +      (s_as_ntt error_as_ntt: t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K)
 +    : Prims.Pure (t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K)
 +      Prims.l_True
 +      (fun _ -> Prims.l_True)
  
+-
 -val compute_message (#p:Spec.Kyber.params)
 +val compute_message
        (v_K: usize)
@@ -4256,7 +3844,12 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Matrix.fsti extraction-secret-inde
 -        let e_spec = Libcrux.Kem.Kyber.Arithmetic.to_spec_vector_b #p error_1_ in
 -        let res_spec = Libcrux.Kem.Kyber.Arithmetic.to_spec_vector_b #p res in
 -        res_spec == Spec.Kyber.(vector_add (vector_inv_ntt (matrix_vector_mul a_spec r_spec)) e_spec))
--
++      (a_as_ntt: t_Array (t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K) v_K)
++      (r_as_ntt error_1_: t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K)
++    : Prims.Pure (t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K)
++      Prims.l_True
++      (fun _ -> Prims.l_True)
+ 
 -
 -
 -val sample_matrix_A (#p:Spec.Kyber.params) (v_K: usize) (seed: t_Array u8 (sz 34)) (transpose: bool)
@@ -4266,19 +3859,13 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Matrix.fsti extraction-secret-inde
 -        let matrix_A = Spec.Kyber.sample_matrix_A #p (Seq.slice seed 0 32) in
 -        if transpose then Libcrux.Kem.Kyber.Arithmetic.to_spec_matrix_b #p res == matrix_A
 -        else Libcrux.Kem.Kyber.Arithmetic.to_spec_matrix_b #p res == Spec.Kyber.matrix_transpose matrix_A)
-+      (a_as_ntt: t_Array (t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K) v_K)
-+      (r_as_ntt error_1_: t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K)
-+    : Prims.Pure (t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K)
-+      Prims.l_True
-+      (fun _ -> Prims.l_True)
-+
 +val sample_matrix_A (v_K: usize) (seed: t_Array u8 (sz 34)) (transpose: bool)
 +    : Prims.Pure (t_Array (t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K) v_K)
 +      Prims.l_True
 +      (fun _ -> Prims.l_True)
 diff -ruN extraction-edited/Libcrux.Kem.Kyber.Ntt.fst extraction-secret-independent/Libcrux.Kem.Kyber.Ntt.fst
---- extraction-edited/Libcrux.Kem.Kyber.Ntt.fst	2024-03-12 10:45:44.820929513 +0100
-+++ extraction-secret-independent/Libcrux.Kem.Kyber.Ntt.fst	2024-03-12 10:45:44.878927802 +0100
+--- extraction-edited/Libcrux.Kem.Kyber.Ntt.fst	2024-03-13 11:03:50
++++ extraction-secret-independent/Libcrux.Kem.Kyber.Ntt.fst	2024-03-13 11:03:50
 @@ -1,130 +1,56 @@
  module Libcrux.Kem.Kyber.Ntt
 -#set-options "--fuel 0 --ifuel 1 --z3rlimit 100"
@@ -4286,7 +3873,15 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Ntt.fst extraction-secret-independ
  open Core
  open FStar.Mul
  
--
++let ntt_multiply_binomials (a0, a1: (i32 & i32)) (b0, b1: (i32 & i32)) (zeta: i32) =
++  Libcrux.Kem.Kyber.Arithmetic.montgomery_reduce ((a0 *! b0 <: i32) +!
++      ((Libcrux.Kem.Kyber.Arithmetic.montgomery_reduce (a1 *! b1 <: i32) <: i32) *! zeta <: i32)
++      <:
++      i32),
++  Libcrux.Kem.Kyber.Arithmetic.montgomery_reduce ((a0 *! b1 <: i32) +! (a1 *! b0 <: i32) <: i32)
++  <:
++  (i32 & i32)
+ 
 -let v_ZETAS_TIMES_MONTGOMERY_R =
 -  let list : list (i32_b 1664) =
 -    [
@@ -4356,15 +3951,6 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Ntt.fst extraction-secret-independ
 -
 -#push-options "--ifuel 0 --z3rlimit 1200"
 -let invert_ntt_at_layer #v_K #b zeta_i re layer =
-+let ntt_multiply_binomials (a0, a1: (i32 & i32)) (b0, b1: (i32 & i32)) (zeta: i32) =
-+  Libcrux.Kem.Kyber.Arithmetic.montgomery_reduce ((a0 *! b0 <: i32) +!
-+      ((Libcrux.Kem.Kyber.Arithmetic.montgomery_reduce (a1 *! b1 <: i32) <: i32) *! zeta <: i32)
-+      <:
-+      i32),
-+  Libcrux.Kem.Kyber.Arithmetic.montgomery_reduce ((a0 *! b1 <: i32) +! (a1 *! b0 <: i32) <: i32)
-+  <:
-+  (i32 & i32)
-+
 +let invert_ntt_at_layer
 +      (zeta_i: usize)
 +      (re: Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement)
@@ -4599,7 +4185,8 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Ntt.fst extraction-secret-independ
    in
 -  re 
 -#pop-options
--
++  re
+ 
 -#push-options "--z3rlimit 500"
 -val mul_zeta_red2   (#b:nat{b <= 31175}) 
 -                   (zeta_i:usize{v zeta_i >= 0 /\ v zeta_i <= 63} )
@@ -4617,8 +4204,7 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Ntt.fst extraction-secret-independ
 -                             (v_ZETAS_TIMES_MONTGOMERY_R.[ zeta_i ] <: i32) in
 -    red
 -#pop-options
-+  re
- 
+-
 -#push-options "--ifuel 0 --z3rlimit 5000"
 -let ntt_at_layer #b zeta_i re layer initial_coefficient_bound =
 -  let step = sz 1 <<! layer in
@@ -4957,9 +4543,9 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Ntt.fst extraction-secret-independ
 -  let re:Libcrux.Kem.Kyber.Arithmetic.wfPolynomialRingElement = down_cast_poly_b #(6*3328+11207) #3328 re in
 -  re 
 -#pop-options
--
 +  re
  
+-
 -#push-options "--z3rlimit 100"
 -let ntt_multiply lhs rhs =
 +let ntt_multiply (lhs rhs: Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement) =
@@ -5209,8 +4795,8 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Ntt.fst extraction-secret-independ
 -#pop-options
 +  re
 diff -ruN extraction-edited/Libcrux.Kem.Kyber.Ntt.fsti extraction-secret-independent/Libcrux.Kem.Kyber.Ntt.fsti
---- extraction-edited/Libcrux.Kem.Kyber.Ntt.fsti	2024-03-12 10:45:44.808929867 +0100
-+++ extraction-secret-independent/Libcrux.Kem.Kyber.Ntt.fsti	2024-03-12 10:45:44.851928598 +0100
+--- extraction-edited/Libcrux.Kem.Kyber.Ntt.fsti	2024-03-13 11:03:50
++++ extraction-secret-independent/Libcrux.Kem.Kyber.Ntt.fsti	2024-03-13 11:03:50
 @@ -2,80 +2,224 @@
  #set-options "--fuel 0 --ifuel 1 --z3rlimit 15"
  open Core
@@ -5236,28 +4822,14 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Ntt.fsti extraction-secret-indepen
 +  in
 +  FStar.Pervasives.assert_norm (Prims.eq2 (List.Tot.length list) 128);
 +  Rust_primitives.Hax.array_of_list list
-+
-+val ntt_multiply_binomials: (i32 & i32) -> (i32 & i32) -> zeta: i32
-+  -> Prims.Pure (i32 & i32) Prims.l_True (fun _ -> Prims.l_True)
-+
-+val invert_ntt_at_layer
-+      (zeta_i: usize)
-+      (re: Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement)
-+      (layer: usize)
-+    : Prims.Pure (usize & Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement)
-+      Prims.l_True
-+      (fun _ -> Prims.l_True)
-+
-+val invert_ntt_montgomery (v_K: usize) (re: Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement)
-+    : Prims.Pure Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement
-+      Prims.l_True
-+      (fun _ -> Prims.l_True)
  
 -val ntt_multiply_binomials (a:wfFieldElement&wfFieldElement) (b: wfFieldElement&wfFieldElement) (zeta: i32_b 1664) :
 -    Pure (wfFieldElement & wfFieldElement)
 -    (requires True)
 -    (ensures (fun _ -> True))
--
++val ntt_multiply_binomials: (i32 & i32) -> (i32 & i32) -> zeta: i32
++  -> Prims.Pure (i32 & i32) Prims.l_True (fun _ -> Prims.l_True)
+ 
 -val invert_ntt_at_layer (#v_K:usize{v v_K >= 1 /\ v v_K <= 4})
 -      (#b:nat{b <= v v_K * 3328 * 64})
 -      (zeta_i: usize{v zeta_i >= 1 /\ v zeta_i <= 128})
@@ -5267,10 +4839,10 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Ntt.fsti extraction-secret-indepen
 -                    v zeta_i == pow2 (8 - v layer) /\ 
 -                    b == v v_K * 3328 * pow2(v layer - 1)})
 -    : Prims.Pure (usize & Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement_b (2*b))
-+val ntt_at_layer
++val invert_ntt_at_layer
 +      (zeta_i: usize)
 +      (re: Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement)
-+      (layer initial_coefficient_bound: usize)
++      (layer: usize)
 +    : Prims.Pure (usize & Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement)
        Prims.l_True
 -      (fun x -> let (zeta_fin,re) = x in v zeta_fin == pow2 (7 - v layer))
@@ -5279,7 +4851,11 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Ntt.fsti extraction-secret-indepen
 -val invert_ntt_montgomery (v_K: usize{v v_K >= 1 /\ v v_K <= 4}) 
 -      (re: Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement_b (v v_K * 3328))
 -    : Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement_b (64 * v v_K * 3328)
--
++val invert_ntt_montgomery (v_K: usize) (re: Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement)
++    : Prims.Pure Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement
++      Prims.l_True
++      (fun _ -> Prims.l_True)
+ 
 -val ntt_at_layer 
 -      (#b:nat{b <= 31175})
 -      (zeta_i: usize{v zeta_i < 128})
@@ -5291,7 +4867,14 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Ntt.fsti extraction-secret-indepen
 -    : Pure (usize & Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement_b (3328+b))
 -      (requires True)
 -      (ensures fun (zeta_i, result) -> v zeta_i == pow2 (8 - v layer) - 1)
--
++val ntt_at_layer
++      (zeta_i: usize)
++      (re: Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement)
++      (layer initial_coefficient_bound: usize)
++    : Prims.Pure (usize & Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement)
++      Prims.l_True
++      (fun _ -> Prims.l_True)
+ 
 -val ntt_at_layer_3_ (#b:nat)
 -      (zeta_i: usize{v zeta_i < 128})
 -      (re: Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement_b b)
@@ -5325,7 +4908,11 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Ntt.fsti extraction-secret-indepen
        Prims.l_True
 -      (ensures fun (zeta_i,result) -> v zeta_i == pow2 (8 - v layer) - 1)
 +      (fun _ -> Prims.l_True)
-+
+ 
+-val ntt_binomially_sampled_ring_element (re: Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement_b 7)
+-    : Prims.Pure (Libcrux.Kem.Kyber.Arithmetic.wfPolynomialRingElement)
+-      (requires True)
+-      (ensures (fun _ -> True))
 +val ntt_binomially_sampled_ring_element (re: Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement)
 +    : Prims.Pure Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement
 +      (requires
@@ -5379,7 +4966,13 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Ntt.fsti extraction-secret-indepen
 +                      bool)
 +                <:
 +                bool))
-+
+ 
+-val ntt_multiply (lhs: Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement_b 3328)
+-                 (rhs: Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement_b 3328)
+-    : Prims.Pure (Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement_b 3328)
+-      (requires True)
+-      (ensures (fun _ -> True))
+-    
 +val ntt_multiply (lhs rhs: Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement)
 +    : Prims.Pure Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement
 +      (requires
@@ -5431,18 +5024,7 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Ntt.fsti extraction-secret-indepen
 +                      bool)
 +                <:
 +                bool))
- 
--val ntt_binomially_sampled_ring_element (re: Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement_b 7)
--    : Prims.Pure (Libcrux.Kem.Kyber.Arithmetic.wfPolynomialRingElement)
--      (requires True)
--      (ensures (fun _ -> True))
--
--val ntt_multiply (lhs: Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement_b 3328)
--                 (rhs: Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement_b 3328)
--    : Prims.Pure (Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement_b 3328)
--      (requires True)
--      (ensures (fun _ -> True))
--    
++
  val ntt_vector_u
        (v_VECTOR_U_COMPRESSION_FACTOR: usize)
 -      (re: Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement_b 3328)
@@ -5504,8 +5086,8 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Ntt.fsti extraction-secret-indepen
 +                <:
 +                bool))
 diff -ruN extraction-edited/Libcrux.Kem.Kyber.Sampling.fst extraction-secret-independent/Libcrux.Kem.Kyber.Sampling.fst
---- extraction-edited/Libcrux.Kem.Kyber.Sampling.fst	2024-03-12 10:45:44.831929189 +0100
-+++ extraction-secret-independent/Libcrux.Kem.Kyber.Sampling.fst	2024-03-12 10:45:44.862928274 +0100
+--- extraction-edited/Libcrux.Kem.Kyber.Sampling.fst	2024-03-13 11:03:50
++++ extraction-secret-independent/Libcrux.Kem.Kyber.Sampling.fst	2024-03-13 11:03:50
 @@ -3,34 +3,27 @@
  open Core
  open FStar.Mul
@@ -5942,8 +5524,8 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Sampling.fst extraction-secret-ind
 -#pop-options
 +  out
 diff -ruN extraction-edited/Libcrux.Kem.Kyber.Sampling.fsti extraction-secret-independent/Libcrux.Kem.Kyber.Sampling.fsti
---- extraction-edited/Libcrux.Kem.Kyber.Sampling.fsti	2024-03-12 10:45:44.821929484 +0100
-+++ extraction-secret-independent/Libcrux.Kem.Kyber.Sampling.fsti	2024-03-12 10:45:44.855928480 +0100
+--- extraction-edited/Libcrux.Kem.Kyber.Sampling.fsti	2024-03-13 11:03:50
++++ extraction-secret-independent/Libcrux.Kem.Kyber.Sampling.fsti	2024-03-13 11:03:50
 @@ -3,37 +3,77 @@
  open Core
  open FStar.Mul
@@ -6044,8 +5626,8 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Sampling.fsti extraction-secret-in
 +      Prims.l_True
 +      (fun _ -> Prims.l_True)
 diff -ruN extraction-edited/Libcrux.Kem.Kyber.Serialize.fst extraction-secret-independent/Libcrux.Kem.Kyber.Serialize.fst
---- extraction-edited/Libcrux.Kem.Kyber.Serialize.fst	2024-03-12 10:45:44.810929808 +0100
-+++ extraction-secret-independent/Libcrux.Kem.Kyber.Serialize.fst	2024-03-12 10:45:44.883927654 +0100
+--- extraction-edited/Libcrux.Kem.Kyber.Serialize.fst	2024-03-13 11:03:50
++++ extraction-secret-independent/Libcrux.Kem.Kyber.Serialize.fst	2024-03-13 11:03:50
 @@ -1,15 +1,8 @@
  module Libcrux.Kem.Kyber.Serialize
 -#set-options "--fuel 0 --ifuel 0 --z3rlimit 50 --retry 3"
@@ -6188,12 +5770,11 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Serialize.fst extraction-secret-in
 -  bit_vec_equal_intro_principle ();
 -  coefficient1, coefficient2
 -#pop-options
--
++  coefficient1, coefficient2 <: (i32 & i32)
+ 
 -#push-options "--z3rlimit 400"
 -[@@"opaque_to_smt"]
 -let decompress_coefficients_5_ byte1 byte2 byte3 byte4 byte5 =
-+  coefficient1, coefficient2 <: (i32 & i32)
-+
 +let decompress_coefficients_5_ (byte1 byte2 byte3 byte4 byte5: i32) =
    let coefficient1:i32 = byte1 &. 31l in
    let coefficient2:i32 = ((byte2 &. 3l <: i32) <<! 3l <: i32) |. (byte1 >>! 5l <: i32) in
@@ -6219,7 +5800,9 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Serialize.fst extraction-secret-in
    coefficient7,
    coefficient8
 -#pop-options
--
++  <:
++  (i32 & i32 & i32 & i32 & i32 & i32 & i32 & i32)
+ 
 -let cast_bound_lemma 
 -  #t #u
 -  (n: int_t t) 
@@ -6246,9 +5829,7 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Serialize.fst extraction-secret-in
 -#pop-options
 -
 -#restart-solver
-+  <:
-+  (i32 & i32 & i32 & i32 & i32 & i32 & i32 & i32)
- 
+-
 -#push-options "--fuel 0 --ifuel 1 --query_stats --z3rlimit 100"
 -[@@"opaque_to_smt"]
  let compress_then_serialize_10_
@@ -7529,8 +7110,8 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Serialize.fst extraction-secret-in
 -#pop-options
 -
 diff -ruN extraction-edited/Libcrux.Kem.Kyber.Serialize.fsti extraction-secret-independent/Libcrux.Kem.Kyber.Serialize.fsti
---- extraction-edited/Libcrux.Kem.Kyber.Serialize.fsti	2024-03-12 10:45:44.815929661 +0100
-+++ extraction-secret-independent/Libcrux.Kem.Kyber.Serialize.fsti	2024-03-12 10:45:44.889927477 +0100
+--- extraction-edited/Libcrux.Kem.Kyber.Serialize.fsti	2024-03-13 11:03:50
++++ extraction-secret-independent/Libcrux.Kem.Kyber.Serialize.fsti	2024-03-13 11:03:50
 @@ -2,188 +2,118 @@
  #set-options "--fuel 0 --ifuel 1 --z3rlimit 15"
  open Core
@@ -7559,7 +7140,9 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Serialize.fsti extraction-secret-i
 -                (create8 (coefficient1, coefficient2, coefficient3, coefficient4, coefficient5, coefficient6, coefficient7, coefficient8)) 11
 -                (create11 tuple) 8
 -      )
--
++      Prims.l_True
++      (fun _ -> Prims.l_True)
+ 
 -val compress_coefficients_3_ (coefficient1 coefficient2: int_t_d u16_inttype 12)
 -    : Prims.Pure (u8 & u8 & u8)
 -    (requires True)
@@ -7568,9 +7151,6 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Serialize.fsti extraction-secret-i
 -              (create2 (coefficient1, coefficient2)) 12
 -              (create3 tuple) 8
 -    )
-+      Prims.l_True
-+      (fun _ -> Prims.l_True)
-+
 +val compress_coefficients_3_ (coefficient1 coefficient2: u16)
 +    : Prims.Pure (u8 & u8 & u8) Prims.l_True (fun _ -> Prims.l_True)
  
@@ -7583,7 +7163,10 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Serialize.fsti extraction-secret-i
 -                (create8 (coefficient1, coefficient2, coefficient3, coefficient4, coefficient5, coefficient6, coefficient7, coefficient8)) 5
 -                (create5 tuple) 8
 -      )
--
++      (coefficient2 coefficient1 coefficient4 coefficient3 coefficient5 coefficient7 coefficient6 coefficient8:
++          u8)
++    : Prims.Pure (u8 & u8 & u8 & u8 & u8) Prims.l_True (fun _ -> Prims.l_True)
+ 
 -private unfold type i32_d = int_t_d i32_inttype
 -val decompress_coefficients_10_ (byte2 byte1 byte3 byte4 byte5: int_t_d i32_inttype 8)
 -    : Prims.Pure (i32_d 10 & i32_d 10 & i32_d 10 & i32_d 10)
@@ -7593,10 +7176,6 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Serialize.fsti extraction-secret-i
 -                (create5 (byte1, byte2, byte3, byte4, byte5)) 8
 -                (create4 #i32 (r1, r2, r3, r4)) 10
 -      )
-+      (coefficient2 coefficient1 coefficient4 coefficient3 coefficient5 coefficient7 coefficient6 coefficient8:
-+          u8)
-+    : Prims.Pure (u8 & u8 & u8 & u8 & u8) Prims.l_True (fun _ -> Prims.l_True)
-+
 +val decompress_coefficients_10_ (byte2 byte1 byte3 byte4 byte5: i32)
 +    : Prims.Pure (i32 & i32 & i32 & i32) Prims.l_True (fun _ -> Prims.l_True)
  
@@ -7622,7 +7201,8 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Serialize.fsti extraction-secret-i
 -                (create1 byte) 8
 -                (create2 #i32 (r1, r2)) 4
 -      )
--
++    : Prims.Pure (i32 & i32) Prims.l_True (fun _ -> Prims.l_True)
+ 
 -val decompress_coefficients_5_ (byte1 byte2 byte3 byte4 byte5: int_t_d i32_inttype 8)
 -    : Prims.Pure (i32_d 5 & i32_d 5 & i32_d 5 & i32_d 5 & i32_d 5 & i32_d 5 & i32_d 5 & i32_d 5)
 -      (requires True)
@@ -7631,8 +7211,6 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Serialize.fsti extraction-secret-i
 -                (create5 #i32 (byte1, byte2, byte3, byte4, byte5)) 8
 -                (create8 #i32 (r1, r2, r3, r4, r5, r6, r7, r8)) 5
 -      )
-+    : Prims.Pure (i32 & i32) Prims.l_True (fun _ -> Prims.l_True)
-+
 +val decompress_coefficients_5_ (byte1 byte2 byte3 byte4 byte5: i32)
 +    : Prims.Pure (i32 & i32 & i32 & i32 & i32 & i32 & i32 & i32)
 +      Prims.l_True
@@ -7694,14 +7272,14 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Serialize.fsti extraction-secret-i
 +      (v_COMPRESSION_FACTOR v_OUT_LEN: usize)
 +      (re: Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement)
 +    : Prims.Pure (t_Array u8 v_OUT_LEN) Prims.l_True (fun _ -> Prims.l_True)
-+
+ 
+-val deserialize_then_decompress_10_ (serialized: t_Slice u8 {Seq.length serialized == 320})
+-    : Prims.Pure Libcrux.Kem.Kyber.Arithmetic.wfPolynomialRingElement
 +val compress_then_serialize_ring_element_v
 +      (v_COMPRESSION_FACTOR v_OUT_LEN: usize)
 +      (re: Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement)
 +    : Prims.Pure (t_Array u8 v_OUT_LEN) Prims.l_True (fun _ -> Prims.l_True)
- 
--val deserialize_then_decompress_10_ (serialized: t_Slice u8 {Seq.length serialized == 320})
--    : Prims.Pure Libcrux.Kem.Kyber.Arithmetic.wfPolynomialRingElement
++
 +val deserialize_then_decompress_10_ (serialized: t_Slice u8)
 +    : Prims.Pure Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement
        Prims.l_True
@@ -7773,7 +7351,10 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Serialize.fsti extraction-secret-i
 -    : Pure (Libcrux.Kem.Kyber.Arithmetic.wfPolynomialRingElement)
 -      (requires (length serialized == Spec.Kyber.v_BYTES_PER_RING_ELEMENT))
 -      (ensures fun _ -> True)
--
++    : Prims.Pure Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement
++      Prims.l_True
++      (fun _ -> Prims.l_True)
+ 
 -val serialize_uncompressed_ring_element (re: Libcrux.Kem.Kyber.Arithmetic.wfPolynomialRingElement)
 -    : Pure (t_Array u8 (sz 384))
 -      (requires True)
@@ -7781,15 +7362,11 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Serialize.fsti extraction-secret-i
 -        let coefficients: t_Array _ (sz 256) = Spec.Kyber.map' Libcrux.Kem.Kyber.Arithmetic.to_unsigned_representative re.f_coefficients in
 -        int_t_array_bitwise_eq res 8 coefficients 12
 -      ))
-+    : Prims.Pure Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement
-+      Prims.l_True
-+      (fun _ -> Prims.l_True)
-+
 +val serialize_uncompressed_ring_element (re: Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement)
 +    : Prims.Pure (t_Array u8 (sz 384)) Prims.l_True (fun _ -> Prims.l_True)
 diff -ruN extraction-edited/Libcrux.Kem.Kyber.Types.fst extraction-secret-independent/Libcrux.Kem.Kyber.Types.fst
---- extraction-edited/Libcrux.Kem.Kyber.Types.fst	2024-03-12 10:45:44.796930221 +0100
-+++ extraction-secret-independent/Libcrux.Kem.Kyber.Types.fst	2024-03-12 10:45:44.860928333 +0100
+--- extraction-edited/Libcrux.Kem.Kyber.Types.fst	2024-03-13 11:03:50
++++ extraction-secret-independent/Libcrux.Kem.Kyber.Types.fst	2024-03-13 11:03:50
 @@ -3,275 +3,193 @@
  open Core
  open FStar.Mul
@@ -7976,7 +7553,8 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Types.fst extraction-secret-indepe
 +    : (t_Slice u8 & t_Slice u8) =
    Core.Slice.impl__split_at (Rust_primitives.unsize self.f_value <: t_Slice u8) mid
  
--
++type t_KyberPublicKey (v_SIZE: usize) = { f_value:t_Array u8 v_SIZE }
+ 
 -
 -
 -
@@ -7987,8 +7565,7 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Types.fst extraction-secret-indepe
 -
 -
 -type t_MlKemPublicKey (v_SIZE: usize) = { f_value:t_Array u8 v_SIZE }
-+type t_KyberPublicKey (v_SIZE: usize) = { f_value:t_Array u8 v_SIZE }
- 
+-
  [@@ FStar.Tactics.Typeclasses.tcinstance]
 -let impl_13 (v_SIZE: usize) : Core.Convert.t_AsRef (t_MlKemPublicKey v_SIZE) (t_Slice u8) =
 -  {
@@ -8077,72 +7654,454 @@ diff -ruN extraction-edited/Libcrux.Kem.Kyber.Types.fst extraction-secret-indepe
 +    : (t_Slice u8 & t_Slice u8) =
    Core.Slice.impl__split_at (Rust_primitives.unsize self.f_value <: t_Slice u8) mid
  
--type t_MlKemKeyPair (v_PRIVATE_KEY_SIZE: usize) (v_PUBLIC_KEY_SIZE: usize) = {
--  f_sk:t_MlKemPrivateKey v_PRIVATE_KEY_SIZE;
--  f_pk:t_MlKemPublicKey v_PUBLIC_KEY_SIZE
-+type t_KyberKeyPair (v_PRIVATE_KEY_SIZE: usize) (v_PUBLIC_KEY_SIZE: usize) = {
-+  f_sk:t_KyberPrivateKey v_PRIVATE_KEY_SIZE;
-+  f_pk:t_KyberPublicKey v_PUBLIC_KEY_SIZE
- }
+-type t_MlKemKeyPair (v_PRIVATE_KEY_SIZE: usize) (v_PUBLIC_KEY_SIZE: usize) = {
+-  f_sk:t_MlKemPrivateKey v_PRIVATE_KEY_SIZE;
+-  f_pk:t_MlKemPublicKey v_PUBLIC_KEY_SIZE
++type t_KyberKeyPair (v_PRIVATE_KEY_SIZE: usize) (v_PUBLIC_KEY_SIZE: usize) = {
++  f_sk:t_KyberPrivateKey v_PRIVATE_KEY_SIZE;
++  f_pk:t_KyberPublicKey v_PUBLIC_KEY_SIZE
+ }
+ 
+ let impl__from
+       (v_PRIVATE_KEY_SIZE v_PUBLIC_KEY_SIZE: usize)
+-      (sk: t_MlKemPrivateKey v_PRIVATE_KEY_SIZE)
+-      (pk: t_MlKemPublicKey v_PUBLIC_KEY_SIZE)
+-    : t_MlKemKeyPair v_PRIVATE_KEY_SIZE v_PUBLIC_KEY_SIZE =
+-  { f_sk = sk; f_pk = pk } <: t_MlKemKeyPair v_PRIVATE_KEY_SIZE v_PUBLIC_KEY_SIZE
++      (sk: t_KyberPrivateKey v_PRIVATE_KEY_SIZE)
++      (pk: t_KyberPublicKey v_PUBLIC_KEY_SIZE)
++    : t_KyberKeyPair v_PRIVATE_KEY_SIZE v_PUBLIC_KEY_SIZE =
++  { f_sk = sk; f_pk = pk } <: t_KyberKeyPair v_PRIVATE_KEY_SIZE v_PUBLIC_KEY_SIZE
+ 
+ let impl__new
+       (v_PRIVATE_KEY_SIZE v_PUBLIC_KEY_SIZE: usize)
+       (sk: t_Array u8 v_PRIVATE_KEY_SIZE)
+       (pk: t_Array u8 v_PUBLIC_KEY_SIZE)
+-    : t_MlKemKeyPair v_PRIVATE_KEY_SIZE v_PUBLIC_KEY_SIZE =
++    : t_KyberKeyPair v_PRIVATE_KEY_SIZE v_PUBLIC_KEY_SIZE =
+   { f_sk = Core.Convert.f_into sk; f_pk = Core.Convert.f_into pk }
+   <:
+-  t_MlKemKeyPair v_PRIVATE_KEY_SIZE v_PUBLIC_KEY_SIZE
++  t_KyberKeyPair v_PRIVATE_KEY_SIZE v_PUBLIC_KEY_SIZE
+ 
+ let impl__pk
+       (v_PRIVATE_KEY_SIZE v_PUBLIC_KEY_SIZE: usize)
+-      (self: t_MlKemKeyPair v_PRIVATE_KEY_SIZE v_PUBLIC_KEY_SIZE)
++      (self: t_KyberKeyPair v_PRIVATE_KEY_SIZE v_PUBLIC_KEY_SIZE)
+     : t_Array u8 v_PUBLIC_KEY_SIZE = impl_18__as_slice v_PUBLIC_KEY_SIZE self.f_pk
+ 
+ let impl__private_key
+       (v_PRIVATE_KEY_SIZE v_PUBLIC_KEY_SIZE: usize)
+-      (self: t_MlKemKeyPair v_PRIVATE_KEY_SIZE v_PUBLIC_KEY_SIZE)
+-    : t_MlKemPrivateKey v_PRIVATE_KEY_SIZE = self.f_sk
++      (self: t_KyberKeyPair v_PRIVATE_KEY_SIZE v_PUBLIC_KEY_SIZE)
++    : t_KyberPrivateKey v_PRIVATE_KEY_SIZE = self.f_sk
+ 
+ let impl__public_key
+       (v_PRIVATE_KEY_SIZE v_PUBLIC_KEY_SIZE: usize)
+-      (self: t_MlKemKeyPair v_PRIVATE_KEY_SIZE v_PUBLIC_KEY_SIZE)
+-    : t_MlKemPublicKey v_PUBLIC_KEY_SIZE = self.f_pk
++      (self: t_KyberKeyPair v_PRIVATE_KEY_SIZE v_PUBLIC_KEY_SIZE)
++    : t_KyberPublicKey v_PUBLIC_KEY_SIZE = self.f_pk
+ 
+ let impl__sk
+       (v_PRIVATE_KEY_SIZE v_PUBLIC_KEY_SIZE: usize)
+-      (self: t_MlKemKeyPair v_PRIVATE_KEY_SIZE v_PUBLIC_KEY_SIZE)
++      (self: t_KyberKeyPair v_PRIVATE_KEY_SIZE v_PUBLIC_KEY_SIZE)
+     : t_Array u8 v_PRIVATE_KEY_SIZE = impl_12__as_slice v_PRIVATE_KEY_SIZE self.f_sk
+diff -ruN extraction-edited/Libcrux.Kem.Kyber.fst extraction-secret-independent/Libcrux.Kem.Kyber.fst
+--- extraction-edited/Libcrux.Kem.Kyber.fst	2024-03-13 11:03:50
++++ extraction-secret-independent/Libcrux.Kem.Kyber.fst	2024-03-13 11:03:50
+@@ -1,29 +1,12 @@
+ module Libcrux.Kem.Kyber
+-#set-options "--fuel 0 --ifuel 1 --z3rlimit 100"
++#set-options "--fuel 0 --ifuel 1 --z3rlimit 15"
+ open Core
+ open FStar.Mul
+ 
+-let update_at_range_lemma #n
+-  (s: t_Slice 't)
+-  (i: Core.Ops.Range.t_Range (int_t n) {(Core.Ops.Range.impl_index_range_slice 't n).f_index_pre s i}) 
+-  (x: t_Slice 't)
+-  : Lemma
+-    (requires (Seq.length x == v i.f_end - v i.f_start))
+-    (ensures (
+-      let s' = Rust_primitives.Hax.Monomorphized_update_at.update_at_range s i x in
+-      let len = v i.f_start in
+-      forall (i: nat). i < len ==> Seq.index s i == Seq.index s' i
+-    ))
+-    [SMTPat (Rust_primitives.Hax.Monomorphized_update_at.update_at_range s i x)]
+-  = let s' = Rust_primitives.Hax.Monomorphized_update_at.update_at_range s i x in
+-    let len = v i.f_start in
+-    introduce forall (i:nat {i < len}). Seq.index s i == Seq.index s' i
+-    with (assert ( Seq.index (Seq.slice s  0 len) i == Seq.index s  i 
+-                 /\ Seq.index (Seq.slice s' 0 len) i == Seq.index s' i ))
+-
+-let serialize_kem_secret_key #p
++let serialize_kem_secret_key
+       (v_SERIALIZED_KEY_LEN: usize)
+-      (private_key public_key implicit_rejection_value: t_Slice u8) =
++      (private_key public_key implicit_rejection_value: t_Slice u8)
++     =
+   let out:t_Array u8 v_SERIALIZED_KEY_LEN = Rust_primitives.Hax.repeat 0uy v_SERIALIZED_KEY_LEN in
+   let pointer:usize = sz 0 in
+   let out:t_Array u8 v_SERIALIZED_KEY_LEN =
+@@ -72,8 +55,6 @@
+         t_Slice u8)
+   in
+   let pointer:usize = pointer +! (Core.Slice.impl__len public_key <: usize) in
+-  let h_public_key = (Rust_primitives.unsize (Libcrux.Kem.Kyber.Hash_functions.v_H public_key)
+-                     <: t_Slice u8) in
+   let out:t_Array u8 v_SERIALIZED_KEY_LEN =
+     Rust_primitives.Hax.Monomorphized_update_at.update_at_range out
+       ({
+@@ -89,7 +70,16 @@
+                 pointer +! Libcrux.Kem.Kyber.Constants.v_H_DIGEST_SIZE <: usize
+               }
+               <:
+-              Core.Ops.Range.t_Range usize ]) h_public_key)
++              Core.Ops.Range.t_Range usize ]
++            <:
++            t_Slice u8)
++          (Rust_primitives.unsize (Libcrux.Kem.Kyber.Hash_functions.v_H public_key
++                <:
++                t_Array u8 (sz 32))
++            <:
++            t_Slice u8)
++        <:
++        t_Slice u8)
+   in
+   let pointer:usize = pointer +! Libcrux.Kem.Kyber.Constants.v_H_DIGEST_SIZE in
+   let out:t_Array u8 v_SERIALIZED_KEY_LEN =
+@@ -116,32 +106,14 @@
+         <:
+         t_Slice u8)
+   in
+-  assert (Seq.slice out 0 (v #usize_inttype (Spec.Kyber.v_CPA_PKE_SECRET_KEY_SIZE p)) `Seq.equal` private_key);
+-  assert (Seq.slice out (v #usize_inttype (Spec.Kyber.v_CPA_PKE_SECRET_KEY_SIZE p))
+-                        (v #usize_inttype (Spec.Kyber.v_CPA_PKE_SECRET_KEY_SIZE p +! Spec.Kyber.v_CPA_PKE_PUBLIC_KEY_SIZE p)) `Seq.equal` public_key);
+-  assert (Seq.slice out (v #usize_inttype (Spec.Kyber.v_CPA_PKE_SECRET_KEY_SIZE p +!
+-                                           Spec.Kyber.v_CPA_PKE_PUBLIC_KEY_SIZE p))
+-                        (v #usize_inttype (Spec.Kyber.v_CPA_PKE_SECRET_KEY_SIZE p +!
+-                                           Spec.Kyber.v_CPA_PKE_PUBLIC_KEY_SIZE p +!
+-                                           Libcrux.Kem.Kyber.Constants.v_H_DIGEST_SIZE))
+-          `Seq.equal` Libcrux.Kem.Kyber.Hash_functions.v_H public_key);
+-  assert (Seq.slice out (v #usize_inttype (Spec.Kyber.v_CPA_PKE_SECRET_KEY_SIZE p +!
+-                                           Spec.Kyber.v_CPA_PKE_PUBLIC_KEY_SIZE p +!
+-                                           Libcrux.Kem.Kyber.Constants.v_H_DIGEST_SIZE))
+-                        (v #usize_inttype (Spec.Kyber.v_CPA_PKE_SECRET_KEY_SIZE p +!
+-                                           Spec.Kyber.v_CPA_PKE_PUBLIC_KEY_SIZE p +!
+-                                           Libcrux.Kem.Kyber.Constants.v_H_DIGEST_SIZE +!
+-                                           Spec.Kyber.v_SHARED_SECRET_SIZE))
+-          == implicit_rejection_value);
+-  lemma_slice_append_4 out private_key public_key (Libcrux.Kem.Kyber.Hash_functions.v_H public_key) implicit_rejection_value;
+   out
+ 
+-let decapsulate #p
++let decapsulate
+       (v_K v_SECRET_KEY_SIZE v_CPA_SECRET_KEY_SIZE v_PUBLIC_KEY_SIZE v_CIPHERTEXT_SIZE v_T_AS_NTT_ENCODED_SIZE v_C1_SIZE v_C2_SIZE v_VECTOR_U_COMPRESSION_FACTOR v_VECTOR_V_COMPRESSION_FACTOR v_C1_BLOCK_SIZE v_ETA1 v_ETA1_RANDOMNESS_SIZE v_ETA2 v_ETA2_RANDOMNESS_SIZE v_IMPLICIT_REJECTION_HASH_INPUT_SIZE:
+           usize)
+-      (secret_key: Libcrux.Kem.Kyber.Types.t_MlKemPrivateKey v_SECRET_KEY_SIZE)
+-      (ciphertext: Libcrux.Kem.Kyber.Types.t_MlKemCiphertext v_CIPHERTEXT_SIZE) =
+-  let orig_secret_key = secret_key.f_value in
++      (secret_key: Libcrux.Kem.Kyber.Types.t_KyberPrivateKey v_SECRET_KEY_SIZE)
++      (ciphertext: Libcrux.Kem.Kyber.Types.t_KyberCiphertext v_CIPHERTEXT_SIZE)
++     =
+   let ind_cpa_secret_key, secret_key:(t_Slice u8 & t_Slice u8) =
+     Libcrux.Kem.Kyber.Types.impl_12__split_at v_SECRET_KEY_SIZE secret_key v_CPA_SECRET_KEY_SIZE
+   in
+@@ -151,12 +123,8 @@
+   let ind_cpa_public_key_hash, implicit_rejection_value:(t_Slice u8 & t_Slice u8) =
+     Core.Slice.impl__split_at secret_key Libcrux.Kem.Kyber.Constants.v_H_DIGEST_SIZE
+   in
+-  assert (ind_cpa_secret_key == slice orig_secret_key (sz 0) v_CPA_SECRET_KEY_SIZE);
+-  assert (ind_cpa_public_key == slice orig_secret_key v_CPA_SECRET_KEY_SIZE (v_CPA_SECRET_KEY_SIZE +! v_PUBLIC_KEY_SIZE));
+-  assert (ind_cpa_public_key_hash == slice orig_secret_key (v_CPA_SECRET_KEY_SIZE +! v_PUBLIC_KEY_SIZE) (v_CPA_SECRET_KEY_SIZE +! v_PUBLIC_KEY_SIZE +! Libcrux.Kem.Kyber.Constants.v_H_DIGEST_SIZE));
+-  assert (implicit_rejection_value == slice orig_secret_key (v_CPA_SECRET_KEY_SIZE +! v_PUBLIC_KEY_SIZE +! Libcrux.Kem.Kyber.Constants.v_H_DIGEST_SIZE) (length orig_secret_key));
+   let decrypted:t_Array u8 (sz 32) =
+-    Libcrux.Kem.Kyber.Ind_cpa.decrypt #p v_K
++    Libcrux.Kem.Kyber.Ind_cpa.decrypt v_K
+       v_CIPHERTEXT_SIZE
+       v_C1_SIZE
+       v_VECTOR_U_COMPRESSION_FACTOR
+@@ -184,9 +152,6 @@
+         <:
+         t_Slice u8)
+   in
+-  lemma_slice_append to_hash decrypted ind_cpa_public_key_hash;
+-  assert (decrypted == Spec.Kyber.ind_cpa_decrypt p ind_cpa_secret_key ciphertext.f_value);
+-  assert (to_hash == concat decrypted ind_cpa_public_key_hash);
+   let hashed:t_Array u8 (sz 64) =
+     Libcrux.Kem.Kyber.Hash_functions.v_G (Rust_primitives.unsize to_hash <: t_Slice u8)
+   in
+@@ -194,10 +159,6 @@
+     Core.Slice.impl__split_at (Rust_primitives.unsize hashed <: t_Slice u8)
+       Libcrux.Kem.Kyber.Constants.v_SHARED_SECRET_SIZE
+   in
+-  assert ((shared_secret,pseudorandomness) == split hashed Libcrux.Kem.Kyber.Constants.v_SHARED_SECRET_SIZE);
+-  assert (length implicit_rejection_value = v_SECRET_KEY_SIZE -! v_CPA_SECRET_KEY_SIZE -! v_PUBLIC_KEY_SIZE -! Libcrux.Kem.Kyber.Constants.v_H_DIGEST_SIZE);
+-  assert (length implicit_rejection_value = Spec.Kyber.v_SHARED_SECRET_SIZE);
+-  assert (Spec.Kyber.v_SHARED_SECRET_SIZE <=. Spec.Kyber.v_IMPLICIT_REJECTION_HASH_INPUT_SIZE p);
+   let (to_hash: t_Array u8 v_IMPLICIT_REJECTION_HASH_INPUT_SIZE):t_Array u8
+     v_IMPLICIT_REJECTION_HASH_INPUT_SIZE =
+     Libcrux.Kem.Kyber.Ind_cpa.into_padded_array v_IMPLICIT_REJECTION_HASH_INPUT_SIZE
+@@ -219,14 +180,11 @@
+         <:
+         t_Slice u8)
+   in
+-  lemma_slice_append to_hash implicit_rejection_value ciphertext.f_value;
+   let (implicit_rejection_shared_secret: t_Array u8 (sz 32)):t_Array u8 (sz 32) =
+     Libcrux.Kem.Kyber.Hash_functions.v_PRF (sz 32) (Rust_primitives.unsize to_hash <: t_Slice u8)
+   in
+-  assert (implicit_rejection_shared_secret == Spec.Kyber.v_J to_hash);
+-  assert (Seq.length ind_cpa_public_key == v v_PUBLIC_KEY_SIZE);
+   let expected_ciphertext:t_Array u8 v_CIPHERTEXT_SIZE =
+-    Libcrux.Kem.Kyber.Ind_cpa.encrypt #p v_K v_CIPHERTEXT_SIZE v_T_AS_NTT_ENCODED_SIZE v_C1_SIZE
++    Libcrux.Kem.Kyber.Ind_cpa.encrypt v_K v_CIPHERTEXT_SIZE v_T_AS_NTT_ENCODED_SIZE v_C1_SIZE
+       v_C2_SIZE v_VECTOR_U_COMPRESSION_FACTOR v_VECTOR_V_COMPRESSION_FACTOR v_C1_BLOCK_SIZE v_ETA1
+       v_ETA1_RANDOMNESS_SIZE v_ETA2 v_ETA2_RANDOMNESS_SIZE ind_cpa_public_key decrypted
+       pseudorandomness
+@@ -236,18 +194,16 @@
+       (Core.Convert.f_as_ref ciphertext <: t_Slice u8)
+       (Rust_primitives.unsize expected_ciphertext <: t_Slice u8)
+   in
+-  let res = 
+   Libcrux.Kem.Kyber.Constant_time_ops.select_shared_secret_in_constant_time shared_secret
+     (Rust_primitives.unsize implicit_rejection_shared_secret <: t_Slice u8)
+     selector
+-  in
+-  res
+ 
+-let encapsulate #p
++let encapsulate
+       (v_K v_CIPHERTEXT_SIZE v_PUBLIC_KEY_SIZE v_T_AS_NTT_ENCODED_SIZE v_C1_SIZE v_C2_SIZE v_VECTOR_U_COMPRESSION_FACTOR v_VECTOR_V_COMPRESSION_FACTOR v_VECTOR_U_BLOCK_LEN v_ETA1 v_ETA1_RANDOMNESS_SIZE v_ETA2 v_ETA2_RANDOMNESS_SIZE:
+           usize)
+-      (public_key: Libcrux.Kem.Kyber.Types.t_MlKemPublicKey v_PUBLIC_KEY_SIZE)
+-      (randomness: t_Array u8 (sz 32)) =
++      (public_key: Libcrux.Kem.Kyber.Types.t_KyberPublicKey v_PUBLIC_KEY_SIZE)
++      (randomness: t_Array u8 (sz 32))
++     =
+   let (to_hash: t_Array u8 (sz 64)):t_Array u8 (sz 64) =
+     Libcrux.Kem.Kyber.Ind_cpa.into_padded_array (sz 64)
+       (Rust_primitives.unsize randomness <: t_Slice u8)
+@@ -278,10 +234,6 @@
+         <:
+         t_Slice u8)
+   in
+-  assert (Seq.slice to_hash 0 (v Libcrux.Kem.Kyber.Constants.v_H_DIGEST_SIZE) == randomness);
+-  lemma_slice_append to_hash randomness (Spec.Kyber.v_H public_key.f_value);
+-  assert (to_hash == concat randomness (Spec.Kyber.v_H public_key.f_value));
+-
+   let hashed:t_Array u8 (sz 64) =
+     Libcrux.Kem.Kyber.Hash_functions.v_G (Rust_primitives.unsize to_hash <: t_Slice u8)
+   in
+@@ -290,7 +242,7 @@
+       Libcrux.Kem.Kyber.Constants.v_SHARED_SECRET_SIZE
+   in
+   let ciphertext:t_Array u8 v_CIPHERTEXT_SIZE =
+-    Libcrux.Kem.Kyber.Ind_cpa.encrypt #p v_K v_CIPHERTEXT_SIZE v_T_AS_NTT_ENCODED_SIZE v_C1_SIZE
++    Libcrux.Kem.Kyber.Ind_cpa.encrypt v_K v_CIPHERTEXT_SIZE v_T_AS_NTT_ENCODED_SIZE v_C1_SIZE
+       v_C2_SIZE v_VECTOR_U_COMPRESSION_FACTOR v_VECTOR_V_COMPRESSION_FACTOR v_VECTOR_U_BLOCK_LEN
+       v_ETA1 v_ETA1_RANDOMNESS_SIZE v_ETA2 v_ETA2_RANDOMNESS_SIZE
+       (Rust_primitives.unsize (Libcrux.Kem.Kyber.Types.impl_18__as_slice v_PUBLIC_KEY_SIZE
+@@ -300,42 +252,23 @@
+         <:
+         t_Slice u8) randomness pseudorandomness
+   in
+-  Core.Convert.f_into ciphertext,
+-  Core.Result.impl__unwrap (Core.Convert.f_try_into shared_secret
+-      <:
+-      Core.Result.t_Result (t_Array u8 (sz 32)) Core.Array.t_TryFromSliceError)
+-  <:
+-  (Libcrux.Kem.Kyber.Types.t_MlKemCiphertext v_CIPHERTEXT_SIZE & t_Array u8 (sz 32))
+-
+-#push-options "--z3rlimit 100"
+-let validate_public_key #p
+-      (v_K v_RANKED_BYTES_PER_RING_ELEMENT v_PUBLIC_KEY_SIZE: usize)
+-      (public_key: t_Array u8 v_PUBLIC_KEY_SIZE)
+-     =
+-  let pk:t_Array Libcrux.Kem.Kyber.Arithmetic.wfPolynomialRingElement v_K =
+-    Libcrux.Kem.Kyber.Ind_cpa.deserialize_public_key #p v_K
+-      (public_key.[ { Core.Ops.Range.f_end = v_RANKED_BYTES_PER_RING_ELEMENT }
++  let shared_secret:t_Array u8 (sz 32) =
++    match Core.Convert.f_try_into shared_secret with
++    | Core.Result.Result_Ok shared_secret -> shared_secret
++    | Core.Result.Result_Err _ ->
++      Rust_primitives.Hax.never_to_any (Core.Panicking.panic "explicit panic"
+           <:
+-          Core.Ops.Range.t_RangeTo usize ])
++          Rust_primitives.Hax.t_Never)
+   in
+-  let public_key_serialized:t_Array u8 v_PUBLIC_KEY_SIZE =
+-    Libcrux.Kem.Kyber.Ind_cpa.serialize_public_key #p v_K
+-      v_RANKED_BYTES_PER_RING_ELEMENT
+-      v_PUBLIC_KEY_SIZE
+-      pk
+-      (public_key.[ { Core.Ops.Range.f_start = v_RANKED_BYTES_PER_RING_ELEMENT }
+-          <:
+-          Core.Ops.Range.t_RangeFrom usize ]
+-        <:
+-        t_Slice u8)
+-  in
+-  public_key =. public_key_serialized
+-#pop-options
++  Core.Convert.f_into ciphertext, shared_secret
++  <:
++  (Libcrux.Kem.Kyber.Types.t_KyberCiphertext v_CIPHERTEXT_SIZE & t_Array u8 (sz 32))
+ 
+-let generate_keypair #p
++let generate_keypair
+       (v_K v_CPA_PRIVATE_KEY_SIZE v_PRIVATE_KEY_SIZE v_PUBLIC_KEY_SIZE v_BYTES_PER_RING_ELEMENT v_ETA1 v_ETA1_RANDOMNESS_SIZE:
+           usize)
+-      (randomness: t_Array u8 (sz 64)) =
++      (randomness: t_Array u8 (sz 64))
++     =
+   let ind_cpa_keypair_randomness:t_Slice u8 =
+     randomness.[ {
+         Core.Ops.Range.f_start = sz 0;
+@@ -353,7 +286,7 @@
+   in
+   let ind_cpa_private_key, public_key:(t_Array u8 v_CPA_PRIVATE_KEY_SIZE &
+     t_Array u8 v_PUBLIC_KEY_SIZE) =
+-    Libcrux.Kem.Kyber.Ind_cpa.generate_keypair #p v_K
++    Libcrux.Kem.Kyber.Ind_cpa.generate_keypair v_K
+       v_CPA_PRIVATE_KEY_SIZE
+       v_PUBLIC_KEY_SIZE
+       v_BYTES_PER_RING_ELEMENT
+@@ -362,17 +295,16 @@
+       ind_cpa_keypair_randomness
+   in
+   let secret_key_serialized:t_Array u8 v_PRIVATE_KEY_SIZE =
+-    serialize_kem_secret_key #p v_PRIVATE_KEY_SIZE
++    serialize_kem_secret_key v_PRIVATE_KEY_SIZE
+       (Rust_primitives.unsize ind_cpa_private_key <: t_Slice u8)
+       (Rust_primitives.unsize public_key <: t_Slice u8)
+       implicit_rejection_value
+   in
+-  let (private_key: Libcrux.Kem.Kyber.Types.t_MlKemPrivateKey v_PRIVATE_KEY_SIZE):Libcrux.Kem.Kyber.Types.t_MlKemPrivateKey
++  let (private_key: Libcrux.Kem.Kyber.Types.t_KyberPrivateKey v_PRIVATE_KEY_SIZE):Libcrux.Kem.Kyber.Types.t_KyberPrivateKey
+   v_PRIVATE_KEY_SIZE =
+     Core.Convert.f_from secret_key_serialized
+   in
+   Libcrux.Kem.Kyber.Types.impl__from v_PRIVATE_KEY_SIZE
+     v_PUBLIC_KEY_SIZE
+     private_key
+-    (Core.Convert.f_into public_key <: Libcrux.Kem.Kyber.Types.t_MlKemPublicKey v_PUBLIC_KEY_SIZE)
+-
++    (Core.Convert.f_into public_key <: Libcrux.Kem.Kyber.Types.t_KyberPublicKey v_PUBLIC_KEY_SIZE)
+diff -ruN extraction-edited/Libcrux.Kem.Kyber.fsti extraction-secret-independent/Libcrux.Kem.Kyber.fsti
+--- extraction-edited/Libcrux.Kem.Kyber.fsti	2024-03-13 11:03:50
++++ extraction-secret-independent/Libcrux.Kem.Kyber.fsti	2024-03-13 11:03:50
+@@ -4,90 +4,37 @@
+ open FStar.Mul
  
- let impl__from
-       (v_PRIVATE_KEY_SIZE v_PUBLIC_KEY_SIZE: usize)
--      (sk: t_MlKemPrivateKey v_PRIVATE_KEY_SIZE)
--      (pk: t_MlKemPublicKey v_PUBLIC_KEY_SIZE)
--    : t_MlKemKeyPair v_PRIVATE_KEY_SIZE v_PUBLIC_KEY_SIZE =
--  { f_sk = sk; f_pk = pk } <: t_MlKemKeyPair v_PRIVATE_KEY_SIZE v_PUBLIC_KEY_SIZE
-+      (sk: t_KyberPrivateKey v_PRIVATE_KEY_SIZE)
-+      (pk: t_KyberPublicKey v_PUBLIC_KEY_SIZE)
-+    : t_KyberKeyPair v_PRIVATE_KEY_SIZE v_PUBLIC_KEY_SIZE =
-+  { f_sk = sk; f_pk = pk } <: t_KyberKeyPair v_PRIVATE_KEY_SIZE v_PUBLIC_KEY_SIZE
+ unfold
+-let t_MlKemSharedSecret = t_Array u8 (sz 32)
++let t_KyberSharedSecret = t_Array u8 (sz 32)
  
- let impl__new
-       (v_PRIVATE_KEY_SIZE v_PUBLIC_KEY_SIZE: usize)
-       (sk: t_Array u8 v_PRIVATE_KEY_SIZE)
-       (pk: t_Array u8 v_PUBLIC_KEY_SIZE)
--    : t_MlKemKeyPair v_PRIVATE_KEY_SIZE v_PUBLIC_KEY_SIZE =
-+    : t_KyberKeyPair v_PRIVATE_KEY_SIZE v_PUBLIC_KEY_SIZE =
-   { f_sk = Core.Convert.f_into sk; f_pk = Core.Convert.f_into pk }
-   <:
--  t_MlKemKeyPair v_PRIVATE_KEY_SIZE v_PUBLIC_KEY_SIZE
-+  t_KyberKeyPair v_PRIVATE_KEY_SIZE v_PUBLIC_KEY_SIZE
+ let v_KEY_GENERATION_SEED_SIZE: usize =
+   Libcrux.Kem.Kyber.Constants.v_CPA_PKE_KEY_GENERATION_SEED_SIZE +!
+   Libcrux.Kem.Kyber.Constants.v_SHARED_SECRET_SIZE
  
- let impl__pk
-       (v_PRIVATE_KEY_SIZE v_PUBLIC_KEY_SIZE: usize)
--      (self: t_MlKemKeyPair v_PRIVATE_KEY_SIZE v_PUBLIC_KEY_SIZE)
-+      (self: t_KyberKeyPair v_PRIVATE_KEY_SIZE v_PUBLIC_KEY_SIZE)
-     : t_Array u8 v_PUBLIC_KEY_SIZE = impl_18__as_slice v_PUBLIC_KEY_SIZE self.f_pk
+-val serialize_kem_secret_key (#p:Spec.Kyber.params)
++val serialize_kem_secret_key
+       (v_SERIALIZED_KEY_LEN: usize)
+       (private_key public_key implicit_rejection_value: t_Slice u8)
+-    : Pure (t_Array u8 v_SERIALIZED_KEY_LEN)
+-      (requires (length private_key == Spec.Kyber.v_CPA_PKE_SECRET_KEY_SIZE p /\
+-                 length public_key == Spec.Kyber.v_CPA_PKE_PUBLIC_KEY_SIZE p /\
+-                 length implicit_rejection_value == Spec.Kyber.v_SHARED_SECRET_SIZE /\
+-                 v_SERIALIZED_KEY_LEN == Spec.Kyber.v_SECRET_KEY_SIZE p))
+-      (ensures (fun res -> res ==
+-                Seq.append private_key (
+-                Seq.append public_key (
+-                Seq.append (Libcrux.Kem.Kyber.Hash_functions.v_H public_key) implicit_rejection_value))))
++    : Prims.Pure (t_Array u8 v_SERIALIZED_KEY_LEN) Prims.l_True (fun _ -> Prims.l_True)
  
- let impl__private_key
-       (v_PRIVATE_KEY_SIZE v_PUBLIC_KEY_SIZE: usize)
--      (self: t_MlKemKeyPair v_PRIVATE_KEY_SIZE v_PUBLIC_KEY_SIZE)
--    : t_MlKemPrivateKey v_PRIVATE_KEY_SIZE = self.f_sk
-+      (self: t_KyberKeyPair v_PRIVATE_KEY_SIZE v_PUBLIC_KEY_SIZE)
-+    : t_KyberPrivateKey v_PRIVATE_KEY_SIZE = self.f_sk
+-val decapsulate (#p:Spec.Kyber.params)
++val decapsulate
+       (v_K v_SECRET_KEY_SIZE v_CPA_SECRET_KEY_SIZE v_PUBLIC_KEY_SIZE v_CIPHERTEXT_SIZE v_T_AS_NTT_ENCODED_SIZE v_C1_SIZE v_C2_SIZE v_VECTOR_U_COMPRESSION_FACTOR v_VECTOR_V_COMPRESSION_FACTOR v_C1_BLOCK_SIZE v_ETA1 v_ETA1_RANDOMNESS_SIZE v_ETA2 v_ETA2_RANDOMNESS_SIZE v_IMPLICIT_REJECTION_HASH_INPUT_SIZE:
+           usize)
+-      (secret_key: Libcrux.Kem.Kyber.Types.t_MlKemPrivateKey v_SECRET_KEY_SIZE)
+-      (ciphertext: Libcrux.Kem.Kyber.Types.t_MlKemCiphertext v_CIPHERTEXT_SIZE)
+-    : Pure (t_Array u8 (sz 32))
+-    (requires ( p == (let open Spec.Kyber in {v_RANK = v_K; v_ETA1; v_ETA2; v_VECTOR_U_COMPRESSION_FACTOR; v_VECTOR_V_COMPRESSION_FACTOR}) /\
+-                Spec.Kyber.valid_params p /\
+-                v_ETA1_RANDOMNESS_SIZE == Spec.Kyber.v_ETA1_RANDOMNESS_SIZE p /\
+-                v_ETA2_RANDOMNESS_SIZE == Spec.Kyber.v_ETA2_RANDOMNESS_SIZE p /\
+-                v_IMPLICIT_REJECTION_HASH_INPUT_SIZE == Spec.Kyber.v_IMPLICIT_REJECTION_HASH_INPUT_SIZE p /\
+-                v_SECRET_KEY_SIZE == Spec.Kyber.v_SECRET_KEY_SIZE p /\
+-                v_CPA_SECRET_KEY_SIZE == Spec.Kyber.v_CPA_PKE_SECRET_KEY_SIZE p /\
+-                v_PUBLIC_KEY_SIZE == Spec.Kyber.v_CPA_PKE_PUBLIC_KEY_SIZE p /\
+-                v_CIPHERTEXT_SIZE == Spec.Kyber.v_CPA_PKE_CIPHERTEXT_SIZE p /\
+-                v_C1_SIZE == Spec.Kyber.v_C1_SIZE p /\
+-                v_C1_BLOCK_SIZE == Spec.Kyber.v_C1_BLOCK_SIZE p /\
+-                v_C2_SIZE == Spec.Kyber.v_C2_SIZE p /\
+-                v_T_AS_NTT_ENCODED_SIZE = Spec.Kyber.v_T_AS_NTT_ENCODED_SIZE p
+-               ))
+-    (ensures (fun res ->
+-                res == Spec.Kyber.ind_cca_decapsulate p secret_key.f_value ciphertext.f_value))
++      (secret_key: Libcrux.Kem.Kyber.Types.t_KyberPrivateKey v_SECRET_KEY_SIZE)
++      (ciphertext: Libcrux.Kem.Kyber.Types.t_KyberCiphertext v_CIPHERTEXT_SIZE)
++    : Prims.Pure (t_Array u8 (sz 32)) Prims.l_True (fun _ -> Prims.l_True)
  
- let impl__public_key
-       (v_PRIVATE_KEY_SIZE v_PUBLIC_KEY_SIZE: usize)
--      (self: t_MlKemKeyPair v_PRIVATE_KEY_SIZE v_PUBLIC_KEY_SIZE)
--    : t_MlKemPublicKey v_PUBLIC_KEY_SIZE = self.f_pk
-+      (self: t_KyberKeyPair v_PRIVATE_KEY_SIZE v_PUBLIC_KEY_SIZE)
-+    : t_KyberPublicKey v_PUBLIC_KEY_SIZE = self.f_pk
+-val encapsulate (#p:Spec.Kyber.params)
++val encapsulate
+       (v_K v_CIPHERTEXT_SIZE v_PUBLIC_KEY_SIZE v_T_AS_NTT_ENCODED_SIZE v_C1_SIZE v_C2_SIZE v_VECTOR_U_COMPRESSION_FACTOR v_VECTOR_V_COMPRESSION_FACTOR v_VECTOR_U_BLOCK_LEN v_ETA1 v_ETA1_RANDOMNESS_SIZE v_ETA2 v_ETA2_RANDOMNESS_SIZE:
+           usize)
+-      (public_key: Libcrux.Kem.Kyber.Types.t_MlKemPublicKey v_PUBLIC_KEY_SIZE)
++      (public_key: Libcrux.Kem.Kyber.Types.t_KyberPublicKey v_PUBLIC_KEY_SIZE)
+       (randomness: t_Array u8 (sz 32))
+-    : Pure (Libcrux.Kem.Kyber.Types.t_MlKemCiphertext v_CIPHERTEXT_SIZE & t_Array u8 (sz 32))
+-     (requires (p == (let open Spec.Kyber in {v_RANK = v_K; v_ETA1; v_ETA2; v_VECTOR_U_COMPRESSION_FACTOR; v_VECTOR_V_COMPRESSION_FACTOR}) /\
+-                Spec.Kyber.valid_params p /\
+-                v_ETA1_RANDOMNESS_SIZE == Spec.Kyber.v_ETA1_RANDOMNESS_SIZE p /\
+-                v_ETA2_RANDOMNESS_SIZE == Spec.Kyber.v_ETA2_RANDOMNESS_SIZE p /\
+-                v_PUBLIC_KEY_SIZE == Spec.Kyber.v_CPA_PKE_PUBLIC_KEY_SIZE p /\
+-                v_CIPHERTEXT_SIZE == Spec.Kyber.v_CPA_PKE_CIPHERTEXT_SIZE p /\
+-                v_C1_SIZE == Spec.Kyber.v_C1_SIZE p /\
+-                v_C2_SIZE == Spec.Kyber.v_C2_SIZE p /\
+-                v_T_AS_NTT_ENCODED_SIZE = Spec.Kyber.v_T_AS_NTT_ENCODED_SIZE p /\
+-                v_VECTOR_U_BLOCK_LEN == Spec.Kyber.v_C1_BLOCK_SIZE p
+-                ))
++    : Prims.Pure (Libcrux.Kem.Kyber.Types.t_KyberCiphertext v_CIPHERTEXT_SIZE & t_Array u8 (sz 32))
++      Prims.l_True
++      (fun _ -> Prims.l_True)
  
- let impl__sk
-       (v_PRIVATE_KEY_SIZE v_PUBLIC_KEY_SIZE: usize)
--      (self: t_MlKemKeyPair v_PRIVATE_KEY_SIZE v_PUBLIC_KEY_SIZE)
-+      (self: t_KyberKeyPair v_PRIVATE_KEY_SIZE v_PUBLIC_KEY_SIZE)
-     : t_Array u8 v_PRIVATE_KEY_SIZE = impl_12__as_slice v_PRIVATE_KEY_SIZE self.f_sk
-diff -ruN extraction-edited/Libcrux_platform.fsti extraction-secret-independent/Libcrux_platform.fsti
---- extraction-edited/Libcrux_platform.fsti	1970-01-01 01:00:00.000000000 +0100
-+++ extraction-secret-independent/Libcrux_platform.fsti	2024-03-12 10:45:44.840928923 +0100
-@@ -0,0 +1,4 @@
-+module Libcrux_platform
-+#set-options "--fuel 0 --ifuel 1 --z3rlimit 15"
-+
-+val simd256_support : unit -> bool
+-      (ensures (fun (ct,ss) ->
+-                (ct.f_value,ss) == Spec.Kyber.ind_cca_encapsulate p public_key.f_value randomness))
+-
+-val validate_public_key (#p:Spec.Kyber.params)
+-      (v_K v_RANKED_BYTES_PER_RING_ELEMENT v_PUBLIC_KEY_SIZE: usize)
+-      (public_key: t_Array u8 v_PUBLIC_KEY_SIZE)
+-    : Prims.Pure bool
+-      (requires (v_K == p.v_RANK /\
+-                 v_PUBLIC_KEY_SIZE == Spec.Kyber.v_CPA_PKE_PUBLIC_KEY_SIZE p /\
+-                 v_RANKED_BYTES_PER_RING_ELEMENT == Spec.Kyber.v_RANKED_BYTES_PER_RING_ELEMENT p
+-                 ))
+-      (ensures (fun _ -> Prims.l_True))
+-
+-val generate_keypair (#p:Spec.Kyber.params)
++val generate_keypair
+       (v_K v_CPA_PRIVATE_KEY_SIZE v_PRIVATE_KEY_SIZE v_PUBLIC_KEY_SIZE v_BYTES_PER_RING_ELEMENT v_ETA1 v_ETA1_RANDOMNESS_SIZE:
+           usize)
+       (randomness: t_Array u8 (sz 64))
+-    : Pure (Libcrux.Kem.Kyber.Types.t_MlKemKeyPair v_PRIVATE_KEY_SIZE v_PUBLIC_KEY_SIZE)
+-      (requires (v_K == p.v_RANK /\ v_ETA1 == p.v_ETA1 /\
+-                 v_ETA1_RANDOMNESS_SIZE == Spec.Kyber.v_ETA1_RANDOMNESS_SIZE p /\
+-                 v_PUBLIC_KEY_SIZE == Spec.Kyber.v_CPA_PKE_PUBLIC_KEY_SIZE p /\
+-                 v_CPA_PRIVATE_KEY_SIZE == Spec.Kyber.v_CPA_PKE_SECRET_KEY_SIZE p /\
+-                 v_PRIVATE_KEY_SIZE == Spec.Kyber.v_SECRET_KEY_SIZE p /\
+-                 v_BYTES_PER_RING_ELEMENT == Spec.Kyber.v_RANKED_BYTES_PER_RING_ELEMENT p
+-                 ))
+-      (ensures (fun kp -> 
+-                (kp.f_sk.f_value,kp.f_pk.f_value) == Spec.Kyber.ind_cca_generate_keypair p randomness))
++    : Prims.Pure (Libcrux.Kem.Kyber.Types.t_KyberKeyPair v_PRIVATE_KEY_SIZE v_PUBLIC_KEY_SIZE)
++      Prims.l_True
++      (fun _ -> Prims.l_True)
 diff -ruN extraction-edited/Libcrux_platform.Platform.fsti extraction-secret-independent/Libcrux_platform.Platform.fsti
---- extraction-edited/Libcrux_platform.Platform.fsti	2024-03-12 10:45:44.782930634 +0100
-+++ extraction-secret-independent/Libcrux_platform.Platform.fsti	1970-01-01 01:00:00.000000000 +0100
+--- extraction-edited/Libcrux_platform.Platform.fsti	2024-03-13 11:03:50
++++ extraction-secret-independent/Libcrux_platform.Platform.fsti	1970-01-01 01:00:00
 @@ -1,20 +0,0 @@
 -module Libcrux_platform.Platform
 -#set-options "--fuel 0 --ifuel 1 --z3rlimit 15"
@@ -8164,9 +8123,17 @@ diff -ruN extraction-edited/Libcrux_platform.Platform.fsti extraction-secret-ind
 -val sha256_support: Prims.unit -> Prims.Pure bool Prims.l_True (fun _ -> Prims.l_True)
 -
 -val simd128_support: Prims.unit -> Prims.Pure bool Prims.l_True (fun _ -> Prims.l_True)
+diff -ruN extraction-edited/Libcrux_platform.fsti extraction-secret-independent/Libcrux_platform.fsti
+--- extraction-edited/Libcrux_platform.fsti	1970-01-01 01:00:00
++++ extraction-secret-independent/Libcrux_platform.fsti	2024-03-13 11:03:50
+@@ -0,0 +1,4 @@
++module Libcrux_platform
++#set-options "--fuel 0 --ifuel 1 --z3rlimit 15"
++
++val simd256_support : unit -> bool
 diff -ruN extraction-edited/MkSeq.fst extraction-secret-independent/MkSeq.fst
---- extraction-edited/MkSeq.fst	2024-03-12 10:45:44.778930752 +0100
-+++ extraction-secret-independent/MkSeq.fst	1970-01-01 01:00:00.000000000 +0100
+--- extraction-edited/MkSeq.fst	2024-03-13 11:03:50
++++ extraction-secret-independent/MkSeq.fst	1970-01-01 01:00:00
 @@ -1,91 +0,0 @@
 -module MkSeq
 -open Core
@@ -8260,8 +8227,8 @@ diff -ruN extraction-edited/MkSeq.fst extraction-secret-independent/MkSeq.fst
 -
 -%splice[] (init 13 (fun i -> create_gen_tac (i + 1)))
 diff -ruN extraction-edited/Spec.Kyber.fst extraction-secret-independent/Spec.Kyber.fst
---- extraction-edited/Spec.Kyber.fst	2024-03-12 10:45:44.805929956 +0100
-+++ extraction-secret-independent/Spec.Kyber.fst	1970-01-01 01:00:00.000000000 +0100
+--- extraction-edited/Spec.Kyber.fst	2024-03-13 11:03:50
++++ extraction-secret-independent/Spec.Kyber.fst	1970-01-01 01:00:00
 @@ -1,435 +0,0 @@
 -module Spec.Kyber
 -#set-options "--fuel 0 --ifuel 1 --z3rlimit 100"
diff --git a/proofs/fstar/extraction/Libcrux.Digest.Incremental_x4.fsti b/proofs/fstar/extraction/Libcrux.Digest.Incremental_x4.fsti
new file mode 100644
index 000000000..ba0a48ea5
--- /dev/null
+++ b/proofs/fstar/extraction/Libcrux.Digest.Incremental_x4.fsti
@@ -0,0 +1,23 @@
+module Libcrux.Digest.Incremental_x4
+#set-options "--fuel 0 --ifuel 1 --z3rlimit 15"
+open Core
+open FStar.Mul
+
+val t_Shake128StateX4:Type
+
+val impl__Shake128StateX4__absorb_final
+      (v_N: usize)
+      (self: t_Shake128StateX4)
+      (input: t_Array (t_Slice u8) v_N)
+    : Prims.Pure t_Shake128StateX4 Prims.l_True (fun _ -> Prims.l_True)
+
+val impl__Shake128StateX4__free_memory (self: t_Shake128StateX4)
+    : Prims.Pure Prims.unit Prims.l_True (fun _ -> Prims.l_True)
+
+val impl__Shake128StateX4__new: Prims.unit
+  -> Prims.Pure t_Shake128StateX4 Prims.l_True (fun _ -> Prims.l_True)
+
+val impl__Shake128StateX4__squeeze_blocks (v_N v_M: usize) (self: t_Shake128StateX4)
+    : Prims.Pure (t_Shake128StateX4 & t_Array (t_Array u8 v_N) v_M)
+      Prims.l_True
+      (fun _ -> Prims.l_True)
diff --git a/proofs/fstar/extraction/Libcrux.Digest.fst b/proofs/fstar/extraction/Libcrux.Digest.fst
deleted file mode 100644
index 94f37dd21..000000000
--- a/proofs/fstar/extraction/Libcrux.Digest.fst
+++ /dev/null
@@ -1,48 +0,0 @@
-module Libcrux.Digest
-#set-options "--fuel 0 --ifuel 1 --z3rlimit 15"
-open Core
-open FStar.Mul
-
-let sha3_256_ (payload: t_Slice u8) = Libcrux.Hacl.Sha3.sha256 payload
-
-let sha3_512_ (payload: t_Slice u8) = Libcrux.Hacl.Sha3.sha512 payload
-
-let shake128 (v_LEN: usize) (data: t_Slice u8) = Libcrux.Hacl.Sha3.shake128 v_LEN data
-
-let shake256 (v_LEN: usize) (data: t_Slice u8) = Libcrux.Hacl.Sha3.shake256 v_LEN data
-
-let shake128x4_portable (v_LEN: usize) (data0 data1 data2 data3: t_Slice u8) =
-  let input_len:usize = Core.Slice.impl__len data0 in
-  let _:Prims.unit =
-    if true
-    then
-      let _:Prims.unit =
-        if
-          ~.((input_len =. (Core.Slice.impl__len data1 <: usize) <: bool) &&
-            (input_len =. (Core.Slice.impl__len data2 <: usize) <: bool) &&
-            (input_len =. (Core.Slice.impl__len data3 <: usize) <: bool) &&
-            (input_len <=. (cast (Core.Num.impl__u32__MAX <: u32) <: usize) <: bool) &&
-            (v_LEN <=. (cast (Core.Num.impl__u32__MAX <: u32) <: usize) <: bool))
-        then
-          Rust_primitives.Hax.never_to_any (Core.Panicking.panic "assertion failed: input_len == data1.len() && input_len == data2.len() &&\\n            input_len == data3.len() && input_len <= u32::MAX as usize &&\\n    LEN <= u32::MAX as usize"
-
-              <:
-              Rust_primitives.Hax.t_Never)
-      in
-      ()
-  in
-  let digest0:t_Array u8 v_LEN = Libcrux.Hacl.Sha3.shake128 v_LEN data0 in
-  let digest1:t_Array u8 v_LEN = Libcrux.Hacl.Sha3.shake128 v_LEN data1 in
-  let digest2:t_Array u8 v_LEN = Libcrux.Hacl.Sha3.shake128 v_LEN data2 in
-  let digest3:t_Array u8 v_LEN = Libcrux.Hacl.Sha3.shake128 v_LEN data3 in
-  digest0, digest1, digest2, digest3
-  <:
-  (t_Array u8 v_LEN & t_Array u8 v_LEN & t_Array u8 v_LEN & t_Array u8 v_LEN)
-
-let shake128x4_256_ (v_LEN: usize) (data0 data1 data2 data3: t_Slice u8) =
-  shake128x4_portable v_LEN data0 data1 data2 data3
-
-let shake128x4 (v_LEN: usize) (data0 data1 data2 data3: t_Slice u8) =
-  if Libcrux_platform.Platform.simd256_support ()
-  then shake128x4_256_ v_LEN data0 data1 data2 data3
-  else shake128x4_portable v_LEN data0 data1 data2 data3
diff --git a/proofs/fstar/extraction/Libcrux.Digest.fsti b/proofs/fstar/extraction/Libcrux.Digest.fsti
index 59887e419..de5b4d494 100644
--- a/proofs/fstar/extraction/Libcrux.Digest.fsti
+++ b/proofs/fstar/extraction/Libcrux.Digest.fsti
@@ -9,23 +9,5 @@ val sha3_256_ (payload: t_Slice u8)
 val sha3_512_ (payload: t_Slice u8)
     : Prims.Pure (t_Array u8 (sz 64)) Prims.l_True (fun _ -> Prims.l_True)
 
-val shake128 (v_LEN: usize) (data: t_Slice u8)
-    : Prims.Pure (t_Array u8 v_LEN) Prims.l_True (fun _ -> Prims.l_True)
-
 val shake256 (v_LEN: usize) (data: t_Slice u8)
     : Prims.Pure (t_Array u8 v_LEN) Prims.l_True (fun _ -> Prims.l_True)
-
-val shake128x4_portable (v_LEN: usize) (data0 data1 data2 data3: t_Slice u8)
-    : Prims.Pure (t_Array u8 v_LEN & t_Array u8 v_LEN & t_Array u8 v_LEN & t_Array u8 v_LEN)
-      Prims.l_True
-      (fun _ -> Prims.l_True)
-
-val shake128x4_256_ (v_LEN: usize) (data0 data1 data2 data3: t_Slice u8)
-    : Prims.Pure (t_Array u8 v_LEN & t_Array u8 v_LEN & t_Array u8 v_LEN & t_Array u8 v_LEN)
-      Prims.l_True
-      (fun _ -> Prims.l_True)
-
-val shake128x4 (v_LEN: usize) (data0 data1 data2 data3: t_Slice u8)
-    : Prims.Pure (t_Array u8 v_LEN & t_Array u8 v_LEN & t_Array u8 v_LEN & t_Array u8 v_LEN)
-      Prims.l_True
-      (fun _ -> Prims.l_True)
diff --git a/proofs/fstar/extraction/Libcrux.Kem.Kyber.Constants.fsti b/proofs/fstar/extraction/Libcrux.Kem.Kyber.Constants.fsti
index 938e50fe2..7bb3171bd 100644
--- a/proofs/fstar/extraction/Libcrux.Kem.Kyber.Constants.fsti
+++ b/proofs/fstar/extraction/Libcrux.Kem.Kyber.Constants.fsti
@@ -17,6 +17,4 @@ let v_FIELD_MODULUS: i32 = 3329l
 
 let v_H_DIGEST_SIZE: usize = sz 32
 
-let v_REJECTION_SAMPLING_SEED_SIZE: usize = sz 168 *! sz 5
-
 let v_SHARED_SECRET_SIZE: usize = sz 32
diff --git a/proofs/fstar/extraction/Libcrux.Kem.Kyber.Hash_functions.fst b/proofs/fstar/extraction/Libcrux.Kem.Kyber.Hash_functions.fst
index 48e40e681..9a1368857 100644
--- a/proofs/fstar/extraction/Libcrux.Kem.Kyber.Hash_functions.fst
+++ b/proofs/fstar/extraction/Libcrux.Kem.Kyber.Hash_functions.fst
@@ -9,95 +9,123 @@ let v_H (input: t_Slice u8) = Libcrux.Digest.sha3_256_ input
 
 let v_PRF (v_LEN: usize) (input: t_Slice u8) = Libcrux.Digest.shake256 v_LEN input
 
-let v_XOFx4 (v_K: usize) (input: t_Array (t_Array u8 (sz 34)) v_K) =
-  let out:t_Array (t_Array u8 (sz 840)) v_K =
-    Rust_primitives.Hax.repeat (Rust_primitives.Hax.repeat 0uy (sz 840) <: t_Array u8 (sz 840)) v_K
-  in
-  let out:t_Array (t_Array u8 (sz 840)) v_K =
-    if ~.(Libcrux_platform.Platform.simd256_support () <: bool)
+let absorb (v_K: usize) (input: t_Array (t_Array u8 (sz 34)) v_K) =
+  let _:Prims.unit =
+    if true
     then
-      Core.Iter.Traits.Iterator.f_fold (Core.Iter.Traits.Collect.f_into_iter ({
-                Core.Ops.Range.f_start = sz 0;
-                Core.Ops.Range.f_end = v_K
-              }
+      let _:Prims.unit =
+        if ~.((v_K =. sz 2 <: bool) || (v_K =. sz 3 <: bool) || (v_K =. sz 4 <: bool))
+        then
+          Rust_primitives.Hax.never_to_any (Core.Panicking.panic "assertion failed: K == 2 || K == 3 || K == 4"
+
               <:
-              Core.Ops.Range.t_Range usize)
+              Rust_primitives.Hax.t_Never)
+      in
+      ()
+  in
+  let state:Libcrux.Digest.Incremental_x4.t_Shake128StateX4 =
+    Libcrux.Digest.Incremental_x4.impl__Shake128StateX4__new ()
+  in
+  let (data: t_Array (t_Slice u8) v_K):t_Array (t_Slice u8) v_K =
+    Rust_primitives.Hax.repeat (Rust_primitives.unsize (let list = [0uy] in
+            FStar.Pervasives.assert_norm (Prims.eq2 (List.Tot.length list) 1);
+            Rust_primitives.Hax.array_of_list 1 list)
+        <:
+        t_Slice u8)
+      v_K
+  in
+  let data:t_Array (t_Slice u8) v_K =
+    Core.Iter.Traits.Iterator.f_fold (Core.Iter.Traits.Collect.f_into_iter ({
+              Core.Ops.Range.f_start = sz 0;
+              Core.Ops.Range.f_end = v_K
+            }
+            <:
+            Core.Ops.Range.t_Range usize)
+        <:
+        Core.Ops.Range.t_Range usize)
+      data
+      (fun data i ->
+          let data:t_Array (t_Slice u8) v_K = data in
+          let i:usize = i in
+          Rust_primitives.Hax.Monomorphized_update_at.update_at_usize data
+            i
+            (Rust_primitives.unsize (input.[ i ] <: t_Array u8 (sz 34)) <: t_Slice u8)
+          <:
+          t_Array (t_Slice u8) v_K)
+  in
+  let state:Libcrux.Digest.Incremental_x4.t_Shake128StateX4 =
+    Libcrux.Digest.Incremental_x4.impl__Shake128StateX4__absorb_final v_K state data
+  in
+  state
+
+let free_state (xof_state: Libcrux.Digest.Incremental_x4.t_Shake128StateX4) =
+  let _:Prims.unit = Libcrux.Digest.Incremental_x4.impl__Shake128StateX4__free_memory xof_state in
+  ()
+
+let squeeze_block (v_K: usize) (xof_state: Libcrux.Digest.Incremental_x4.t_Shake128StateX4) =
+  let tmp0, out1:(Libcrux.Digest.Incremental_x4.t_Shake128StateX4 &
+    t_Array (t_Array u8 (sz 168)) v_K) =
+    Libcrux.Digest.Incremental_x4.impl__Shake128StateX4__squeeze_blocks (sz 168) v_K xof_state
+  in
+  let xof_state:Libcrux.Digest.Incremental_x4.t_Shake128StateX4 = tmp0 in
+  let (output: t_Array (t_Array u8 (sz 168)) v_K):t_Array (t_Array u8 (sz 168)) v_K = out1 in
+  let out:t_Array (t_Array u8 (sz 168)) v_K =
+    Rust_primitives.Hax.repeat (Rust_primitives.Hax.repeat 0uy (sz 168) <: t_Array u8 (sz 168)) v_K
+  in
+  let out:t_Array (t_Array u8 (sz 168)) v_K =
+    Core.Iter.Traits.Iterator.f_fold (Core.Iter.Traits.Collect.f_into_iter ({
+              Core.Ops.Range.f_start = sz 0;
+              Core.Ops.Range.f_end = v_K
+            }
+            <:
+            Core.Ops.Range.t_Range usize)
+        <:
+        Core.Ops.Range.t_Range usize)
+      out
+      (fun out i ->
+          let out:t_Array (t_Array u8 (sz 168)) v_K = out in
+          let i:usize = i in
+          Rust_primitives.Hax.Monomorphized_update_at.update_at_usize out
+            i
+            (output.[ i ] <: t_Array u8 (sz 168))
           <:
-          Core.Ops.Range.t_Range usize)
-        out
-        (fun out i ->
-            let out:t_Array (t_Array u8 (sz 840)) v_K = out in
-            let i:usize = i in
-            Rust_primitives.Hax.Monomorphized_update_at.update_at_usize out
-              i
-              (Libcrux.Digest.shake128 (sz 840)
-                  (Rust_primitives.unsize (input.[ i ] <: t_Array u8 (sz 34)) <: t_Slice u8)
-                <:
-                t_Array u8 (sz 840))
+          t_Array (t_Array u8 (sz 168)) v_K)
+  in
+  let hax_temp_output:t_Array (t_Array u8 (sz 168)) v_K = out in
+  xof_state, hax_temp_output
+  <:
+  (Libcrux.Digest.Incremental_x4.t_Shake128StateX4 & t_Array (t_Array u8 (sz 168)) v_K)
+
+let squeeze_three_blocks (v_K: usize) (xof_state: Libcrux.Digest.Incremental_x4.t_Shake128StateX4) =
+  let tmp0, out1:(Libcrux.Digest.Incremental_x4.t_Shake128StateX4 &
+    t_Array (t_Array u8 (sz 504)) v_K) =
+    Libcrux.Digest.Incremental_x4.impl__Shake128StateX4__squeeze_blocks (sz 504) v_K xof_state
+  in
+  let xof_state:Libcrux.Digest.Incremental_x4.t_Shake128StateX4 = tmp0 in
+  let (output: t_Array (t_Array u8 (sz 504)) v_K):t_Array (t_Array u8 (sz 504)) v_K = out1 in
+  let out:t_Array (t_Array u8 (sz 504)) v_K =
+    Rust_primitives.Hax.repeat (Rust_primitives.Hax.repeat 0uy (sz 504) <: t_Array u8 (sz 504)) v_K
+  in
+  let out:t_Array (t_Array u8 (sz 504)) v_K =
+    Core.Iter.Traits.Iterator.f_fold (Core.Iter.Traits.Collect.f_into_iter ({
+              Core.Ops.Range.f_start = sz 0;
+              Core.Ops.Range.f_end = v_K
+            }
             <:
-            t_Array (t_Array u8 (sz 840)) v_K)
-    else
-      let out:t_Array (t_Array u8 (sz 840)) v_K =
-        match cast (v_K <: usize) <: u8 with
-        | 2uy ->
-          let d0, d1, _, _:(t_Array u8 (sz 840) & t_Array u8 (sz 840) & t_Array u8 (sz 840) &
-            t_Array u8 (sz 840)) =
-            Libcrux.Digest.shake128x4 (sz 840)
-              (Rust_primitives.unsize (input.[ sz 0 ] <: t_Array u8 (sz 34)) <: t_Slice u8)
-              (Rust_primitives.unsize (input.[ sz 1 ] <: t_Array u8 (sz 34)) <: t_Slice u8)
-              (Rust_primitives.unsize (input.[ sz 0 ] <: t_Array u8 (sz 34)) <: t_Slice u8)
-              (Rust_primitives.unsize (input.[ sz 1 ] <: t_Array u8 (sz 34)) <: t_Slice u8)
-          in
-          let out:t_Array (t_Array u8 (sz 840)) v_K =
-            Rust_primitives.Hax.Monomorphized_update_at.update_at_usize out (sz 0) d0
-          in
-          let out:t_Array (t_Array u8 (sz 840)) v_K =
-            Rust_primitives.Hax.Monomorphized_update_at.update_at_usize out (sz 1) d1
-          in
-          out
-        | 3uy ->
-          let d0, d1, d2, _:(t_Array u8 (sz 840) & t_Array u8 (sz 840) & t_Array u8 (sz 840) &
-            t_Array u8 (sz 840)) =
-            Libcrux.Digest.shake128x4 (sz 840)
-              (Rust_primitives.unsize (input.[ sz 0 ] <: t_Array u8 (sz 34)) <: t_Slice u8)
-              (Rust_primitives.unsize (input.[ sz 1 ] <: t_Array u8 (sz 34)) <: t_Slice u8)
-              (Rust_primitives.unsize (input.[ sz 2 ] <: t_Array u8 (sz 34)) <: t_Slice u8)
-              (Rust_primitives.unsize (input.[ sz 0 ] <: t_Array u8 (sz 34)) <: t_Slice u8)
-          in
-          let out:t_Array (t_Array u8 (sz 840)) v_K =
-            Rust_primitives.Hax.Monomorphized_update_at.update_at_usize out (sz 0) d0
-          in
-          let out:t_Array (t_Array u8 (sz 840)) v_K =
-            Rust_primitives.Hax.Monomorphized_update_at.update_at_usize out (sz 1) d1
-          in
-          let out:t_Array (t_Array u8 (sz 840)) v_K =
-            Rust_primitives.Hax.Monomorphized_update_at.update_at_usize out (sz 2) d2
-          in
-          out
-        | 4uy ->
-          let d0, d1, d2, d3:(t_Array u8 (sz 840) & t_Array u8 (sz 840) & t_Array u8 (sz 840) &
-            t_Array u8 (sz 840)) =
-            Libcrux.Digest.shake128x4 (sz 840)
-              (Rust_primitives.unsize (input.[ sz 0 ] <: t_Array u8 (sz 34)) <: t_Slice u8)
-              (Rust_primitives.unsize (input.[ sz 1 ] <: t_Array u8 (sz 34)) <: t_Slice u8)
-              (Rust_primitives.unsize (input.[ sz 2 ] <: t_Array u8 (sz 34)) <: t_Slice u8)
-              (Rust_primitives.unsize (input.[ sz 3 ] <: t_Array u8 (sz 34)) <: t_Slice u8)
-          in
-          let out:t_Array (t_Array u8 (sz 840)) v_K =
-            Rust_primitives.Hax.Monomorphized_update_at.update_at_usize out (sz 0) d0
-          in
-          let out:t_Array (t_Array u8 (sz 840)) v_K =
-            Rust_primitives.Hax.Monomorphized_update_at.update_at_usize out (sz 1) d1
-          in
-          let out:t_Array (t_Array u8 (sz 840)) v_K =
-            Rust_primitives.Hax.Monomorphized_update_at.update_at_usize out (sz 2) d2
-          in
-          let out:t_Array (t_Array u8 (sz 840)) v_K =
-            Rust_primitives.Hax.Monomorphized_update_at.update_at_usize out (sz 3) d3
-          in
-          out
-        | _ -> out
-      in
+            Core.Ops.Range.t_Range usize)
+        <:
+        Core.Ops.Range.t_Range usize)
       out
+      (fun out i ->
+          let out:t_Array (t_Array u8 (sz 504)) v_K = out in
+          let i:usize = i in
+          Rust_primitives.Hax.Monomorphized_update_at.update_at_usize out
+            i
+            (output.[ i ] <: t_Array u8 (sz 504))
+          <:
+          t_Array (t_Array u8 (sz 504)) v_K)
   in
-  out
+  let hax_temp_output:t_Array (t_Array u8 (sz 504)) v_K = out in
+  xof_state, hax_temp_output
+  <:
+  (Libcrux.Digest.Incremental_x4.t_Shake128StateX4 & t_Array (t_Array u8 (sz 504)) v_K)
diff --git a/proofs/fstar/extraction/Libcrux.Kem.Kyber.Hash_functions.fsti b/proofs/fstar/extraction/Libcrux.Kem.Kyber.Hash_functions.fsti
index 2b580727c..18555f6cf 100644
--- a/proofs/fstar/extraction/Libcrux.Kem.Kyber.Hash_functions.fsti
+++ b/proofs/fstar/extraction/Libcrux.Kem.Kyber.Hash_functions.fsti
@@ -3,6 +3,8 @@ module Libcrux.Kem.Kyber.Hash_functions
 open Core
 open FStar.Mul
 
+let v_BLOCK_SIZE: usize = sz 168
+
 val v_G (input: t_Slice u8) : Prims.Pure (t_Array u8 (sz 64)) Prims.l_True (fun _ -> Prims.l_True)
 
 val v_H (input: t_Slice u8) : Prims.Pure (t_Array u8 (sz 32)) Prims.l_True (fun _ -> Prims.l_True)
@@ -10,5 +12,24 @@ val v_H (input: t_Slice u8) : Prims.Pure (t_Array u8 (sz 32)) Prims.l_True (fun
 val v_PRF (v_LEN: usize) (input: t_Slice u8)
     : Prims.Pure (t_Array u8 v_LEN) Prims.l_True (fun _ -> Prims.l_True)
 
-val v_XOFx4 (v_K: usize) (input: t_Array (t_Array u8 (sz 34)) v_K)
-    : Prims.Pure (t_Array (t_Array u8 (sz 840)) v_K) Prims.l_True (fun _ -> Prims.l_True)
+let v_THREE_BLOCKS: usize = v_BLOCK_SIZE *! sz 3
+
+val absorb (v_K: usize) (input: t_Array (t_Array u8 (sz 34)) v_K)
+    : Prims.Pure Libcrux.Digest.Incremental_x4.t_Shake128StateX4
+      Prims.l_True
+      (fun _ -> Prims.l_True)
+
+val free_state (xof_state: Libcrux.Digest.Incremental_x4.t_Shake128StateX4)
+    : Prims.Pure Prims.unit Prims.l_True (fun _ -> Prims.l_True)
+
+val squeeze_block (v_K: usize) (xof_state: Libcrux.Digest.Incremental_x4.t_Shake128StateX4)
+    : Prims.Pure
+      (Libcrux.Digest.Incremental_x4.t_Shake128StateX4 & t_Array (t_Array u8 (sz 168)) v_K)
+      Prims.l_True
+      (fun _ -> Prims.l_True)
+
+val squeeze_three_blocks (v_K: usize) (xof_state: Libcrux.Digest.Incremental_x4.t_Shake128StateX4)
+    : Prims.Pure
+      (Libcrux.Digest.Incremental_x4.t_Shake128StateX4 & t_Array (t_Array u8 (sz 504)) v_K)
+      Prims.l_True
+      (fun _ -> Prims.l_True)
diff --git a/proofs/fstar/extraction/Libcrux.Kem.Kyber.Matrix.fst b/proofs/fstar/extraction/Libcrux.Kem.Kyber.Matrix.fst
index 6cfbc72fe..ced9b7441 100644
--- a/proofs/fstar/extraction/Libcrux.Kem.Kyber.Matrix.fst
+++ b/proofs/fstar/extraction/Libcrux.Kem.Kyber.Matrix.fst
@@ -482,8 +482,8 @@ let sample_matrix_A (v_K: usize) (seed: t_Array u8 (sz 34)) (transpose: bool) =
                   in
                   seeds)
           in
-          let xof_bytes:t_Array (t_Array u8 (sz 840)) v_K =
-            Libcrux.Kem.Kyber.Hash_functions.v_XOFx4 v_K seeds
+          let sampled:t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K =
+            Libcrux.Kem.Kyber.Sampling.sample_from_xof v_K seeds
           in
           Core.Iter.Traits.Iterator.f_fold (Core.Iter.Traits.Collect.f_into_iter ({
                     Core.Ops.Range.f_start = sz 0;
@@ -500,11 +500,6 @@ let sample_matrix_A (v_K: usize) (seed: t_Array u8 (sz 34)) (transpose: bool) =
                   v_A_transpose
                 in
                 let j:usize = j in
-                let sampled:Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement =
-                  Libcrux.Kem.Kyber.Sampling.sample_from_uniform_distribution (xof_bytes.[ j ]
-                      <:
-                      t_Array u8 (sz 840))
-                in
                 if transpose
                 then
                   let v_A_transpose:t_Array
@@ -516,7 +511,7 @@ let sample_matrix_A (v_K: usize) (seed: t_Array u8 (sz 34)) (transpose: bool) =
                             <:
                             t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K)
                           i
-                          sampled
+                          (sampled.[ j ] <: Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement)
                         <:
                         t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K)
                   in
@@ -531,7 +526,7 @@ let sample_matrix_A (v_K: usize) (seed: t_Array u8 (sz 34)) (transpose: bool) =
                             <:
                             t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K)
                           j
-                          sampled
+                          (sampled.[ j ] <: Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement)
                         <:
                         t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K)
                   in
diff --git a/proofs/fstar/extraction/Libcrux.Kem.Kyber.Sampling.fst b/proofs/fstar/extraction/Libcrux.Kem.Kyber.Sampling.fst
index 94988a9fe..40b62654e 100644
--- a/proofs/fstar/extraction/Libcrux.Kem.Kyber.Sampling.fst
+++ b/proofs/fstar/extraction/Libcrux.Kem.Kyber.Sampling.fst
@@ -3,11 +3,6 @@ module Libcrux.Kem.Kyber.Sampling
 open Core
 open FStar.Mul
 
-let rejection_sampling_panic_with_diagnostic (_: Prims.unit) =
-  Rust_primitives.Hax.never_to_any (Core.Panicking.panic "explicit panic"
-      <:
-      Rust_primitives.Hax.t_Never)
-
 let sample_from_binomial_distribution_2_ (randomness: t_Slice u8) =
   let (sampled: Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement):Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement
   =
@@ -158,114 +153,227 @@ let sample_from_binomial_distribution (v_ETA: usize) (randomness: t_Slice u8) =
         <:
         Rust_primitives.Hax.t_Never)
 
-let sample_from_uniform_distribution (randomness: t_Array u8 (sz 840)) =
-  let (sampled_coefficients: usize):usize = sz 0 in
-  let (out: Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement):Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement
-  =
-    Libcrux.Kem.Kyber.Arithmetic.impl__PolynomialRingElement__ZERO
-  in
-  let done:bool = false in
-  let done, out, sampled_coefficients:(bool & Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement &
-    usize) =
-    Core.Iter.Traits.Iterator.f_fold (Core.Iter.Traits.Collect.f_into_iter (Core.Slice.impl__chunks (
-                Rust_primitives.unsize randomness <: t_Slice u8)
-              (sz 3)
+let sample_from_uniform_distribution_next
+      (v_K v_N: usize)
+      (randomness: t_Array (t_Array u8 v_N) v_K)
+      (sampled_coefficients: t_Array usize v_K)
+      (out: t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K)
+     =
+  let done:bool = true in
+  let done, out, sampled_coefficients:(bool &
+    t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K &
+    t_Array usize v_K) =
+    Core.Iter.Traits.Iterator.f_fold (Core.Iter.Traits.Collect.f_into_iter ({
+              Core.Ops.Range.f_start = sz 0;
+              Core.Ops.Range.f_end = v_K
+            }
             <:
-            Core.Slice.Iter.t_Chunks u8)
+            Core.Ops.Range.t_Range usize)
         <:
-        Core.Slice.Iter.t_Chunks u8)
+        Core.Ops.Range.t_Range usize)
       (done, out, sampled_coefficients
         <:
-        (bool & Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement & usize))
-      (fun temp_0_ bytes ->
+        (bool & t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K & t_Array usize v_K
+        ))
+      (fun temp_0_ i ->
           let done, out, sampled_coefficients:(bool &
-            Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement &
-            usize) =
+            t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K &
+            t_Array usize v_K) =
             temp_0_
           in
-          let bytes:t_Slice u8 = bytes in
-          if ~.done <: bool
-          then
-            let b1:i32 = cast (bytes.[ sz 0 ] <: u8) <: i32 in
-            let b2:i32 = cast (bytes.[ sz 1 ] <: u8) <: i32 in
-            let b3:i32 = cast (bytes.[ sz 2 ] <: u8) <: i32 in
-            let d1:i32 = ((b2 &. 15l <: i32) <<! 8l <: i32) |. b1 in
-            let d2:i32 = (b3 <<! 4l <: i32) |. (b2 >>! 4l <: i32) in
-            let out, sampled_coefficients:(Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement &
-              usize) =
-              if
-                d1 <. Libcrux.Kem.Kyber.Constants.v_FIELD_MODULUS &&
-                sampled_coefficients <. Libcrux.Kem.Kyber.Constants.v_COEFFICIENTS_IN_RING_ELEMENT
-              then
-                let out:Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement =
-                  {
-                    out with
-                    Libcrux.Kem.Kyber.Arithmetic.f_coefficients
-                    =
-                    Rust_primitives.Hax.Monomorphized_update_at.update_at_usize out
-                        .Libcrux.Kem.Kyber.Arithmetic.f_coefficients
-                      sampled_coefficients
-                      d1
-                  }
-                  <:
-                  Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement
-                in
-                out, sampled_coefficients +! sz 1
-                <:
-                (Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement & usize)
-              else
-                out, sampled_coefficients
+          let i:usize = i in
+          let out, sampled_coefficients:(t_Array
+              Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K &
+            t_Array usize v_K) =
+            Core.Iter.Traits.Iterator.f_fold (Core.Iter.Traits.Collect.f_into_iter (Core.Slice.impl__chunks
+                      (Rust_primitives.unsize (randomness.[ i ] <: t_Array u8 v_N) <: t_Slice u8)
+                      (sz 3)
+                    <:
+                    Core.Slice.Iter.t_Chunks u8)
                 <:
-                (Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement & usize)
-            in
-            let out, sampled_coefficients:(Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement &
-              usize) =
-              if
-                d2 <. Libcrux.Kem.Kyber.Constants.v_FIELD_MODULUS &&
-                sampled_coefficients <. Libcrux.Kem.Kyber.Constants.v_COEFFICIENTS_IN_RING_ELEMENT
-              then
-                let out:Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement =
-                  {
-                    out with
-                    Libcrux.Kem.Kyber.Arithmetic.f_coefficients
-                    =
-                    Rust_primitives.Hax.Monomorphized_update_at.update_at_usize out
-                        .Libcrux.Kem.Kyber.Arithmetic.f_coefficients
-                      sampled_coefficients
-                      d2
-                  }
-                  <:
-                  Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement
-                in
-                let sampled_coefficients:usize = sampled_coefficients +! sz 1 in
-                out, sampled_coefficients
+                Core.Slice.Iter.t_Chunks u8)
+              (out, sampled_coefficients
                 <:
-                (Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement & usize)
-              else
-                out, sampled_coefficients
-                <:
-                (Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement & usize)
-            in
-            if sampled_coefficients =. Libcrux.Kem.Kyber.Constants.v_COEFFICIENTS_IN_RING_ELEMENT
-            then
-              let done:bool = true in
-              done, out, sampled_coefficients
-              <:
-              (bool & Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement & usize)
-            else
-              done, out, sampled_coefficients
-              <:
-              (bool & Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement & usize)
+                (t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K &
+                  t_Array usize v_K))
+              (fun temp_0_ bytes ->
+                  let out, sampled_coefficients:(t_Array
+                      Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K &
+                    t_Array usize v_K) =
+                    temp_0_
+                  in
+                  let bytes:t_Slice u8 = bytes in
+                  let b1:i32 = cast (bytes.[ sz 0 ] <: u8) <: i32 in
+                  let b2:i32 = cast (bytes.[ sz 1 ] <: u8) <: i32 in
+                  let b3:i32 = cast (bytes.[ sz 2 ] <: u8) <: i32 in
+                  let d1:i32 = ((b2 &. 15l <: i32) <<! 8l <: i32) |. b1 in
+                  let d2:i32 = (b3 <<! 4l <: i32) |. (b2 >>! 4l <: i32) in
+                  let out, sampled_coefficients:(t_Array
+                      Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K &
+                    t_Array usize v_K) =
+                    if
+                      d1 <. Libcrux.Kem.Kyber.Constants.v_FIELD_MODULUS &&
+                      (sampled_coefficients.[ i ] <: usize) <.
+                      Libcrux.Kem.Kyber.Constants.v_COEFFICIENTS_IN_RING_ELEMENT
+                    then
+                      let out:t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K =
+                        Rust_primitives.Hax.Monomorphized_update_at.update_at_usize out
+                          i
+                          ({
+                              (out.[ i ] <: Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement) with
+                              Libcrux.Kem.Kyber.Arithmetic.f_coefficients
+                              =
+                              Rust_primitives.Hax.Monomorphized_update_at.update_at_usize (out.[ i ]
+                                  <:
+                                  Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement)
+                                  .Libcrux.Kem.Kyber.Arithmetic.f_coefficients
+                                (sampled_coefficients.[ i ] <: usize)
+                                d1
+                              <:
+                              t_Array i32 (sz 256)
+                            }
+                            <:
+                            Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement)
+                      in
+                      out,
+                      Rust_primitives.Hax.Monomorphized_update_at.update_at_usize sampled_coefficients
+                        i
+                        ((sampled_coefficients.[ i ] <: usize) +! sz 1 <: usize)
+                      <:
+                      (t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K &
+                        t_Array usize v_K)
+                    else
+                      out, sampled_coefficients
+                      <:
+                      (t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K &
+                        t_Array usize v_K)
+                  in
+                  if
+                    d2 <. Libcrux.Kem.Kyber.Constants.v_FIELD_MODULUS &&
+                    (sampled_coefficients.[ i ] <: usize) <.
+                    Libcrux.Kem.Kyber.Constants.v_COEFFICIENTS_IN_RING_ELEMENT
+                  then
+                    let out:t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K =
+                      Rust_primitives.Hax.Monomorphized_update_at.update_at_usize out
+                        i
+                        ({
+                            (out.[ i ] <: Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement) with
+                            Libcrux.Kem.Kyber.Arithmetic.f_coefficients
+                            =
+                            Rust_primitives.Hax.Monomorphized_update_at.update_at_usize (out.[ i ]
+                                <:
+                                Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement)
+                                .Libcrux.Kem.Kyber.Arithmetic.f_coefficients
+                              (sampled_coefficients.[ i ] <: usize)
+                              d2
+                            <:
+                            t_Array i32 (sz 256)
+                          }
+                          <:
+                          Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement)
+                    in
+                    let sampled_coefficients:t_Array usize v_K =
+                      Rust_primitives.Hax.Monomorphized_update_at.update_at_usize sampled_coefficients
+                        i
+                        ((sampled_coefficients.[ i ] <: usize) +! sz 1 <: usize)
+                    in
+                    out, sampled_coefficients
+                    <:
+                    (t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K &
+                      t_Array usize v_K)
+                  else
+                    out, sampled_coefficients
+                    <:
+                    (t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K &
+                      t_Array usize v_K))
+          in
+          if
+            (sampled_coefficients.[ i ] <: usize) <.
+            Libcrux.Kem.Kyber.Constants.v_COEFFICIENTS_IN_RING_ELEMENT
+          then
+            false, out, sampled_coefficients
+            <:
+            (bool & t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K &
+              t_Array usize v_K)
           else
             done, out, sampled_coefficients
             <:
-            (bool & Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement & usize))
+            (bool & t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K &
+              t_Array usize v_K))
+  in
+  let hax_temp_output:bool = done in
+  sampled_coefficients, out, hax_temp_output
+  <:
+  (t_Array usize v_K & t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K & bool)
+
+let sample_from_xof (v_K: usize) (seeds: t_Array (t_Array u8 (sz 34)) v_K) =
+  let (sampled_coefficients: t_Array usize v_K):t_Array usize v_K =
+    Rust_primitives.Hax.repeat (sz 0) v_K
+  in
+  let (out: t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K):t_Array
+    Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K =
+    Rust_primitives.Hax.repeat Libcrux.Kem.Kyber.Arithmetic.impl__PolynomialRingElement__ZERO v_K
   in
-  let _:Prims.unit =
-    if ~.done
-    then
-      let _:Prims.unit = rejection_sampling_panic_with_diagnostic () in
-      ()
+  let xof_state:Libcrux.Digest.Incremental_x4.t_Shake128StateX4 =
+    Libcrux.Kem.Kyber.Hash_functions.absorb v_K seeds
+  in
+  let tmp0, out1:(Libcrux.Digest.Incremental_x4.t_Shake128StateX4 &
+    t_Array (t_Array u8 (sz 504)) v_K) =
+    Libcrux.Kem.Kyber.Hash_functions.squeeze_three_blocks v_K xof_state
+  in
+  let xof_state:Libcrux.Digest.Incremental_x4.t_Shake128StateX4 = tmp0 in
+  let randomness:t_Array (t_Array u8 (sz 504)) v_K = out1 in
+  let tmp0, tmp1, out1:(t_Array usize v_K &
+    t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K &
+    bool) =
+    sample_from_uniform_distribution_next v_K (sz 504) randomness sampled_coefficients out
+  in
+  let sampled_coefficients:t_Array usize v_K = tmp0 in
+  let out:t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K = tmp1 in
+  let done:bool = out1 in
+  let done, out, sampled_coefficients, xof_state:(bool &
+    t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K &
+    t_Array usize v_K &
+    Libcrux.Digest.Incremental_x4.t_Shake128StateX4) =
+    Rust_primitives.f_while_loop (fun temp_0_ ->
+          let done, out, sampled_coefficients, xof_state:(bool &
+            t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K &
+            t_Array usize v_K &
+            Libcrux.Digest.Incremental_x4.t_Shake128StateX4) =
+            temp_0_
+          in
+          ~.done <: bool)
+      (done, out, sampled_coefficients, xof_state
+        <:
+        (bool & t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K & t_Array usize v_K &
+          Libcrux.Digest.Incremental_x4.t_Shake128StateX4))
+      (fun temp_0_ ->
+          let done, out, sampled_coefficients, xof_state:(bool &
+            t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K &
+            t_Array usize v_K &
+            Libcrux.Digest.Incremental_x4.t_Shake128StateX4) =
+            temp_0_
+          in
+          let tmp0, out1:(Libcrux.Digest.Incremental_x4.t_Shake128StateX4 &
+            t_Array (t_Array u8 (sz 168)) v_K) =
+            Libcrux.Kem.Kyber.Hash_functions.squeeze_block v_K xof_state
+          in
+          let xof_state:Libcrux.Digest.Incremental_x4.t_Shake128StateX4 = tmp0 in
+          let randomness:t_Array (t_Array u8 (sz 168)) v_K = out1 in
+          let tmp0, tmp1, out1:(t_Array usize v_K &
+            t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K &
+            bool) =
+            sample_from_uniform_distribution_next v_K (sz 168) randomness sampled_coefficients out
+          in
+          let sampled_coefficients:t_Array usize v_K = tmp0 in
+          let out:t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K = tmp1 in
+          let done:bool = out1 in
+          done, out, sampled_coefficients, xof_state
+          <:
+          (bool & t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K &
+            t_Array usize v_K &
+            Libcrux.Digest.Incremental_x4.t_Shake128StateX4))
   in
+  let _:Prims.unit = Libcrux.Kem.Kyber.Hash_functions.free_state xof_state in
   let _:Prims.unit = () <: Prims.unit in
   out
diff --git a/proofs/fstar/extraction/Libcrux.Kem.Kyber.Sampling.fsti b/proofs/fstar/extraction/Libcrux.Kem.Kyber.Sampling.fsti
index 4000929f6..6c5f8500e 100644
--- a/proofs/fstar/extraction/Libcrux.Kem.Kyber.Sampling.fsti
+++ b/proofs/fstar/extraction/Libcrux.Kem.Kyber.Sampling.fsti
@@ -3,9 +3,6 @@ module Libcrux.Kem.Kyber.Sampling
 open Core
 open FStar.Mul
 
-val rejection_sampling_panic_with_diagnostic: Prims.unit
-  -> Prims.Pure Prims.unit Prims.l_True (fun _ -> Prims.l_True)
-
 val sample_from_binomial_distribution_2_ (randomness: t_Slice u8)
     : Prims.Pure Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement
       (requires (Core.Slice.impl__len randomness <: usize) =. (sz 2 *! sz 64 <: usize))
@@ -73,7 +70,17 @@ val sample_from_binomial_distribution (v_ETA: usize) (randomness: t_Slice u8)
       Prims.l_True
       (fun _ -> Prims.l_True)
 
-val sample_from_uniform_distribution (randomness: t_Array u8 (sz 840))
-    : Prims.Pure Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement
+val sample_from_uniform_distribution_next
+      (v_K v_N: usize)
+      (randomness: t_Array (t_Array u8 v_N) v_K)
+      (sampled_coefficients: t_Array usize v_K)
+      (out: t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K)
+    : Prims.Pure
+      (t_Array usize v_K & t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K & bool)
+      Prims.l_True
+      (fun _ -> Prims.l_True)
+
+val sample_from_xof (v_K: usize) (seeds: t_Array (t_Array u8 (sz 34)) v_K)
+    : Prims.Pure (t_Array Libcrux.Kem.Kyber.Arithmetic.t_PolynomialRingElement v_K)
       Prims.l_True
       (fun _ -> Prims.l_True)
diff --git a/proofs/fstar/extraction/Makefile b/proofs/fstar/extraction/Makefile
index b22bb482b..763274af1 100644
--- a/proofs/fstar/extraction/Makefile
+++ b/proofs/fstar/extraction/Makefile
@@ -63,6 +63,8 @@ UNVERIFIED = \
 	Libcrux.Kem.Kyber.Arithmetic.fsti \
 	Libcrux.Kem.Kyber.Compress.fst \
 	Libcrux.Kem.Kyber.Constant_time_ops.fst \
+	Libcrux.Digest.fsti \
+	Libcrux.Digest.Incremental_x4.fsti \
 	Libcrux.Kem.Kyber.Hash_functions.fst \
 	Libcrux.Kem.Kyber.Matrix.fst \
 	Libcrux.Kem.Kyber.Ntt.fst \
diff --git a/src/digest.rs b/src/digest.rs
index 7ebcfeed0..a468b8bec 100644
--- a/src/digest.rs
+++ b/src/digest.rs
@@ -370,3 +370,66 @@ pub fn shake128<const LEN: usize>(data: &[u8]) -> [u8; LEN] {
 pub fn shake256<const LEN: usize>(data: &[u8]) -> [u8; LEN] {
     sha3::shake256(data)
 }
+
+/// An incremental eXtendable Output Function API for SHA3 (shake).
+///
+/// This x4 variant of the incremental API always processes 4 inputs at a time.
+/// This uses AVX2 when available to run the 4 operations in parallel.
+///
+/// More generic APIs will be added later.
+pub mod incremental_x4 {
+
+    /// Incremental state
+    #[cfg_attr(hax, hax_lib_macros::opaque_type)]
+    pub struct Shake128StateX4 {
+        state: crate::hacl::sha3::incremental_x4::Shake128StateX4,
+    }
+
+    impl Shake128StateX4 {
+        /// Create a new Shake128 x4 state.
+        #[inline(always)]
+        pub fn new() -> Self {
+            Self {
+                state: crate::hacl::sha3::incremental_x4::Shake128StateX4::new(),
+            }
+        }
+
+        /// This is only used internally to work around Eurydice bugs.
+        #[inline(always)]
+        pub fn free_memory(self) {
+            self.state.free();
+        }
+
+        /// Absorb 4 blocks.
+        ///
+        /// A blocks MUST all be the same length.
+        /// Each slice MUST be a multiple of the block length 168.
+        #[inline(always)]
+        pub fn absorb_4blocks(&mut self, input: [&[u8]; 4]) {
+            self.state.absorb_blocks(input)
+        }
+
+        /// Absorb up to 4 blocks.
+        ///
+        /// The `input` must be of length 1 to 4.
+        /// A blocks MUST all be the same length.
+        /// Each slice MUST be a multiple of the block length 168.
+        #[inline(always)]
+        pub fn absorb_final<const N: usize>(&mut self, input: [&[u8]; N]) {
+            // Pad the input to the length of 4
+            let data = [
+                input[0],
+                if N > 1 { input[1] } else { &[] },
+                if N > 2 { input[2] } else { &[] },
+                if N > 3 { input[3] } else { &[] },
+            ];
+            self.state.absorb_final(data);
+        }
+
+        /// Squeeze `M` blocks of length `N`
+        #[inline(always)]
+        pub fn squeeze_blocks<const N: usize, const M: usize>(&mut self) -> [[u8; N]; M] {
+            self.state.squeeze_blocks()
+        }
+    }
+}
diff --git a/src/hacl/sha3.rs b/src/hacl/sha3.rs
index 818d3c1e8..014447aab 100644
--- a/src/hacl/sha3.rs
+++ b/src/hacl/sha3.rs
@@ -227,3 +227,437 @@ pub mod x4 {
         (digest0, digest1, digest2, digest3)
     }
 }
+
+/// This module groups together functions that can be used to absorb or squeeze
+/// bytes in increments.
+/// TODO: This module should not be public, see: https://github.com/cryspen/libcrux/issues/157
+pub mod incremental {
+    use std::ptr::null_mut;
+
+    use libcrux_hacl::{
+        Hacl_Hash_SHA3_Scalar_shake128_absorb_final, Hacl_Hash_SHA3_Scalar_shake128_absorb_nblocks,
+        Hacl_Hash_SHA3_Scalar_shake128_squeeze_nblocks, Hacl_Hash_SHA3_Scalar_state_free,
+        Hacl_Hash_SHA3_Scalar_state_malloc,
+    };
+
+    /// SHAKE 128
+    ///
+
+    /// Handle to internal SHAKE 129 state
+    pub struct Shake128State {
+        state: *mut u64,
+    }
+
+    impl Shake128State {
+        /// Create a new state.
+        ///
+        /// This allocates the necessary memory.
+        pub fn new() -> Self {
+            let state = Self {
+                state: unsafe { Hacl_Hash_SHA3_Scalar_state_malloc() },
+            };
+
+            state
+        }
+
+        /// Free and consume the state.
+        ///
+        /// **NOTE:** This consumes the value. It is not usable after this call!
+        pub fn free(&mut self) {
+            unsafe {
+                Hacl_Hash_SHA3_Scalar_state_free(self.state);
+                // null the pointer (hacl isn't doing that unfortunately)
+                // This way we can check whether the memory was freed already or not.
+                self.state = null_mut();
+            }
+        }
+
+        pub fn absorb_blocks(&mut self, input: &[u8]) {
+            unsafe {
+                Hacl_Hash_SHA3_Scalar_shake128_absorb_nblocks(
+                    self.state,
+                    input.as_ptr() as _,
+                    input.len() as u32,
+                )
+            };
+        }
+
+        pub fn absorb_final(&mut self, input: &[u8]) {
+            unsafe {
+                Hacl_Hash_SHA3_Scalar_shake128_absorb_final(
+                    self.state,
+                    input.as_ptr() as _,
+                    input.len() as u32,
+                )
+            };
+        }
+        pub fn squeeze_blocks<const OUTPUT_BYTES: usize>(&mut self) -> [u8; OUTPUT_BYTES] {
+            debug_assert!(OUTPUT_BYTES % 168 == 0);
+            let mut output = [0u8; OUTPUT_BYTES];
+            unsafe {
+                Hacl_Hash_SHA3_Scalar_shake128_squeeze_nblocks(
+                    self.state,
+                    output.as_mut_ptr(),
+                    OUTPUT_BYTES as u32,
+                )
+            };
+
+            output
+        }
+    }
+
+    /// **NOTE:** When generating C code with Eurydice, the state needs to be freed
+    ///           manually for now due to a bug in Eurydice.
+    impl Drop for Shake128State {
+        fn drop(&mut self) {
+            unsafe {
+                // A manual free may have occurred already.
+                // Avoid double free.
+                if !self.state.is_null() {
+                    Hacl_Hash_SHA3_Scalar_state_free(self.state)
+                }
+            }
+        }
+    }
+}
+
+pub mod incremental_x4 {
+    use std::ptr::null_mut;
+
+    use libcrux_hacl::{
+        Hacl_Hash_SHA3_Scalar_shake128_absorb_final, Hacl_Hash_SHA3_Scalar_shake128_absorb_nblocks,
+        Hacl_Hash_SHA3_Scalar_shake128_squeeze_nblocks, Hacl_Hash_SHA3_Scalar_state_free,
+        Hacl_Hash_SHA3_Scalar_state_malloc,
+    };
+    #[cfg(simd256)]
+    use libcrux_hacl::{
+        Hacl_Hash_SHA3_Simd256_shake128_absorb_final,
+        Hacl_Hash_SHA3_Simd256_shake128_absorb_nblocks,
+        Hacl_Hash_SHA3_Simd256_shake128_squeeze_nblocks, Hacl_Hash_SHA3_Simd256_state_free,
+        Hacl_Hash_SHA3_Simd256_state_malloc, Lib_IntVector_Intrinsics_vec256,
+    };
+    #[cfg(simd256)]
+    use libcrux_platform::simd256_support;
+
+    /// SHAKE 128
+    ///
+    /// Handle to internal SHAKE 128 state
+    #[cfg(simd256)]
+    pub struct Shake128StateX4 {
+        statex4: *mut Lib_IntVector_Intrinsics_vec256,
+        state: [*mut u64; 4],
+    }
+
+    #[cfg(not(simd256))]
+    pub struct Shake128StateX4 {
+        state: [*mut u64; 4],
+    }
+
+    impl Shake128StateX4 {
+        #[cfg(simd256)]
+        pub fn new() -> Self {
+            if cfg!(simd256) && simd256_support() {
+                Self {
+                    statex4: unsafe { Hacl_Hash_SHA3_Simd256_state_malloc() },
+                    state: [null_mut(), null_mut(), null_mut(), null_mut()],
+                }
+            } else {
+                Self {
+                    statex4: null_mut(),
+                    state: unsafe {
+                        [
+                            Hacl_Hash_SHA3_Scalar_state_malloc(),
+                            Hacl_Hash_SHA3_Scalar_state_malloc(),
+                            Hacl_Hash_SHA3_Scalar_state_malloc(),
+                            Hacl_Hash_SHA3_Scalar_state_malloc(),
+                        ]
+                    },
+                }
+            }
+        }
+
+        #[cfg(not(simd256))]
+        pub fn new() -> Self {
+            Self {
+                state: unsafe {
+                    [
+                        Hacl_Hash_SHA3_Scalar_state_malloc(),
+                        Hacl_Hash_SHA3_Scalar_state_malloc(),
+                        Hacl_Hash_SHA3_Scalar_state_malloc(),
+                        Hacl_Hash_SHA3_Scalar_state_malloc(),
+                    ]
+                },
+            }
+        }
+
+        /// Free and consume the state.
+        ///
+        /// **NOTE:** This consumes the value. It is not usable after this call!
+        #[cfg(simd256)]
+        pub fn free(mut self) {
+            if cfg!(simd256) && simd256_support() {
+                unsafe {
+                    Hacl_Hash_SHA3_Simd256_state_free(self.statex4);
+                    // null the pointer (hacl isn't doing that unfortunately)
+                    // This way we can check whether the memory was freed already or not.
+                    self.statex4 = null_mut();
+                }
+            } else {
+                for i in 0..4 {
+                    unsafe {
+                        Hacl_Hash_SHA3_Scalar_state_free(self.state[i]);
+                        // null the pointer (hacl isn't doing that unfortunately)
+                        // This way we can check whether the memory was freed already or not.
+                        self.state[i] = null_mut();
+                    }
+                }
+            }
+        }
+
+        /// Free and consume the state.
+        ///
+        /// **NOTE:** This consumes the value. It is not usable after this call!
+        #[cfg(not(simd256))]
+        pub fn free(mut self) {
+            for i in 0..4 {
+                unsafe {
+                    Hacl_Hash_SHA3_Scalar_state_free(self.state[i]);
+                    // null the pointer (hacl isn't doing that unfortunately)
+                    // This way we can check whether the memory was freed already or not.
+                    self.state[i] = null_mut();
+                }
+            }
+        }
+
+        /// Absorb up to 4 blocks at a time.
+        ///
+        /// The input length must be a multiple of the SHA3 block length of 168.
+        ///
+        /// The input is truncated at `u32::MAX`.
+        #[cfg(simd256)]
+        pub fn absorb_blocks(&mut self, input: [&[u8]; 4]) {
+            debug_assert!(
+                (input[0].len() == input[1].len() || input[1].len() == 0)
+                    && (input[0].len() == input[2].len() || input[2].len() == 0)
+                    && (input[0].len() == input[3].len() || input[3].len() == 0)
+            );
+            debug_assert!(input[0].len() % 168 == 0);
+
+            if simd256_support() {
+                unsafe {
+                    Hacl_Hash_SHA3_Simd256_shake128_absorb_nblocks(
+                        self.statex4,
+                        input[0].as_ptr() as _,
+                        input[1].as_ptr() as _,
+                        input[2].as_ptr() as _,
+                        input[3].as_ptr() as _,
+                        input[0].len() as u32,
+                    )
+                };
+            } else {
+                for i in 0..4 {
+                    if !input[i].is_empty() {
+                        unsafe {
+                            Hacl_Hash_SHA3_Scalar_shake128_absorb_nblocks(
+                                self.state[i],
+                                input[i].as_ptr() as _,
+                                input[i].len() as u32,
+                            );
+                        };
+                    }
+                }
+            }
+        }
+
+        /// Absorb up to 4 blocks at a time.
+        ///
+        /// The input length must be a multiple of the SHA3 block length of 168.
+        ///
+        /// The input is truncated at `u32::MAX`.
+        #[cfg(not(simd256))]
+        pub fn absorb_blocks(&mut self, input: [&[u8]; 4]) {
+            debug_assert!(
+                (input[0].len() == input[1].len() || input[1].len() == 0)
+                    && (input[0].len() == input[2].len() || input[2].len() == 0)
+                    && (input[0].len() == input[3].len() || input[3].len() == 0)
+            );
+            debug_assert!(input[0].len() % 168 == 0);
+
+            for i in 0..4 {
+                if !input[i].is_empty() {
+                    unsafe {
+                        Hacl_Hash_SHA3_Scalar_shake128_absorb_nblocks(
+                            self.state[i],
+                            input[i].as_ptr() as _,
+                            input[i].len() as u32,
+                        );
+                    };
+                }
+            }
+        }
+
+        /// Absorb up to 4 final blocks at a time.
+        ///
+        /// The input length must be a multiple of the SHA3 block length of 168.
+        ///
+        /// The input is truncated at `u32::MAX`.
+        #[cfg(simd256)]
+        pub fn absorb_final(&mut self, input: [&[u8]; 4]) {
+            debug_assert!(
+                (input[0].len() == input[1].len() || input[1].len() == 0)
+                    && (input[0].len() == input[2].len() || input[2].len() == 0)
+                    && (input[0].len() == input[3].len() || input[3].len() == 0)
+            );
+            debug_assert!(input[0].len() < 168);
+
+            if cfg!(simd256) && simd256_support() {
+                unsafe {
+                    Hacl_Hash_SHA3_Simd256_shake128_absorb_final(
+                        self.statex4,
+                        input[0].as_ptr() as _,
+                        input[1].as_ptr() as _,
+                        input[2].as_ptr() as _,
+                        input[3].as_ptr() as _,
+                        input[0].len() as u32,
+                    )
+                };
+            } else {
+                for i in 0..4 {
+                    if !input[i].is_empty() {
+                        unsafe {
+                            Hacl_Hash_SHA3_Scalar_shake128_absorb_final(
+                                self.state[i],
+                                input[i].as_ptr() as _,
+                                input[i].len() as u32,
+                            );
+                        };
+                    }
+                }
+            }
+        }
+
+        /// Absorb up to 4 final blocks at a time.
+        ///
+        /// The input length must be a multiple of the SHA3 block length of 168.
+        ///
+        /// The input is truncated at `u32::MAX`.
+        #[cfg(not(simd256))]
+        pub fn absorb_final(&mut self, input: [&[u8]; 4]) {
+            debug_assert!(
+                (input[0].len() == input[1].len() || input[1].len() == 0)
+                    && (input[0].len() == input[2].len() || input[2].len() == 0)
+                    && (input[0].len() == input[3].len() || input[3].len() == 0)
+            );
+            debug_assert!(input[0].len() < 168);
+
+            for i in 0..4 {
+                if !input[i].is_empty() {
+                    unsafe {
+                        Hacl_Hash_SHA3_Scalar_shake128_absorb_final(
+                            self.state[i],
+                            input[i].as_ptr() as _,
+                            input[i].len() as u32,
+                        );
+                    };
+                }
+            }
+        }
+
+        #[cfg(simd256)]
+        pub fn squeeze_blocks<const OUTPUT_BYTES: usize, const M: usize>(
+            &mut self,
+        ) -> [[u8; OUTPUT_BYTES]; M] {
+            debug_assert!(OUTPUT_BYTES % 168 == 0);
+            debug_assert!(M <= self.state.len() && (M == 2 || M == 3 || M == 4));
+
+            if cfg!(simd256) && simd256_support() {
+                let mut output = [[0u8; OUTPUT_BYTES]; 4];
+                unsafe {
+                    Hacl_Hash_SHA3_Simd256_shake128_squeeze_nblocks(
+                        self.statex4,
+                        output[0].as_mut_ptr(),
+                        output[1].as_mut_ptr(),
+                        output[2].as_mut_ptr(),
+                        output[3].as_mut_ptr(),
+                        OUTPUT_BYTES as u32,
+                    );
+                };
+                core::array::from_fn(|i| output[i])
+            } else {
+                let mut output = [[0u8; OUTPUT_BYTES]; M];
+                for i in 0..M {
+                    unsafe {
+                        Hacl_Hash_SHA3_Scalar_shake128_squeeze_nblocks(
+                            self.state[i],
+                            output[i].as_mut_ptr(),
+                            OUTPUT_BYTES as u32,
+                        );
+                    };
+                }
+                output
+            }
+        }
+
+        #[cfg(not(simd256))]
+        pub fn squeeze_blocks<const OUTPUT_BYTES: usize, const M: usize>(
+            &mut self,
+        ) -> [[u8; OUTPUT_BYTES]; M] {
+            debug_assert!(OUTPUT_BYTES % 168 == 0);
+            debug_assert!(M <= self.state.len());
+
+            let mut output = [[0u8; OUTPUT_BYTES]; M];
+
+            for i in 0..M {
+                unsafe {
+                    Hacl_Hash_SHA3_Scalar_shake128_squeeze_nblocks(
+                        self.state[i],
+                        output[i].as_mut_ptr(),
+                        OUTPUT_BYTES as u32,
+                    );
+                };
+            }
+
+            output
+        }
+    }
+
+    /// **NOTE:** When generating C code with Eurydice, the state needs to be freed
+    ///           manually for now due to a bug in Eurydice.
+    impl Drop for Shake128StateX4 {
+        #[cfg(simd256)]
+        fn drop(&mut self) {
+            if cfg!(simd256) && simd256_support() {
+                // A manual free may have occurred already.
+                // Avoid double free.
+                unsafe {
+                    if !self.statex4.is_null() {
+                        Hacl_Hash_SHA3_Simd256_state_free(self.statex4);
+                    }
+                }
+            } else {
+                // A manual free may have occurred already.
+                // Avoid double free.
+                for i in 0..4 {
+                    unsafe {
+                        if !self.state[i].is_null() {
+                            Hacl_Hash_SHA3_Scalar_state_free(self.state[i])
+                        }
+                    }
+                }
+            }
+        }
+
+        #[cfg(not(simd256))]
+        fn drop(&mut self) {
+            // A manual free may have occurred already.
+            // Avoid double free.
+            for i in 0..4 {
+                unsafe {
+                    if !self.state[i].is_null() {
+                        Hacl_Hash_SHA3_Scalar_state_free(self.state[i])
+                    }
+                }
+            }
+        }
+    }
+}
diff --git a/src/kem/kyber/arithmetic.rs b/src/kem/kyber/arithmetic.rs
index e6e5a0b3d..4543b6fbe 100644
--- a/src/kem/kyber/arithmetic.rs
+++ b/src/kem/kyber/arithmetic.rs
@@ -67,6 +67,8 @@ pub(crate) fn barrett_reduce(value: FieldElement) -> FieldElement {
     result
 }
 
+const INVERSE_OF_MODULUS_MOD_MONTGOMERY_R: u32 = 62209; // FIELD_MODULUS^{-1} mod MONTGOMERY_R
+
 /// Signed Montgomery Reduction
 ///
 /// Given an input `value`, `montgomery_reduce` outputs a representative `o`
@@ -78,9 +80,6 @@ pub(crate) fn barrett_reduce(value: FieldElement) -> FieldElement {
 /// `|result| ≤ (|value| / MONTGOMERY_R) + (FIELD_MODULUS / 2)
 ///
 /// In particular, if `|value| ≤ FIELD_MODULUS * MONTGOMERY_R`, then `|o| < (3 · FIELD_MODULUS) / 2`.
-
-const INVERSE_OF_MODULUS_MOD_MONTGOMERY_R: u32 = 62209; // FIELD_MODULUS^{-1} mod MONTGOMERY_R
-
 #[cfg_attr(hax, hax_lib_macros::requires(value >= -FIELD_MODULUS * MONTGOMERY_R && value <= FIELD_MODULUS * MONTGOMERY_R))]
 #[cfg_attr(hax, hax_lib_macros::ensures(|result| result >= -(3 * FIELD_MODULUS) / 2 && result <= (3 * FIELD_MODULUS) / 2))]
 pub(crate) fn montgomery_reduce(value: FieldElement) -> MontgomeryFieldElement {
diff --git a/src/kem/kyber/constants.rs b/src/kem/kyber/constants.rs
index f64a05b2f..a48705a2f 100644
--- a/src/kem/kyber/constants.rs
+++ b/src/kem/kyber/constants.rs
@@ -13,12 +13,6 @@ pub(crate) const BITS_PER_RING_ELEMENT: usize = COEFFICIENTS_IN_RING_ELEMENT * 1
 /// Bytes required per (uncompressed) ring element
 pub(crate) const BYTES_PER_RING_ELEMENT: usize = BITS_PER_RING_ELEMENT / 8;
 
-/// Seed size for rejection sampling.
-///
-/// See <https://eprint.iacr.org/2023/708> for some background regarding
-/// this choice.
-pub(crate) const REJECTION_SAMPLING_SEED_SIZE: usize = 168 * 5;
-
 /// PKE message size
 pub(crate) const SHARED_SECRET_SIZE: usize = 32;
 
diff --git a/src/kem/kyber/hash_functions.rs b/src/kem/kyber/hash_functions.rs
index 3eeb58bd8..12a198453 100644
--- a/src/kem/kyber/hash_functions.rs
+++ b/src/kem/kyber/hash_functions.rs
@@ -1,10 +1,8 @@
 #![allow(non_snake_case)]
 
-use libcrux_platform::simd256_support;
-
-use crate::digest::{self, digest_size, Algorithm};
-
-use super::constants::{H_DIGEST_SIZE, REJECTION_SAMPLING_SEED_SIZE};
+use super::constants::H_DIGEST_SIZE;
+use crate::digest::{self, digest_size, incremental_x4::Shake128StateX4, Algorithm};
+// use crate::sha3::incremental_x4::Shake128StateX4;
 
 pub(crate) fn G(input: &[u8]) -> [u8; digest_size(Algorithm::Sha3_512)] {
     digest::sha3_512(input)
@@ -19,49 +17,50 @@ pub(crate) fn PRF<const LEN: usize>(input: &[u8]) -> [u8; LEN] {
 }
 
 #[inline(always)]
-pub(crate) fn XOFx4<const K: usize>(
-    input: [[u8; 34]; K],
-) -> [[u8; REJECTION_SAMPLING_SEED_SIZE]; K] {
-    let mut out = [[0u8; REJECTION_SAMPLING_SEED_SIZE]; K];
+pub(crate) fn absorb<const K: usize>(input: [[u8; 34]; K]) -> Shake128StateX4 {
+    debug_assert!(K == 2 || K == 3 || K == 4);
 
-    if !simd256_support() {
-        // Without SIMD256 support we fake it and call shak128 4 times.
-        // While shak128x4 does this too, this is faster because we only do the
-        // required number of invocations (K).
-        for i in 0..K {
-            out[i] = digest::shake128::<REJECTION_SAMPLING_SEED_SIZE>(&input[i]);
-        }
-    } else {
-        // Always do 4 SHA3 at a time even if we need less.
-        // XXX: Cast for hax extraction
-        match K as u8 {
-            2 => {
-                let (d0, d1, _, _) = digest::shake128x4::<REJECTION_SAMPLING_SEED_SIZE>(
-                    &input[0], &input[1], &input[0], &input[1],
-                );
-                out[0] = d0;
-                out[1] = d1;
-            }
-            3 => {
-                let (d0, d1, d2, _) = digest::shake128x4::<REJECTION_SAMPLING_SEED_SIZE>(
-                    &input[0], &input[1], &input[2], &input[0],
-                );
-                out[0] = d0;
-                out[1] = d1;
-                out[2] = d2;
-            }
-            4 => {
-                let (d0, d1, d2, d3) = digest::shake128x4::<REJECTION_SAMPLING_SEED_SIZE>(
-                    &input[0], &input[1], &input[2], &input[3],
-                );
-                out[0] = d0;
-                out[1] = d1;
-                out[2] = d2;
-                out[3] = d3;
-            }
-            _ => unreachable!(),
-        };
+    let mut state = Shake128StateX4::new();
+    // XXX: We need to do this dance to get it through hax and eurydice for now.
+    let mut data: [&[u8]; K] = [&[0u8]; K];
+    for i in 0..K {
+        data[i] = &input[i] as &[u8];
+    }
+    state.absorb_final(data);
+    state
+}
+
+const BLOCK_SIZE: usize = 168;
+const THREE_BLOCKS: usize = BLOCK_SIZE * 3;
+
+#[inline(always)]
+pub(crate) fn squeeze_three_blocks<const K: usize>(
+    xof_state: &mut Shake128StateX4,
+) -> [[u8; THREE_BLOCKS]; K] {
+    let output: [[u8; THREE_BLOCKS]; K] = xof_state.squeeze_blocks();
+    let mut out = [[0u8; THREE_BLOCKS]; K];
+    for i in 0..K {
+        out[i] = output[i];
     }
+    out
+}
 
+#[inline(always)]
+pub(crate) fn squeeze_block<const K: usize>(
+    xof_state: &mut Shake128StateX4,
+) -> [[u8; BLOCK_SIZE]; K] {
+    let output: [[u8; BLOCK_SIZE]; K] = xof_state.squeeze_blocks();
+    let mut out = [[0u8; BLOCK_SIZE]; K];
+    for i in 0..K {
+        out[i] = output[i];
+    }
     out
 }
+
+/// Free the memory of the state.
+///
+/// **NOTE:** That this needs to be done manually for now.
+#[inline(always)]
+pub(crate) fn free_state(xof_state: Shake128StateX4) {
+    xof_state.free_memory();
+}
diff --git a/src/kem/kyber/matrix.rs b/src/kem/kyber/matrix.rs
index 5d51e7c7d..a4908eaaa 100644
--- a/src/kem/kyber/matrix.rs
+++ b/src/kem/kyber/matrix.rs
@@ -4,9 +4,8 @@ use super::{
         PolynomialRingElement,
     },
     constants::COEFFICIENTS_IN_RING_ELEMENT,
-    hash_functions::XOFx4,
     ntt::{invert_ntt_montgomery, ntt_multiply},
-    sampling::sample_from_uniform_distribution,
+    sampling::sample_from_xof,
 };
 use crate::cloop;
 
@@ -24,16 +23,13 @@ pub(in crate::kem::kyber) fn sample_matrix_A<const K: usize>(
             seeds[j][32] = i as u8;
             seeds[j][33] = j as u8;
         }
-        let xof_bytes = XOFx4::<K>(seeds);
-
+        let sampled = sample_from_xof(seeds);
         for j in 0..K {
-            let sampled = sample_from_uniform_distribution(xof_bytes[j]);
-
             // A[i][j] = A_transpose[j][i]
             if transpose {
-                A_transpose[j][i] = sampled;
+                A_transpose[j][i] = sampled[j];
             } else {
-                A_transpose[i][j] = sampled;
+                A_transpose[i][j] = sampled[j];
             }
         }
     }
diff --git a/src/kem/kyber/sampling.rs b/src/kem/kyber/sampling.rs
index ba0147af0..feba5d1b4 100644
--- a/src/kem/kyber/sampling.rs
+++ b/src/kem/kyber/sampling.rs
@@ -1,16 +1,11 @@
 use super::{
     arithmetic::{FieldElement, PolynomialRingElement},
-    constants::{COEFFICIENTS_IN_RING_ELEMENT, FIELD_MODULUS, REJECTION_SAMPLING_SEED_SIZE},
+    constants::{COEFFICIENTS_IN_RING_ELEMENT, FIELD_MODULUS},
+    hash_functions::*,
 };
 use crate::cloop;
 use crate::hax_utils::hax_debug_assert;
 
-fn rejection_sampling_panic_with_diagnostic() {
-    panic!()
-    //    We would instead prefer to do the following:
-    //    panic!("5 blocks of SHAKE128 output were extracted from the seed for rejection sampling, but not all of them could be sampled.\nWe would appreciate it if you could report this error by opening an issue at https://github.com/cryspen/libcrux/issues");
-}
-
 /// If `bytes` contains a set of uniformly random bytes, this function
 /// uniformly samples a ring element `â` that is treated as being the NTT representation
 /// of the corresponding polynomial `a`.
@@ -49,19 +44,14 @@ fn rejection_sampling_panic_with_diagnostic() {
 ///
 /// The NIST FIPS 203 standard can be found at
 /// <https://csrc.nist.gov/pubs/fips/203/ipd>.
-pub fn sample_from_uniform_distribution(
-    randomness: [u8; REJECTION_SAMPLING_SEED_SIZE],
-) -> PolynomialRingElement {
-    let mut sampled_coefficients: usize = 0;
-    let mut out: PolynomialRingElement = PolynomialRingElement::ZERO;
-
-    // This loop is written the way it is since reasoning about early returns,
-    // breaks, and continues is not well supported in Fstar at the moment. Rewriting
-    // this code to use an early return is being tracked in:
-    // https://github.com/cryspen/libcrux/issues/134
-    let mut done = false;
-    for bytes in randomness.chunks(3) {
-        if !done {
+fn sample_from_uniform_distribution_next<const K: usize, const N: usize>(
+    randomness: [[u8; N]; K],
+    sampled_coefficients: &mut [usize; K],
+    out: &mut [PolynomialRingElement; K],
+) -> bool {
+    let mut done = true;
+    for i in 0..K {
+        for bytes in randomness[i].chunks(3) {
             let b1 = bytes[0] as i32;
             let b2 = bytes[1] as i32;
             let b3 = bytes[2] as i32;
@@ -69,28 +59,46 @@ pub fn sample_from_uniform_distribution(
             let d1 = ((b2 & 0xF) << 8) | b1;
             let d2 = (b3 << 4) | (b2 >> 4);
 
-            if d1 < FIELD_MODULUS && sampled_coefficients < COEFFICIENTS_IN_RING_ELEMENT {
-                out.coefficients[sampled_coefficients] = d1;
-                sampled_coefficients += 1
+            if d1 < FIELD_MODULUS && sampled_coefficients[i] < COEFFICIENTS_IN_RING_ELEMENT {
+                out[i].coefficients[sampled_coefficients[i]] = d1;
+                sampled_coefficients[i] += 1
             }
-            if d2 < FIELD_MODULUS && sampled_coefficients < COEFFICIENTS_IN_RING_ELEMENT {
-                out.coefficients[sampled_coefficients] = d2;
-                sampled_coefficients += 1;
-            }
-            if sampled_coefficients == COEFFICIENTS_IN_RING_ELEMENT {
-                done = true;
+            if d2 < FIELD_MODULUS && sampled_coefficients[i] < COEFFICIENTS_IN_RING_ELEMENT {
+                out[i].coefficients[sampled_coefficients[i]] = d2;
+                sampled_coefficients[i] += 1;
             }
         }
+        if sampled_coefficients[i] < COEFFICIENTS_IN_RING_ELEMENT {
+            done = false
+        }
     }
+    done
+}
 
-    if !done {
-        // Requiring more than 5 blocks to sample a ring element should be very
-        // unlikely according to:
-        // https://eprint.iacr.org/2023/708.pdf
-        rejection_sampling_panic_with_diagnostic();
+pub(super) fn sample_from_xof<const K: usize>(seeds: [[u8; 34]; K]) -> [PolynomialRingElement; K] {
+    let mut sampled_coefficients: [usize; K] = [0; K];
+    let mut out: [PolynomialRingElement; K] = [PolynomialRingElement::ZERO; K];
+
+    let mut xof_state = absorb(seeds);
+    let randomness = squeeze_three_blocks(&mut xof_state);
+
+    let mut done =
+        sample_from_uniform_distribution_next(randomness, &mut sampled_coefficients, &mut out);
+
+    // Requiring more than 5 blocks to sample a ring element should be very
+    // unlikely according to:
+    // https://eprint.iacr.org/2023/708.pdf
+    // To avoid failing here, we squeeze more blocks out of the state until
+    // we have enough.
+    while !done {
+        let randomness = squeeze_block(&mut xof_state);
+        done =
+            sample_from_uniform_distribution_next(randomness, &mut sampled_coefficients, &mut out);
     }
+    // XXX: We have to manually free the state here due to a Eurydice issue.
+    free_state(xof_state);
 
-    hax_debug_assert!(out
+    hax_debug_assert!(out[0]
         .coefficients
         .into_iter()
         .all(|coefficient| coefficient >= 0 && coefficient < FIELD_MODULUS));
diff --git a/sys/hacl/c/include/Hacl_Hash_SHA3_Scalar.h b/sys/hacl/c/include/Hacl_Hash_SHA3_Scalar.h
index e49f1967b..a40c2d04d 100644
--- a/sys/hacl/c/include/Hacl_Hash_SHA3_Scalar.h
+++ b/sys/hacl/c/include/Hacl_Hash_SHA3_Scalar.h
@@ -37,27 +37,95 @@ extern "C" {
 
 void
 Hacl_Hash_SHA3_Scalar_shake128(
-  uint32_t inputByteLen,
-  uint8_t *input,
+  uint8_t *output,
   uint32_t outputByteLen,
-  uint8_t *output
+  uint8_t *input,
+  uint32_t inputByteLen
 );
 
 void
 Hacl_Hash_SHA3_Scalar_shake256(
-  uint32_t inputByteLen,
-  uint8_t *input,
+  uint8_t *output,
   uint32_t outputByteLen,
-  uint8_t *output
+  uint8_t *input,
+  uint32_t inputByteLen
 );
 
-void Hacl_Hash_SHA3_Scalar_sha3_224(uint32_t inputByteLen, uint8_t *input, uint8_t *output);
+void Hacl_Hash_SHA3_Scalar_sha3_224(uint8_t *output, uint8_t *input, uint32_t inputByteLen);
+
+void Hacl_Hash_SHA3_Scalar_sha3_256(uint8_t *output, uint8_t *input, uint32_t inputByteLen);
+
+void Hacl_Hash_SHA3_Scalar_sha3_384(uint8_t *output, uint8_t *input, uint32_t inputByteLen);
 
-void Hacl_Hash_SHA3_Scalar_sha3_256(uint32_t inputByteLen, uint8_t *input, uint8_t *output);
+void Hacl_Hash_SHA3_Scalar_sha3_512(uint8_t *output, uint8_t *input, uint32_t inputByteLen);
 
-void Hacl_Hash_SHA3_Scalar_sha3_384(uint32_t inputByteLen, uint8_t *input, uint8_t *output);
+/**
+Allocate state buffer of 200-bytes
+*/
+uint64_t *Hacl_Hash_SHA3_Scalar_state_malloc(void);
 
-void Hacl_Hash_SHA3_Scalar_sha3_512(uint32_t inputByteLen, uint8_t *input, uint8_t *output);
+/**
+Free state buffer
+*/
+void Hacl_Hash_SHA3_Scalar_state_free(uint64_t *s);
+
+/**
+Absorb number of input blocks and write the output state
+
+  This function is intended to receive a hash state and input buffer.
+  It prcoesses an input of multiple of 168-bytes (SHAKE128 block size),
+  any additional bytes of final partial block are ignored.
+
+  The argument `state` (IN/OUT) points to hash state, i.e., uint64_t[25]
+  The argument `input` (IN) points to `inputByteLen` bytes of valid memory,
+  i.e., uint8_t[inputByteLen]
+*/
+void
+Hacl_Hash_SHA3_Scalar_shake128_absorb_nblocks(
+  uint64_t *state,
+  uint8_t *input,
+  uint32_t inputByteLen
+);
+
+/**
+Absorb a final partial block of input and write the output state
+
+  This function is intended to receive a hash state and input buffer.
+  It prcoesses a sequence of bytes at end of input buffer that is less 
+  than 168-bytes (SHAKE128 block size),
+  any bytes of full blocks at start of input buffer are ignored.
+
+  The argument `state` (IN/OUT) points to hash state, i.e., uint64_t[25]
+  The argument `input` (IN) points to `inputByteLen` bytes of valid memory,
+  i.e., uint8_t[inputByteLen]
+  
+  Note: Full size of input buffer must be passed to `inputByteLen` including
+  the number of full-block bytes at start of input buffer that are ignored
+*/
+void
+Hacl_Hash_SHA3_Scalar_shake128_absorb_final(
+  uint64_t *state,
+  uint8_t *input,
+  uint32_t inputByteLen
+);
+
+/**
+Squeeze a hash state to output buffer
+
+  This function is intended to receive a hash state and output buffer.
+  It produces an output of multiple of 168-bytes (SHAKE128 block size),
+  any additional bytes of final partial block are ignored.
+
+  The argument `state` (IN) points to hash state, i.e., uint64_t[25]
+  The argument `output` (OUT) points to `outputByteLen` bytes of valid memory,
+  i.e., uint8_t[outputByteLen]
+*/
+void
+Hacl_Hash_SHA3_Scalar_shake128_squeeze_nblocks(
+  uint64_t *state,
+  uint8_t *output,
+  uint32_t outputByteLen
+);
 
 #if defined(__cplusplus)
 }
diff --git a/sys/hacl/c/include/Hacl_Hash_SHA3_Simd256.h b/sys/hacl/c/include/Hacl_Hash_SHA3_Simd256.h
index 3dd3772dd..f38bf7cbb 100644
--- a/sys/hacl/c/include/Hacl_Hash_SHA3_Simd256.h
+++ b/sys/hacl/c/include/Hacl_Hash_SHA3_Simd256.h
@@ -35,6 +35,8 @@ extern "C" {
 #include "krml/lowstar_endianness.h"
 #include "krml/internal/target.h"
 
+#include "libintvector.h"
+
 typedef struct K____uint8_t___uint8_t__s
 {
   uint8_t *fst;
@@ -58,82 +60,162 @@ K____uint8_t___uint8_t____K____uint8_t___uint8_t_;
 
 void
 Hacl_Hash_SHA3_Simd256_shake128(
-  uint32_t inputByteLen,
+  uint8_t *output0,
+  uint8_t *output1,
+  uint8_t *output2,
+  uint8_t *output3,
+  uint32_t outputByteLen,
   uint8_t *input0,
   uint8_t *input1,
   uint8_t *input2,
   uint8_t *input3,
-  uint32_t outputByteLen,
-  uint8_t *output0,
-  uint8_t *output1,
-  uint8_t *output2,
-  uint8_t *output3
+  uint32_t inputByteLen
 );
 
 void
 Hacl_Hash_SHA3_Simd256_shake256(
-  uint32_t inputByteLen,
+  uint8_t *output0,
+  uint8_t *output1,
+  uint8_t *output2,
+  uint8_t *output3,
+  uint32_t outputByteLen,
   uint8_t *input0,
   uint8_t *input1,
   uint8_t *input2,
   uint8_t *input3,
-  uint32_t outputByteLen,
-  uint8_t *output0,
-  uint8_t *output1,
-  uint8_t *output2,
-  uint8_t *output3
+  uint32_t inputByteLen
 );
 
 void
 Hacl_Hash_SHA3_Simd256_sha3_224(
-  uint32_t inputByteLen,
+  uint8_t *output0,
+  uint8_t *output1,
+  uint8_t *output2,
+  uint8_t *output3,
   uint8_t *input0,
   uint8_t *input1,
   uint8_t *input2,
   uint8_t *input3,
+  uint32_t inputByteLen
+);
+
+void
+Hacl_Hash_SHA3_Simd256_sha3_256(
   uint8_t *output0,
   uint8_t *output1,
   uint8_t *output2,
-  uint8_t *output3
+  uint8_t *output3,
+  uint8_t *input0,
+  uint8_t *input1,
+  uint8_t *input2,
+  uint8_t *input3,
+  uint32_t inputByteLen
 );
 
 void
-Hacl_Hash_SHA3_Simd256_sha3_256(
-  uint32_t inputByteLen,
+Hacl_Hash_SHA3_Simd256_sha3_384(
+  uint8_t *output0,
+  uint8_t *output1,
+  uint8_t *output2,
+  uint8_t *output3,
   uint8_t *input0,
   uint8_t *input1,
   uint8_t *input2,
   uint8_t *input3,
+  uint32_t inputByteLen
+);
+
+void
+Hacl_Hash_SHA3_Simd256_sha3_512(
   uint8_t *output0,
   uint8_t *output1,
   uint8_t *output2,
-  uint8_t *output3
+  uint8_t *output3,
+  uint8_t *input0,
+  uint8_t *input1,
+  uint8_t *input2,
+  uint8_t *input3,
+  uint32_t inputByteLen
 );
 
+/**
+Allocate quadruple state buffer (200-bytes for each)
+*/
+Lib_IntVector_Intrinsics_vec256 *Hacl_Hash_SHA3_Simd256_state_malloc(void);
+
+/**
+Free quadruple state buffer
+*/
+void Hacl_Hash_SHA3_Simd256_state_free(Lib_IntVector_Intrinsics_vec256 *s);
+
+/**
+Absorb number of blocks of 4 input buffers and write the output states
+
+  This function is intended to receive a quadruple hash state and 4 input buffers.
+  It prcoesses an inputs of multiple of 168-bytes (SHAKE128 block size),
+  any additional bytes of final partial block for each buffer are ignored.
+
+  The argument `state` (IN/OUT) points to quadruple hash state,
+  i.e., Lib_IntVector_Intrinsics_vec256[25]
+  The arguments `input0/input1/input2/input3` (IN) point to `inputByteLen` bytes 
+  of valid memory for each buffer, i.e., uint8_t[inputByteLen]
+*/
 void
-Hacl_Hash_SHA3_Simd256_sha3_384(
-  uint32_t inputByteLen,
+Hacl_Hash_SHA3_Simd256_shake128_absorb_nblocks(
+  Lib_IntVector_Intrinsics_vec256 *state,
   uint8_t *input0,
   uint8_t *input1,
   uint8_t *input2,
   uint8_t *input3,
-  uint8_t *output0,
-  uint8_t *output1,
-  uint8_t *output2,
-  uint8_t *output3
+  uint32_t inputByteLen
 );
 
+/**
+Absorb a final partial blocks of 4 input buffers and write the output states
+
+  This function is intended to receive a quadruple hash state and 4 input buffers.
+  It prcoesses a sequence of bytes at end of each input buffer that is less 
+  than 168-bytes (SHAKE128 block size),
+  any bytes of full blocks at start of input buffers are ignored.
+
+  The argument `state` (IN/OUT) points to quadruple hash state,
+  i.e., Lib_IntVector_Intrinsics_vec256[25]
+  The arguments `input0/input1/input2/input3` (IN) point to `inputByteLen` bytes 
+  of valid memory for each buffer, i.e., uint8_t[inputByteLen]
+  
+  Note: Full size of input buffers must be passed to `inputByteLen` including
+  the number of full-block bytes at start of each input buffer that are ignored
+*/
 void
-Hacl_Hash_SHA3_Simd256_sha3_512(
-  uint32_t inputByteLen,
+Hacl_Hash_SHA3_Simd256_shake128_absorb_final(
+  Lib_IntVector_Intrinsics_vec256 *state,
   uint8_t *input0,
   uint8_t *input1,
   uint8_t *input2,
   uint8_t *input3,
+  uint32_t inputByteLen
+);
+
+/**
+Squeeze a quadruple hash state to 4 output buffers
+
+  This function is intended to receive a quadruple hash state and 4 output buffers.
+  It produces 4 outputs, each is multiple of 168-bytes (SHAKE128 block size),
+  any additional bytes of final partial block for each buffer are ignored.
+
+  The argument `state` (IN) points to quadruple hash state,
+  i.e., Lib_IntVector_Intrinsics_vec256[25]
+  The arguments `output0/output1/output2/output3` (OUT) point to `outputByteLen` bytes 
+  of valid memory for each buffer, i.e., uint8_t[inputByteLen]
+*/
+void
+Hacl_Hash_SHA3_Simd256_shake128_squeeze_nblocks(
+  Lib_IntVector_Intrinsics_vec256 *state,
   uint8_t *output0,
   uint8_t *output1,
   uint8_t *output2,
-  uint8_t *output3
+  uint8_t *output3,
+  uint32_t outputByteLen
 );
 
 #if defined(__cplusplus)
diff --git a/sys/hacl/c/include/msvc/Hacl_Hash_SHA3_Scalar.h b/sys/hacl/c/include/msvc/Hacl_Hash_SHA3_Scalar.h
index e49f1967b..a40c2d04d 100644
--- a/sys/hacl/c/include/msvc/Hacl_Hash_SHA3_Scalar.h
+++ b/sys/hacl/c/include/msvc/Hacl_Hash_SHA3_Scalar.h
@@ -37,27 +37,95 @@ extern "C" {
 
 void
 Hacl_Hash_SHA3_Scalar_shake128(
-  uint32_t inputByteLen,
-  uint8_t *input,
+  uint8_t *output,
   uint32_t outputByteLen,
-  uint8_t *output
+  uint8_t *input,
+  uint32_t inputByteLen
 );
 
 void
 Hacl_Hash_SHA3_Scalar_shake256(
-  uint32_t inputByteLen,
-  uint8_t *input,
+  uint8_t *output,
   uint32_t outputByteLen,
-  uint8_t *output
+  uint8_t *input,
+  uint32_t inputByteLen
 );
 
-void Hacl_Hash_SHA3_Scalar_sha3_224(uint32_t inputByteLen, uint8_t *input, uint8_t *output);
+void Hacl_Hash_SHA3_Scalar_sha3_224(uint8_t *output, uint8_t *input, uint32_t inputByteLen);
+
+void Hacl_Hash_SHA3_Scalar_sha3_256(uint8_t *output, uint8_t *input, uint32_t inputByteLen);
+
+void Hacl_Hash_SHA3_Scalar_sha3_384(uint8_t *output, uint8_t *input, uint32_t inputByteLen);
 
-void Hacl_Hash_SHA3_Scalar_sha3_256(uint32_t inputByteLen, uint8_t *input, uint8_t *output);
+void Hacl_Hash_SHA3_Scalar_sha3_512(uint8_t *output, uint8_t *input, uint32_t inputByteLen);
 
-void Hacl_Hash_SHA3_Scalar_sha3_384(uint32_t inputByteLen, uint8_t *input, uint8_t *output);
+/**
+Allocate state buffer of 200-bytes
+*/
+uint64_t *Hacl_Hash_SHA3_Scalar_state_malloc(void);
 
-void Hacl_Hash_SHA3_Scalar_sha3_512(uint32_t inputByteLen, uint8_t *input, uint8_t *output);
+/**
+Free state buffer
+*/
+void Hacl_Hash_SHA3_Scalar_state_free(uint64_t *s);
+
+/**
+Absorb number of input blocks and write the output state
+
+  This function is intended to receive a hash state and input buffer.
+  It prcoesses an input of multiple of 168-bytes (SHAKE128 block size),
+  any additional bytes of final partial block are ignored.
+
+  The argument `state` (IN/OUT) points to hash state, i.e., uint64_t[25]
+  The argument `input` (IN) points to `inputByteLen` bytes of valid memory,
+  i.e., uint8_t[inputByteLen]
+*/
+void
+Hacl_Hash_SHA3_Scalar_shake128_absorb_nblocks(
+  uint64_t *state,
+  uint8_t *input,
+  uint32_t inputByteLen
+);
+
+/**
+Absorb a final partial block of input and write the output state
+
+  This function is intended to receive a hash state and input buffer.
+  It prcoesses a sequence of bytes at end of input buffer that is less 
+  than 168-bytes (SHAKE128 block size),
+  any bytes of full blocks at start of input buffer are ignored.
+
+  The argument `state` (IN/OUT) points to hash state, i.e., uint64_t[25]
+  The argument `input` (IN) points to `inputByteLen` bytes of valid memory,
+  i.e., uint8_t[inputByteLen]
+  
+  Note: Full size of input buffer must be passed to `inputByteLen` including
+  the number of full-block bytes at start of input buffer that are ignored
+*/
+void
+Hacl_Hash_SHA3_Scalar_shake128_absorb_final(
+  uint64_t *state,
+  uint8_t *input,
+  uint32_t inputByteLen
+);
+
+/**
+Squeeze a hash state to output buffer
+
+  This function is intended to receive a hash state and output buffer.
+  It produces an output of multiple of 168-bytes (SHAKE128 block size),
+  any additional bytes of final partial block are ignored.
+
+  The argument `state` (IN) points to hash state, i.e., uint64_t[25]
+  The argument `output` (OUT) points to `outputByteLen` bytes of valid memory,
+  i.e., uint8_t[outputByteLen]
+*/
+void
+Hacl_Hash_SHA3_Scalar_shake128_squeeze_nblocks(
+  uint64_t *state,
+  uint8_t *output,
+  uint32_t outputByteLen
+);
 
 #if defined(__cplusplus)
 }
diff --git a/sys/hacl/c/include/msvc/Hacl_Hash_SHA3_Simd256.h b/sys/hacl/c/include/msvc/Hacl_Hash_SHA3_Simd256.h
index 3dd3772dd..302094a43 100644
--- a/sys/hacl/c/include/msvc/Hacl_Hash_SHA3_Simd256.h
+++ b/sys/hacl/c/include/msvc/Hacl_Hash_SHA3_Simd256.h
@@ -35,6 +35,8 @@ extern "C" {
 #include "krml/lowstar_endianness.h"
 #include "krml/internal/target.h"
 
+#include "libintvector.h"
+
 typedef struct K____uint8_t___uint8_t__s
 {
   uint8_t *fst;
@@ -58,82 +60,162 @@ K____uint8_t___uint8_t____K____uint8_t___uint8_t_;
 
 void
 Hacl_Hash_SHA3_Simd256_shake128(
-  uint32_t inputByteLen,
+  uint8_t *output0,
+  uint8_t *output1,
+  uint8_t *output2,
+  uint8_t *output3,
+  uint32_t outputByteLen,
   uint8_t *input0,
   uint8_t *input1,
   uint8_t *input2,
   uint8_t *input3,
-  uint32_t outputByteLen,
-  uint8_t *output0,
-  uint8_t *output1,
-  uint8_t *output2,
-  uint8_t *output3
+  uint32_t inputByteLen
 );
 
 void
 Hacl_Hash_SHA3_Simd256_shake256(
-  uint32_t inputByteLen,
+  uint8_t *output0,
+  uint8_t *output1,
+  uint8_t *output2,
+  uint8_t *output3,
+  uint32_t outputByteLen,
   uint8_t *input0,
   uint8_t *input1,
   uint8_t *input2,
   uint8_t *input3,
-  uint32_t outputByteLen,
-  uint8_t *output0,
-  uint8_t *output1,
-  uint8_t *output2,
-  uint8_t *output3
+  uint32_t inputByteLen
 );
 
 void
 Hacl_Hash_SHA3_Simd256_sha3_224(
-  uint32_t inputByteLen,
+  uint8_t *output0,
+  uint8_t *output1,
+  uint8_t *output2,
+  uint8_t *output3,
   uint8_t *input0,
   uint8_t *input1,
   uint8_t *input2,
   uint8_t *input3,
+  uint32_t inputByteLen
+);
+
+void
+Hacl_Hash_SHA3_Simd256_sha3_256(
   uint8_t *output0,
   uint8_t *output1,
   uint8_t *output2,
-  uint8_t *output3
+  uint8_t *output3,
+  uint8_t *input0,
+  uint8_t *input1,
+  uint8_t *input2,
+  uint8_t *input3,
+  uint32_t inputByteLen
 );
 
 void
-Hacl_Hash_SHA3_Simd256_sha3_256(
-  uint32_t inputByteLen,
+Hacl_Hash_SHA3_Simd256_sha3_384(
+  uint8_t *output0,
+  uint8_t *output1,
+  uint8_t *output2,
+  uint8_t *output3,
   uint8_t *input0,
   uint8_t *input1,
   uint8_t *input2,
   uint8_t *input3,
+  uint32_t inputByteLen
+);
+
+void
+Hacl_Hash_SHA3_Simd256_sha3_512(
   uint8_t *output0,
   uint8_t *output1,
   uint8_t *output2,
-  uint8_t *output3
+  uint8_t *output3,
+  uint8_t *input0,
+  uint8_t *input1,
+  uint8_t *input2,
+  uint8_t *input3,
+  uint32_t inputByteLen
 );
 
+/**
+Allocate quadruple state buffer (200-bytes for each)
+*/
+uint64_t *Hacl_Hash_SHA3_Simd256_state_malloc(void);
+
+/**
+Free quadruple state buffer
+*/
+void Hacl_Hash_SHA3_Simd256_state_free(uint64_t *s);
+
+/**
+Absorb number of blocks of 4 input buffers and write the output states
+
+  This function is intended to receive a quadruple hash state and 4 input buffers.
+  It prcoesses an inputs of multiple of 168-bytes (SHAKE128 block size),
+  any additional bytes of final partial block for each buffer are ignored.
+
+  The argument `state` (IN/OUT) points to quadruple hash state,
+  i.e., Lib_IntVector_Intrinsics_vec256[25]
+  The arguments `input0/input1/input2/input3` (IN) point to `inputByteLen` bytes 
+  of valid memory for each buffer, i.e., uint8_t[inputByteLen]
+*/
 void
-Hacl_Hash_SHA3_Simd256_sha3_384(
-  uint32_t inputByteLen,
+Hacl_Hash_SHA3_Simd256_shake128_absorb_nblocks(
+  Lib_IntVector_Intrinsics_vec256 *state,
   uint8_t *input0,
   uint8_t *input1,
   uint8_t *input2,
   uint8_t *input3,
-  uint8_t *output0,
-  uint8_t *output1,
-  uint8_t *output2,
-  uint8_t *output3
+  uint32_t inputByteLen
 );
 
+/**
+Absorb a final partial blocks of 4 input buffers and write the output states
+
+  This function is intended to receive a quadruple hash state and 4 input buffers.
+  It prcoesses a sequence of bytes at end of each input buffer that is less 
+  than 168-bytes (SHAKE128 block size),
+  any bytes of full blocks at start of input buffers are ignored.
+
+  The argument `state` (IN/OUT) points to quadruple hash state,
+  i.e., Lib_IntVector_Intrinsics_vec256[25]
+  The arguments `input0/input1/input2/input3` (IN) point to `inputByteLen` bytes 
+  of valid memory for each buffer, i.e., uint8_t[inputByteLen]
+  
+  Note: Full size of input buffers must be passed to `inputByteLen` including
+  the number of full-block bytes at start of each input buffer that are ignored
+*/
 void
-Hacl_Hash_SHA3_Simd256_sha3_512(
-  uint32_t inputByteLen,
+Hacl_Hash_SHA3_Simd256_shake128_absorb_final(
+  Lib_IntVector_Intrinsics_vec256 *state,
   uint8_t *input0,
   uint8_t *input1,
   uint8_t *input2,
   uint8_t *input3,
+  uint32_t inputByteLen
+);
+
+/**
+Squeeze a quadruple hash state to 4 output buffers
+
+  This function is intended to receive a quadruple hash state and 4 output buffers.
+  It produces 4 outputs, each is multiple of 168-bytes (SHAKE128 block size),
+  any additional bytes of final partial block for each buffer are ignored.
+
+  The argument `state` (IN) points to quadruple hash state,
+  i.e., Lib_IntVector_Intrinsics_vec256[25]
+  The arguments `output0/output1/output2/output3` (OUT) point to `outputByteLen` bytes 
+  of valid memory for each buffer, i.e., uint8_t[inputByteLen]
+*/
+void
+Hacl_Hash_SHA3_Simd256_shake128_squeeze_nblocks(
+  Lib_IntVector_Intrinsics_vec256 *state,
   uint8_t *output0,
   uint8_t *output1,
   uint8_t *output2,
-  uint8_t *output3
+  uint8_t *output3,
+  uint32_t outputByteLen
 );
 
 #if defined(__cplusplus)
diff --git a/sys/hacl/c/src/Hacl_Hash_SHA3_Scalar.c b/sys/hacl/c/src/Hacl_Hash_SHA3_Scalar.c
index 43d574827..6d6806a37 100644
--- a/sys/hacl/c/src/Hacl_Hash_SHA3_Scalar.c
+++ b/sys/hacl/c/src/Hacl_Hash_SHA3_Scalar.c
@@ -55,10 +55,10 @@ Hacl_Impl_SHA3_Vec_keccak_rndc[24U] =
 
 void
 Hacl_Hash_SHA3_Scalar_shake128(
-  uint32_t inputByteLen,
-  uint8_t *input,
+  uint8_t *output,
   uint32_t outputByteLen,
-  uint8_t *output
+  uint8_t *input,
+  uint32_t inputByteLen
 )
 {
   uint32_t rateInBytes = 168U;
@@ -447,10 +447,10 @@ Hacl_Hash_SHA3_Scalar_shake128(
 
 void
 Hacl_Hash_SHA3_Scalar_shake256(
-  uint32_t inputByteLen,
-  uint8_t *input,
+  uint8_t *output,
   uint32_t outputByteLen,
-  uint8_t *output
+  uint8_t *input,
+  uint32_t inputByteLen
 )
 {
   uint32_t rateInBytes = 136U;
@@ -837,7 +837,7 @@ Hacl_Hash_SHA3_Scalar_shake256(
   memcpy(output + outputByteLen - remOut, hbuf, remOut * sizeof (uint8_t));
 }
 
-void Hacl_Hash_SHA3_Scalar_sha3_224(uint32_t inputByteLen, uint8_t *input, uint8_t *output)
+void Hacl_Hash_SHA3_Scalar_sha3_224(uint8_t *output, uint8_t *input, uint32_t inputByteLen)
 {
   uint32_t rateInBytes = 144U;
   uint64_t s[25U] = { 0U };
@@ -1223,7 +1223,7 @@ void Hacl_Hash_SHA3_Scalar_sha3_224(uint32_t inputByteLen, uint8_t *input, uint8
   memcpy(output + 28U - remOut, hbuf, remOut * sizeof (uint8_t));
 }
 
-void Hacl_Hash_SHA3_Scalar_sha3_256(uint32_t inputByteLen, uint8_t *input, uint8_t *output)
+void Hacl_Hash_SHA3_Scalar_sha3_256(uint8_t *output, uint8_t *input, uint32_t inputByteLen)
 {
   uint32_t rateInBytes = 136U;
   uint64_t s[25U] = { 0U };
@@ -1609,7 +1609,7 @@ void Hacl_Hash_SHA3_Scalar_sha3_256(uint32_t inputByteLen, uint8_t *input, uint8
   memcpy(output + 32U - remOut, hbuf, remOut * sizeof (uint8_t));
 }
 
-void Hacl_Hash_SHA3_Scalar_sha3_384(uint32_t inputByteLen, uint8_t *input, uint8_t *output)
+void Hacl_Hash_SHA3_Scalar_sha3_384(uint8_t *output, uint8_t *input, uint32_t inputByteLen)
 {
   uint32_t rateInBytes = 104U;
   uint64_t s[25U] = { 0U };
@@ -1995,7 +1995,7 @@ void Hacl_Hash_SHA3_Scalar_sha3_384(uint32_t inputByteLen, uint8_t *input, uint8
   memcpy(output + 48U - remOut, hbuf, remOut * sizeof (uint8_t));
 }
 
-void Hacl_Hash_SHA3_Scalar_sha3_512(uint32_t inputByteLen, uint8_t *input, uint8_t *output)
+void Hacl_Hash_SHA3_Scalar_sha3_512(uint8_t *output, uint8_t *input, uint32_t inputByteLen)
 {
   uint32_t rateInBytes = 72U;
   uint64_t s[25U] = { 0U };
@@ -2381,3 +2381,418 @@ void Hacl_Hash_SHA3_Scalar_sha3_512(uint32_t inputByteLen, uint8_t *input, uint8
   memcpy(output + 64U - remOut, hbuf, remOut * sizeof (uint8_t));
 }
 
+uint64_t *Hacl_Hash_SHA3_Scalar_state_malloc(void)
+{
+  uint64_t *buf = (uint64_t *)KRML_HOST_CALLOC(25U, sizeof (uint64_t));
+  return buf;
+}
+
+void Hacl_Hash_SHA3_Scalar_state_free(uint64_t *s)
+{
+  KRML_HOST_FREE(s);
+}
+
+void
+Hacl_Hash_SHA3_Scalar_shake128_absorb_nblocks(
+  uint64_t *state,
+  uint8_t *input,
+  uint32_t inputByteLen
+)
+{
+  for (uint32_t i0 = 0U; i0 < inputByteLen / 168U; i0++)
+  {
+    uint8_t b1[256U] = { 0U };
+    uint8_t *b_ = b1;
+    uint8_t *b0 = input;
+    uint8_t *bl0 = b_;
+    memcpy(bl0, b0 + i0 * 168U, 168U * sizeof (uint8_t));
+    uint64_t ws[32U] = { 0U };
+    uint8_t *b = b_;
+    uint64_t u = load64_le(b);
+    ws[0U] = u;
+    uint64_t u0 = load64_le(b + 8U);
+    ws[1U] = u0;
+    uint64_t u1 = load64_le(b + 16U);
+    ws[2U] = u1;
+    uint64_t u2 = load64_le(b + 24U);
+    ws[3U] = u2;
+    uint64_t u3 = load64_le(b + 32U);
+    ws[4U] = u3;
+    uint64_t u4 = load64_le(b + 40U);
+    ws[5U] = u4;
+    uint64_t u5 = load64_le(b + 48U);
+    ws[6U] = u5;
+    uint64_t u6 = load64_le(b + 56U);
+    ws[7U] = u6;
+    uint64_t u7 = load64_le(b + 64U);
+    ws[8U] = u7;
+    uint64_t u8 = load64_le(b + 72U);
+    ws[9U] = u8;
+    uint64_t u9 = load64_le(b + 80U);
+    ws[10U] = u9;
+    uint64_t u10 = load64_le(b + 88U);
+    ws[11U] = u10;
+    uint64_t u11 = load64_le(b + 96U);
+    ws[12U] = u11;
+    uint64_t u12 = load64_le(b + 104U);
+    ws[13U] = u12;
+    uint64_t u13 = load64_le(b + 112U);
+    ws[14U] = u13;
+    uint64_t u14 = load64_le(b + 120U);
+    ws[15U] = u14;
+    uint64_t u15 = load64_le(b + 128U);
+    ws[16U] = u15;
+    uint64_t u16 = load64_le(b + 136U);
+    ws[17U] = u16;
+    uint64_t u17 = load64_le(b + 144U);
+    ws[18U] = u17;
+    uint64_t u18 = load64_le(b + 152U);
+    ws[19U] = u18;
+    uint64_t u19 = load64_le(b + 160U);
+    ws[20U] = u19;
+    uint64_t u20 = load64_le(b + 168U);
+    ws[21U] = u20;
+    uint64_t u21 = load64_le(b + 176U);
+    ws[22U] = u21;
+    uint64_t u22 = load64_le(b + 184U);
+    ws[23U] = u22;
+    uint64_t u23 = load64_le(b + 192U);
+    ws[24U] = u23;
+    uint64_t u24 = load64_le(b + 200U);
+    ws[25U] = u24;
+    uint64_t u25 = load64_le(b + 208U);
+    ws[26U] = u25;
+    uint64_t u26 = load64_le(b + 216U);
+    ws[27U] = u26;
+    uint64_t u27 = load64_le(b + 224U);
+    ws[28U] = u27;
+    uint64_t u28 = load64_le(b + 232U);
+    ws[29U] = u28;
+    uint64_t u29 = load64_le(b + 240U);
+    ws[30U] = u29;
+    uint64_t u30 = load64_le(b + 248U);
+    ws[31U] = u30;
+    for (uint32_t i = 0U; i < 25U; i++)
+    {
+      state[i] = state[i] ^ ws[i];
+    }
+    for (uint32_t i1 = 0U; i1 < 24U; i1++)
+    {
+      uint64_t _C[5U] = { 0U };
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        _C[i] =
+          state[i
+          + 0U]
+          ^ (state[i + 5U] ^ (state[i + 10U] ^ (state[i + 15U] ^ state[i + 20U]))););
+      KRML_MAYBE_FOR5(i2,
+        0U,
+        5U,
+        1U,
+        uint64_t uu____0 = _C[(i2 + 1U) % 5U];
+        uint64_t _D = _C[(i2 + 4U) % 5U] ^ (uu____0 << 1U | uu____0 >> 63U);
+        KRML_MAYBE_FOR5(i, 0U, 5U, 1U, state[i2 + 5U * i] = state[i2 + 5U * i] ^ _D;););
+      uint64_t x = state[1U];
+      uint64_t current = x;
+      for (uint32_t i = 0U; i < 24U; i++)
+      {
+        uint32_t _Y = Hacl_Impl_SHA3_Vec_keccak_piln[i];
+        uint32_t r = Hacl_Impl_SHA3_Vec_keccak_rotc[i];
+        uint64_t temp = state[_Y];
+        uint64_t uu____1 = current;
+        state[_Y] = uu____1 << r | uu____1 >> (64U - r);
+        current = temp;
+      }
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        uint64_t v0 = state[0U + 5U * i] ^ (~state[1U + 5U * i] & state[2U + 5U * i]);
+        uint64_t v1 = state[1U + 5U * i] ^ (~state[2U + 5U * i] & state[3U + 5U * i]);
+        uint64_t v2 = state[2U + 5U * i] ^ (~state[3U + 5U * i] & state[4U + 5U * i]);
+        uint64_t v3 = state[3U + 5U * i] ^ (~state[4U + 5U * i] & state[0U + 5U * i]);
+        uint64_t v4 = state[4U + 5U * i] ^ (~state[0U + 5U * i] & state[1U + 5U * i]);
+        state[0U + 5U * i] = v0;
+        state[1U + 5U * i] = v1;
+        state[2U + 5U * i] = v2;
+        state[3U + 5U * i] = v3;
+        state[4U + 5U * i] = v4;);
+      uint64_t c = Hacl_Impl_SHA3_Vec_keccak_rndc[i1];
+      state[0U] = state[0U] ^ c;
+    }
+  }
+}
+
+void
+Hacl_Hash_SHA3_Scalar_shake128_absorb_final(
+  uint64_t *state,
+  uint8_t *input,
+  uint32_t inputByteLen
+)
+{
+  uint32_t rem = inputByteLen % 168U;
+  uint8_t b2[256U] = { 0U };
+  uint8_t *b_ = b2;
+  uint32_t rem1 = inputByteLen % 168U;
+  uint8_t *b00 = input;
+  uint8_t *bl0 = b_;
+  memcpy(bl0, b00 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
+  uint8_t *b01 = b_;
+  b01[rem] = 0x1FU;
+  uint64_t ws[32U] = { 0U };
+  uint8_t *b = b_;
+  uint64_t u0 = load64_le(b);
+  ws[0U] = u0;
+  uint64_t u1 = load64_le(b + 8U);
+  ws[1U] = u1;
+  uint64_t u2 = load64_le(b + 16U);
+  ws[2U] = u2;
+  uint64_t u3 = load64_le(b + 24U);
+  ws[3U] = u3;
+  uint64_t u4 = load64_le(b + 32U);
+  ws[4U] = u4;
+  uint64_t u5 = load64_le(b + 40U);
+  ws[5U] = u5;
+  uint64_t u6 = load64_le(b + 48U);
+  ws[6U] = u6;
+  uint64_t u7 = load64_le(b + 56U);
+  ws[7U] = u7;
+  uint64_t u8 = load64_le(b + 64U);
+  ws[8U] = u8;
+  uint64_t u9 = load64_le(b + 72U);
+  ws[9U] = u9;
+  uint64_t u10 = load64_le(b + 80U);
+  ws[10U] = u10;
+  uint64_t u11 = load64_le(b + 88U);
+  ws[11U] = u11;
+  uint64_t u12 = load64_le(b + 96U);
+  ws[12U] = u12;
+  uint64_t u13 = load64_le(b + 104U);
+  ws[13U] = u13;
+  uint64_t u14 = load64_le(b + 112U);
+  ws[14U] = u14;
+  uint64_t u15 = load64_le(b + 120U);
+  ws[15U] = u15;
+  uint64_t u16 = load64_le(b + 128U);
+  ws[16U] = u16;
+  uint64_t u17 = load64_le(b + 136U);
+  ws[17U] = u17;
+  uint64_t u18 = load64_le(b + 144U);
+  ws[18U] = u18;
+  uint64_t u19 = load64_le(b + 152U);
+  ws[19U] = u19;
+  uint64_t u20 = load64_le(b + 160U);
+  ws[20U] = u20;
+  uint64_t u21 = load64_le(b + 168U);
+  ws[21U] = u21;
+  uint64_t u22 = load64_le(b + 176U);
+  ws[22U] = u22;
+  uint64_t u23 = load64_le(b + 184U);
+  ws[23U] = u23;
+  uint64_t u24 = load64_le(b + 192U);
+  ws[24U] = u24;
+  uint64_t u25 = load64_le(b + 200U);
+  ws[25U] = u25;
+  uint64_t u26 = load64_le(b + 208U);
+  ws[26U] = u26;
+  uint64_t u27 = load64_le(b + 216U);
+  ws[27U] = u27;
+  uint64_t u28 = load64_le(b + 224U);
+  ws[28U] = u28;
+  uint64_t u29 = load64_le(b + 232U);
+  ws[29U] = u29;
+  uint64_t u30 = load64_le(b + 240U);
+  ws[30U] = u30;
+  uint64_t u31 = load64_le(b + 248U);
+  ws[31U] = u31;
+  for (uint32_t i = 0U; i < 25U; i++)
+  {
+    state[i] = state[i] ^ ws[i];
+  }
+  uint8_t b3[256U] = { 0U };
+  uint8_t *b4 = b3;
+  uint8_t *b0 = b4;
+  b0[167U] = 0x80U;
+  uint64_t ws0[32U] = { 0U };
+  uint8_t *b1 = b4;
+  uint64_t u = load64_le(b1);
+  ws0[0U] = u;
+  uint64_t u32 = load64_le(b1 + 8U);
+  ws0[1U] = u32;
+  uint64_t u33 = load64_le(b1 + 16U);
+  ws0[2U] = u33;
+  uint64_t u34 = load64_le(b1 + 24U);
+  ws0[3U] = u34;
+  uint64_t u35 = load64_le(b1 + 32U);
+  ws0[4U] = u35;
+  uint64_t u36 = load64_le(b1 + 40U);
+  ws0[5U] = u36;
+  uint64_t u37 = load64_le(b1 + 48U);
+  ws0[6U] = u37;
+  uint64_t u38 = load64_le(b1 + 56U);
+  ws0[7U] = u38;
+  uint64_t u39 = load64_le(b1 + 64U);
+  ws0[8U] = u39;
+  uint64_t u40 = load64_le(b1 + 72U);
+  ws0[9U] = u40;
+  uint64_t u41 = load64_le(b1 + 80U);
+  ws0[10U] = u41;
+  uint64_t u42 = load64_le(b1 + 88U);
+  ws0[11U] = u42;
+  uint64_t u43 = load64_le(b1 + 96U);
+  ws0[12U] = u43;
+  uint64_t u44 = load64_le(b1 + 104U);
+  ws0[13U] = u44;
+  uint64_t u45 = load64_le(b1 + 112U);
+  ws0[14U] = u45;
+  uint64_t u46 = load64_le(b1 + 120U);
+  ws0[15U] = u46;
+  uint64_t u47 = load64_le(b1 + 128U);
+  ws0[16U] = u47;
+  uint64_t u48 = load64_le(b1 + 136U);
+  ws0[17U] = u48;
+  uint64_t u49 = load64_le(b1 + 144U);
+  ws0[18U] = u49;
+  uint64_t u50 = load64_le(b1 + 152U);
+  ws0[19U] = u50;
+  uint64_t u51 = load64_le(b1 + 160U);
+  ws0[20U] = u51;
+  uint64_t u52 = load64_le(b1 + 168U);
+  ws0[21U] = u52;
+  uint64_t u53 = load64_le(b1 + 176U);
+  ws0[22U] = u53;
+  uint64_t u54 = load64_le(b1 + 184U);
+  ws0[23U] = u54;
+  uint64_t u55 = load64_le(b1 + 192U);
+  ws0[24U] = u55;
+  uint64_t u56 = load64_le(b1 + 200U);
+  ws0[25U] = u56;
+  uint64_t u57 = load64_le(b1 + 208U);
+  ws0[26U] = u57;
+  uint64_t u58 = load64_le(b1 + 216U);
+  ws0[27U] = u58;
+  uint64_t u59 = load64_le(b1 + 224U);
+  ws0[28U] = u59;
+  uint64_t u60 = load64_le(b1 + 232U);
+  ws0[29U] = u60;
+  uint64_t u61 = load64_le(b1 + 240U);
+  ws0[30U] = u61;
+  uint64_t u62 = load64_le(b1 + 248U);
+  ws0[31U] = u62;
+  for (uint32_t i = 0U; i < 25U; i++)
+  {
+    state[i] = state[i] ^ ws0[i];
+  }
+  for (uint32_t i0 = 0U; i0 < 24U; i0++)
+  {
+    uint64_t _C[5U] = { 0U };
+    KRML_MAYBE_FOR5(i,
+      0U,
+      5U,
+      1U,
+      _C[i] = state[i + 0U] ^ (state[i + 5U] ^ (state[i + 10U] ^ (state[i + 15U] ^ state[i + 20U]))););
+    KRML_MAYBE_FOR5(i1,
+      0U,
+      5U,
+      1U,
+      uint64_t uu____0 = _C[(i1 + 1U) % 5U];
+      uint64_t _D = _C[(i1 + 4U) % 5U] ^ (uu____0 << 1U | uu____0 >> 63U);
+      KRML_MAYBE_FOR5(i, 0U, 5U, 1U, state[i1 + 5U * i] = state[i1 + 5U * i] ^ _D;););
+    uint64_t x = state[1U];
+    uint64_t current = x;
+    for (uint32_t i = 0U; i < 24U; i++)
+    {
+      uint32_t _Y = Hacl_Impl_SHA3_Vec_keccak_piln[i];
+      uint32_t r = Hacl_Impl_SHA3_Vec_keccak_rotc[i];
+      uint64_t temp = state[_Y];
+      uint64_t uu____1 = current;
+      state[_Y] = uu____1 << r | uu____1 >> (64U - r);
+      current = temp;
+    }
+    KRML_MAYBE_FOR5(i,
+      0U,
+      5U,
+      1U,
+      uint64_t v0 = state[0U + 5U * i] ^ (~state[1U + 5U * i] & state[2U + 5U * i]);
+      uint64_t v1 = state[1U + 5U * i] ^ (~state[2U + 5U * i] & state[3U + 5U * i]);
+      uint64_t v2 = state[2U + 5U * i] ^ (~state[3U + 5U * i] & state[4U + 5U * i]);
+      uint64_t v3 = state[3U + 5U * i] ^ (~state[4U + 5U * i] & state[0U + 5U * i]);
+      uint64_t v4 = state[4U + 5U * i] ^ (~state[0U + 5U * i] & state[1U + 5U * i]);
+      state[0U + 5U * i] = v0;
+      state[1U + 5U * i] = v1;
+      state[2U + 5U * i] = v2;
+      state[3U + 5U * i] = v3;
+      state[4U + 5U * i] = v4;);
+    uint64_t c = Hacl_Impl_SHA3_Vec_keccak_rndc[i0];
+    state[0U] = state[0U] ^ c;
+  }
+}
+
+void
+Hacl_Hash_SHA3_Scalar_shake128_squeeze_nblocks(
+  uint64_t *state,
+  uint8_t *output,
+  uint32_t outputByteLen
+)
+{
+  for (uint32_t i0 = 0U; i0 < outputByteLen / 168U; i0++)
+  {
+    uint8_t hbuf[256U] = { 0U };
+    uint64_t ws[32U] = { 0U };
+    memcpy(ws, state, 25U * sizeof (uint64_t));
+    for (uint32_t i = 0U; i < 32U; i++)
+    {
+      store64_le(hbuf + i * 8U, ws[i]);
+    }
+    memcpy(output + i0 * 168U, hbuf, 168U * sizeof (uint8_t));
+    for (uint32_t i1 = 0U; i1 < 24U; i1++)
+    {
+      uint64_t _C[5U] = { 0U };
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        _C[i] =
+          state[i
+          + 0U]
+          ^ (state[i + 5U] ^ (state[i + 10U] ^ (state[i + 15U] ^ state[i + 20U]))););
+      KRML_MAYBE_FOR5(i2,
+        0U,
+        5U,
+        1U,
+        uint64_t uu____0 = _C[(i2 + 1U) % 5U];
+        uint64_t _D = _C[(i2 + 4U) % 5U] ^ (uu____0 << 1U | uu____0 >> 63U);
+        KRML_MAYBE_FOR5(i, 0U, 5U, 1U, state[i2 + 5U * i] = state[i2 + 5U * i] ^ _D;););
+      uint64_t x = state[1U];
+      uint64_t current = x;
+      for (uint32_t i = 0U; i < 24U; i++)
+      {
+        uint32_t _Y = Hacl_Impl_SHA3_Vec_keccak_piln[i];
+        uint32_t r = Hacl_Impl_SHA3_Vec_keccak_rotc[i];
+        uint64_t temp = state[_Y];
+        uint64_t uu____1 = current;
+        state[_Y] = uu____1 << r | uu____1 >> (64U - r);
+        current = temp;
+      }
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        uint64_t v0 = state[0U + 5U * i] ^ (~state[1U + 5U * i] & state[2U + 5U * i]);
+        uint64_t v1 = state[1U + 5U * i] ^ (~state[2U + 5U * i] & state[3U + 5U * i]);
+        uint64_t v2 = state[2U + 5U * i] ^ (~state[3U + 5U * i] & state[4U + 5U * i]);
+        uint64_t v3 = state[3U + 5U * i] ^ (~state[4U + 5U * i] & state[0U + 5U * i]);
+        uint64_t v4 = state[4U + 5U * i] ^ (~state[0U + 5U * i] & state[1U + 5U * i]);
+        state[0U + 5U * i] = v0;
+        state[1U + 5U * i] = v1;
+        state[2U + 5U * i] = v2;
+        state[3U + 5U * i] = v3;
+        state[4U + 5U * i] = v4;);
+      uint64_t c = Hacl_Impl_SHA3_Vec_keccak_rndc[i1];
+      state[0U] = state[0U] ^ c;
+    }
+  }
+}
+
diff --git a/sys/hacl/c/src/Hacl_Hash_SHA3_Simd256.c b/sys/hacl/c/src/Hacl_Hash_SHA3_Simd256.c
index b9bfcee59..76938112e 100644
--- a/sys/hacl/c/src/Hacl_Hash_SHA3_Simd256.c
+++ b/sys/hacl/c/src/Hacl_Hash_SHA3_Simd256.c
@@ -26,20 +26,19 @@
 #include "Hacl_Hash_SHA3_Simd256.h"
 
 #include "internal/Hacl_Hash_SHA3_Scalar.h"
-#include "libintvector.h"
 
 void
 Hacl_Hash_SHA3_Simd256_shake128(
-  uint32_t inputByteLen,
+  uint8_t *output0,
+  uint8_t *output1,
+  uint8_t *output2,
+  uint8_t *output3,
+  uint32_t outputByteLen,
   uint8_t *input0,
   uint8_t *input1,
   uint8_t *input2,
   uint8_t *input3,
-  uint32_t outputByteLen,
-  uint8_t *output0,
-  uint8_t *output1,
-  uint8_t *output2,
-  uint8_t *output3
+  uint32_t inputByteLen
 )
 {
   K____uint8_t___uint8_t____K____uint8_t___uint8_t_
@@ -438,63 +437,63 @@ Hacl_Hash_SHA3_Simd256_shake128(
   K____uint8_t___uint8_t____K____uint8_t___uint8_t_
   b_ = { .fst = b00, .snd = { .fst = b10, .snd = { .fst = b20, .snd = b30 } } };
   uint32_t rem1 = inputByteLen % rateInBytes;
-  uint8_t *b32 = ib.snd.snd.snd;
-  uint8_t *b22 = ib.snd.snd.fst;
-  uint8_t *b12 = ib.snd.fst;
-  uint8_t *b02 = ib.fst;
+  uint8_t *b31 = ib.snd.snd.snd;
+  uint8_t *b21 = ib.snd.snd.fst;
+  uint8_t *b11 = ib.snd.fst;
+  uint8_t *b01 = ib.fst;
   uint8_t *bl3 = b_.snd.snd.snd;
   uint8_t *bl2 = b_.snd.snd.fst;
   uint8_t *bl1 = b_.snd.fst;
   uint8_t *bl0 = b_.fst;
-  memcpy(bl0, b02 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
-  memcpy(bl1, b12 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
-  memcpy(bl2, b22 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
-  memcpy(bl3, b32 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
+  memcpy(bl0, b01 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
+  memcpy(bl1, b11 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
+  memcpy(bl2, b21 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
+  memcpy(bl3, b31 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
+  uint8_t *b32 = b_.snd.snd.snd;
+  uint8_t *b22 = b_.snd.snd.fst;
+  uint8_t *b12 = b_.snd.fst;
+  uint8_t *b02 = b_.fst;
+  b02[rem] = 0x1FU;
+  b12[rem] = 0x1FU;
+  b22[rem] = 0x1FU;
+  b32[rem] = 0x1FU;
+  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U };
   uint8_t *b33 = b_.snd.snd.snd;
   uint8_t *b23 = b_.snd.snd.fst;
   uint8_t *b13 = b_.snd.fst;
   uint8_t *b03 = b_.fst;
-  b03[rem] = 0x1FU;
-  b13[rem] = 0x1FU;
-  b23[rem] = 0x1FU;
-  b33[rem] = 0x1FU;
-  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U };
-  uint8_t *b34 = b_.snd.snd.snd;
-  uint8_t *b24 = b_.snd.snd.fst;
-  uint8_t *b14 = b_.snd.fst;
-  uint8_t *b04 = b_.fst;
-  ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04);
-  ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14);
-  ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24);
-  ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34);
-  ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 32U);
-  ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 32U);
-  ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 32U);
-  ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 32U);
-  ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 64U);
-  ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 64U);
-  ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 64U);
-  ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 64U);
-  ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 96U);
-  ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 96U);
-  ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 96U);
-  ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 96U);
-  ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 128U);
-  ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 128U);
-  ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 128U);
-  ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 128U);
-  ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 160U);
-  ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 160U);
-  ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 160U);
-  ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 160U);
-  ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 192U);
-  ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 192U);
-  ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 192U);
-  ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 192U);
-  ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 224U);
-  ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 224U);
-  ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 224U);
-  ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 224U);
+  ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03);
+  ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13);
+  ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23);
+  ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33);
+  ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 32U);
+  ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 32U);
+  ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 32U);
+  ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 32U);
+  ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 64U);
+  ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 64U);
+  ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 64U);
+  ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 64U);
+  ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 96U);
+  ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 96U);
+  ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 96U);
+  ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 96U);
+  ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 128U);
+  ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 128U);
+  ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 128U);
+  ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 128U);
+  ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 160U);
+  ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 160U);
+  ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 160U);
+  ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 160U);
+  ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 192U);
+  ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 192U);
+  ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 192U);
+  ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 192U);
+  ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 224U);
+  ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 224U);
+  ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 224U);
+  ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 224U);
   Lib_IntVector_Intrinsics_vec256 v00 = ws32[0U];
   Lib_IntVector_Intrinsics_vec256 v10 = ws32[1U];
   Lib_IntVector_Intrinsics_vec256 v20 = ws32[2U];
@@ -723,57 +722,57 @@ Hacl_Hash_SHA3_Simd256_shake128(
   {
     s[i] = Lib_IntVector_Intrinsics_vec256_xor(s[i], ws32[i]);
   }
-  uint8_t b05[256U] = { 0U };
-  uint8_t b15[256U] = { 0U };
-  uint8_t b25[256U] = { 0U };
-  uint8_t b35[256U] = { 0U };
+  uint8_t b04[256U] = { 0U };
+  uint8_t b14[256U] = { 0U };
+  uint8_t b24[256U] = { 0U };
+  uint8_t b34[256U] = { 0U };
   K____uint8_t___uint8_t____K____uint8_t___uint8_t_
-  b = { .fst = b05, .snd = { .fst = b15, .snd = { .fst = b25, .snd = b35 } } };
-  uint8_t *b36 = b.snd.snd.snd;
+  b = { .fst = b04, .snd = { .fst = b14, .snd = { .fst = b24, .snd = b34 } } };
+  uint8_t *b35 = b.snd.snd.snd;
+  uint8_t *b25 = b.snd.snd.fst;
+  uint8_t *b15 = b.snd.fst;
+  uint8_t *b05 = b.fst;
+  b05[rateInBytes - 1U] = 0x80U;
+  b15[rateInBytes - 1U] = 0x80U;
+  b25[rateInBytes - 1U] = 0x80U;
+  b35[rateInBytes - 1U] = 0x80U;
+  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws34[32U] KRML_POST_ALIGN(32) = { 0U };
+  uint8_t *b3 = b.snd.snd.snd;
   uint8_t *b26 = b.snd.snd.fst;
   uint8_t *b16 = b.snd.fst;
   uint8_t *b06 = b.fst;
-  b06[rateInBytes - 1U] = 0x80U;
-  b16[rateInBytes - 1U] = 0x80U;
-  b26[rateInBytes - 1U] = 0x80U;
-  b36[rateInBytes - 1U] = 0x80U;
-  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws34[32U] KRML_POST_ALIGN(32) = { 0U };
-  uint8_t *b37 = b.snd.snd.snd;
-  uint8_t *b27 = b.snd.snd.fst;
-  uint8_t *b17 = b.snd.fst;
-  uint8_t *b07 = b.fst;
-  ws34[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07);
-  ws34[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17);
-  ws34[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27);
-  ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37);
-  ws34[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 32U);
-  ws34[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 32U);
-  ws34[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 32U);
-  ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 32U);
-  ws34[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 64U);
-  ws34[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 64U);
-  ws34[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 64U);
-  ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 64U);
-  ws34[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 96U);
-  ws34[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 96U);
-  ws34[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 96U);
-  ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 96U);
-  ws34[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 128U);
-  ws34[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 128U);
-  ws34[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 128U);
-  ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 128U);
-  ws34[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 160U);
-  ws34[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 160U);
-  ws34[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 160U);
-  ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 160U);
-  ws34[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 192U);
-  ws34[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 192U);
-  ws34[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 192U);
-  ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 192U);
-  ws34[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 224U);
-  ws34[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 224U);
-  ws34[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 224U);
-  ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 224U);
+  ws34[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06);
+  ws34[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16);
+  ws34[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26);
+  ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3);
+  ws34[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 32U);
+  ws34[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 32U);
+  ws34[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 32U);
+  ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 32U);
+  ws34[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 64U);
+  ws34[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 64U);
+  ws34[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 64U);
+  ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 64U);
+  ws34[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 96U);
+  ws34[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 96U);
+  ws34[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 96U);
+  ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 96U);
+  ws34[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 128U);
+  ws34[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 128U);
+  ws34[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 128U);
+  ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 128U);
+  ws34[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 160U);
+  ws34[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 160U);
+  ws34[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 160U);
+  ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 160U);
+  ws34[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 192U);
+  ws34[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 192U);
+  ws34[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 192U);
+  ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 192U);
+  ws34[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 224U);
+  ws34[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 224U);
+  ws34[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 224U);
+  ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 224U);
   Lib_IntVector_Intrinsics_vec256 v08 = ws34[0U];
   Lib_IntVector_Intrinsics_vec256 v18 = ws34[1U];
   Lib_IntVector_Intrinsics_vec256 v28 = ws34[2U];
@@ -1295,62 +1294,49 @@ Hacl_Hash_SHA3_Simd256_shake128(
     Lib_IntVector_Intrinsics_vec256 ws30 = v1__22;
     Lib_IntVector_Intrinsics_vec256 ws31 = v3__22;
     ws[0U] = ws0;
-    ws[1U] = ws1;
-    ws[2U] = ws2;
-    ws[3U] = ws3;
-    ws[4U] = ws4;
-    ws[5U] = ws5;
-    ws[6U] = ws6;
-    ws[7U] = ws7;
-    ws[8U] = ws8;
-    ws[9U] = ws9;
-    ws[10U] = ws10;
-    ws[11U] = ws11;
-    ws[12U] = ws12;
-    ws[13U] = ws13;
-    ws[14U] = ws14;
-    ws[15U] = ws15;
-    ws[16U] = ws16;
-    ws[17U] = ws17;
-    ws[18U] = ws18;
-    ws[19U] = ws19;
-    ws[20U] = ws20;
-    ws[21U] = ws21;
-    ws[22U] = ws22;
-    ws[23U] = ws23;
-    ws[24U] = ws24;
-    ws[25U] = ws25;
-    ws[26U] = ws26;
-    ws[27U] = ws27;
-    ws[28U] = ws28;
-    ws[29U] = ws29;
-    ws[30U] = ws30;
+    ws[1U] = ws4;
+    ws[2U] = ws8;
+    ws[3U] = ws12;
+    ws[4U] = ws16;
+    ws[5U] = ws20;
+    ws[6U] = ws24;
+    ws[7U] = ws28;
+    ws[8U] = ws1;
+    ws[9U] = ws5;
+    ws[10U] = ws9;
+    ws[11U] = ws13;
+    ws[12U] = ws17;
+    ws[13U] = ws21;
+    ws[14U] = ws25;
+    ws[15U] = ws29;
+    ws[16U] = ws2;
+    ws[17U] = ws6;
+    ws[18U] = ws10;
+    ws[19U] = ws14;
+    ws[20U] = ws18;
+    ws[21U] = ws22;
+    ws[22U] = ws26;
+    ws[23U] = ws30;
+    ws[24U] = ws3;
+    ws[25U] = ws7;
+    ws[26U] = ws11;
+    ws[27U] = ws15;
+    ws[28U] = ws19;
+    ws[29U] = ws23;
+    ws[30U] = ws27;
     ws[31U] = ws31;
     for (uint32_t i = 0U; i < 32U; i++)
     {
       Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]);
     }
-    for (uint32_t i = 0U; i < rateInBytes / 32U; i++)
-    {
-      uint8_t *b31 = rb.snd.snd.snd;
-      uint8_t *b21 = rb.snd.snd.fst;
-      uint8_t *b11 = rb.snd.fst;
-      uint8_t *b01 = rb.fst;
-      memcpy(b01 + i0 * rateInBytes + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t));
-      memcpy(b11 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t));
-      memcpy(b21 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t));
-      memcpy(b31 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t));
-    }
-    uint32_t rem0 = rateInBytes % 32U;
-    uint32_t j = rateInBytes / 32U;
-    uint8_t *b31 = rb.snd.snd.snd;
-    uint8_t *b21 = rb.snd.snd.fst;
-    uint8_t *b11 = rb.snd.fst;
-    uint8_t *b01 = rb.fst;
-    memcpy(b01 + i0 * rateInBytes + j * 32U, hbuf + j * 128U, rem0 * sizeof (uint8_t));
-    memcpy(b11 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 32U, rem0 * sizeof (uint8_t));
-    memcpy(b21 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 64U, rem0 * sizeof (uint8_t));
-    memcpy(b31 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 96U, rem0 * sizeof (uint8_t));
+    uint8_t *b36 = rb.snd.snd.snd;
+    uint8_t *b2 = rb.snd.snd.fst;
+    uint8_t *b1 = rb.snd.fst;
+    uint8_t *b0 = rb.fst;
+    memcpy(b0 + i0 * rateInBytes, hbuf, rateInBytes * sizeof (uint8_t));
+    memcpy(b1 + i0 * rateInBytes, hbuf + 256U, rateInBytes * sizeof (uint8_t));
+    memcpy(b2 + i0 * rateInBytes, hbuf + 512U, rateInBytes * sizeof (uint8_t));
+    memcpy(b36 + i0 * rateInBytes, hbuf + 768U, rateInBytes * sizeof (uint8_t));
     for (uint32_t i1 = 0U; i1 < 24U; i1++)
     {
       KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 _C[5U] KRML_POST_ALIGN(32) = { 0U };
@@ -1645,76 +1631,63 @@ Hacl_Hash_SHA3_Simd256_shake128(
   Lib_IntVector_Intrinsics_vec256 ws30 = v1__22;
   Lib_IntVector_Intrinsics_vec256 ws31 = v3__22;
   ws[0U] = ws0;
-  ws[1U] = ws1;
-  ws[2U] = ws2;
-  ws[3U] = ws3;
-  ws[4U] = ws4;
-  ws[5U] = ws5;
-  ws[6U] = ws6;
-  ws[7U] = ws7;
-  ws[8U] = ws8;
-  ws[9U] = ws9;
-  ws[10U] = ws10;
-  ws[11U] = ws11;
-  ws[12U] = ws12;
-  ws[13U] = ws13;
-  ws[14U] = ws14;
-  ws[15U] = ws15;
-  ws[16U] = ws16;
-  ws[17U] = ws17;
-  ws[18U] = ws18;
-  ws[19U] = ws19;
-  ws[20U] = ws20;
-  ws[21U] = ws21;
-  ws[22U] = ws22;
-  ws[23U] = ws23;
-  ws[24U] = ws24;
-  ws[25U] = ws25;
-  ws[26U] = ws26;
-  ws[27U] = ws27;
-  ws[28U] = ws28;
-  ws[29U] = ws29;
-  ws[30U] = ws30;
+  ws[1U] = ws4;
+  ws[2U] = ws8;
+  ws[3U] = ws12;
+  ws[4U] = ws16;
+  ws[5U] = ws20;
+  ws[6U] = ws24;
+  ws[7U] = ws28;
+  ws[8U] = ws1;
+  ws[9U] = ws5;
+  ws[10U] = ws9;
+  ws[11U] = ws13;
+  ws[12U] = ws17;
+  ws[13U] = ws21;
+  ws[14U] = ws25;
+  ws[15U] = ws29;
+  ws[16U] = ws2;
+  ws[17U] = ws6;
+  ws[18U] = ws10;
+  ws[19U] = ws14;
+  ws[20U] = ws18;
+  ws[21U] = ws22;
+  ws[22U] = ws26;
+  ws[23U] = ws30;
+  ws[24U] = ws3;
+  ws[25U] = ws7;
+  ws[26U] = ws11;
+  ws[27U] = ws15;
+  ws[28U] = ws19;
+  ws[29U] = ws23;
+  ws[30U] = ws27;
   ws[31U] = ws31;
   for (uint32_t i = 0U; i < 32U; i++)
   {
     Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]);
   }
-  for (uint32_t i = 0U; i < remOut / 32U; i++)
-  {
-    uint8_t *b3 = rb.snd.snd.snd;
-    uint8_t *b2 = rb.snd.snd.fst;
-    uint8_t *b1 = rb.snd.fst;
-    uint8_t *b0 = rb.fst;
-    memcpy(b0 + outputByteLen - remOut + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t));
-    memcpy(b1 + outputByteLen - remOut + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t));
-    memcpy(b2 + outputByteLen - remOut + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t));
-    memcpy(b3 + outputByteLen - remOut + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t));
-  }
-  uint32_t rem0 = remOut % 32U;
-  uint32_t j = remOut / 32U;
-  uint8_t *b3 = rb.snd.snd.snd;
+  uint8_t *b36 = rb.snd.snd.snd;
   uint8_t *b2 = rb.snd.snd.fst;
   uint8_t *b1 = rb.snd.fst;
   uint8_t *b0 = rb.fst;
-  memcpy(b0 + outputByteLen - remOut + j * 32U, hbuf + j * 128U, rem0 * sizeof (uint8_t));
-  memcpy(b1 + outputByteLen - remOut + j * 32U, hbuf + j * 128U + 32U, rem0 * sizeof (uint8_t));
-  memcpy(b2 + outputByteLen - remOut + j * 32U, hbuf + j * 128U + 64U, rem0 * sizeof (uint8_t));
-  memcpy(b3 + outputByteLen - remOut + j * 32U, hbuf + j * 128U + 96U, rem0 * sizeof (uint8_t));
+  memcpy(b0 + outputByteLen - remOut, hbuf, remOut * sizeof (uint8_t));
+  memcpy(b1 + outputByteLen - remOut, hbuf + 256U, remOut * sizeof (uint8_t));
+  memcpy(b2 + outputByteLen - remOut, hbuf + 512U, remOut * sizeof (uint8_t));
+  memcpy(b36 + outputByteLen - remOut, hbuf + 768U, remOut * sizeof (uint8_t));
 }
 
 void
 Hacl_Hash_SHA3_Simd256_shake256(
-  uint32_t inputByteLen,
+  uint8_t *output0,
+  uint8_t *output1,
+  uint8_t *output2,
+  uint8_t *output3,
+  uint32_t outputByteLen,
   uint8_t *input0,
   uint8_t *input1,
   uint8_t *input2,
   uint8_t *input3,
-  uint32_t outputByteLen,
-  uint8_t *output0,
-  uint8_t *output1,
-  uint8_t *output2,
-  uint8_t *output3
+  uint32_t inputByteLen
 )
 {
   K____uint8_t___uint8_t____K____uint8_t___uint8_t_
@@ -2113,63 +2086,63 @@ Hacl_Hash_SHA3_Simd256_shake256(
   K____uint8_t___uint8_t____K____uint8_t___uint8_t_
   b_ = { .fst = b00, .snd = { .fst = b10, .snd = { .fst = b20, .snd = b30 } } };
   uint32_t rem1 = inputByteLen % rateInBytes;
-  uint8_t *b32 = ib.snd.snd.snd;
-  uint8_t *b22 = ib.snd.snd.fst;
-  uint8_t *b12 = ib.snd.fst;
-  uint8_t *b02 = ib.fst;
+  uint8_t *b31 = ib.snd.snd.snd;
+  uint8_t *b21 = ib.snd.snd.fst;
+  uint8_t *b11 = ib.snd.fst;
+  uint8_t *b01 = ib.fst;
   uint8_t *bl3 = b_.snd.snd.snd;
   uint8_t *bl2 = b_.snd.snd.fst;
   uint8_t *bl1 = b_.snd.fst;
   uint8_t *bl0 = b_.fst;
-  memcpy(bl0, b02 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
-  memcpy(bl1, b12 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
-  memcpy(bl2, b22 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
-  memcpy(bl3, b32 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
+  memcpy(bl0, b01 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
+  memcpy(bl1, b11 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
+  memcpy(bl2, b21 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
+  memcpy(bl3, b31 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
+  uint8_t *b32 = b_.snd.snd.snd;
+  uint8_t *b22 = b_.snd.snd.fst;
+  uint8_t *b12 = b_.snd.fst;
+  uint8_t *b02 = b_.fst;
+  b02[rem] = 0x1FU;
+  b12[rem] = 0x1FU;
+  b22[rem] = 0x1FU;
+  b32[rem] = 0x1FU;
+  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U };
   uint8_t *b33 = b_.snd.snd.snd;
   uint8_t *b23 = b_.snd.snd.fst;
   uint8_t *b13 = b_.snd.fst;
   uint8_t *b03 = b_.fst;
-  b03[rem] = 0x1FU;
-  b13[rem] = 0x1FU;
-  b23[rem] = 0x1FU;
-  b33[rem] = 0x1FU;
-  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U };
-  uint8_t *b34 = b_.snd.snd.snd;
-  uint8_t *b24 = b_.snd.snd.fst;
-  uint8_t *b14 = b_.snd.fst;
-  uint8_t *b04 = b_.fst;
-  ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04);
-  ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14);
-  ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24);
-  ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34);
-  ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 32U);
-  ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 32U);
-  ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 32U);
-  ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 32U);
-  ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 64U);
-  ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 64U);
-  ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 64U);
-  ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 64U);
-  ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 96U);
-  ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 96U);
-  ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 96U);
-  ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 96U);
-  ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 128U);
-  ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 128U);
-  ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 128U);
-  ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 128U);
-  ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 160U);
-  ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 160U);
-  ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 160U);
-  ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 160U);
-  ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 192U);
-  ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 192U);
-  ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 192U);
-  ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 192U);
-  ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 224U);
-  ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 224U);
-  ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 224U);
-  ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 224U);
+  ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03);
+  ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13);
+  ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23);
+  ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33);
+  ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 32U);
+  ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 32U);
+  ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 32U);
+  ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 32U);
+  ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 64U);
+  ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 64U);
+  ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 64U);
+  ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 64U);
+  ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 96U);
+  ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 96U);
+  ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 96U);
+  ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 96U);
+  ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 128U);
+  ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 128U);
+  ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 128U);
+  ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 128U);
+  ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 160U);
+  ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 160U);
+  ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 160U);
+  ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 160U);
+  ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 192U);
+  ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 192U);
+  ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 192U);
+  ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 192U);
+  ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 224U);
+  ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 224U);
+  ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 224U);
+  ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 224U);
   Lib_IntVector_Intrinsics_vec256 v00 = ws32[0U];
   Lib_IntVector_Intrinsics_vec256 v10 = ws32[1U];
   Lib_IntVector_Intrinsics_vec256 v20 = ws32[2U];
@@ -2398,57 +2371,57 @@ Hacl_Hash_SHA3_Simd256_shake256(
   {
     s[i] = Lib_IntVector_Intrinsics_vec256_xor(s[i], ws32[i]);
   }
-  uint8_t b05[256U] = { 0U };
-  uint8_t b15[256U] = { 0U };
-  uint8_t b25[256U] = { 0U };
-  uint8_t b35[256U] = { 0U };
+  uint8_t b04[256U] = { 0U };
+  uint8_t b14[256U] = { 0U };
+  uint8_t b24[256U] = { 0U };
+  uint8_t b34[256U] = { 0U };
   K____uint8_t___uint8_t____K____uint8_t___uint8_t_
-  b = { .fst = b05, .snd = { .fst = b15, .snd = { .fst = b25, .snd = b35 } } };
-  uint8_t *b36 = b.snd.snd.snd;
+  b = { .fst = b04, .snd = { .fst = b14, .snd = { .fst = b24, .snd = b34 } } };
+  uint8_t *b35 = b.snd.snd.snd;
+  uint8_t *b25 = b.snd.snd.fst;
+  uint8_t *b15 = b.snd.fst;
+  uint8_t *b05 = b.fst;
+  b05[rateInBytes - 1U] = 0x80U;
+  b15[rateInBytes - 1U] = 0x80U;
+  b25[rateInBytes - 1U] = 0x80U;
+  b35[rateInBytes - 1U] = 0x80U;
+  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws34[32U] KRML_POST_ALIGN(32) = { 0U };
+  uint8_t *b3 = b.snd.snd.snd;
   uint8_t *b26 = b.snd.snd.fst;
   uint8_t *b16 = b.snd.fst;
   uint8_t *b06 = b.fst;
-  b06[rateInBytes - 1U] = 0x80U;
-  b16[rateInBytes - 1U] = 0x80U;
-  b26[rateInBytes - 1U] = 0x80U;
-  b36[rateInBytes - 1U] = 0x80U;
-  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws34[32U] KRML_POST_ALIGN(32) = { 0U };
-  uint8_t *b37 = b.snd.snd.snd;
-  uint8_t *b27 = b.snd.snd.fst;
-  uint8_t *b17 = b.snd.fst;
-  uint8_t *b07 = b.fst;
-  ws34[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07);
-  ws34[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17);
-  ws34[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27);
-  ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37);
-  ws34[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 32U);
-  ws34[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 32U);
-  ws34[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 32U);
-  ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 32U);
-  ws34[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 64U);
-  ws34[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 64U);
-  ws34[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 64U);
-  ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 64U);
-  ws34[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 96U);
-  ws34[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 96U);
-  ws34[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 96U);
-  ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 96U);
-  ws34[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 128U);
-  ws34[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 128U);
-  ws34[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 128U);
-  ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 128U);
-  ws34[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 160U);
-  ws34[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 160U);
-  ws34[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 160U);
-  ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 160U);
-  ws34[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 192U);
-  ws34[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 192U);
-  ws34[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 192U);
-  ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 192U);
-  ws34[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 224U);
-  ws34[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 224U);
-  ws34[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 224U);
-  ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 224U);
+  ws34[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06);
+  ws34[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16);
+  ws34[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26);
+  ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3);
+  ws34[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 32U);
+  ws34[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 32U);
+  ws34[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 32U);
+  ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 32U);
+  ws34[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 64U);
+  ws34[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 64U);
+  ws34[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 64U);
+  ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 64U);
+  ws34[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 96U);
+  ws34[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 96U);
+  ws34[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 96U);
+  ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 96U);
+  ws34[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 128U);
+  ws34[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 128U);
+  ws34[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 128U);
+  ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 128U);
+  ws34[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 160U);
+  ws34[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 160U);
+  ws34[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 160U);
+  ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 160U);
+  ws34[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 192U);
+  ws34[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 192U);
+  ws34[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 192U);
+  ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 192U);
+  ws34[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 224U);
+  ws34[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 224U);
+  ws34[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 224U);
+  ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 224U);
   Lib_IntVector_Intrinsics_vec256 v08 = ws34[0U];
   Lib_IntVector_Intrinsics_vec256 v18 = ws34[1U];
   Lib_IntVector_Intrinsics_vec256 v28 = ws34[2U];
@@ -2970,62 +2943,49 @@ Hacl_Hash_SHA3_Simd256_shake256(
     Lib_IntVector_Intrinsics_vec256 ws30 = v1__22;
     Lib_IntVector_Intrinsics_vec256 ws31 = v3__22;
     ws[0U] = ws0;
-    ws[1U] = ws1;
-    ws[2U] = ws2;
-    ws[3U] = ws3;
-    ws[4U] = ws4;
-    ws[5U] = ws5;
-    ws[6U] = ws6;
-    ws[7U] = ws7;
-    ws[8U] = ws8;
-    ws[9U] = ws9;
-    ws[10U] = ws10;
-    ws[11U] = ws11;
-    ws[12U] = ws12;
-    ws[13U] = ws13;
-    ws[14U] = ws14;
-    ws[15U] = ws15;
-    ws[16U] = ws16;
-    ws[17U] = ws17;
-    ws[18U] = ws18;
-    ws[19U] = ws19;
-    ws[20U] = ws20;
-    ws[21U] = ws21;
-    ws[22U] = ws22;
-    ws[23U] = ws23;
-    ws[24U] = ws24;
-    ws[25U] = ws25;
-    ws[26U] = ws26;
-    ws[27U] = ws27;
-    ws[28U] = ws28;
-    ws[29U] = ws29;
-    ws[30U] = ws30;
+    ws[1U] = ws4;
+    ws[2U] = ws8;
+    ws[3U] = ws12;
+    ws[4U] = ws16;
+    ws[5U] = ws20;
+    ws[6U] = ws24;
+    ws[7U] = ws28;
+    ws[8U] = ws1;
+    ws[9U] = ws5;
+    ws[10U] = ws9;
+    ws[11U] = ws13;
+    ws[12U] = ws17;
+    ws[13U] = ws21;
+    ws[14U] = ws25;
+    ws[15U] = ws29;
+    ws[16U] = ws2;
+    ws[17U] = ws6;
+    ws[18U] = ws10;
+    ws[19U] = ws14;
+    ws[20U] = ws18;
+    ws[21U] = ws22;
+    ws[22U] = ws26;
+    ws[23U] = ws30;
+    ws[24U] = ws3;
+    ws[25U] = ws7;
+    ws[26U] = ws11;
+    ws[27U] = ws15;
+    ws[28U] = ws19;
+    ws[29U] = ws23;
+    ws[30U] = ws27;
     ws[31U] = ws31;
     for (uint32_t i = 0U; i < 32U; i++)
     {
       Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]);
     }
-    for (uint32_t i = 0U; i < rateInBytes / 32U; i++)
-    {
-      uint8_t *b31 = rb.snd.snd.snd;
-      uint8_t *b21 = rb.snd.snd.fst;
-      uint8_t *b11 = rb.snd.fst;
-      uint8_t *b01 = rb.fst;
-      memcpy(b01 + i0 * rateInBytes + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t));
-      memcpy(b11 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t));
-      memcpy(b21 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t));
-      memcpy(b31 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t));
-    }
-    uint32_t rem0 = rateInBytes % 32U;
-    uint32_t j = rateInBytes / 32U;
-    uint8_t *b31 = rb.snd.snd.snd;
-    uint8_t *b21 = rb.snd.snd.fst;
-    uint8_t *b11 = rb.snd.fst;
-    uint8_t *b01 = rb.fst;
-    memcpy(b01 + i0 * rateInBytes + j * 32U, hbuf + j * 128U, rem0 * sizeof (uint8_t));
-    memcpy(b11 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 32U, rem0 * sizeof (uint8_t));
-    memcpy(b21 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 64U, rem0 * sizeof (uint8_t));
-    memcpy(b31 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 96U, rem0 * sizeof (uint8_t));
+    uint8_t *b36 = rb.snd.snd.snd;
+    uint8_t *b2 = rb.snd.snd.fst;
+    uint8_t *b1 = rb.snd.fst;
+    uint8_t *b0 = rb.fst;
+    memcpy(b0 + i0 * rateInBytes, hbuf, rateInBytes * sizeof (uint8_t));
+    memcpy(b1 + i0 * rateInBytes, hbuf + 256U, rateInBytes * sizeof (uint8_t));
+    memcpy(b2 + i0 * rateInBytes, hbuf + 512U, rateInBytes * sizeof (uint8_t));
+    memcpy(b36 + i0 * rateInBytes, hbuf + 768U, rateInBytes * sizeof (uint8_t));
     for (uint32_t i1 = 0U; i1 < 24U; i1++)
     {
       KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 _C[5U] KRML_POST_ALIGN(32) = { 0U };
@@ -3320,75 +3280,62 @@ Hacl_Hash_SHA3_Simd256_shake256(
   Lib_IntVector_Intrinsics_vec256 ws30 = v1__22;
   Lib_IntVector_Intrinsics_vec256 ws31 = v3__22;
   ws[0U] = ws0;
-  ws[1U] = ws1;
-  ws[2U] = ws2;
-  ws[3U] = ws3;
-  ws[4U] = ws4;
-  ws[5U] = ws5;
-  ws[6U] = ws6;
-  ws[7U] = ws7;
-  ws[8U] = ws8;
-  ws[9U] = ws9;
-  ws[10U] = ws10;
-  ws[11U] = ws11;
-  ws[12U] = ws12;
-  ws[13U] = ws13;
-  ws[14U] = ws14;
-  ws[15U] = ws15;
-  ws[16U] = ws16;
-  ws[17U] = ws17;
-  ws[18U] = ws18;
-  ws[19U] = ws19;
-  ws[20U] = ws20;
-  ws[21U] = ws21;
-  ws[22U] = ws22;
-  ws[23U] = ws23;
-  ws[24U] = ws24;
-  ws[25U] = ws25;
-  ws[26U] = ws26;
-  ws[27U] = ws27;
-  ws[28U] = ws28;
-  ws[29U] = ws29;
-  ws[30U] = ws30;
+  ws[1U] = ws4;
+  ws[2U] = ws8;
+  ws[3U] = ws12;
+  ws[4U] = ws16;
+  ws[5U] = ws20;
+  ws[6U] = ws24;
+  ws[7U] = ws28;
+  ws[8U] = ws1;
+  ws[9U] = ws5;
+  ws[10U] = ws9;
+  ws[11U] = ws13;
+  ws[12U] = ws17;
+  ws[13U] = ws21;
+  ws[14U] = ws25;
+  ws[15U] = ws29;
+  ws[16U] = ws2;
+  ws[17U] = ws6;
+  ws[18U] = ws10;
+  ws[19U] = ws14;
+  ws[20U] = ws18;
+  ws[21U] = ws22;
+  ws[22U] = ws26;
+  ws[23U] = ws30;
+  ws[24U] = ws3;
+  ws[25U] = ws7;
+  ws[26U] = ws11;
+  ws[27U] = ws15;
+  ws[28U] = ws19;
+  ws[29U] = ws23;
+  ws[30U] = ws27;
   ws[31U] = ws31;
   for (uint32_t i = 0U; i < 32U; i++)
   {
     Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]);
   }
-  for (uint32_t i = 0U; i < remOut / 32U; i++)
-  {
-    uint8_t *b3 = rb.snd.snd.snd;
-    uint8_t *b2 = rb.snd.snd.fst;
-    uint8_t *b1 = rb.snd.fst;
-    uint8_t *b0 = rb.fst;
-    memcpy(b0 + outputByteLen - remOut + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t));
-    memcpy(b1 + outputByteLen - remOut + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t));
-    memcpy(b2 + outputByteLen - remOut + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t));
-    memcpy(b3 + outputByteLen - remOut + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t));
-  }
-  uint32_t rem0 = remOut % 32U;
-  uint32_t j = remOut / 32U;
-  uint8_t *b3 = rb.snd.snd.snd;
+  uint8_t *b36 = rb.snd.snd.snd;
   uint8_t *b2 = rb.snd.snd.fst;
   uint8_t *b1 = rb.snd.fst;
   uint8_t *b0 = rb.fst;
-  memcpy(b0 + outputByteLen - remOut + j * 32U, hbuf + j * 128U, rem0 * sizeof (uint8_t));
-  memcpy(b1 + outputByteLen - remOut + j * 32U, hbuf + j * 128U + 32U, rem0 * sizeof (uint8_t));
-  memcpy(b2 + outputByteLen - remOut + j * 32U, hbuf + j * 128U + 64U, rem0 * sizeof (uint8_t));
-  memcpy(b3 + outputByteLen - remOut + j * 32U, hbuf + j * 128U + 96U, rem0 * sizeof (uint8_t));
+  memcpy(b0 + outputByteLen - remOut, hbuf, remOut * sizeof (uint8_t));
+  memcpy(b1 + outputByteLen - remOut, hbuf + 256U, remOut * sizeof (uint8_t));
+  memcpy(b2 + outputByteLen - remOut, hbuf + 512U, remOut * sizeof (uint8_t));
+  memcpy(b36 + outputByteLen - remOut, hbuf + 768U, remOut * sizeof (uint8_t));
 }
 
 void
 Hacl_Hash_SHA3_Simd256_sha3_224(
-  uint32_t inputByteLen,
+  uint8_t *output0,
+  uint8_t *output1,
+  uint8_t *output2,
+  uint8_t *output3,
   uint8_t *input0,
   uint8_t *input1,
   uint8_t *input2,
   uint8_t *input3,
-  uint8_t *output0,
-  uint8_t *output1,
-  uint8_t *output2,
-  uint8_t *output3
+  uint32_t inputByteLen
 )
 {
   K____uint8_t___uint8_t____K____uint8_t___uint8_t_
@@ -3787,63 +3734,63 @@ Hacl_Hash_SHA3_Simd256_sha3_224(
   K____uint8_t___uint8_t____K____uint8_t___uint8_t_
   b_ = { .fst = b00, .snd = { .fst = b10, .snd = { .fst = b20, .snd = b30 } } };
   uint32_t rem1 = inputByteLen % rateInBytes;
-  uint8_t *b32 = ib.snd.snd.snd;
-  uint8_t *b22 = ib.snd.snd.fst;
-  uint8_t *b12 = ib.snd.fst;
-  uint8_t *b02 = ib.fst;
+  uint8_t *b31 = ib.snd.snd.snd;
+  uint8_t *b21 = ib.snd.snd.fst;
+  uint8_t *b11 = ib.snd.fst;
+  uint8_t *b01 = ib.fst;
   uint8_t *bl3 = b_.snd.snd.snd;
   uint8_t *bl2 = b_.snd.snd.fst;
   uint8_t *bl1 = b_.snd.fst;
   uint8_t *bl0 = b_.fst;
-  memcpy(bl0, b02 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
-  memcpy(bl1, b12 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
-  memcpy(bl2, b22 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
-  memcpy(bl3, b32 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
+  memcpy(bl0, b01 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
+  memcpy(bl1, b11 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
+  memcpy(bl2, b21 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
+  memcpy(bl3, b31 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
+  uint8_t *b32 = b_.snd.snd.snd;
+  uint8_t *b22 = b_.snd.snd.fst;
+  uint8_t *b12 = b_.snd.fst;
+  uint8_t *b02 = b_.fst;
+  b02[rem] = 0x06U;
+  b12[rem] = 0x06U;
+  b22[rem] = 0x06U;
+  b32[rem] = 0x06U;
+  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U };
   uint8_t *b33 = b_.snd.snd.snd;
   uint8_t *b23 = b_.snd.snd.fst;
   uint8_t *b13 = b_.snd.fst;
   uint8_t *b03 = b_.fst;
-  b03[rem] = 0x06U;
-  b13[rem] = 0x06U;
-  b23[rem] = 0x06U;
-  b33[rem] = 0x06U;
-  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U };
-  uint8_t *b34 = b_.snd.snd.snd;
-  uint8_t *b24 = b_.snd.snd.fst;
-  uint8_t *b14 = b_.snd.fst;
-  uint8_t *b04 = b_.fst;
-  ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04);
-  ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14);
-  ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24);
-  ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34);
-  ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 32U);
-  ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 32U);
-  ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 32U);
-  ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 32U);
-  ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 64U);
-  ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 64U);
-  ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 64U);
-  ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 64U);
-  ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 96U);
-  ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 96U);
-  ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 96U);
-  ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 96U);
-  ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 128U);
-  ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 128U);
-  ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 128U);
-  ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 128U);
-  ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 160U);
-  ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 160U);
-  ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 160U);
-  ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 160U);
-  ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 192U);
-  ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 192U);
-  ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 192U);
-  ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 192U);
-  ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 224U);
-  ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 224U);
-  ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 224U);
-  ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 224U);
+  ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03);
+  ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13);
+  ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23);
+  ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33);
+  ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 32U);
+  ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 32U);
+  ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 32U);
+  ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 32U);
+  ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 64U);
+  ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 64U);
+  ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 64U);
+  ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 64U);
+  ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 96U);
+  ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 96U);
+  ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 96U);
+  ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 96U);
+  ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 128U);
+  ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 128U);
+  ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 128U);
+  ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 128U);
+  ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 160U);
+  ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 160U);
+  ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 160U);
+  ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 160U);
+  ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 192U);
+  ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 192U);
+  ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 192U);
+  ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 192U);
+  ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 224U);
+  ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 224U);
+  ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 224U);
+  ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 224U);
   Lib_IntVector_Intrinsics_vec256 v00 = ws32[0U];
   Lib_IntVector_Intrinsics_vec256 v10 = ws32[1U];
   Lib_IntVector_Intrinsics_vec256 v20 = ws32[2U];
@@ -4072,57 +4019,57 @@ Hacl_Hash_SHA3_Simd256_sha3_224(
   {
     s[i] = Lib_IntVector_Intrinsics_vec256_xor(s[i], ws32[i]);
   }
-  uint8_t b05[256U] = { 0U };
-  uint8_t b15[256U] = { 0U };
-  uint8_t b25[256U] = { 0U };
-  uint8_t b35[256U] = { 0U };
+  uint8_t b04[256U] = { 0U };
+  uint8_t b14[256U] = { 0U };
+  uint8_t b24[256U] = { 0U };
+  uint8_t b34[256U] = { 0U };
   K____uint8_t___uint8_t____K____uint8_t___uint8_t_
-  b = { .fst = b05, .snd = { .fst = b15, .snd = { .fst = b25, .snd = b35 } } };
-  uint8_t *b36 = b.snd.snd.snd;
+  b = { .fst = b04, .snd = { .fst = b14, .snd = { .fst = b24, .snd = b34 } } };
+  uint8_t *b35 = b.snd.snd.snd;
+  uint8_t *b25 = b.snd.snd.fst;
+  uint8_t *b15 = b.snd.fst;
+  uint8_t *b05 = b.fst;
+  b05[rateInBytes - 1U] = 0x80U;
+  b15[rateInBytes - 1U] = 0x80U;
+  b25[rateInBytes - 1U] = 0x80U;
+  b35[rateInBytes - 1U] = 0x80U;
+  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws34[32U] KRML_POST_ALIGN(32) = { 0U };
+  uint8_t *b3 = b.snd.snd.snd;
   uint8_t *b26 = b.snd.snd.fst;
   uint8_t *b16 = b.snd.fst;
   uint8_t *b06 = b.fst;
-  b06[rateInBytes - 1U] = 0x80U;
-  b16[rateInBytes - 1U] = 0x80U;
-  b26[rateInBytes - 1U] = 0x80U;
-  b36[rateInBytes - 1U] = 0x80U;
-  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws34[32U] KRML_POST_ALIGN(32) = { 0U };
-  uint8_t *b37 = b.snd.snd.snd;
-  uint8_t *b27 = b.snd.snd.fst;
-  uint8_t *b17 = b.snd.fst;
-  uint8_t *b07 = b.fst;
-  ws34[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07);
-  ws34[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17);
-  ws34[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27);
-  ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37);
-  ws34[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 32U);
-  ws34[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 32U);
-  ws34[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 32U);
-  ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 32U);
-  ws34[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 64U);
-  ws34[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 64U);
-  ws34[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 64U);
-  ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 64U);
-  ws34[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 96U);
-  ws34[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 96U);
-  ws34[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 96U);
-  ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 96U);
-  ws34[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 128U);
-  ws34[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 128U);
-  ws34[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 128U);
-  ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 128U);
-  ws34[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 160U);
-  ws34[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 160U);
-  ws34[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 160U);
-  ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 160U);
-  ws34[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 192U);
-  ws34[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 192U);
-  ws34[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 192U);
-  ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 192U);
-  ws34[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 224U);
-  ws34[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 224U);
-  ws34[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 224U);
-  ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 224U);
+  ws34[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06);
+  ws34[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16);
+  ws34[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26);
+  ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3);
+  ws34[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 32U);
+  ws34[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 32U);
+  ws34[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 32U);
+  ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 32U);
+  ws34[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 64U);
+  ws34[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 64U);
+  ws34[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 64U);
+  ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 64U);
+  ws34[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 96U);
+  ws34[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 96U);
+  ws34[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 96U);
+  ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 96U);
+  ws34[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 128U);
+  ws34[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 128U);
+  ws34[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 128U);
+  ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 128U);
+  ws34[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 160U);
+  ws34[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 160U);
+  ws34[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 160U);
+  ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 160U);
+  ws34[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 192U);
+  ws34[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 192U);
+  ws34[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 192U);
+  ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 192U);
+  ws34[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 224U);
+  ws34[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 224U);
+  ws34[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 224U);
+  ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 224U);
   Lib_IntVector_Intrinsics_vec256 v08 = ws34[0U];
   Lib_IntVector_Intrinsics_vec256 v18 = ws34[1U];
   Lib_IntVector_Intrinsics_vec256 v28 = ws34[2U];
@@ -4644,62 +4591,49 @@ Hacl_Hash_SHA3_Simd256_sha3_224(
     Lib_IntVector_Intrinsics_vec256 ws30 = v1__22;
     Lib_IntVector_Intrinsics_vec256 ws31 = v3__22;
     ws[0U] = ws0;
-    ws[1U] = ws1;
-    ws[2U] = ws2;
-    ws[3U] = ws3;
-    ws[4U] = ws4;
-    ws[5U] = ws5;
-    ws[6U] = ws6;
-    ws[7U] = ws7;
-    ws[8U] = ws8;
-    ws[9U] = ws9;
-    ws[10U] = ws10;
-    ws[11U] = ws11;
-    ws[12U] = ws12;
-    ws[13U] = ws13;
-    ws[14U] = ws14;
-    ws[15U] = ws15;
-    ws[16U] = ws16;
-    ws[17U] = ws17;
-    ws[18U] = ws18;
-    ws[19U] = ws19;
-    ws[20U] = ws20;
-    ws[21U] = ws21;
-    ws[22U] = ws22;
-    ws[23U] = ws23;
-    ws[24U] = ws24;
-    ws[25U] = ws25;
-    ws[26U] = ws26;
-    ws[27U] = ws27;
-    ws[28U] = ws28;
-    ws[29U] = ws29;
-    ws[30U] = ws30;
+    ws[1U] = ws4;
+    ws[2U] = ws8;
+    ws[3U] = ws12;
+    ws[4U] = ws16;
+    ws[5U] = ws20;
+    ws[6U] = ws24;
+    ws[7U] = ws28;
+    ws[8U] = ws1;
+    ws[9U] = ws5;
+    ws[10U] = ws9;
+    ws[11U] = ws13;
+    ws[12U] = ws17;
+    ws[13U] = ws21;
+    ws[14U] = ws25;
+    ws[15U] = ws29;
+    ws[16U] = ws2;
+    ws[17U] = ws6;
+    ws[18U] = ws10;
+    ws[19U] = ws14;
+    ws[20U] = ws18;
+    ws[21U] = ws22;
+    ws[22U] = ws26;
+    ws[23U] = ws30;
+    ws[24U] = ws3;
+    ws[25U] = ws7;
+    ws[26U] = ws11;
+    ws[27U] = ws15;
+    ws[28U] = ws19;
+    ws[29U] = ws23;
+    ws[30U] = ws27;
     ws[31U] = ws31;
     for (uint32_t i = 0U; i < 32U; i++)
     {
       Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]);
     }
-    for (uint32_t i = 0U; i < rateInBytes / 32U; i++)
-    {
-      uint8_t *b31 = rb.snd.snd.snd;
-      uint8_t *b21 = rb.snd.snd.fst;
-      uint8_t *b11 = rb.snd.fst;
-      uint8_t *b01 = rb.fst;
-      memcpy(b01 + i0 * rateInBytes + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t));
-      memcpy(b11 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t));
-      memcpy(b21 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t));
-      memcpy(b31 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t));
-    }
-    uint32_t rem0 = rateInBytes % 32U;
-    uint32_t j = rateInBytes / 32U;
-    uint8_t *b31 = rb.snd.snd.snd;
-    uint8_t *b21 = rb.snd.snd.fst;
-    uint8_t *b11 = rb.snd.fst;
-    uint8_t *b01 = rb.fst;
-    memcpy(b01 + i0 * rateInBytes + j * 32U, hbuf + j * 128U, rem0 * sizeof (uint8_t));
-    memcpy(b11 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 32U, rem0 * sizeof (uint8_t));
-    memcpy(b21 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 64U, rem0 * sizeof (uint8_t));
-    memcpy(b31 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 96U, rem0 * sizeof (uint8_t));
+    uint8_t *b36 = rb.snd.snd.snd;
+    uint8_t *b2 = rb.snd.snd.fst;
+    uint8_t *b1 = rb.snd.fst;
+    uint8_t *b0 = rb.fst;
+    memcpy(b0 + i0 * rateInBytes, hbuf, rateInBytes * sizeof (uint8_t));
+    memcpy(b1 + i0 * rateInBytes, hbuf + 256U, rateInBytes * sizeof (uint8_t));
+    memcpy(b2 + i0 * rateInBytes, hbuf + 512U, rateInBytes * sizeof (uint8_t));
+    memcpy(b36 + i0 * rateInBytes, hbuf + 768U, rateInBytes * sizeof (uint8_t));
     for (uint32_t i1 = 0U; i1 < 24U; i1++)
     {
       KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 _C[5U] KRML_POST_ALIGN(32) = { 0U };
@@ -4994,75 +4928,62 @@ Hacl_Hash_SHA3_Simd256_sha3_224(
   Lib_IntVector_Intrinsics_vec256 ws30 = v1__22;
   Lib_IntVector_Intrinsics_vec256 ws31 = v3__22;
   ws[0U] = ws0;
-  ws[1U] = ws1;
-  ws[2U] = ws2;
-  ws[3U] = ws3;
-  ws[4U] = ws4;
-  ws[5U] = ws5;
-  ws[6U] = ws6;
-  ws[7U] = ws7;
-  ws[8U] = ws8;
-  ws[9U] = ws9;
-  ws[10U] = ws10;
-  ws[11U] = ws11;
-  ws[12U] = ws12;
-  ws[13U] = ws13;
-  ws[14U] = ws14;
-  ws[15U] = ws15;
-  ws[16U] = ws16;
-  ws[17U] = ws17;
-  ws[18U] = ws18;
-  ws[19U] = ws19;
-  ws[20U] = ws20;
-  ws[21U] = ws21;
-  ws[22U] = ws22;
-  ws[23U] = ws23;
-  ws[24U] = ws24;
-  ws[25U] = ws25;
-  ws[26U] = ws26;
-  ws[27U] = ws27;
-  ws[28U] = ws28;
-  ws[29U] = ws29;
-  ws[30U] = ws30;
+  ws[1U] = ws4;
+  ws[2U] = ws8;
+  ws[3U] = ws12;
+  ws[4U] = ws16;
+  ws[5U] = ws20;
+  ws[6U] = ws24;
+  ws[7U] = ws28;
+  ws[8U] = ws1;
+  ws[9U] = ws5;
+  ws[10U] = ws9;
+  ws[11U] = ws13;
+  ws[12U] = ws17;
+  ws[13U] = ws21;
+  ws[14U] = ws25;
+  ws[15U] = ws29;
+  ws[16U] = ws2;
+  ws[17U] = ws6;
+  ws[18U] = ws10;
+  ws[19U] = ws14;
+  ws[20U] = ws18;
+  ws[21U] = ws22;
+  ws[22U] = ws26;
+  ws[23U] = ws30;
+  ws[24U] = ws3;
+  ws[25U] = ws7;
+  ws[26U] = ws11;
+  ws[27U] = ws15;
+  ws[28U] = ws19;
+  ws[29U] = ws23;
+  ws[30U] = ws27;
   ws[31U] = ws31;
   for (uint32_t i = 0U; i < 32U; i++)
   {
     Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]);
   }
-  for (uint32_t i = 0U; i < remOut / 32U; i++)
-  {
-    uint8_t *b3 = rb.snd.snd.snd;
-    uint8_t *b2 = rb.snd.snd.fst;
-    uint8_t *b1 = rb.snd.fst;
-    uint8_t *b0 = rb.fst;
-    memcpy(b0 + 28U - remOut + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t));
-    memcpy(b1 + 28U - remOut + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t));
-    memcpy(b2 + 28U - remOut + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t));
-    memcpy(b3 + 28U - remOut + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t));
-  }
-  uint32_t rem0 = remOut % 32U;
-  uint32_t j = remOut / 32U;
-  uint8_t *b3 = rb.snd.snd.snd;
+  uint8_t *b36 = rb.snd.snd.snd;
   uint8_t *b2 = rb.snd.snd.fst;
   uint8_t *b1 = rb.snd.fst;
   uint8_t *b0 = rb.fst;
-  memcpy(b0 + 28U - remOut + j * 32U, hbuf + j * 128U, rem0 * sizeof (uint8_t));
-  memcpy(b1 + 28U - remOut + j * 32U, hbuf + j * 128U + 32U, rem0 * sizeof (uint8_t));
-  memcpy(b2 + 28U - remOut + j * 32U, hbuf + j * 128U + 64U, rem0 * sizeof (uint8_t));
-  memcpy(b3 + 28U - remOut + j * 32U, hbuf + j * 128U + 96U, rem0 * sizeof (uint8_t));
+  memcpy(b0 + 28U - remOut, hbuf, remOut * sizeof (uint8_t));
+  memcpy(b1 + 28U - remOut, hbuf + 256U, remOut * sizeof (uint8_t));
+  memcpy(b2 + 28U - remOut, hbuf + 512U, remOut * sizeof (uint8_t));
+  memcpy(b36 + 28U - remOut, hbuf + 768U, remOut * sizeof (uint8_t));
 }
 
 void
 Hacl_Hash_SHA3_Simd256_sha3_256(
-  uint32_t inputByteLen,
+  uint8_t *output0,
+  uint8_t *output1,
+  uint8_t *output2,
+  uint8_t *output3,
   uint8_t *input0,
   uint8_t *input1,
   uint8_t *input2,
   uint8_t *input3,
-  uint8_t *output0,
-  uint8_t *output1,
-  uint8_t *output2,
-  uint8_t *output3
+  uint32_t inputByteLen
 )
 {
   K____uint8_t___uint8_t____K____uint8_t___uint8_t_
@@ -5461,63 +5382,63 @@ Hacl_Hash_SHA3_Simd256_sha3_256(
   K____uint8_t___uint8_t____K____uint8_t___uint8_t_
   b_ = { .fst = b00, .snd = { .fst = b10, .snd = { .fst = b20, .snd = b30 } } };
   uint32_t rem1 = inputByteLen % rateInBytes;
-  uint8_t *b32 = ib.snd.snd.snd;
-  uint8_t *b22 = ib.snd.snd.fst;
-  uint8_t *b12 = ib.snd.fst;
-  uint8_t *b02 = ib.fst;
+  uint8_t *b31 = ib.snd.snd.snd;
+  uint8_t *b21 = ib.snd.snd.fst;
+  uint8_t *b11 = ib.snd.fst;
+  uint8_t *b01 = ib.fst;
   uint8_t *bl3 = b_.snd.snd.snd;
   uint8_t *bl2 = b_.snd.snd.fst;
   uint8_t *bl1 = b_.snd.fst;
   uint8_t *bl0 = b_.fst;
-  memcpy(bl0, b02 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
-  memcpy(bl1, b12 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
-  memcpy(bl2, b22 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
-  memcpy(bl3, b32 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
+  memcpy(bl0, b01 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
+  memcpy(bl1, b11 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
+  memcpy(bl2, b21 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
+  memcpy(bl3, b31 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
+  uint8_t *b32 = b_.snd.snd.snd;
+  uint8_t *b22 = b_.snd.snd.fst;
+  uint8_t *b12 = b_.snd.fst;
+  uint8_t *b02 = b_.fst;
+  b02[rem] = 0x06U;
+  b12[rem] = 0x06U;
+  b22[rem] = 0x06U;
+  b32[rem] = 0x06U;
+  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U };
   uint8_t *b33 = b_.snd.snd.snd;
   uint8_t *b23 = b_.snd.snd.fst;
   uint8_t *b13 = b_.snd.fst;
   uint8_t *b03 = b_.fst;
-  b03[rem] = 0x06U;
-  b13[rem] = 0x06U;
-  b23[rem] = 0x06U;
-  b33[rem] = 0x06U;
-  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U };
-  uint8_t *b34 = b_.snd.snd.snd;
-  uint8_t *b24 = b_.snd.snd.fst;
-  uint8_t *b14 = b_.snd.fst;
-  uint8_t *b04 = b_.fst;
-  ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04);
-  ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14);
-  ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24);
-  ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34);
-  ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 32U);
-  ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 32U);
-  ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 32U);
-  ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 32U);
-  ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 64U);
-  ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 64U);
-  ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 64U);
-  ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 64U);
-  ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 96U);
-  ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 96U);
-  ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 96U);
-  ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 96U);
-  ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 128U);
-  ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 128U);
-  ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 128U);
-  ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 128U);
-  ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 160U);
-  ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 160U);
-  ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 160U);
-  ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 160U);
-  ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 192U);
-  ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 192U);
-  ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 192U);
-  ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 192U);
-  ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 224U);
-  ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 224U);
-  ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 224U);
-  ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 224U);
+  ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03);
+  ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13);
+  ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23);
+  ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33);
+  ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 32U);
+  ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 32U);
+  ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 32U);
+  ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 32U);
+  ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 64U);
+  ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 64U);
+  ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 64U);
+  ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 64U);
+  ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 96U);
+  ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 96U);
+  ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 96U);
+  ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 96U);
+  ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 128U);
+  ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 128U);
+  ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 128U);
+  ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 128U);
+  ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 160U);
+  ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 160U);
+  ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 160U);
+  ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 160U);
+  ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 192U);
+  ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 192U);
+  ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 192U);
+  ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 192U);
+  ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 224U);
+  ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 224U);
+  ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 224U);
+  ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 224U);
   Lib_IntVector_Intrinsics_vec256 v00 = ws32[0U];
   Lib_IntVector_Intrinsics_vec256 v10 = ws32[1U];
   Lib_IntVector_Intrinsics_vec256 v20 = ws32[2U];
@@ -5746,57 +5667,57 @@ Hacl_Hash_SHA3_Simd256_sha3_256(
   {
     s[i] = Lib_IntVector_Intrinsics_vec256_xor(s[i], ws32[i]);
   }
-  uint8_t b05[256U] = { 0U };
-  uint8_t b15[256U] = { 0U };
-  uint8_t b25[256U] = { 0U };
-  uint8_t b35[256U] = { 0U };
+  uint8_t b04[256U] = { 0U };
+  uint8_t b14[256U] = { 0U };
+  uint8_t b24[256U] = { 0U };
+  uint8_t b34[256U] = { 0U };
   K____uint8_t___uint8_t____K____uint8_t___uint8_t_
-  b = { .fst = b05, .snd = { .fst = b15, .snd = { .fst = b25, .snd = b35 } } };
-  uint8_t *b36 = b.snd.snd.snd;
+  b = { .fst = b04, .snd = { .fst = b14, .snd = { .fst = b24, .snd = b34 } } };
+  uint8_t *b35 = b.snd.snd.snd;
+  uint8_t *b25 = b.snd.snd.fst;
+  uint8_t *b15 = b.snd.fst;
+  uint8_t *b05 = b.fst;
+  b05[rateInBytes - 1U] = 0x80U;
+  b15[rateInBytes - 1U] = 0x80U;
+  b25[rateInBytes - 1U] = 0x80U;
+  b35[rateInBytes - 1U] = 0x80U;
+  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws34[32U] KRML_POST_ALIGN(32) = { 0U };
+  uint8_t *b3 = b.snd.snd.snd;
   uint8_t *b26 = b.snd.snd.fst;
   uint8_t *b16 = b.snd.fst;
   uint8_t *b06 = b.fst;
-  b06[rateInBytes - 1U] = 0x80U;
-  b16[rateInBytes - 1U] = 0x80U;
-  b26[rateInBytes - 1U] = 0x80U;
-  b36[rateInBytes - 1U] = 0x80U;
-  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws34[32U] KRML_POST_ALIGN(32) = { 0U };
-  uint8_t *b37 = b.snd.snd.snd;
-  uint8_t *b27 = b.snd.snd.fst;
-  uint8_t *b17 = b.snd.fst;
-  uint8_t *b07 = b.fst;
-  ws34[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07);
-  ws34[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17);
-  ws34[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27);
-  ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37);
-  ws34[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 32U);
-  ws34[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 32U);
-  ws34[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 32U);
-  ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 32U);
-  ws34[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 64U);
-  ws34[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 64U);
-  ws34[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 64U);
-  ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 64U);
-  ws34[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 96U);
-  ws34[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 96U);
-  ws34[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 96U);
-  ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 96U);
-  ws34[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 128U);
-  ws34[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 128U);
-  ws34[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 128U);
-  ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 128U);
-  ws34[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 160U);
-  ws34[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 160U);
-  ws34[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 160U);
-  ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 160U);
-  ws34[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 192U);
-  ws34[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 192U);
-  ws34[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 192U);
-  ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 192U);
-  ws34[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 224U);
-  ws34[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 224U);
-  ws34[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 224U);
-  ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 224U);
+  ws34[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06);
+  ws34[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16);
+  ws34[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26);
+  ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3);
+  ws34[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 32U);
+  ws34[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 32U);
+  ws34[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 32U);
+  ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 32U);
+  ws34[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 64U);
+  ws34[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 64U);
+  ws34[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 64U);
+  ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 64U);
+  ws34[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 96U);
+  ws34[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 96U);
+  ws34[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 96U);
+  ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 96U);
+  ws34[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 128U);
+  ws34[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 128U);
+  ws34[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 128U);
+  ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 128U);
+  ws34[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 160U);
+  ws34[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 160U);
+  ws34[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 160U);
+  ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 160U);
+  ws34[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 192U);
+  ws34[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 192U);
+  ws34[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 192U);
+  ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 192U);
+  ws34[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 224U);
+  ws34[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 224U);
+  ws34[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 224U);
+  ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 224U);
   Lib_IntVector_Intrinsics_vec256 v08 = ws34[0U];
   Lib_IntVector_Intrinsics_vec256 v18 = ws34[1U];
   Lib_IntVector_Intrinsics_vec256 v28 = ws34[2U];
@@ -6318,62 +6239,49 @@ Hacl_Hash_SHA3_Simd256_sha3_256(
     Lib_IntVector_Intrinsics_vec256 ws30 = v1__22;
     Lib_IntVector_Intrinsics_vec256 ws31 = v3__22;
     ws[0U] = ws0;
-    ws[1U] = ws1;
-    ws[2U] = ws2;
-    ws[3U] = ws3;
-    ws[4U] = ws4;
-    ws[5U] = ws5;
-    ws[6U] = ws6;
-    ws[7U] = ws7;
-    ws[8U] = ws8;
-    ws[9U] = ws9;
-    ws[10U] = ws10;
-    ws[11U] = ws11;
-    ws[12U] = ws12;
-    ws[13U] = ws13;
-    ws[14U] = ws14;
-    ws[15U] = ws15;
-    ws[16U] = ws16;
-    ws[17U] = ws17;
-    ws[18U] = ws18;
-    ws[19U] = ws19;
-    ws[20U] = ws20;
-    ws[21U] = ws21;
-    ws[22U] = ws22;
-    ws[23U] = ws23;
-    ws[24U] = ws24;
-    ws[25U] = ws25;
-    ws[26U] = ws26;
-    ws[27U] = ws27;
-    ws[28U] = ws28;
-    ws[29U] = ws29;
-    ws[30U] = ws30;
+    ws[1U] = ws4;
+    ws[2U] = ws8;
+    ws[3U] = ws12;
+    ws[4U] = ws16;
+    ws[5U] = ws20;
+    ws[6U] = ws24;
+    ws[7U] = ws28;
+    ws[8U] = ws1;
+    ws[9U] = ws5;
+    ws[10U] = ws9;
+    ws[11U] = ws13;
+    ws[12U] = ws17;
+    ws[13U] = ws21;
+    ws[14U] = ws25;
+    ws[15U] = ws29;
+    ws[16U] = ws2;
+    ws[17U] = ws6;
+    ws[18U] = ws10;
+    ws[19U] = ws14;
+    ws[20U] = ws18;
+    ws[21U] = ws22;
+    ws[22U] = ws26;
+    ws[23U] = ws30;
+    ws[24U] = ws3;
+    ws[25U] = ws7;
+    ws[26U] = ws11;
+    ws[27U] = ws15;
+    ws[28U] = ws19;
+    ws[29U] = ws23;
+    ws[30U] = ws27;
     ws[31U] = ws31;
     for (uint32_t i = 0U; i < 32U; i++)
     {
       Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]);
     }
-    for (uint32_t i = 0U; i < rateInBytes / 32U; i++)
-    {
-      uint8_t *b31 = rb.snd.snd.snd;
-      uint8_t *b21 = rb.snd.snd.fst;
-      uint8_t *b11 = rb.snd.fst;
-      uint8_t *b01 = rb.fst;
-      memcpy(b01 + i0 * rateInBytes + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t));
-      memcpy(b11 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t));
-      memcpy(b21 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t));
-      memcpy(b31 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t));
-    }
-    uint32_t rem0 = rateInBytes % 32U;
-    uint32_t j = rateInBytes / 32U;
-    uint8_t *b31 = rb.snd.snd.snd;
-    uint8_t *b21 = rb.snd.snd.fst;
-    uint8_t *b11 = rb.snd.fst;
-    uint8_t *b01 = rb.fst;
-    memcpy(b01 + i0 * rateInBytes + j * 32U, hbuf + j * 128U, rem0 * sizeof (uint8_t));
-    memcpy(b11 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 32U, rem0 * sizeof (uint8_t));
-    memcpy(b21 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 64U, rem0 * sizeof (uint8_t));
-    memcpy(b31 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 96U, rem0 * sizeof (uint8_t));
+    uint8_t *b36 = rb.snd.snd.snd;
+    uint8_t *b2 = rb.snd.snd.fst;
+    uint8_t *b1 = rb.snd.fst;
+    uint8_t *b0 = rb.fst;
+    memcpy(b0 + i0 * rateInBytes, hbuf, rateInBytes * sizeof (uint8_t));
+    memcpy(b1 + i0 * rateInBytes, hbuf + 256U, rateInBytes * sizeof (uint8_t));
+    memcpy(b2 + i0 * rateInBytes, hbuf + 512U, rateInBytes * sizeof (uint8_t));
+    memcpy(b36 + i0 * rateInBytes, hbuf + 768U, rateInBytes * sizeof (uint8_t));
     for (uint32_t i1 = 0U; i1 < 24U; i1++)
     {
       KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 _C[5U] KRML_POST_ALIGN(32) = { 0U };
@@ -6668,75 +6576,62 @@ Hacl_Hash_SHA3_Simd256_sha3_256(
   Lib_IntVector_Intrinsics_vec256 ws30 = v1__22;
   Lib_IntVector_Intrinsics_vec256 ws31 = v3__22;
   ws[0U] = ws0;
-  ws[1U] = ws1;
-  ws[2U] = ws2;
-  ws[3U] = ws3;
-  ws[4U] = ws4;
-  ws[5U] = ws5;
-  ws[6U] = ws6;
-  ws[7U] = ws7;
-  ws[8U] = ws8;
-  ws[9U] = ws9;
-  ws[10U] = ws10;
-  ws[11U] = ws11;
-  ws[12U] = ws12;
-  ws[13U] = ws13;
-  ws[14U] = ws14;
-  ws[15U] = ws15;
-  ws[16U] = ws16;
-  ws[17U] = ws17;
-  ws[18U] = ws18;
-  ws[19U] = ws19;
-  ws[20U] = ws20;
-  ws[21U] = ws21;
-  ws[22U] = ws22;
-  ws[23U] = ws23;
-  ws[24U] = ws24;
-  ws[25U] = ws25;
-  ws[26U] = ws26;
-  ws[27U] = ws27;
-  ws[28U] = ws28;
-  ws[29U] = ws29;
-  ws[30U] = ws30;
+  ws[1U] = ws4;
+  ws[2U] = ws8;
+  ws[3U] = ws12;
+  ws[4U] = ws16;
+  ws[5U] = ws20;
+  ws[6U] = ws24;
+  ws[7U] = ws28;
+  ws[8U] = ws1;
+  ws[9U] = ws5;
+  ws[10U] = ws9;
+  ws[11U] = ws13;
+  ws[12U] = ws17;
+  ws[13U] = ws21;
+  ws[14U] = ws25;
+  ws[15U] = ws29;
+  ws[16U] = ws2;
+  ws[17U] = ws6;
+  ws[18U] = ws10;
+  ws[19U] = ws14;
+  ws[20U] = ws18;
+  ws[21U] = ws22;
+  ws[22U] = ws26;
+  ws[23U] = ws30;
+  ws[24U] = ws3;
+  ws[25U] = ws7;
+  ws[26U] = ws11;
+  ws[27U] = ws15;
+  ws[28U] = ws19;
+  ws[29U] = ws23;
+  ws[30U] = ws27;
   ws[31U] = ws31;
   for (uint32_t i = 0U; i < 32U; i++)
   {
     Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]);
   }
-  for (uint32_t i = 0U; i < remOut / 32U; i++)
-  {
-    uint8_t *b3 = rb.snd.snd.snd;
-    uint8_t *b2 = rb.snd.snd.fst;
-    uint8_t *b1 = rb.snd.fst;
-    uint8_t *b0 = rb.fst;
-    memcpy(b0 + 32U - remOut + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t));
-    memcpy(b1 + 32U - remOut + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t));
-    memcpy(b2 + 32U - remOut + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t));
-    memcpy(b3 + 32U - remOut + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t));
-  }
-  uint32_t rem0 = remOut % 32U;
-  uint32_t j = remOut / 32U;
-  uint8_t *b3 = rb.snd.snd.snd;
+  uint8_t *b36 = rb.snd.snd.snd;
   uint8_t *b2 = rb.snd.snd.fst;
   uint8_t *b1 = rb.snd.fst;
   uint8_t *b0 = rb.fst;
-  memcpy(b0 + 32U - remOut + j * 32U, hbuf + j * 128U, rem0 * sizeof (uint8_t));
-  memcpy(b1 + 32U - remOut + j * 32U, hbuf + j * 128U + 32U, rem0 * sizeof (uint8_t));
-  memcpy(b2 + 32U - remOut + j * 32U, hbuf + j * 128U + 64U, rem0 * sizeof (uint8_t));
-  memcpy(b3 + 32U - remOut + j * 32U, hbuf + j * 128U + 96U, rem0 * sizeof (uint8_t));
+  memcpy(b0 + 32U - remOut, hbuf, remOut * sizeof (uint8_t));
+  memcpy(b1 + 32U - remOut, hbuf + 256U, remOut * sizeof (uint8_t));
+  memcpy(b2 + 32U - remOut, hbuf + 512U, remOut * sizeof (uint8_t));
+  memcpy(b36 + 32U - remOut, hbuf + 768U, remOut * sizeof (uint8_t));
 }
 
 void
 Hacl_Hash_SHA3_Simd256_sha3_384(
-  uint32_t inputByteLen,
+  uint8_t *output0,
+  uint8_t *output1,
+  uint8_t *output2,
+  uint8_t *output3,
   uint8_t *input0,
   uint8_t *input1,
   uint8_t *input2,
   uint8_t *input3,
-  uint8_t *output0,
-  uint8_t *output1,
-  uint8_t *output2,
-  uint8_t *output3
+  uint32_t inputByteLen
 )
 {
   K____uint8_t___uint8_t____K____uint8_t___uint8_t_
@@ -7135,63 +7030,63 @@ Hacl_Hash_SHA3_Simd256_sha3_384(
   K____uint8_t___uint8_t____K____uint8_t___uint8_t_
   b_ = { .fst = b00, .snd = { .fst = b10, .snd = { .fst = b20, .snd = b30 } } };
   uint32_t rem1 = inputByteLen % rateInBytes;
-  uint8_t *b32 = ib.snd.snd.snd;
-  uint8_t *b22 = ib.snd.snd.fst;
-  uint8_t *b12 = ib.snd.fst;
-  uint8_t *b02 = ib.fst;
+  uint8_t *b31 = ib.snd.snd.snd;
+  uint8_t *b21 = ib.snd.snd.fst;
+  uint8_t *b11 = ib.snd.fst;
+  uint8_t *b01 = ib.fst;
   uint8_t *bl3 = b_.snd.snd.snd;
   uint8_t *bl2 = b_.snd.snd.fst;
   uint8_t *bl1 = b_.snd.fst;
   uint8_t *bl0 = b_.fst;
-  memcpy(bl0, b02 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
-  memcpy(bl1, b12 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
-  memcpy(bl2, b22 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
-  memcpy(bl3, b32 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
+  memcpy(bl0, b01 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
+  memcpy(bl1, b11 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
+  memcpy(bl2, b21 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
+  memcpy(bl3, b31 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
+  uint8_t *b32 = b_.snd.snd.snd;
+  uint8_t *b22 = b_.snd.snd.fst;
+  uint8_t *b12 = b_.snd.fst;
+  uint8_t *b02 = b_.fst;
+  b02[rem] = 0x06U;
+  b12[rem] = 0x06U;
+  b22[rem] = 0x06U;
+  b32[rem] = 0x06U;
+  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U };
   uint8_t *b33 = b_.snd.snd.snd;
   uint8_t *b23 = b_.snd.snd.fst;
   uint8_t *b13 = b_.snd.fst;
   uint8_t *b03 = b_.fst;
-  b03[rem] = 0x06U;
-  b13[rem] = 0x06U;
-  b23[rem] = 0x06U;
-  b33[rem] = 0x06U;
-  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U };
-  uint8_t *b34 = b_.snd.snd.snd;
-  uint8_t *b24 = b_.snd.snd.fst;
-  uint8_t *b14 = b_.snd.fst;
-  uint8_t *b04 = b_.fst;
-  ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04);
-  ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14);
-  ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24);
-  ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34);
-  ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 32U);
-  ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 32U);
-  ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 32U);
-  ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 32U);
-  ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 64U);
-  ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 64U);
-  ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 64U);
-  ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 64U);
-  ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 96U);
-  ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 96U);
-  ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 96U);
-  ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 96U);
-  ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 128U);
-  ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 128U);
-  ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 128U);
-  ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 128U);
-  ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 160U);
-  ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 160U);
-  ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 160U);
-  ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 160U);
-  ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 192U);
-  ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 192U);
-  ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 192U);
-  ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 192U);
-  ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 224U);
-  ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 224U);
-  ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 224U);
-  ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 224U);
+  ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03);
+  ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13);
+  ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23);
+  ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33);
+  ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 32U);
+  ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 32U);
+  ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 32U);
+  ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 32U);
+  ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 64U);
+  ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 64U);
+  ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 64U);
+  ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 64U);
+  ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 96U);
+  ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 96U);
+  ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 96U);
+  ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 96U);
+  ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 128U);
+  ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 128U);
+  ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 128U);
+  ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 128U);
+  ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 160U);
+  ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 160U);
+  ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 160U);
+  ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 160U);
+  ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 192U);
+  ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 192U);
+  ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 192U);
+  ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 192U);
+  ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 224U);
+  ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 224U);
+  ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 224U);
+  ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 224U);
   Lib_IntVector_Intrinsics_vec256 v00 = ws32[0U];
   Lib_IntVector_Intrinsics_vec256 v10 = ws32[1U];
   Lib_IntVector_Intrinsics_vec256 v20 = ws32[2U];
@@ -7420,57 +7315,57 @@ Hacl_Hash_SHA3_Simd256_sha3_384(
   {
     s[i] = Lib_IntVector_Intrinsics_vec256_xor(s[i], ws32[i]);
   }
-  uint8_t b05[256U] = { 0U };
-  uint8_t b15[256U] = { 0U };
-  uint8_t b25[256U] = { 0U };
-  uint8_t b35[256U] = { 0U };
+  uint8_t b04[256U] = { 0U };
+  uint8_t b14[256U] = { 0U };
+  uint8_t b24[256U] = { 0U };
+  uint8_t b34[256U] = { 0U };
   K____uint8_t___uint8_t____K____uint8_t___uint8_t_
-  b = { .fst = b05, .snd = { .fst = b15, .snd = { .fst = b25, .snd = b35 } } };
-  uint8_t *b36 = b.snd.snd.snd;
+  b = { .fst = b04, .snd = { .fst = b14, .snd = { .fst = b24, .snd = b34 } } };
+  uint8_t *b35 = b.snd.snd.snd;
+  uint8_t *b25 = b.snd.snd.fst;
+  uint8_t *b15 = b.snd.fst;
+  uint8_t *b05 = b.fst;
+  b05[rateInBytes - 1U] = 0x80U;
+  b15[rateInBytes - 1U] = 0x80U;
+  b25[rateInBytes - 1U] = 0x80U;
+  b35[rateInBytes - 1U] = 0x80U;
+  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws34[32U] KRML_POST_ALIGN(32) = { 0U };
+  uint8_t *b3 = b.snd.snd.snd;
   uint8_t *b26 = b.snd.snd.fst;
   uint8_t *b16 = b.snd.fst;
   uint8_t *b06 = b.fst;
-  b06[rateInBytes - 1U] = 0x80U;
-  b16[rateInBytes - 1U] = 0x80U;
-  b26[rateInBytes - 1U] = 0x80U;
-  b36[rateInBytes - 1U] = 0x80U;
-  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws34[32U] KRML_POST_ALIGN(32) = { 0U };
-  uint8_t *b37 = b.snd.snd.snd;
-  uint8_t *b27 = b.snd.snd.fst;
-  uint8_t *b17 = b.snd.fst;
-  uint8_t *b07 = b.fst;
-  ws34[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07);
-  ws34[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17);
-  ws34[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27);
-  ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37);
-  ws34[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 32U);
-  ws34[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 32U);
-  ws34[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 32U);
-  ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 32U);
-  ws34[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 64U);
-  ws34[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 64U);
-  ws34[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 64U);
-  ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 64U);
-  ws34[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 96U);
-  ws34[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 96U);
-  ws34[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 96U);
-  ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 96U);
-  ws34[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 128U);
-  ws34[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 128U);
-  ws34[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 128U);
-  ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 128U);
-  ws34[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 160U);
-  ws34[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 160U);
-  ws34[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 160U);
-  ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 160U);
-  ws34[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 192U);
-  ws34[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 192U);
-  ws34[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 192U);
-  ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 192U);
-  ws34[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 224U);
-  ws34[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 224U);
-  ws34[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 224U);
-  ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 224U);
+  ws34[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06);
+  ws34[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16);
+  ws34[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26);
+  ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3);
+  ws34[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 32U);
+  ws34[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 32U);
+  ws34[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 32U);
+  ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 32U);
+  ws34[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 64U);
+  ws34[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 64U);
+  ws34[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 64U);
+  ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 64U);
+  ws34[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 96U);
+  ws34[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 96U);
+  ws34[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 96U);
+  ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 96U);
+  ws34[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 128U);
+  ws34[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 128U);
+  ws34[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 128U);
+  ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 128U);
+  ws34[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 160U);
+  ws34[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 160U);
+  ws34[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 160U);
+  ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 160U);
+  ws34[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 192U);
+  ws34[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 192U);
+  ws34[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 192U);
+  ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 192U);
+  ws34[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 224U);
+  ws34[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 224U);
+  ws34[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 224U);
+  ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 224U);
   Lib_IntVector_Intrinsics_vec256 v08 = ws34[0U];
   Lib_IntVector_Intrinsics_vec256 v18 = ws34[1U];
   Lib_IntVector_Intrinsics_vec256 v28 = ws34[2U];
@@ -7992,62 +7887,49 @@ Hacl_Hash_SHA3_Simd256_sha3_384(
     Lib_IntVector_Intrinsics_vec256 ws30 = v1__22;
     Lib_IntVector_Intrinsics_vec256 ws31 = v3__22;
     ws[0U] = ws0;
-    ws[1U] = ws1;
-    ws[2U] = ws2;
-    ws[3U] = ws3;
-    ws[4U] = ws4;
-    ws[5U] = ws5;
-    ws[6U] = ws6;
-    ws[7U] = ws7;
-    ws[8U] = ws8;
-    ws[9U] = ws9;
-    ws[10U] = ws10;
-    ws[11U] = ws11;
-    ws[12U] = ws12;
-    ws[13U] = ws13;
-    ws[14U] = ws14;
-    ws[15U] = ws15;
-    ws[16U] = ws16;
-    ws[17U] = ws17;
-    ws[18U] = ws18;
-    ws[19U] = ws19;
-    ws[20U] = ws20;
-    ws[21U] = ws21;
-    ws[22U] = ws22;
-    ws[23U] = ws23;
-    ws[24U] = ws24;
-    ws[25U] = ws25;
-    ws[26U] = ws26;
-    ws[27U] = ws27;
-    ws[28U] = ws28;
-    ws[29U] = ws29;
-    ws[30U] = ws30;
+    ws[1U] = ws4;
+    ws[2U] = ws8;
+    ws[3U] = ws12;
+    ws[4U] = ws16;
+    ws[5U] = ws20;
+    ws[6U] = ws24;
+    ws[7U] = ws28;
+    ws[8U] = ws1;
+    ws[9U] = ws5;
+    ws[10U] = ws9;
+    ws[11U] = ws13;
+    ws[12U] = ws17;
+    ws[13U] = ws21;
+    ws[14U] = ws25;
+    ws[15U] = ws29;
+    ws[16U] = ws2;
+    ws[17U] = ws6;
+    ws[18U] = ws10;
+    ws[19U] = ws14;
+    ws[20U] = ws18;
+    ws[21U] = ws22;
+    ws[22U] = ws26;
+    ws[23U] = ws30;
+    ws[24U] = ws3;
+    ws[25U] = ws7;
+    ws[26U] = ws11;
+    ws[27U] = ws15;
+    ws[28U] = ws19;
+    ws[29U] = ws23;
+    ws[30U] = ws27;
     ws[31U] = ws31;
     for (uint32_t i = 0U; i < 32U; i++)
     {
       Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]);
     }
-    for (uint32_t i = 0U; i < rateInBytes / 32U; i++)
-    {
-      uint8_t *b31 = rb.snd.snd.snd;
-      uint8_t *b21 = rb.snd.snd.fst;
-      uint8_t *b11 = rb.snd.fst;
-      uint8_t *b01 = rb.fst;
-      memcpy(b01 + i0 * rateInBytes + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t));
-      memcpy(b11 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t));
-      memcpy(b21 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t));
-      memcpy(b31 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t));
-    }
-    uint32_t rem0 = rateInBytes % 32U;
-    uint32_t j = rateInBytes / 32U;
-    uint8_t *b31 = rb.snd.snd.snd;
-    uint8_t *b21 = rb.snd.snd.fst;
-    uint8_t *b11 = rb.snd.fst;
-    uint8_t *b01 = rb.fst;
-    memcpy(b01 + i0 * rateInBytes + j * 32U, hbuf + j * 128U, rem0 * sizeof (uint8_t));
-    memcpy(b11 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 32U, rem0 * sizeof (uint8_t));
-    memcpy(b21 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 64U, rem0 * sizeof (uint8_t));
-    memcpy(b31 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 96U, rem0 * sizeof (uint8_t));
+    uint8_t *b36 = rb.snd.snd.snd;
+    uint8_t *b2 = rb.snd.snd.fst;
+    uint8_t *b1 = rb.snd.fst;
+    uint8_t *b0 = rb.fst;
+    memcpy(b0 + i0 * rateInBytes, hbuf, rateInBytes * sizeof (uint8_t));
+    memcpy(b1 + i0 * rateInBytes, hbuf + 256U, rateInBytes * sizeof (uint8_t));
+    memcpy(b2 + i0 * rateInBytes, hbuf + 512U, rateInBytes * sizeof (uint8_t));
+    memcpy(b36 + i0 * rateInBytes, hbuf + 768U, rateInBytes * sizeof (uint8_t));
     for (uint32_t i1 = 0U; i1 < 24U; i1++)
     {
       KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 _C[5U] KRML_POST_ALIGN(32) = { 0U };
@@ -8342,75 +8224,62 @@ Hacl_Hash_SHA3_Simd256_sha3_384(
   Lib_IntVector_Intrinsics_vec256 ws30 = v1__22;
   Lib_IntVector_Intrinsics_vec256 ws31 = v3__22;
   ws[0U] = ws0;
-  ws[1U] = ws1;
-  ws[2U] = ws2;
-  ws[3U] = ws3;
-  ws[4U] = ws4;
-  ws[5U] = ws5;
-  ws[6U] = ws6;
-  ws[7U] = ws7;
-  ws[8U] = ws8;
-  ws[9U] = ws9;
-  ws[10U] = ws10;
-  ws[11U] = ws11;
-  ws[12U] = ws12;
-  ws[13U] = ws13;
-  ws[14U] = ws14;
-  ws[15U] = ws15;
-  ws[16U] = ws16;
-  ws[17U] = ws17;
-  ws[18U] = ws18;
-  ws[19U] = ws19;
-  ws[20U] = ws20;
-  ws[21U] = ws21;
-  ws[22U] = ws22;
-  ws[23U] = ws23;
-  ws[24U] = ws24;
-  ws[25U] = ws25;
-  ws[26U] = ws26;
-  ws[27U] = ws27;
-  ws[28U] = ws28;
-  ws[29U] = ws29;
-  ws[30U] = ws30;
+  ws[1U] = ws4;
+  ws[2U] = ws8;
+  ws[3U] = ws12;
+  ws[4U] = ws16;
+  ws[5U] = ws20;
+  ws[6U] = ws24;
+  ws[7U] = ws28;
+  ws[8U] = ws1;
+  ws[9U] = ws5;
+  ws[10U] = ws9;
+  ws[11U] = ws13;
+  ws[12U] = ws17;
+  ws[13U] = ws21;
+  ws[14U] = ws25;
+  ws[15U] = ws29;
+  ws[16U] = ws2;
+  ws[17U] = ws6;
+  ws[18U] = ws10;
+  ws[19U] = ws14;
+  ws[20U] = ws18;
+  ws[21U] = ws22;
+  ws[22U] = ws26;
+  ws[23U] = ws30;
+  ws[24U] = ws3;
+  ws[25U] = ws7;
+  ws[26U] = ws11;
+  ws[27U] = ws15;
+  ws[28U] = ws19;
+  ws[29U] = ws23;
+  ws[30U] = ws27;
   ws[31U] = ws31;
   for (uint32_t i = 0U; i < 32U; i++)
   {
     Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]);
   }
-  for (uint32_t i = 0U; i < remOut / 32U; i++)
-  {
-    uint8_t *b3 = rb.snd.snd.snd;
-    uint8_t *b2 = rb.snd.snd.fst;
-    uint8_t *b1 = rb.snd.fst;
-    uint8_t *b0 = rb.fst;
-    memcpy(b0 + 48U - remOut + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t));
-    memcpy(b1 + 48U - remOut + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t));
-    memcpy(b2 + 48U - remOut + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t));
-    memcpy(b3 + 48U - remOut + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t));
-  }
-  uint32_t rem0 = remOut % 32U;
-  uint32_t j = remOut / 32U;
-  uint8_t *b3 = rb.snd.snd.snd;
+  uint8_t *b36 = rb.snd.snd.snd;
   uint8_t *b2 = rb.snd.snd.fst;
   uint8_t *b1 = rb.snd.fst;
   uint8_t *b0 = rb.fst;
-  memcpy(b0 + 48U - remOut + j * 32U, hbuf + j * 128U, rem0 * sizeof (uint8_t));
-  memcpy(b1 + 48U - remOut + j * 32U, hbuf + j * 128U + 32U, rem0 * sizeof (uint8_t));
-  memcpy(b2 + 48U - remOut + j * 32U, hbuf + j * 128U + 64U, rem0 * sizeof (uint8_t));
-  memcpy(b3 + 48U - remOut + j * 32U, hbuf + j * 128U + 96U, rem0 * sizeof (uint8_t));
+  memcpy(b0 + 48U - remOut, hbuf, remOut * sizeof (uint8_t));
+  memcpy(b1 + 48U - remOut, hbuf + 256U, remOut * sizeof (uint8_t));
+  memcpy(b2 + 48U - remOut, hbuf + 512U, remOut * sizeof (uint8_t));
+  memcpy(b36 + 48U - remOut, hbuf + 768U, remOut * sizeof (uint8_t));
 }
 
 void
 Hacl_Hash_SHA3_Simd256_sha3_512(
-  uint32_t inputByteLen,
-  uint8_t *input0,
-  uint8_t *input1,
-  uint8_t *input2,
-  uint8_t *input3,
   uint8_t *output0,
   uint8_t *output1,
   uint8_t *output2,
-  uint8_t *output3
+  uint8_t *output3,
+  uint8_t *input0,
+  uint8_t *input1,
+  uint8_t *input2,
+  uint8_t *input3,
+  uint32_t inputByteLen
 )
 {
   K____uint8_t___uint8_t____K____uint8_t___uint8_t_
@@ -8809,63 +8678,63 @@ Hacl_Hash_SHA3_Simd256_sha3_512(
   K____uint8_t___uint8_t____K____uint8_t___uint8_t_
   b_ = { .fst = b00, .snd = { .fst = b10, .snd = { .fst = b20, .snd = b30 } } };
   uint32_t rem1 = inputByteLen % rateInBytes;
-  uint8_t *b32 = ib.snd.snd.snd;
-  uint8_t *b22 = ib.snd.snd.fst;
-  uint8_t *b12 = ib.snd.fst;
-  uint8_t *b02 = ib.fst;
+  uint8_t *b31 = ib.snd.snd.snd;
+  uint8_t *b21 = ib.snd.snd.fst;
+  uint8_t *b11 = ib.snd.fst;
+  uint8_t *b01 = ib.fst;
   uint8_t *bl3 = b_.snd.snd.snd;
   uint8_t *bl2 = b_.snd.snd.fst;
   uint8_t *bl1 = b_.snd.fst;
   uint8_t *bl0 = b_.fst;
-  memcpy(bl0, b02 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
-  memcpy(bl1, b12 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
-  memcpy(bl2, b22 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
-  memcpy(bl3, b32 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
+  memcpy(bl0, b01 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
+  memcpy(bl1, b11 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
+  memcpy(bl2, b21 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
+  memcpy(bl3, b31 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
+  uint8_t *b32 = b_.snd.snd.snd;
+  uint8_t *b22 = b_.snd.snd.fst;
+  uint8_t *b12 = b_.snd.fst;
+  uint8_t *b02 = b_.fst;
+  b02[rem] = 0x06U;
+  b12[rem] = 0x06U;
+  b22[rem] = 0x06U;
+  b32[rem] = 0x06U;
+  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U };
   uint8_t *b33 = b_.snd.snd.snd;
   uint8_t *b23 = b_.snd.snd.fst;
   uint8_t *b13 = b_.snd.fst;
   uint8_t *b03 = b_.fst;
-  b03[rem] = 0x06U;
-  b13[rem] = 0x06U;
-  b23[rem] = 0x06U;
-  b33[rem] = 0x06U;
-  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U };
-  uint8_t *b34 = b_.snd.snd.snd;
-  uint8_t *b24 = b_.snd.snd.fst;
-  uint8_t *b14 = b_.snd.fst;
-  uint8_t *b04 = b_.fst;
-  ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04);
-  ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14);
-  ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24);
-  ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34);
-  ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 32U);
-  ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 32U);
-  ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 32U);
-  ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 32U);
-  ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 64U);
-  ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 64U);
-  ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 64U);
-  ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 64U);
-  ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 96U);
-  ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 96U);
-  ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 96U);
-  ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 96U);
-  ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 128U);
-  ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 128U);
-  ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 128U);
-  ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 128U);
-  ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 160U);
-  ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 160U);
-  ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 160U);
-  ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 160U);
-  ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 192U);
-  ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 192U);
-  ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 192U);
-  ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 192U);
-  ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 224U);
-  ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 224U);
-  ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 224U);
-  ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 224U);
+  ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03);
+  ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13);
+  ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23);
+  ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33);
+  ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 32U);
+  ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 32U);
+  ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 32U);
+  ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 32U);
+  ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 64U);
+  ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 64U);
+  ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 64U);
+  ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 64U);
+  ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 96U);
+  ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 96U);
+  ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 96U);
+  ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 96U);
+  ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 128U);
+  ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 128U);
+  ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 128U);
+  ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 128U);
+  ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 160U);
+  ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 160U);
+  ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 160U);
+  ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 160U);
+  ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 192U);
+  ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 192U);
+  ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 192U);
+  ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 192U);
+  ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 224U);
+  ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 224U);
+  ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 224U);
+  ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 224U);
   Lib_IntVector_Intrinsics_vec256 v00 = ws32[0U];
   Lib_IntVector_Intrinsics_vec256 v10 = ws32[1U];
   Lib_IntVector_Intrinsics_vec256 v20 = ws32[2U];
@@ -9094,57 +8963,57 @@ Hacl_Hash_SHA3_Simd256_sha3_512(
   {
     s[i] = Lib_IntVector_Intrinsics_vec256_xor(s[i], ws32[i]);
   }
-  uint8_t b05[256U] = { 0U };
-  uint8_t b15[256U] = { 0U };
-  uint8_t b25[256U] = { 0U };
-  uint8_t b35[256U] = { 0U };
+  uint8_t b04[256U] = { 0U };
+  uint8_t b14[256U] = { 0U };
+  uint8_t b24[256U] = { 0U };
+  uint8_t b34[256U] = { 0U };
   K____uint8_t___uint8_t____K____uint8_t___uint8_t_
-  b = { .fst = b05, .snd = { .fst = b15, .snd = { .fst = b25, .snd = b35 } } };
-  uint8_t *b36 = b.snd.snd.snd;
+  b = { .fst = b04, .snd = { .fst = b14, .snd = { .fst = b24, .snd = b34 } } };
+  uint8_t *b35 = b.snd.snd.snd;
+  uint8_t *b25 = b.snd.snd.fst;
+  uint8_t *b15 = b.snd.fst;
+  uint8_t *b05 = b.fst;
+  b05[rateInBytes - 1U] = 0x80U;
+  b15[rateInBytes - 1U] = 0x80U;
+  b25[rateInBytes - 1U] = 0x80U;
+  b35[rateInBytes - 1U] = 0x80U;
+  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws34[32U] KRML_POST_ALIGN(32) = { 0U };
+  uint8_t *b3 = b.snd.snd.snd;
   uint8_t *b26 = b.snd.snd.fst;
   uint8_t *b16 = b.snd.fst;
   uint8_t *b06 = b.fst;
-  b06[rateInBytes - 1U] = 0x80U;
-  b16[rateInBytes - 1U] = 0x80U;
-  b26[rateInBytes - 1U] = 0x80U;
-  b36[rateInBytes - 1U] = 0x80U;
-  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws34[32U] KRML_POST_ALIGN(32) = { 0U };
-  uint8_t *b37 = b.snd.snd.snd;
-  uint8_t *b27 = b.snd.snd.fst;
-  uint8_t *b17 = b.snd.fst;
-  uint8_t *b07 = b.fst;
-  ws34[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07);
-  ws34[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17);
-  ws34[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27);
-  ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37);
-  ws34[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 32U);
-  ws34[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 32U);
-  ws34[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 32U);
-  ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 32U);
-  ws34[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 64U);
-  ws34[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 64U);
-  ws34[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 64U);
-  ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 64U);
-  ws34[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 96U);
-  ws34[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 96U);
-  ws34[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 96U);
-  ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 96U);
-  ws34[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 128U);
-  ws34[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 128U);
-  ws34[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 128U);
-  ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 128U);
-  ws34[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 160U);
-  ws34[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 160U);
-  ws34[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 160U);
-  ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 160U);
-  ws34[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 192U);
-  ws34[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 192U);
-  ws34[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 192U);
-  ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 192U);
-  ws34[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 224U);
-  ws34[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 224U);
-  ws34[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 224U);
-  ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 224U);
+  ws34[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06);
+  ws34[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16);
+  ws34[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26);
+  ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3);
+  ws34[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 32U);
+  ws34[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 32U);
+  ws34[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 32U);
+  ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 32U);
+  ws34[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 64U);
+  ws34[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 64U);
+  ws34[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 64U);
+  ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 64U);
+  ws34[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 96U);
+  ws34[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 96U);
+  ws34[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 96U);
+  ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 96U);
+  ws34[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 128U);
+  ws34[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 128U);
+  ws34[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 128U);
+  ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 128U);
+  ws34[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 160U);
+  ws34[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 160U);
+  ws34[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 160U);
+  ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 160U);
+  ws34[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 192U);
+  ws34[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 192U);
+  ws34[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 192U);
+  ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 192U);
+  ws34[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 224U);
+  ws34[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 224U);
+  ws34[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 224U);
+  ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 224U);
   Lib_IntVector_Intrinsics_vec256 v08 = ws34[0U];
   Lib_IntVector_Intrinsics_vec256 v18 = ws34[1U];
   Lib_IntVector_Intrinsics_vec256 v28 = ws34[2U];
@@ -9666,62 +9535,49 @@ Hacl_Hash_SHA3_Simd256_sha3_512(
     Lib_IntVector_Intrinsics_vec256 ws30 = v1__22;
     Lib_IntVector_Intrinsics_vec256 ws31 = v3__22;
     ws[0U] = ws0;
-    ws[1U] = ws1;
-    ws[2U] = ws2;
-    ws[3U] = ws3;
-    ws[4U] = ws4;
-    ws[5U] = ws5;
-    ws[6U] = ws6;
-    ws[7U] = ws7;
-    ws[8U] = ws8;
-    ws[9U] = ws9;
-    ws[10U] = ws10;
-    ws[11U] = ws11;
-    ws[12U] = ws12;
-    ws[13U] = ws13;
-    ws[14U] = ws14;
-    ws[15U] = ws15;
-    ws[16U] = ws16;
-    ws[17U] = ws17;
-    ws[18U] = ws18;
-    ws[19U] = ws19;
-    ws[20U] = ws20;
-    ws[21U] = ws21;
-    ws[22U] = ws22;
-    ws[23U] = ws23;
-    ws[24U] = ws24;
-    ws[25U] = ws25;
-    ws[26U] = ws26;
-    ws[27U] = ws27;
-    ws[28U] = ws28;
-    ws[29U] = ws29;
-    ws[30U] = ws30;
+    ws[1U] = ws4;
+    ws[2U] = ws8;
+    ws[3U] = ws12;
+    ws[4U] = ws16;
+    ws[5U] = ws20;
+    ws[6U] = ws24;
+    ws[7U] = ws28;
+    ws[8U] = ws1;
+    ws[9U] = ws5;
+    ws[10U] = ws9;
+    ws[11U] = ws13;
+    ws[12U] = ws17;
+    ws[13U] = ws21;
+    ws[14U] = ws25;
+    ws[15U] = ws29;
+    ws[16U] = ws2;
+    ws[17U] = ws6;
+    ws[18U] = ws10;
+    ws[19U] = ws14;
+    ws[20U] = ws18;
+    ws[21U] = ws22;
+    ws[22U] = ws26;
+    ws[23U] = ws30;
+    ws[24U] = ws3;
+    ws[25U] = ws7;
+    ws[26U] = ws11;
+    ws[27U] = ws15;
+    ws[28U] = ws19;
+    ws[29U] = ws23;
+    ws[30U] = ws27;
     ws[31U] = ws31;
     for (uint32_t i = 0U; i < 32U; i++)
     {
       Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]);
     }
-    for (uint32_t i = 0U; i < rateInBytes / 32U; i++)
-    {
-      uint8_t *b31 = rb.snd.snd.snd;
-      uint8_t *b21 = rb.snd.snd.fst;
-      uint8_t *b11 = rb.snd.fst;
-      uint8_t *b01 = rb.fst;
-      memcpy(b01 + i0 * rateInBytes + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t));
-      memcpy(b11 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t));
-      memcpy(b21 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t));
-      memcpy(b31 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t));
-    }
-    uint32_t rem0 = rateInBytes % 32U;
-    uint32_t j = rateInBytes / 32U;
-    uint8_t *b31 = rb.snd.snd.snd;
-    uint8_t *b21 = rb.snd.snd.fst;
-    uint8_t *b11 = rb.snd.fst;
-    uint8_t *b01 = rb.fst;
-    memcpy(b01 + i0 * rateInBytes + j * 32U, hbuf + j * 128U, rem0 * sizeof (uint8_t));
-    memcpy(b11 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 32U, rem0 * sizeof (uint8_t));
-    memcpy(b21 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 64U, rem0 * sizeof (uint8_t));
-    memcpy(b31 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 96U, rem0 * sizeof (uint8_t));
+    uint8_t *b36 = rb.snd.snd.snd;
+    uint8_t *b2 = rb.snd.snd.fst;
+    uint8_t *b1 = rb.snd.fst;
+    uint8_t *b0 = rb.fst;
+    memcpy(b0 + i0 * rateInBytes, hbuf, rateInBytes * sizeof (uint8_t));
+    memcpy(b1 + i0 * rateInBytes, hbuf + 256U, rateInBytes * sizeof (uint8_t));
+    memcpy(b2 + i0 * rateInBytes, hbuf + 512U, rateInBytes * sizeof (uint8_t));
+    memcpy(b36 + i0 * rateInBytes, hbuf + 768U, rateInBytes * sizeof (uint8_t));
     for (uint32_t i1 = 0U; i1 < 24U; i1++)
     {
       KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 _C[5U] KRML_POST_ALIGN(32) = { 0U };
@@ -10016,61 +9872,1485 @@ Hacl_Hash_SHA3_Simd256_sha3_512(
   Lib_IntVector_Intrinsics_vec256 ws30 = v1__22;
   Lib_IntVector_Intrinsics_vec256 ws31 = v3__22;
   ws[0U] = ws0;
-  ws[1U] = ws1;
-  ws[2U] = ws2;
-  ws[3U] = ws3;
-  ws[4U] = ws4;
-  ws[5U] = ws5;
-  ws[6U] = ws6;
-  ws[7U] = ws7;
-  ws[8U] = ws8;
-  ws[9U] = ws9;
-  ws[10U] = ws10;
-  ws[11U] = ws11;
-  ws[12U] = ws12;
-  ws[13U] = ws13;
-  ws[14U] = ws14;
-  ws[15U] = ws15;
-  ws[16U] = ws16;
-  ws[17U] = ws17;
-  ws[18U] = ws18;
-  ws[19U] = ws19;
-  ws[20U] = ws20;
-  ws[21U] = ws21;
-  ws[22U] = ws22;
-  ws[23U] = ws23;
-  ws[24U] = ws24;
-  ws[25U] = ws25;
-  ws[26U] = ws26;
-  ws[27U] = ws27;
-  ws[28U] = ws28;
-  ws[29U] = ws29;
-  ws[30U] = ws30;
+  ws[1U] = ws4;
+  ws[2U] = ws8;
+  ws[3U] = ws12;
+  ws[4U] = ws16;
+  ws[5U] = ws20;
+  ws[6U] = ws24;
+  ws[7U] = ws28;
+  ws[8U] = ws1;
+  ws[9U] = ws5;
+  ws[10U] = ws9;
+  ws[11U] = ws13;
+  ws[12U] = ws17;
+  ws[13U] = ws21;
+  ws[14U] = ws25;
+  ws[15U] = ws29;
+  ws[16U] = ws2;
+  ws[17U] = ws6;
+  ws[18U] = ws10;
+  ws[19U] = ws14;
+  ws[20U] = ws18;
+  ws[21U] = ws22;
+  ws[22U] = ws26;
+  ws[23U] = ws30;
+  ws[24U] = ws3;
+  ws[25U] = ws7;
+  ws[26U] = ws11;
+  ws[27U] = ws15;
+  ws[28U] = ws19;
+  ws[29U] = ws23;
+  ws[30U] = ws27;
   ws[31U] = ws31;
   for (uint32_t i = 0U; i < 32U; i++)
   {
     Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]);
   }
-  for (uint32_t i = 0U; i < remOut / 32U; i++)
-  {
-    uint8_t *b3 = rb.snd.snd.snd;
-    uint8_t *b2 = rb.snd.snd.fst;
-    uint8_t *b1 = rb.snd.fst;
-    uint8_t *b0 = rb.fst;
-    memcpy(b0 + 64U - remOut + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t));
-    memcpy(b1 + 64U - remOut + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t));
-    memcpy(b2 + 64U - remOut + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t));
-    memcpy(b3 + 64U - remOut + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t));
-  }
-  uint32_t rem0 = remOut % 32U;
-  uint32_t j = remOut / 32U;
-  uint8_t *b3 = rb.snd.snd.snd;
+  uint8_t *b36 = rb.snd.snd.snd;
   uint8_t *b2 = rb.snd.snd.fst;
   uint8_t *b1 = rb.snd.fst;
   uint8_t *b0 = rb.fst;
-  memcpy(b0 + 64U - remOut + j * 32U, hbuf + j * 128U, rem0 * sizeof (uint8_t));
-  memcpy(b1 + 64U - remOut + j * 32U, hbuf + j * 128U + 32U, rem0 * sizeof (uint8_t));
-  memcpy(b2 + 64U - remOut + j * 32U, hbuf + j * 128U + 64U, rem0 * sizeof (uint8_t));
-  memcpy(b3 + 64U - remOut + j * 32U, hbuf + j * 128U + 96U, rem0 * sizeof (uint8_t));
+  memcpy(b0 + 64U - remOut, hbuf, remOut * sizeof (uint8_t));
+  memcpy(b1 + 64U - remOut, hbuf + 256U, remOut * sizeof (uint8_t));
+  memcpy(b2 + 64U - remOut, hbuf + 512U, remOut * sizeof (uint8_t));
+  memcpy(b36 + 64U - remOut, hbuf + 768U, remOut * sizeof (uint8_t));
+}
+
+Lib_IntVector_Intrinsics_vec256 *Hacl_Hash_SHA3_Simd256_state_malloc(void)
+{
+  Lib_IntVector_Intrinsics_vec256 *buf = (Lib_IntVector_Intrinsics_vec256 *)KRML_ALIGNED_MALLOC(32, 25U * sizeof (Lib_IntVector_Intrinsics_vec256));
+  for (int i = 0; i < 25; i++){
+    buf[i] = Lib_IntVector_Intrinsics_vec256_zero;
+  }
+  return buf;
+}
+
+void Hacl_Hash_SHA3_Simd256_state_free(Lib_IntVector_Intrinsics_vec256 *s)
+{
+  KRML_ALIGNED_FREE(s);
+}
+
+void
+Hacl_Hash_SHA3_Simd256_shake128_absorb_nblocks(
+  Lib_IntVector_Intrinsics_vec256 *state,
+  uint8_t *input0,
+  uint8_t *input1,
+  uint8_t *input2,
+  uint8_t *input3,
+  uint32_t inputByteLen
+)
+{
+  for (uint32_t i0 = 0U; i0 < inputByteLen / 168U; i0++)
+  {
+    uint8_t b00[256U] = { 0U };
+    uint8_t b10[256U] = { 0U };
+    uint8_t b20[256U] = { 0U };
+    uint8_t b30[256U] = { 0U };
+    K____uint8_t___uint8_t____K____uint8_t___uint8_t_
+    b_ = { .fst = b00, .snd = { .fst = b10, .snd = { .fst = b20, .snd = b30 } } };
+    uint8_t *b01 = input0;
+    uint8_t *b11 = input1;
+    uint8_t *b21 = input2;
+    uint8_t *b31 = input3;
+    uint8_t *bl3 = b_.snd.snd.snd;
+    uint8_t *bl2 = b_.snd.snd.fst;
+    uint8_t *bl1 = b_.snd.fst;
+    uint8_t *bl0 = b_.fst;
+    memcpy(bl0, b01 + i0 * 168U, 168U * sizeof (uint8_t));
+    memcpy(bl1, b11 + i0 * 168U, 168U * sizeof (uint8_t));
+    memcpy(bl2, b21 + i0 * 168U, 168U * sizeof (uint8_t));
+    memcpy(bl3, b31 + i0 * 168U, 168U * sizeof (uint8_t));
+    KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws[32U] KRML_POST_ALIGN(32) = { 0U };
+    uint8_t *b3 = b_.snd.snd.snd;
+    uint8_t *b2 = b_.snd.snd.fst;
+    uint8_t *b1 = b_.snd.fst;
+    uint8_t *b0 = b_.fst;
+    ws[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0);
+    ws[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1);
+    ws[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2);
+    ws[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3);
+    ws[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 32U);
+    ws[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 32U);
+    ws[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 32U);
+    ws[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 32U);
+    ws[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 64U);
+    ws[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 64U);
+    ws[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 64U);
+    ws[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 64U);
+    ws[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 96U);
+    ws[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 96U);
+    ws[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 96U);
+    ws[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 96U);
+    ws[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 128U);
+    ws[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 128U);
+    ws[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 128U);
+    ws[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 128U);
+    ws[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 160U);
+    ws[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 160U);
+    ws[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 160U);
+    ws[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 160U);
+    ws[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 192U);
+    ws[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 192U);
+    ws[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 192U);
+    ws[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 192U);
+    ws[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 224U);
+    ws[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 224U);
+    ws[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 224U);
+    ws[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 224U);
+    Lib_IntVector_Intrinsics_vec256 v00 = ws[0U];
+    Lib_IntVector_Intrinsics_vec256 v10 = ws[1U];
+    Lib_IntVector_Intrinsics_vec256 v20 = ws[2U];
+    Lib_IntVector_Intrinsics_vec256 v30 = ws[3U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v00, v10);
+    Lib_IntVector_Intrinsics_vec256
+    v1_ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v00, v10);
+    Lib_IntVector_Intrinsics_vec256
+    v2_ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v20, v30);
+    Lib_IntVector_Intrinsics_vec256
+    v3_ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v20, v30);
+    Lib_IntVector_Intrinsics_vec256
+    v0__ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_, v2_);
+    Lib_IntVector_Intrinsics_vec256
+    v1__ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_, v2_);
+    Lib_IntVector_Intrinsics_vec256
+    v2__ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_, v3_);
+    Lib_IntVector_Intrinsics_vec256
+    v3__ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_, v3_);
+    Lib_IntVector_Intrinsics_vec256 ws0 = v0__;
+    Lib_IntVector_Intrinsics_vec256 ws1 = v2__;
+    Lib_IntVector_Intrinsics_vec256 ws2 = v1__;
+    Lib_IntVector_Intrinsics_vec256 ws3 = v3__;
+    Lib_IntVector_Intrinsics_vec256 v01 = ws[4U];
+    Lib_IntVector_Intrinsics_vec256 v11 = ws[5U];
+    Lib_IntVector_Intrinsics_vec256 v21 = ws[6U];
+    Lib_IntVector_Intrinsics_vec256 v31 = ws[7U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v01, v11);
+    Lib_IntVector_Intrinsics_vec256
+    v1_0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v01, v11);
+    Lib_IntVector_Intrinsics_vec256
+    v2_0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v21, v31);
+    Lib_IntVector_Intrinsics_vec256
+    v3_0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v21, v31);
+    Lib_IntVector_Intrinsics_vec256
+    v0__0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_0, v2_0);
+    Lib_IntVector_Intrinsics_vec256
+    v1__0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_0, v2_0);
+    Lib_IntVector_Intrinsics_vec256
+    v2__0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_0, v3_0);
+    Lib_IntVector_Intrinsics_vec256
+    v3__0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_0, v3_0);
+    Lib_IntVector_Intrinsics_vec256 ws4 = v0__0;
+    Lib_IntVector_Intrinsics_vec256 ws5 = v2__0;
+    Lib_IntVector_Intrinsics_vec256 ws6 = v1__0;
+    Lib_IntVector_Intrinsics_vec256 ws7 = v3__0;
+    Lib_IntVector_Intrinsics_vec256 v02 = ws[8U];
+    Lib_IntVector_Intrinsics_vec256 v12 = ws[9U];
+    Lib_IntVector_Intrinsics_vec256 v22 = ws[10U];
+    Lib_IntVector_Intrinsics_vec256 v32 = ws[11U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v02, v12);
+    Lib_IntVector_Intrinsics_vec256
+    v1_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v02, v12);
+    Lib_IntVector_Intrinsics_vec256
+    v2_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v22, v32);
+    Lib_IntVector_Intrinsics_vec256
+    v3_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v22, v32);
+    Lib_IntVector_Intrinsics_vec256
+    v0__1 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_1, v2_1);
+    Lib_IntVector_Intrinsics_vec256
+    v1__1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_1, v2_1);
+    Lib_IntVector_Intrinsics_vec256
+    v2__1 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_1, v3_1);
+    Lib_IntVector_Intrinsics_vec256
+    v3__1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_1, v3_1);
+    Lib_IntVector_Intrinsics_vec256 ws8 = v0__1;
+    Lib_IntVector_Intrinsics_vec256 ws9 = v2__1;
+    Lib_IntVector_Intrinsics_vec256 ws10 = v1__1;
+    Lib_IntVector_Intrinsics_vec256 ws11 = v3__1;
+    Lib_IntVector_Intrinsics_vec256 v03 = ws[12U];
+    Lib_IntVector_Intrinsics_vec256 v13 = ws[13U];
+    Lib_IntVector_Intrinsics_vec256 v23 = ws[14U];
+    Lib_IntVector_Intrinsics_vec256 v33 = ws[15U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_2 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v03, v13);
+    Lib_IntVector_Intrinsics_vec256
+    v1_2 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v03, v13);
+    Lib_IntVector_Intrinsics_vec256
+    v2_2 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v23, v33);
+    Lib_IntVector_Intrinsics_vec256
+    v3_2 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v23, v33);
+    Lib_IntVector_Intrinsics_vec256
+    v0__2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_2, v2_2);
+    Lib_IntVector_Intrinsics_vec256
+    v1__2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_2, v2_2);
+    Lib_IntVector_Intrinsics_vec256
+    v2__2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_2, v3_2);
+    Lib_IntVector_Intrinsics_vec256
+    v3__2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_2, v3_2);
+    Lib_IntVector_Intrinsics_vec256 ws12 = v0__2;
+    Lib_IntVector_Intrinsics_vec256 ws13 = v2__2;
+    Lib_IntVector_Intrinsics_vec256 ws14 = v1__2;
+    Lib_IntVector_Intrinsics_vec256 ws15 = v3__2;
+    Lib_IntVector_Intrinsics_vec256 v04 = ws[16U];
+    Lib_IntVector_Intrinsics_vec256 v14 = ws[17U];
+    Lib_IntVector_Intrinsics_vec256 v24 = ws[18U];
+    Lib_IntVector_Intrinsics_vec256 v34 = ws[19U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_3 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v04, v14);
+    Lib_IntVector_Intrinsics_vec256
+    v1_3 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v04, v14);
+    Lib_IntVector_Intrinsics_vec256
+    v2_3 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v24, v34);
+    Lib_IntVector_Intrinsics_vec256
+    v3_3 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v24, v34);
+    Lib_IntVector_Intrinsics_vec256
+    v0__3 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_3, v2_3);
+    Lib_IntVector_Intrinsics_vec256
+    v1__3 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_3, v2_3);
+    Lib_IntVector_Intrinsics_vec256
+    v2__3 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_3, v3_3);
+    Lib_IntVector_Intrinsics_vec256
+    v3__3 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_3, v3_3);
+    Lib_IntVector_Intrinsics_vec256 ws16 = v0__3;
+    Lib_IntVector_Intrinsics_vec256 ws17 = v2__3;
+    Lib_IntVector_Intrinsics_vec256 ws18 = v1__3;
+    Lib_IntVector_Intrinsics_vec256 ws19 = v3__3;
+    Lib_IntVector_Intrinsics_vec256 v05 = ws[20U];
+    Lib_IntVector_Intrinsics_vec256 v15 = ws[21U];
+    Lib_IntVector_Intrinsics_vec256 v25 = ws[22U];
+    Lib_IntVector_Intrinsics_vec256 v35 = ws[23U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_4 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v05, v15);
+    Lib_IntVector_Intrinsics_vec256
+    v1_4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v05, v15);
+    Lib_IntVector_Intrinsics_vec256
+    v2_4 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v25, v35);
+    Lib_IntVector_Intrinsics_vec256
+    v3_4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v25, v35);
+    Lib_IntVector_Intrinsics_vec256
+    v0__4 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_4, v2_4);
+    Lib_IntVector_Intrinsics_vec256
+    v1__4 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_4, v2_4);
+    Lib_IntVector_Intrinsics_vec256
+    v2__4 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_4, v3_4);
+    Lib_IntVector_Intrinsics_vec256
+    v3__4 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_4, v3_4);
+    Lib_IntVector_Intrinsics_vec256 ws20 = v0__4;
+    Lib_IntVector_Intrinsics_vec256 ws21 = v2__4;
+    Lib_IntVector_Intrinsics_vec256 ws22 = v1__4;
+    Lib_IntVector_Intrinsics_vec256 ws23 = v3__4;
+    Lib_IntVector_Intrinsics_vec256 v06 = ws[24U];
+    Lib_IntVector_Intrinsics_vec256 v16 = ws[25U];
+    Lib_IntVector_Intrinsics_vec256 v26 = ws[26U];
+    Lib_IntVector_Intrinsics_vec256 v36 = ws[27U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_5 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v06, v16);
+    Lib_IntVector_Intrinsics_vec256
+    v1_5 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v06, v16);
+    Lib_IntVector_Intrinsics_vec256
+    v2_5 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v26, v36);
+    Lib_IntVector_Intrinsics_vec256
+    v3_5 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v26, v36);
+    Lib_IntVector_Intrinsics_vec256
+    v0__5 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_5, v2_5);
+    Lib_IntVector_Intrinsics_vec256
+    v1__5 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_5, v2_5);
+    Lib_IntVector_Intrinsics_vec256
+    v2__5 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_5, v3_5);
+    Lib_IntVector_Intrinsics_vec256
+    v3__5 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_5, v3_5);
+    Lib_IntVector_Intrinsics_vec256 ws24 = v0__5;
+    Lib_IntVector_Intrinsics_vec256 ws25 = v2__5;
+    Lib_IntVector_Intrinsics_vec256 ws26 = v1__5;
+    Lib_IntVector_Intrinsics_vec256 ws27 = v3__5;
+    Lib_IntVector_Intrinsics_vec256 v0 = ws[28U];
+    Lib_IntVector_Intrinsics_vec256 v1 = ws[29U];
+    Lib_IntVector_Intrinsics_vec256 v2 = ws[30U];
+    Lib_IntVector_Intrinsics_vec256 v3 = ws[31U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_6 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0, v1);
+    Lib_IntVector_Intrinsics_vec256
+    v1_6 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0, v1);
+    Lib_IntVector_Intrinsics_vec256
+    v2_6 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v2, v3);
+    Lib_IntVector_Intrinsics_vec256
+    v3_6 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v2, v3);
+    Lib_IntVector_Intrinsics_vec256
+    v0__6 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_6, v2_6);
+    Lib_IntVector_Intrinsics_vec256
+    v1__6 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_6, v2_6);
+    Lib_IntVector_Intrinsics_vec256
+    v2__6 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_6, v3_6);
+    Lib_IntVector_Intrinsics_vec256
+    v3__6 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_6, v3_6);
+    Lib_IntVector_Intrinsics_vec256 ws28 = v0__6;
+    Lib_IntVector_Intrinsics_vec256 ws29 = v2__6;
+    Lib_IntVector_Intrinsics_vec256 ws30 = v1__6;
+    Lib_IntVector_Intrinsics_vec256 ws31 = v3__6;
+    ws[0U] = ws0;
+    ws[1U] = ws1;
+    ws[2U] = ws2;
+    ws[3U] = ws3;
+    ws[4U] = ws4;
+    ws[5U] = ws5;
+    ws[6U] = ws6;
+    ws[7U] = ws7;
+    ws[8U] = ws8;
+    ws[9U] = ws9;
+    ws[10U] = ws10;
+    ws[11U] = ws11;
+    ws[12U] = ws12;
+    ws[13U] = ws13;
+    ws[14U] = ws14;
+    ws[15U] = ws15;
+    ws[16U] = ws16;
+    ws[17U] = ws17;
+    ws[18U] = ws18;
+    ws[19U] = ws19;
+    ws[20U] = ws20;
+    ws[21U] = ws21;
+    ws[22U] = ws22;
+    ws[23U] = ws23;
+    ws[24U] = ws24;
+    ws[25U] = ws25;
+    ws[26U] = ws26;
+    ws[27U] = ws27;
+    ws[28U] = ws28;
+    ws[29U] = ws29;
+    ws[30U] = ws30;
+    ws[31U] = ws31;
+    for (uint32_t i = 0U; i < 25U; i++)
+    {
+      state[i] = Lib_IntVector_Intrinsics_vec256_xor(state[i], ws[i]);
+    }
+    for (uint32_t i1 = 0U; i1 < 24U; i1++)
+    {
+      KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 _C[5U] KRML_POST_ALIGN(32) = { 0U };
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        Lib_IntVector_Intrinsics_vec256 uu____0 = state[i + 0U];
+        Lib_IntVector_Intrinsics_vec256 uu____1 = state[i + 5U];
+        Lib_IntVector_Intrinsics_vec256 uu____2 = state[i + 10U];
+        _C[i] =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____0,
+            Lib_IntVector_Intrinsics_vec256_xor(uu____1,
+              Lib_IntVector_Intrinsics_vec256_xor(uu____2,
+                Lib_IntVector_Intrinsics_vec256_xor(state[i + 15U], state[i + 20U])))););
+      KRML_MAYBE_FOR5(i2,
+        0U,
+        5U,
+        1U,
+        Lib_IntVector_Intrinsics_vec256 uu____3 = _C[(i2 + 4U) % 5U];
+        Lib_IntVector_Intrinsics_vec256 uu____4 = _C[(i2 + 1U) % 5U];
+        Lib_IntVector_Intrinsics_vec256
+        _D =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____3,
+            Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_left64(uu____4,
+                1U),
+              Lib_IntVector_Intrinsics_vec256_shift_right64(uu____4, 63U)));
+        KRML_MAYBE_FOR5(i,
+          0U,
+          5U,
+          1U,
+          state[i2 + 5U * i] = Lib_IntVector_Intrinsics_vec256_xor(state[i2 + 5U * i], _D);););
+      Lib_IntVector_Intrinsics_vec256 x = state[1U];
+      Lib_IntVector_Intrinsics_vec256 current = x;
+      for (uint32_t i = 0U; i < 24U; i++)
+      {
+        uint32_t _Y = Hacl_Impl_SHA3_Vec_keccak_piln[i];
+        uint32_t r = Hacl_Impl_SHA3_Vec_keccak_rotc[i];
+        Lib_IntVector_Intrinsics_vec256 temp = state[_Y];
+        Lib_IntVector_Intrinsics_vec256 uu____5 = current;
+        state[_Y] =
+          Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_left64(uu____5,
+              r),
+            Lib_IntVector_Intrinsics_vec256_shift_right64(uu____5, 64U - r));
+        current = temp;
+      }
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        Lib_IntVector_Intrinsics_vec256 uu____6 = state[0U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____7 = Lib_IntVector_Intrinsics_vec256_lognot(state[1U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v07 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____6,
+            Lib_IntVector_Intrinsics_vec256_and(uu____7, state[2U + 5U * i]));
+        Lib_IntVector_Intrinsics_vec256 uu____8 = state[1U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____9 = Lib_IntVector_Intrinsics_vec256_lognot(state[2U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v17 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____8,
+            Lib_IntVector_Intrinsics_vec256_and(uu____9, state[3U + 5U * i]));
+        Lib_IntVector_Intrinsics_vec256 uu____10 = state[2U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____11 = Lib_IntVector_Intrinsics_vec256_lognot(state[3U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v27 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____10,
+            Lib_IntVector_Intrinsics_vec256_and(uu____11, state[4U + 5U * i]));
+        Lib_IntVector_Intrinsics_vec256 uu____12 = state[3U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____13 = Lib_IntVector_Intrinsics_vec256_lognot(state[4U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v37 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____12,
+            Lib_IntVector_Intrinsics_vec256_and(uu____13, state[0U + 5U * i]));
+        Lib_IntVector_Intrinsics_vec256 uu____14 = state[4U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____15 = Lib_IntVector_Intrinsics_vec256_lognot(state[0U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v4 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____14,
+            Lib_IntVector_Intrinsics_vec256_and(uu____15, state[1U + 5U * i]));
+        state[0U + 5U * i] = v07;
+        state[1U + 5U * i] = v17;
+        state[2U + 5U * i] = v27;
+        state[3U + 5U * i] = v37;
+        state[4U + 5U * i] = v4;);
+      uint64_t c = Hacl_Impl_SHA3_Vec_keccak_rndc[i1];
+      Lib_IntVector_Intrinsics_vec256 uu____16 = state[0U];
+      state[0U] =
+        Lib_IntVector_Intrinsics_vec256_xor(uu____16,
+          Lib_IntVector_Intrinsics_vec256_load64(c));
+    }
+  }
+}
+
+void
+Hacl_Hash_SHA3_Simd256_shake128_absorb_final(
+  Lib_IntVector_Intrinsics_vec256 *state,
+  uint8_t *input0,
+  uint8_t *input1,
+  uint8_t *input2,
+  uint8_t *input3,
+  uint32_t inputByteLen
+)
+{
+  uint32_t rem = inputByteLen % 168U;
+  uint8_t b00[256U] = { 0U };
+  uint8_t b10[256U] = { 0U };
+  uint8_t b20[256U] = { 0U };
+  uint8_t b30[256U] = { 0U };
+  K____uint8_t___uint8_t____K____uint8_t___uint8_t_
+  b_ = { .fst = b00, .snd = { .fst = b10, .snd = { .fst = b20, .snd = b30 } } };
+  uint32_t rem1 = inputByteLen % 168U;
+  uint8_t *b01 = input0;
+  uint8_t *b11 = input1;
+  uint8_t *b21 = input2;
+  uint8_t *b31 = input3;
+  uint8_t *bl3 = b_.snd.snd.snd;
+  uint8_t *bl2 = b_.snd.snd.fst;
+  uint8_t *bl1 = b_.snd.fst;
+  uint8_t *bl0 = b_.fst;
+  memcpy(bl0, b01 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
+  memcpy(bl1, b11 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
+  memcpy(bl2, b21 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
+  memcpy(bl3, b31 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
+  uint8_t *b32 = b_.snd.snd.snd;
+  uint8_t *b22 = b_.snd.snd.fst;
+  uint8_t *b12 = b_.snd.fst;
+  uint8_t *b02 = b_.fst;
+  b02[rem] = 0x1FU;
+  b12[rem] = 0x1FU;
+  b22[rem] = 0x1FU;
+  b32[rem] = 0x1FU;
+  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws[32U] KRML_POST_ALIGN(32) = { 0U };
+  uint8_t *b33 = b_.snd.snd.snd;
+  uint8_t *b23 = b_.snd.snd.fst;
+  uint8_t *b13 = b_.snd.fst;
+  uint8_t *b03 = b_.fst;
+  ws[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03);
+  ws[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13);
+  ws[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23);
+  ws[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33);
+  ws[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 32U);
+  ws[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 32U);
+  ws[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 32U);
+  ws[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 32U);
+  ws[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 64U);
+  ws[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 64U);
+  ws[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 64U);
+  ws[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 64U);
+  ws[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 96U);
+  ws[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 96U);
+  ws[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 96U);
+  ws[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 96U);
+  ws[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 128U);
+  ws[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 128U);
+  ws[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 128U);
+  ws[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 128U);
+  ws[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 160U);
+  ws[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 160U);
+  ws[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 160U);
+  ws[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 160U);
+  ws[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 192U);
+  ws[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 192U);
+  ws[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 192U);
+  ws[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 192U);
+  ws[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 224U);
+  ws[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 224U);
+  ws[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 224U);
+  ws[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 224U);
+  Lib_IntVector_Intrinsics_vec256 v00 = ws[0U];
+  Lib_IntVector_Intrinsics_vec256 v10 = ws[1U];
+  Lib_IntVector_Intrinsics_vec256 v20 = ws[2U];
+  Lib_IntVector_Intrinsics_vec256 v30 = ws[3U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v00, v10);
+  Lib_IntVector_Intrinsics_vec256
+  v1_ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v00, v10);
+  Lib_IntVector_Intrinsics_vec256
+  v2_ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v20, v30);
+  Lib_IntVector_Intrinsics_vec256
+  v3_ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v20, v30);
+  Lib_IntVector_Intrinsics_vec256
+  v0__ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_, v2_);
+  Lib_IntVector_Intrinsics_vec256
+  v1__ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_, v2_);
+  Lib_IntVector_Intrinsics_vec256
+  v2__ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_, v3_);
+  Lib_IntVector_Intrinsics_vec256
+  v3__ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_, v3_);
+  Lib_IntVector_Intrinsics_vec256 ws00 = v0__;
+  Lib_IntVector_Intrinsics_vec256 ws110 = v2__;
+  Lib_IntVector_Intrinsics_vec256 ws210 = v1__;
+  Lib_IntVector_Intrinsics_vec256 ws32 = v3__;
+  Lib_IntVector_Intrinsics_vec256 v01 = ws[4U];
+  Lib_IntVector_Intrinsics_vec256 v11 = ws[5U];
+  Lib_IntVector_Intrinsics_vec256 v21 = ws[6U];
+  Lib_IntVector_Intrinsics_vec256 v31 = ws[7U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v01, v11);
+  Lib_IntVector_Intrinsics_vec256
+  v1_0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v01, v11);
+  Lib_IntVector_Intrinsics_vec256
+  v2_0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v21, v31);
+  Lib_IntVector_Intrinsics_vec256
+  v3_0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v21, v31);
+  Lib_IntVector_Intrinsics_vec256
+  v0__0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_0, v2_0);
+  Lib_IntVector_Intrinsics_vec256
+  v1__0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_0, v2_0);
+  Lib_IntVector_Intrinsics_vec256
+  v2__0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_0, v3_0);
+  Lib_IntVector_Intrinsics_vec256
+  v3__0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_0, v3_0);
+  Lib_IntVector_Intrinsics_vec256 ws40 = v0__0;
+  Lib_IntVector_Intrinsics_vec256 ws50 = v2__0;
+  Lib_IntVector_Intrinsics_vec256 ws60 = v1__0;
+  Lib_IntVector_Intrinsics_vec256 ws70 = v3__0;
+  Lib_IntVector_Intrinsics_vec256 v02 = ws[8U];
+  Lib_IntVector_Intrinsics_vec256 v12 = ws[9U];
+  Lib_IntVector_Intrinsics_vec256 v22 = ws[10U];
+  Lib_IntVector_Intrinsics_vec256 v32 = ws[11U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v02, v12);
+  Lib_IntVector_Intrinsics_vec256
+  v1_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v02, v12);
+  Lib_IntVector_Intrinsics_vec256
+  v2_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v22, v32);
+  Lib_IntVector_Intrinsics_vec256
+  v3_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v22, v32);
+  Lib_IntVector_Intrinsics_vec256
+  v0__1 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_1, v2_1);
+  Lib_IntVector_Intrinsics_vec256
+  v1__1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_1, v2_1);
+  Lib_IntVector_Intrinsics_vec256
+  v2__1 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_1, v3_1);
+  Lib_IntVector_Intrinsics_vec256
+  v3__1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_1, v3_1);
+  Lib_IntVector_Intrinsics_vec256 ws80 = v0__1;
+  Lib_IntVector_Intrinsics_vec256 ws90 = v2__1;
+  Lib_IntVector_Intrinsics_vec256 ws100 = v1__1;
+  Lib_IntVector_Intrinsics_vec256 ws111 = v3__1;
+  Lib_IntVector_Intrinsics_vec256 v03 = ws[12U];
+  Lib_IntVector_Intrinsics_vec256 v13 = ws[13U];
+  Lib_IntVector_Intrinsics_vec256 v23 = ws[14U];
+  Lib_IntVector_Intrinsics_vec256 v33 = ws[15U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_2 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v03, v13);
+  Lib_IntVector_Intrinsics_vec256
+  v1_2 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v03, v13);
+  Lib_IntVector_Intrinsics_vec256
+  v2_2 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v23, v33);
+  Lib_IntVector_Intrinsics_vec256
+  v3_2 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v23, v33);
+  Lib_IntVector_Intrinsics_vec256
+  v0__2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_2, v2_2);
+  Lib_IntVector_Intrinsics_vec256
+  v1__2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_2, v2_2);
+  Lib_IntVector_Intrinsics_vec256
+  v2__2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_2, v3_2);
+  Lib_IntVector_Intrinsics_vec256
+  v3__2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_2, v3_2);
+  Lib_IntVector_Intrinsics_vec256 ws120 = v0__2;
+  Lib_IntVector_Intrinsics_vec256 ws130 = v2__2;
+  Lib_IntVector_Intrinsics_vec256 ws140 = v1__2;
+  Lib_IntVector_Intrinsics_vec256 ws150 = v3__2;
+  Lib_IntVector_Intrinsics_vec256 v04 = ws[16U];
+  Lib_IntVector_Intrinsics_vec256 v14 = ws[17U];
+  Lib_IntVector_Intrinsics_vec256 v24 = ws[18U];
+  Lib_IntVector_Intrinsics_vec256 v34 = ws[19U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_3 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v04, v14);
+  Lib_IntVector_Intrinsics_vec256
+  v1_3 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v04, v14);
+  Lib_IntVector_Intrinsics_vec256
+  v2_3 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v24, v34);
+  Lib_IntVector_Intrinsics_vec256
+  v3_3 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v24, v34);
+  Lib_IntVector_Intrinsics_vec256
+  v0__3 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_3, v2_3);
+  Lib_IntVector_Intrinsics_vec256
+  v1__3 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_3, v2_3);
+  Lib_IntVector_Intrinsics_vec256
+  v2__3 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_3, v3_3);
+  Lib_IntVector_Intrinsics_vec256
+  v3__3 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_3, v3_3);
+  Lib_IntVector_Intrinsics_vec256 ws160 = v0__3;
+  Lib_IntVector_Intrinsics_vec256 ws170 = v2__3;
+  Lib_IntVector_Intrinsics_vec256 ws180 = v1__3;
+  Lib_IntVector_Intrinsics_vec256 ws190 = v3__3;
+  Lib_IntVector_Intrinsics_vec256 v05 = ws[20U];
+  Lib_IntVector_Intrinsics_vec256 v15 = ws[21U];
+  Lib_IntVector_Intrinsics_vec256 v25 = ws[22U];
+  Lib_IntVector_Intrinsics_vec256 v35 = ws[23U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_4 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v05, v15);
+  Lib_IntVector_Intrinsics_vec256
+  v1_4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v05, v15);
+  Lib_IntVector_Intrinsics_vec256
+  v2_4 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v25, v35);
+  Lib_IntVector_Intrinsics_vec256
+  v3_4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v25, v35);
+  Lib_IntVector_Intrinsics_vec256
+  v0__4 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_4, v2_4);
+  Lib_IntVector_Intrinsics_vec256
+  v1__4 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_4, v2_4);
+  Lib_IntVector_Intrinsics_vec256
+  v2__4 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_4, v3_4);
+  Lib_IntVector_Intrinsics_vec256
+  v3__4 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_4, v3_4);
+  Lib_IntVector_Intrinsics_vec256 ws200 = v0__4;
+  Lib_IntVector_Intrinsics_vec256 ws211 = v2__4;
+  Lib_IntVector_Intrinsics_vec256 ws220 = v1__4;
+  Lib_IntVector_Intrinsics_vec256 ws230 = v3__4;
+  Lib_IntVector_Intrinsics_vec256 v06 = ws[24U];
+  Lib_IntVector_Intrinsics_vec256 v16 = ws[25U];
+  Lib_IntVector_Intrinsics_vec256 v26 = ws[26U];
+  Lib_IntVector_Intrinsics_vec256 v36 = ws[27U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_5 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v06, v16);
+  Lib_IntVector_Intrinsics_vec256
+  v1_5 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v06, v16);
+  Lib_IntVector_Intrinsics_vec256
+  v2_5 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v26, v36);
+  Lib_IntVector_Intrinsics_vec256
+  v3_5 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v26, v36);
+  Lib_IntVector_Intrinsics_vec256
+  v0__5 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_5, v2_5);
+  Lib_IntVector_Intrinsics_vec256
+  v1__5 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_5, v2_5);
+  Lib_IntVector_Intrinsics_vec256
+  v2__5 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_5, v3_5);
+  Lib_IntVector_Intrinsics_vec256
+  v3__5 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_5, v3_5);
+  Lib_IntVector_Intrinsics_vec256 ws240 = v0__5;
+  Lib_IntVector_Intrinsics_vec256 ws250 = v2__5;
+  Lib_IntVector_Intrinsics_vec256 ws260 = v1__5;
+  Lib_IntVector_Intrinsics_vec256 ws270 = v3__5;
+  Lib_IntVector_Intrinsics_vec256 v07 = ws[28U];
+  Lib_IntVector_Intrinsics_vec256 v17 = ws[29U];
+  Lib_IntVector_Intrinsics_vec256 v27 = ws[30U];
+  Lib_IntVector_Intrinsics_vec256 v37 = ws[31U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_6 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v07, v17);
+  Lib_IntVector_Intrinsics_vec256
+  v1_6 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v07, v17);
+  Lib_IntVector_Intrinsics_vec256
+  v2_6 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v27, v37);
+  Lib_IntVector_Intrinsics_vec256
+  v3_6 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v27, v37);
+  Lib_IntVector_Intrinsics_vec256
+  v0__6 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_6, v2_6);
+  Lib_IntVector_Intrinsics_vec256
+  v1__6 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_6, v2_6);
+  Lib_IntVector_Intrinsics_vec256
+  v2__6 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_6, v3_6);
+  Lib_IntVector_Intrinsics_vec256
+  v3__6 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_6, v3_6);
+  Lib_IntVector_Intrinsics_vec256 ws280 = v0__6;
+  Lib_IntVector_Intrinsics_vec256 ws290 = v2__6;
+  Lib_IntVector_Intrinsics_vec256 ws300 = v1__6;
+  Lib_IntVector_Intrinsics_vec256 ws310 = v3__6;
+  ws[0U] = ws00;
+  ws[1U] = ws110;
+  ws[2U] = ws210;
+  ws[3U] = ws32;
+  ws[4U] = ws40;
+  ws[5U] = ws50;
+  ws[6U] = ws60;
+  ws[7U] = ws70;
+  ws[8U] = ws80;
+  ws[9U] = ws90;
+  ws[10U] = ws100;
+  ws[11U] = ws111;
+  ws[12U] = ws120;
+  ws[13U] = ws130;
+  ws[14U] = ws140;
+  ws[15U] = ws150;
+  ws[16U] = ws160;
+  ws[17U] = ws170;
+  ws[18U] = ws180;
+  ws[19U] = ws190;
+  ws[20U] = ws200;
+  ws[21U] = ws211;
+  ws[22U] = ws220;
+  ws[23U] = ws230;
+  ws[24U] = ws240;
+  ws[25U] = ws250;
+  ws[26U] = ws260;
+  ws[27U] = ws270;
+  ws[28U] = ws280;
+  ws[29U] = ws290;
+  ws[30U] = ws300;
+  ws[31U] = ws310;
+  for (uint32_t i = 0U; i < 25U; i++)
+  {
+    state[i] = Lib_IntVector_Intrinsics_vec256_xor(state[i], ws[i]);
+  }
+  uint8_t b04[256U] = { 0U };
+  uint8_t b14[256U] = { 0U };
+  uint8_t b24[256U] = { 0U };
+  uint8_t b34[256U] = { 0U };
+  K____uint8_t___uint8_t____K____uint8_t___uint8_t_
+  b = { .fst = b04, .snd = { .fst = b14, .snd = { .fst = b24, .snd = b34 } } };
+  uint8_t *b35 = b.snd.snd.snd;
+  uint8_t *b25 = b.snd.snd.fst;
+  uint8_t *b15 = b.snd.fst;
+  uint8_t *b05 = b.fst;
+  b05[167U] = 0x80U;
+  b15[167U] = 0x80U;
+  b25[167U] = 0x80U;
+  b35[167U] = 0x80U;
+  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws33[32U] KRML_POST_ALIGN(32) = { 0U };
+  uint8_t *b3 = b.snd.snd.snd;
+  uint8_t *b2 = b.snd.snd.fst;
+  uint8_t *b1 = b.snd.fst;
+  uint8_t *b0 = b.fst;
+  ws33[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0);
+  ws33[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1);
+  ws33[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2);
+  ws33[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3);
+  ws33[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 32U);
+  ws33[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 32U);
+  ws33[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 32U);
+  ws33[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 32U);
+  ws33[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 64U);
+  ws33[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 64U);
+  ws33[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 64U);
+  ws33[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 64U);
+  ws33[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 96U);
+  ws33[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 96U);
+  ws33[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 96U);
+  ws33[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 96U);
+  ws33[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 128U);
+  ws33[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 128U);
+  ws33[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 128U);
+  ws33[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 128U);
+  ws33[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 160U);
+  ws33[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 160U);
+  ws33[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 160U);
+  ws33[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 160U);
+  ws33[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 192U);
+  ws33[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 192U);
+  ws33[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 192U);
+  ws33[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 192U);
+  ws33[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 224U);
+  ws33[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 224U);
+  ws33[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 224U);
+  ws33[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 224U);
+  Lib_IntVector_Intrinsics_vec256 v08 = ws33[0U];
+  Lib_IntVector_Intrinsics_vec256 v18 = ws33[1U];
+  Lib_IntVector_Intrinsics_vec256 v28 = ws33[2U];
+  Lib_IntVector_Intrinsics_vec256 v38 = ws33[3U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_7 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v08, v18);
+  Lib_IntVector_Intrinsics_vec256
+  v1_7 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v08, v18);
+  Lib_IntVector_Intrinsics_vec256
+  v2_7 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v28, v38);
+  Lib_IntVector_Intrinsics_vec256
+  v3_7 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v28, v38);
+  Lib_IntVector_Intrinsics_vec256
+  v0__7 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_7, v2_7);
+  Lib_IntVector_Intrinsics_vec256
+  v1__7 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_7, v2_7);
+  Lib_IntVector_Intrinsics_vec256
+  v2__7 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_7, v3_7);
+  Lib_IntVector_Intrinsics_vec256
+  v3__7 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_7, v3_7);
+  Lib_IntVector_Intrinsics_vec256 ws0 = v0__7;
+  Lib_IntVector_Intrinsics_vec256 ws1 = v2__7;
+  Lib_IntVector_Intrinsics_vec256 ws2 = v1__7;
+  Lib_IntVector_Intrinsics_vec256 ws3 = v3__7;
+  Lib_IntVector_Intrinsics_vec256 v09 = ws33[4U];
+  Lib_IntVector_Intrinsics_vec256 v19 = ws33[5U];
+  Lib_IntVector_Intrinsics_vec256 v29 = ws33[6U];
+  Lib_IntVector_Intrinsics_vec256 v39 = ws33[7U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_8 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v09, v19);
+  Lib_IntVector_Intrinsics_vec256
+  v1_8 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v09, v19);
+  Lib_IntVector_Intrinsics_vec256
+  v2_8 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v29, v39);
+  Lib_IntVector_Intrinsics_vec256
+  v3_8 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v29, v39);
+  Lib_IntVector_Intrinsics_vec256
+  v0__8 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_8, v2_8);
+  Lib_IntVector_Intrinsics_vec256
+  v1__8 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_8, v2_8);
+  Lib_IntVector_Intrinsics_vec256
+  v2__8 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_8, v3_8);
+  Lib_IntVector_Intrinsics_vec256
+  v3__8 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_8, v3_8);
+  Lib_IntVector_Intrinsics_vec256 ws4 = v0__8;
+  Lib_IntVector_Intrinsics_vec256 ws5 = v2__8;
+  Lib_IntVector_Intrinsics_vec256 ws6 = v1__8;
+  Lib_IntVector_Intrinsics_vec256 ws7 = v3__8;
+  Lib_IntVector_Intrinsics_vec256 v010 = ws33[8U];
+  Lib_IntVector_Intrinsics_vec256 v110 = ws33[9U];
+  Lib_IntVector_Intrinsics_vec256 v210 = ws33[10U];
+  Lib_IntVector_Intrinsics_vec256 v310 = ws33[11U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_9 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v010, v110);
+  Lib_IntVector_Intrinsics_vec256
+  v1_9 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v010, v110);
+  Lib_IntVector_Intrinsics_vec256
+  v2_9 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v210, v310);
+  Lib_IntVector_Intrinsics_vec256
+  v3_9 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v210, v310);
+  Lib_IntVector_Intrinsics_vec256
+  v0__9 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_9, v2_9);
+  Lib_IntVector_Intrinsics_vec256
+  v1__9 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_9, v2_9);
+  Lib_IntVector_Intrinsics_vec256
+  v2__9 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_9, v3_9);
+  Lib_IntVector_Intrinsics_vec256
+  v3__9 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_9, v3_9);
+  Lib_IntVector_Intrinsics_vec256 ws8 = v0__9;
+  Lib_IntVector_Intrinsics_vec256 ws9 = v2__9;
+  Lib_IntVector_Intrinsics_vec256 ws10 = v1__9;
+  Lib_IntVector_Intrinsics_vec256 ws11 = v3__9;
+  Lib_IntVector_Intrinsics_vec256 v011 = ws33[12U];
+  Lib_IntVector_Intrinsics_vec256 v111 = ws33[13U];
+  Lib_IntVector_Intrinsics_vec256 v211 = ws33[14U];
+  Lib_IntVector_Intrinsics_vec256 v311 = ws33[15U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_10 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v011, v111);
+  Lib_IntVector_Intrinsics_vec256
+  v1_10 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v011, v111);
+  Lib_IntVector_Intrinsics_vec256
+  v2_10 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v211, v311);
+  Lib_IntVector_Intrinsics_vec256
+  v3_10 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v211, v311);
+  Lib_IntVector_Intrinsics_vec256
+  v0__10 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_10, v2_10);
+  Lib_IntVector_Intrinsics_vec256
+  v1__10 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_10, v2_10);
+  Lib_IntVector_Intrinsics_vec256
+  v2__10 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_10, v3_10);
+  Lib_IntVector_Intrinsics_vec256
+  v3__10 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_10, v3_10);
+  Lib_IntVector_Intrinsics_vec256 ws12 = v0__10;
+  Lib_IntVector_Intrinsics_vec256 ws13 = v2__10;
+  Lib_IntVector_Intrinsics_vec256 ws14 = v1__10;
+  Lib_IntVector_Intrinsics_vec256 ws15 = v3__10;
+  Lib_IntVector_Intrinsics_vec256 v012 = ws33[16U];
+  Lib_IntVector_Intrinsics_vec256 v112 = ws33[17U];
+  Lib_IntVector_Intrinsics_vec256 v212 = ws33[18U];
+  Lib_IntVector_Intrinsics_vec256 v312 = ws33[19U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v012, v112);
+  Lib_IntVector_Intrinsics_vec256
+  v1_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v012, v112);
+  Lib_IntVector_Intrinsics_vec256
+  v2_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v212, v312);
+  Lib_IntVector_Intrinsics_vec256
+  v3_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v212, v312);
+  Lib_IntVector_Intrinsics_vec256
+  v0__11 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_11, v2_11);
+  Lib_IntVector_Intrinsics_vec256
+  v1__11 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_11, v2_11);
+  Lib_IntVector_Intrinsics_vec256
+  v2__11 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_11, v3_11);
+  Lib_IntVector_Intrinsics_vec256
+  v3__11 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_11, v3_11);
+  Lib_IntVector_Intrinsics_vec256 ws16 = v0__11;
+  Lib_IntVector_Intrinsics_vec256 ws17 = v2__11;
+  Lib_IntVector_Intrinsics_vec256 ws18 = v1__11;
+  Lib_IntVector_Intrinsics_vec256 ws19 = v3__11;
+  Lib_IntVector_Intrinsics_vec256 v013 = ws33[20U];
+  Lib_IntVector_Intrinsics_vec256 v113 = ws33[21U];
+  Lib_IntVector_Intrinsics_vec256 v213 = ws33[22U];
+  Lib_IntVector_Intrinsics_vec256 v313 = ws33[23U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_12 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v013, v113);
+  Lib_IntVector_Intrinsics_vec256
+  v1_12 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v013, v113);
+  Lib_IntVector_Intrinsics_vec256
+  v2_12 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v213, v313);
+  Lib_IntVector_Intrinsics_vec256
+  v3_12 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v213, v313);
+  Lib_IntVector_Intrinsics_vec256
+  v0__12 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_12, v2_12);
+  Lib_IntVector_Intrinsics_vec256
+  v1__12 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_12, v2_12);
+  Lib_IntVector_Intrinsics_vec256
+  v2__12 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_12, v3_12);
+  Lib_IntVector_Intrinsics_vec256
+  v3__12 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_12, v3_12);
+  Lib_IntVector_Intrinsics_vec256 ws20 = v0__12;
+  Lib_IntVector_Intrinsics_vec256 ws21 = v2__12;
+  Lib_IntVector_Intrinsics_vec256 ws22 = v1__12;
+  Lib_IntVector_Intrinsics_vec256 ws23 = v3__12;
+  Lib_IntVector_Intrinsics_vec256 v014 = ws33[24U];
+  Lib_IntVector_Intrinsics_vec256 v114 = ws33[25U];
+  Lib_IntVector_Intrinsics_vec256 v214 = ws33[26U];
+  Lib_IntVector_Intrinsics_vec256 v314 = ws33[27U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_13 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v014, v114);
+  Lib_IntVector_Intrinsics_vec256
+  v1_13 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v014, v114);
+  Lib_IntVector_Intrinsics_vec256
+  v2_13 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v214, v314);
+  Lib_IntVector_Intrinsics_vec256
+  v3_13 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v214, v314);
+  Lib_IntVector_Intrinsics_vec256
+  v0__13 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_13, v2_13);
+  Lib_IntVector_Intrinsics_vec256
+  v1__13 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_13, v2_13);
+  Lib_IntVector_Intrinsics_vec256
+  v2__13 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_13, v3_13);
+  Lib_IntVector_Intrinsics_vec256
+  v3__13 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_13, v3_13);
+  Lib_IntVector_Intrinsics_vec256 ws24 = v0__13;
+  Lib_IntVector_Intrinsics_vec256 ws25 = v2__13;
+  Lib_IntVector_Intrinsics_vec256 ws26 = v1__13;
+  Lib_IntVector_Intrinsics_vec256 ws27 = v3__13;
+  Lib_IntVector_Intrinsics_vec256 v0 = ws33[28U];
+  Lib_IntVector_Intrinsics_vec256 v1 = ws33[29U];
+  Lib_IntVector_Intrinsics_vec256 v2 = ws33[30U];
+  Lib_IntVector_Intrinsics_vec256 v3 = ws33[31U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_14 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0, v1);
+  Lib_IntVector_Intrinsics_vec256
+  v1_14 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0, v1);
+  Lib_IntVector_Intrinsics_vec256
+  v2_14 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v2, v3);
+  Lib_IntVector_Intrinsics_vec256
+  v3_14 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v2, v3);
+  Lib_IntVector_Intrinsics_vec256
+  v0__14 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_14, v2_14);
+  Lib_IntVector_Intrinsics_vec256
+  v1__14 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_14, v2_14);
+  Lib_IntVector_Intrinsics_vec256
+  v2__14 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_14, v3_14);
+  Lib_IntVector_Intrinsics_vec256
+  v3__14 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_14, v3_14);
+  Lib_IntVector_Intrinsics_vec256 ws28 = v0__14;
+  Lib_IntVector_Intrinsics_vec256 ws29 = v2__14;
+  Lib_IntVector_Intrinsics_vec256 ws30 = v1__14;
+  Lib_IntVector_Intrinsics_vec256 ws31 = v3__14;
+  ws33[0U] = ws0;
+  ws33[1U] = ws1;
+  ws33[2U] = ws2;
+  ws33[3U] = ws3;
+  ws33[4U] = ws4;
+  ws33[5U] = ws5;
+  ws33[6U] = ws6;
+  ws33[7U] = ws7;
+  ws33[8U] = ws8;
+  ws33[9U] = ws9;
+  ws33[10U] = ws10;
+  ws33[11U] = ws11;
+  ws33[12U] = ws12;
+  ws33[13U] = ws13;
+  ws33[14U] = ws14;
+  ws33[15U] = ws15;
+  ws33[16U] = ws16;
+  ws33[17U] = ws17;
+  ws33[18U] = ws18;
+  ws33[19U] = ws19;
+  ws33[20U] = ws20;
+  ws33[21U] = ws21;
+  ws33[22U] = ws22;
+  ws33[23U] = ws23;
+  ws33[24U] = ws24;
+  ws33[25U] = ws25;
+  ws33[26U] = ws26;
+  ws33[27U] = ws27;
+  ws33[28U] = ws28;
+  ws33[29U] = ws29;
+  ws33[30U] = ws30;
+  ws33[31U] = ws31;
+  for (uint32_t i = 0U; i < 25U; i++)
+  {
+    state[i] = Lib_IntVector_Intrinsics_vec256_xor(state[i], ws33[i]);
+  }
+  for (uint32_t i0 = 0U; i0 < 24U; i0++)
+  {
+    KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 _C[5U] KRML_POST_ALIGN(32) = { 0U };
+    KRML_MAYBE_FOR5(i,
+      0U,
+      5U,
+      1U,
+      Lib_IntVector_Intrinsics_vec256 uu____0 = state[i + 0U];
+      Lib_IntVector_Intrinsics_vec256 uu____1 = state[i + 5U];
+      Lib_IntVector_Intrinsics_vec256 uu____2 = state[i + 10U];
+      _C[i] =
+        Lib_IntVector_Intrinsics_vec256_xor(uu____0,
+          Lib_IntVector_Intrinsics_vec256_xor(uu____1,
+            Lib_IntVector_Intrinsics_vec256_xor(uu____2,
+              Lib_IntVector_Intrinsics_vec256_xor(state[i + 15U], state[i + 20U])))););
+    KRML_MAYBE_FOR5(i1,
+      0U,
+      5U,
+      1U,
+      Lib_IntVector_Intrinsics_vec256 uu____3 = _C[(i1 + 4U) % 5U];
+      Lib_IntVector_Intrinsics_vec256 uu____4 = _C[(i1 + 1U) % 5U];
+      Lib_IntVector_Intrinsics_vec256
+      _D =
+        Lib_IntVector_Intrinsics_vec256_xor(uu____3,
+          Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_left64(uu____4,
+              1U),
+            Lib_IntVector_Intrinsics_vec256_shift_right64(uu____4, 63U)));
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        state[i1 + 5U * i] = Lib_IntVector_Intrinsics_vec256_xor(state[i1 + 5U * i], _D);););
+    Lib_IntVector_Intrinsics_vec256 x = state[1U];
+    Lib_IntVector_Intrinsics_vec256 current = x;
+    for (uint32_t i = 0U; i < 24U; i++)
+    {
+      uint32_t _Y = Hacl_Impl_SHA3_Vec_keccak_piln[i];
+      uint32_t r = Hacl_Impl_SHA3_Vec_keccak_rotc[i];
+      Lib_IntVector_Intrinsics_vec256 temp = state[_Y];
+      Lib_IntVector_Intrinsics_vec256 uu____5 = current;
+      state[_Y] =
+        Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_left64(uu____5, r),
+          Lib_IntVector_Intrinsics_vec256_shift_right64(uu____5, 64U - r));
+      current = temp;
+    }
+    KRML_MAYBE_FOR5(i,
+      0U,
+      5U,
+      1U,
+      Lib_IntVector_Intrinsics_vec256 uu____6 = state[0U + 5U * i];
+      Lib_IntVector_Intrinsics_vec256
+      uu____7 = Lib_IntVector_Intrinsics_vec256_lognot(state[1U + 5U * i]);
+      Lib_IntVector_Intrinsics_vec256
+      v015 =
+        Lib_IntVector_Intrinsics_vec256_xor(uu____6,
+          Lib_IntVector_Intrinsics_vec256_and(uu____7, state[2U + 5U * i]));
+      Lib_IntVector_Intrinsics_vec256 uu____8 = state[1U + 5U * i];
+      Lib_IntVector_Intrinsics_vec256
+      uu____9 = Lib_IntVector_Intrinsics_vec256_lognot(state[2U + 5U * i]);
+      Lib_IntVector_Intrinsics_vec256
+      v115 =
+        Lib_IntVector_Intrinsics_vec256_xor(uu____8,
+          Lib_IntVector_Intrinsics_vec256_and(uu____9, state[3U + 5U * i]));
+      Lib_IntVector_Intrinsics_vec256 uu____10 = state[2U + 5U * i];
+      Lib_IntVector_Intrinsics_vec256
+      uu____11 = Lib_IntVector_Intrinsics_vec256_lognot(state[3U + 5U * i]);
+      Lib_IntVector_Intrinsics_vec256
+      v215 =
+        Lib_IntVector_Intrinsics_vec256_xor(uu____10,
+          Lib_IntVector_Intrinsics_vec256_and(uu____11, state[4U + 5U * i]));
+      Lib_IntVector_Intrinsics_vec256 uu____12 = state[3U + 5U * i];
+      Lib_IntVector_Intrinsics_vec256
+      uu____13 = Lib_IntVector_Intrinsics_vec256_lognot(state[4U + 5U * i]);
+      Lib_IntVector_Intrinsics_vec256
+      v315 =
+        Lib_IntVector_Intrinsics_vec256_xor(uu____12,
+          Lib_IntVector_Intrinsics_vec256_and(uu____13, state[0U + 5U * i]));
+      Lib_IntVector_Intrinsics_vec256 uu____14 = state[4U + 5U * i];
+      Lib_IntVector_Intrinsics_vec256
+      uu____15 = Lib_IntVector_Intrinsics_vec256_lognot(state[0U + 5U * i]);
+      Lib_IntVector_Intrinsics_vec256
+      v4 =
+        Lib_IntVector_Intrinsics_vec256_xor(uu____14,
+          Lib_IntVector_Intrinsics_vec256_and(uu____15, state[1U + 5U * i]));
+      state[0U + 5U * i] = v015;
+      state[1U + 5U * i] = v115;
+      state[2U + 5U * i] = v215;
+      state[3U + 5U * i] = v315;
+      state[4U + 5U * i] = v4;);
+    uint64_t c = Hacl_Impl_SHA3_Vec_keccak_rndc[i0];
+    Lib_IntVector_Intrinsics_vec256 uu____16 = state[0U];
+    state[0U] =
+      Lib_IntVector_Intrinsics_vec256_xor(uu____16,
+        Lib_IntVector_Intrinsics_vec256_load64(c));
+  }
+}
+
+void
+Hacl_Hash_SHA3_Simd256_shake128_squeeze_nblocks(
+  Lib_IntVector_Intrinsics_vec256 *state,
+  uint8_t *output0,
+  uint8_t *output1,
+  uint8_t *output2,
+  uint8_t *output3,
+  uint32_t outputByteLen
+)
+{
+  for (uint32_t i0 = 0U; i0 < outputByteLen / 168U; i0++)
+  {
+    uint8_t hbuf[1024U] = { 0U };
+    KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws[32U] KRML_POST_ALIGN(32) = { 0U };
+    memcpy(ws, state, 25U * sizeof (Lib_IntVector_Intrinsics_vec256));
+    Lib_IntVector_Intrinsics_vec256 v00 = ws[0U];
+    Lib_IntVector_Intrinsics_vec256 v10 = ws[1U];
+    Lib_IntVector_Intrinsics_vec256 v20 = ws[2U];
+    Lib_IntVector_Intrinsics_vec256 v30 = ws[3U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v00, v10);
+    Lib_IntVector_Intrinsics_vec256
+    v1_ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v00, v10);
+    Lib_IntVector_Intrinsics_vec256
+    v2_ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v20, v30);
+    Lib_IntVector_Intrinsics_vec256
+    v3_ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v20, v30);
+    Lib_IntVector_Intrinsics_vec256
+    v0__ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_, v2_);
+    Lib_IntVector_Intrinsics_vec256
+    v1__ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_, v2_);
+    Lib_IntVector_Intrinsics_vec256
+    v2__ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_, v3_);
+    Lib_IntVector_Intrinsics_vec256
+    v3__ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_, v3_);
+    Lib_IntVector_Intrinsics_vec256 ws0 = v0__;
+    Lib_IntVector_Intrinsics_vec256 ws1 = v2__;
+    Lib_IntVector_Intrinsics_vec256 ws2 = v1__;
+    Lib_IntVector_Intrinsics_vec256 ws3 = v3__;
+    Lib_IntVector_Intrinsics_vec256 v01 = ws[4U];
+    Lib_IntVector_Intrinsics_vec256 v11 = ws[5U];
+    Lib_IntVector_Intrinsics_vec256 v21 = ws[6U];
+    Lib_IntVector_Intrinsics_vec256 v31 = ws[7U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v01, v11);
+    Lib_IntVector_Intrinsics_vec256
+    v1_0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v01, v11);
+    Lib_IntVector_Intrinsics_vec256
+    v2_0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v21, v31);
+    Lib_IntVector_Intrinsics_vec256
+    v3_0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v21, v31);
+    Lib_IntVector_Intrinsics_vec256
+    v0__0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_0, v2_0);
+    Lib_IntVector_Intrinsics_vec256
+    v1__0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_0, v2_0);
+    Lib_IntVector_Intrinsics_vec256
+    v2__0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_0, v3_0);
+    Lib_IntVector_Intrinsics_vec256
+    v3__0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_0, v3_0);
+    Lib_IntVector_Intrinsics_vec256 ws4 = v0__0;
+    Lib_IntVector_Intrinsics_vec256 ws5 = v2__0;
+    Lib_IntVector_Intrinsics_vec256 ws6 = v1__0;
+    Lib_IntVector_Intrinsics_vec256 ws7 = v3__0;
+    Lib_IntVector_Intrinsics_vec256 v02 = ws[8U];
+    Lib_IntVector_Intrinsics_vec256 v12 = ws[9U];
+    Lib_IntVector_Intrinsics_vec256 v22 = ws[10U];
+    Lib_IntVector_Intrinsics_vec256 v32 = ws[11U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v02, v12);
+    Lib_IntVector_Intrinsics_vec256
+    v1_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v02, v12);
+    Lib_IntVector_Intrinsics_vec256
+    v2_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v22, v32);
+    Lib_IntVector_Intrinsics_vec256
+    v3_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v22, v32);
+    Lib_IntVector_Intrinsics_vec256
+    v0__1 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_1, v2_1);
+    Lib_IntVector_Intrinsics_vec256
+    v1__1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_1, v2_1);
+    Lib_IntVector_Intrinsics_vec256
+    v2__1 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_1, v3_1);
+    Lib_IntVector_Intrinsics_vec256
+    v3__1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_1, v3_1);
+    Lib_IntVector_Intrinsics_vec256 ws8 = v0__1;
+    Lib_IntVector_Intrinsics_vec256 ws9 = v2__1;
+    Lib_IntVector_Intrinsics_vec256 ws10 = v1__1;
+    Lib_IntVector_Intrinsics_vec256 ws11 = v3__1;
+    Lib_IntVector_Intrinsics_vec256 v03 = ws[12U];
+    Lib_IntVector_Intrinsics_vec256 v13 = ws[13U];
+    Lib_IntVector_Intrinsics_vec256 v23 = ws[14U];
+    Lib_IntVector_Intrinsics_vec256 v33 = ws[15U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_2 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v03, v13);
+    Lib_IntVector_Intrinsics_vec256
+    v1_2 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v03, v13);
+    Lib_IntVector_Intrinsics_vec256
+    v2_2 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v23, v33);
+    Lib_IntVector_Intrinsics_vec256
+    v3_2 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v23, v33);
+    Lib_IntVector_Intrinsics_vec256
+    v0__2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_2, v2_2);
+    Lib_IntVector_Intrinsics_vec256
+    v1__2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_2, v2_2);
+    Lib_IntVector_Intrinsics_vec256
+    v2__2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_2, v3_2);
+    Lib_IntVector_Intrinsics_vec256
+    v3__2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_2, v3_2);
+    Lib_IntVector_Intrinsics_vec256 ws12 = v0__2;
+    Lib_IntVector_Intrinsics_vec256 ws13 = v2__2;
+    Lib_IntVector_Intrinsics_vec256 ws14 = v1__2;
+    Lib_IntVector_Intrinsics_vec256 ws15 = v3__2;
+    Lib_IntVector_Intrinsics_vec256 v04 = ws[16U];
+    Lib_IntVector_Intrinsics_vec256 v14 = ws[17U];
+    Lib_IntVector_Intrinsics_vec256 v24 = ws[18U];
+    Lib_IntVector_Intrinsics_vec256 v34 = ws[19U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_3 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v04, v14);
+    Lib_IntVector_Intrinsics_vec256
+    v1_3 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v04, v14);
+    Lib_IntVector_Intrinsics_vec256
+    v2_3 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v24, v34);
+    Lib_IntVector_Intrinsics_vec256
+    v3_3 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v24, v34);
+    Lib_IntVector_Intrinsics_vec256
+    v0__3 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_3, v2_3);
+    Lib_IntVector_Intrinsics_vec256
+    v1__3 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_3, v2_3);
+    Lib_IntVector_Intrinsics_vec256
+    v2__3 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_3, v3_3);
+    Lib_IntVector_Intrinsics_vec256
+    v3__3 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_3, v3_3);
+    Lib_IntVector_Intrinsics_vec256 ws16 = v0__3;
+    Lib_IntVector_Intrinsics_vec256 ws17 = v2__3;
+    Lib_IntVector_Intrinsics_vec256 ws18 = v1__3;
+    Lib_IntVector_Intrinsics_vec256 ws19 = v3__3;
+    Lib_IntVector_Intrinsics_vec256 v05 = ws[20U];
+    Lib_IntVector_Intrinsics_vec256 v15 = ws[21U];
+    Lib_IntVector_Intrinsics_vec256 v25 = ws[22U];
+    Lib_IntVector_Intrinsics_vec256 v35 = ws[23U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_4 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v05, v15);
+    Lib_IntVector_Intrinsics_vec256
+    v1_4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v05, v15);
+    Lib_IntVector_Intrinsics_vec256
+    v2_4 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v25, v35);
+    Lib_IntVector_Intrinsics_vec256
+    v3_4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v25, v35);
+    Lib_IntVector_Intrinsics_vec256
+    v0__4 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_4, v2_4);
+    Lib_IntVector_Intrinsics_vec256
+    v1__4 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_4, v2_4);
+    Lib_IntVector_Intrinsics_vec256
+    v2__4 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_4, v3_4);
+    Lib_IntVector_Intrinsics_vec256
+    v3__4 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_4, v3_4);
+    Lib_IntVector_Intrinsics_vec256 ws20 = v0__4;
+    Lib_IntVector_Intrinsics_vec256 ws21 = v2__4;
+    Lib_IntVector_Intrinsics_vec256 ws22 = v1__4;
+    Lib_IntVector_Intrinsics_vec256 ws23 = v3__4;
+    Lib_IntVector_Intrinsics_vec256 v06 = ws[24U];
+    Lib_IntVector_Intrinsics_vec256 v16 = ws[25U];
+    Lib_IntVector_Intrinsics_vec256 v26 = ws[26U];
+    Lib_IntVector_Intrinsics_vec256 v36 = ws[27U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_5 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v06, v16);
+    Lib_IntVector_Intrinsics_vec256
+    v1_5 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v06, v16);
+    Lib_IntVector_Intrinsics_vec256
+    v2_5 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v26, v36);
+    Lib_IntVector_Intrinsics_vec256
+    v3_5 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v26, v36);
+    Lib_IntVector_Intrinsics_vec256
+    v0__5 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_5, v2_5);
+    Lib_IntVector_Intrinsics_vec256
+    v1__5 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_5, v2_5);
+    Lib_IntVector_Intrinsics_vec256
+    v2__5 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_5, v3_5);
+    Lib_IntVector_Intrinsics_vec256
+    v3__5 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_5, v3_5);
+    Lib_IntVector_Intrinsics_vec256 ws24 = v0__5;
+    Lib_IntVector_Intrinsics_vec256 ws25 = v2__5;
+    Lib_IntVector_Intrinsics_vec256 ws26 = v1__5;
+    Lib_IntVector_Intrinsics_vec256 ws27 = v3__5;
+    Lib_IntVector_Intrinsics_vec256 v0 = ws[28U];
+    Lib_IntVector_Intrinsics_vec256 v1 = ws[29U];
+    Lib_IntVector_Intrinsics_vec256 v2 = ws[30U];
+    Lib_IntVector_Intrinsics_vec256 v3 = ws[31U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_6 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0, v1);
+    Lib_IntVector_Intrinsics_vec256
+    v1_6 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0, v1);
+    Lib_IntVector_Intrinsics_vec256
+    v2_6 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v2, v3);
+    Lib_IntVector_Intrinsics_vec256
+    v3_6 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v2, v3);
+    Lib_IntVector_Intrinsics_vec256
+    v0__6 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_6, v2_6);
+    Lib_IntVector_Intrinsics_vec256
+    v1__6 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_6, v2_6);
+    Lib_IntVector_Intrinsics_vec256
+    v2__6 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_6, v3_6);
+    Lib_IntVector_Intrinsics_vec256
+    v3__6 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_6, v3_6);
+    Lib_IntVector_Intrinsics_vec256 ws28 = v0__6;
+    Lib_IntVector_Intrinsics_vec256 ws29 = v2__6;
+    Lib_IntVector_Intrinsics_vec256 ws30 = v1__6;
+    Lib_IntVector_Intrinsics_vec256 ws31 = v3__6;
+    ws[0U] = ws0;
+    ws[1U] = ws4;
+    ws[2U] = ws8;
+    ws[3U] = ws12;
+    ws[4U] = ws16;
+    ws[5U] = ws20;
+    ws[6U] = ws24;
+    ws[7U] = ws28;
+    ws[8U] = ws1;
+    ws[9U] = ws5;
+    ws[10U] = ws9;
+    ws[11U] = ws13;
+    ws[12U] = ws17;
+    ws[13U] = ws21;
+    ws[14U] = ws25;
+    ws[15U] = ws29;
+    ws[16U] = ws2;
+    ws[17U] = ws6;
+    ws[18U] = ws10;
+    ws[19U] = ws14;
+    ws[20U] = ws18;
+    ws[21U] = ws22;
+    ws[22U] = ws26;
+    ws[23U] = ws30;
+    ws[24U] = ws3;
+    ws[25U] = ws7;
+    ws[26U] = ws11;
+    ws[27U] = ws15;
+    ws[28U] = ws19;
+    ws[29U] = ws23;
+    ws[30U] = ws27;
+    ws[31U] = ws31;
+    for (uint32_t i = 0U; i < 32U; i++)
+    {
+      Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]);
+    }
+    uint8_t *b0 = output0;
+    uint8_t *b1 = output1;
+    uint8_t *b2 = output2;
+    uint8_t *b3 = output3;
+    memcpy(b0 + i0 * 168U, hbuf, 168U * sizeof (uint8_t));
+    memcpy(b1 + i0 * 168U, hbuf + 256U, 168U * sizeof (uint8_t));
+    memcpy(b2 + i0 * 168U, hbuf + 512U, 168U * sizeof (uint8_t));
+    memcpy(b3 + i0 * 168U, hbuf + 768U, 168U * sizeof (uint8_t));
+    for (uint32_t i1 = 0U; i1 < 24U; i1++)
+    {
+      KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 _C[5U] KRML_POST_ALIGN(32) = { 0U };
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        Lib_IntVector_Intrinsics_vec256 uu____0 = state[i + 0U];
+        Lib_IntVector_Intrinsics_vec256 uu____1 = state[i + 5U];
+        Lib_IntVector_Intrinsics_vec256 uu____2 = state[i + 10U];
+        _C[i] =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____0,
+            Lib_IntVector_Intrinsics_vec256_xor(uu____1,
+              Lib_IntVector_Intrinsics_vec256_xor(uu____2,
+                Lib_IntVector_Intrinsics_vec256_xor(state[i + 15U], state[i + 20U])))););
+      KRML_MAYBE_FOR5(i2,
+        0U,
+        5U,
+        1U,
+        Lib_IntVector_Intrinsics_vec256 uu____3 = _C[(i2 + 4U) % 5U];
+        Lib_IntVector_Intrinsics_vec256 uu____4 = _C[(i2 + 1U) % 5U];
+        Lib_IntVector_Intrinsics_vec256
+        _D =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____3,
+            Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_left64(uu____4,
+                1U),
+              Lib_IntVector_Intrinsics_vec256_shift_right64(uu____4, 63U)));
+        KRML_MAYBE_FOR5(i,
+          0U,
+          5U,
+          1U,
+          state[i2 + 5U * i] = Lib_IntVector_Intrinsics_vec256_xor(state[i2 + 5U * i], _D);););
+      Lib_IntVector_Intrinsics_vec256 x = state[1U];
+      Lib_IntVector_Intrinsics_vec256 current = x;
+      for (uint32_t i = 0U; i < 24U; i++)
+      {
+        uint32_t _Y = Hacl_Impl_SHA3_Vec_keccak_piln[i];
+        uint32_t r = Hacl_Impl_SHA3_Vec_keccak_rotc[i];
+        Lib_IntVector_Intrinsics_vec256 temp = state[_Y];
+        Lib_IntVector_Intrinsics_vec256 uu____5 = current;
+        state[_Y] =
+          Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_left64(uu____5,
+              r),
+            Lib_IntVector_Intrinsics_vec256_shift_right64(uu____5, 64U - r));
+        current = temp;
+      }
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        Lib_IntVector_Intrinsics_vec256 uu____6 = state[0U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____7 = Lib_IntVector_Intrinsics_vec256_lognot(state[1U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v07 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____6,
+            Lib_IntVector_Intrinsics_vec256_and(uu____7, state[2U + 5U * i]));
+        Lib_IntVector_Intrinsics_vec256 uu____8 = state[1U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____9 = Lib_IntVector_Intrinsics_vec256_lognot(state[2U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v17 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____8,
+            Lib_IntVector_Intrinsics_vec256_and(uu____9, state[3U + 5U * i]));
+        Lib_IntVector_Intrinsics_vec256 uu____10 = state[2U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____11 = Lib_IntVector_Intrinsics_vec256_lognot(state[3U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v27 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____10,
+            Lib_IntVector_Intrinsics_vec256_and(uu____11, state[4U + 5U * i]));
+        Lib_IntVector_Intrinsics_vec256 uu____12 = state[3U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____13 = Lib_IntVector_Intrinsics_vec256_lognot(state[4U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v37 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____12,
+            Lib_IntVector_Intrinsics_vec256_and(uu____13, state[0U + 5U * i]));
+        Lib_IntVector_Intrinsics_vec256 uu____14 = state[4U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____15 = Lib_IntVector_Intrinsics_vec256_lognot(state[0U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v4 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____14,
+            Lib_IntVector_Intrinsics_vec256_and(uu____15, state[1U + 5U * i]));
+        state[0U + 5U * i] = v07;
+        state[1U + 5U * i] = v17;
+        state[2U + 5U * i] = v27;
+        state[3U + 5U * i] = v37;
+        state[4U + 5U * i] = v4;);
+      uint64_t c = Hacl_Impl_SHA3_Vec_keccak_rndc[i1];
+      Lib_IntVector_Intrinsics_vec256 uu____16 = state[0U];
+      state[0U] =
+        Lib_IntVector_Intrinsics_vec256_xor(uu____16,
+          Lib_IntVector_Intrinsics_vec256_load64(c));
+    }
+  }
 }
 
diff --git a/sys/hacl/c/src/msvc/Hacl_Hash_SHA3_Scalar.c b/sys/hacl/c/src/msvc/Hacl_Hash_SHA3_Scalar.c
index 43d574827..6d6806a37 100644
--- a/sys/hacl/c/src/msvc/Hacl_Hash_SHA3_Scalar.c
+++ b/sys/hacl/c/src/msvc/Hacl_Hash_SHA3_Scalar.c
@@ -55,10 +55,10 @@ Hacl_Impl_SHA3_Vec_keccak_rndc[24U] =
 
 void
 Hacl_Hash_SHA3_Scalar_shake128(
-  uint32_t inputByteLen,
-  uint8_t *input,
+  uint8_t *output,
   uint32_t outputByteLen,
-  uint8_t *output
+  uint8_t *input,
+  uint32_t inputByteLen
 )
 {
   uint32_t rateInBytes = 168U;
@@ -447,10 +447,10 @@ Hacl_Hash_SHA3_Scalar_shake128(
 
 void
 Hacl_Hash_SHA3_Scalar_shake256(
-  uint32_t inputByteLen,
-  uint8_t *input,
+  uint8_t *output,
   uint32_t outputByteLen,
-  uint8_t *output
+  uint8_t *input,
+  uint32_t inputByteLen
 )
 {
   uint32_t rateInBytes = 136U;
@@ -837,7 +837,7 @@ Hacl_Hash_SHA3_Scalar_shake256(
   memcpy(output + outputByteLen - remOut, hbuf, remOut * sizeof (uint8_t));
 }
 
-void Hacl_Hash_SHA3_Scalar_sha3_224(uint32_t inputByteLen, uint8_t *input, uint8_t *output)
+void Hacl_Hash_SHA3_Scalar_sha3_224(uint8_t *output, uint8_t *input, uint32_t inputByteLen)
 {
   uint32_t rateInBytes = 144U;
   uint64_t s[25U] = { 0U };
@@ -1223,7 +1223,7 @@ void Hacl_Hash_SHA3_Scalar_sha3_224(uint32_t inputByteLen, uint8_t *input, uint8
   memcpy(output + 28U - remOut, hbuf, remOut * sizeof (uint8_t));
 }
 
-void Hacl_Hash_SHA3_Scalar_sha3_256(uint32_t inputByteLen, uint8_t *input, uint8_t *output)
+void Hacl_Hash_SHA3_Scalar_sha3_256(uint8_t *output, uint8_t *input, uint32_t inputByteLen)
 {
   uint32_t rateInBytes = 136U;
   uint64_t s[25U] = { 0U };
@@ -1609,7 +1609,7 @@ void Hacl_Hash_SHA3_Scalar_sha3_256(uint32_t inputByteLen, uint8_t *input, uint8
   memcpy(output + 32U - remOut, hbuf, remOut * sizeof (uint8_t));
 }
 
-void Hacl_Hash_SHA3_Scalar_sha3_384(uint32_t inputByteLen, uint8_t *input, uint8_t *output)
+void Hacl_Hash_SHA3_Scalar_sha3_384(uint8_t *output, uint8_t *input, uint32_t inputByteLen)
 {
   uint32_t rateInBytes = 104U;
   uint64_t s[25U] = { 0U };
@@ -1995,7 +1995,7 @@ void Hacl_Hash_SHA3_Scalar_sha3_384(uint32_t inputByteLen, uint8_t *input, uint8
   memcpy(output + 48U - remOut, hbuf, remOut * sizeof (uint8_t));
 }
 
-void Hacl_Hash_SHA3_Scalar_sha3_512(uint32_t inputByteLen, uint8_t *input, uint8_t *output)
+void Hacl_Hash_SHA3_Scalar_sha3_512(uint8_t *output, uint8_t *input, uint32_t inputByteLen)
 {
   uint32_t rateInBytes = 72U;
   uint64_t s[25U] = { 0U };
@@ -2381,3 +2381,418 @@ void Hacl_Hash_SHA3_Scalar_sha3_512(uint32_t inputByteLen, uint8_t *input, uint8
   memcpy(output + 64U - remOut, hbuf, remOut * sizeof (uint8_t));
 }
 
+uint64_t *Hacl_Hash_SHA3_Scalar_state_malloc(void)
+{
+  uint64_t *buf = (uint64_t *)KRML_HOST_CALLOC(25U, sizeof (uint64_t));
+  return buf;
+}
+
+void Hacl_Hash_SHA3_Scalar_state_free(uint64_t *s)
+{
+  KRML_HOST_FREE(s);
+}
+
+void
+Hacl_Hash_SHA3_Scalar_shake128_absorb_nblocks(
+  uint64_t *state,
+  uint8_t *input,
+  uint32_t inputByteLen
+)
+{
+  for (uint32_t i0 = 0U; i0 < inputByteLen / 168U; i0++)
+  {
+    uint8_t b1[256U] = { 0U };
+    uint8_t *b_ = b1;
+    uint8_t *b0 = input;
+    uint8_t *bl0 = b_;
+    memcpy(bl0, b0 + i0 * 168U, 168U * sizeof (uint8_t));
+    uint64_t ws[32U] = { 0U };
+    uint8_t *b = b_;
+    uint64_t u = load64_le(b);
+    ws[0U] = u;
+    uint64_t u0 = load64_le(b + 8U);
+    ws[1U] = u0;
+    uint64_t u1 = load64_le(b + 16U);
+    ws[2U] = u1;
+    uint64_t u2 = load64_le(b + 24U);
+    ws[3U] = u2;
+    uint64_t u3 = load64_le(b + 32U);
+    ws[4U] = u3;
+    uint64_t u4 = load64_le(b + 40U);
+    ws[5U] = u4;
+    uint64_t u5 = load64_le(b + 48U);
+    ws[6U] = u5;
+    uint64_t u6 = load64_le(b + 56U);
+    ws[7U] = u6;
+    uint64_t u7 = load64_le(b + 64U);
+    ws[8U] = u7;
+    uint64_t u8 = load64_le(b + 72U);
+    ws[9U] = u8;
+    uint64_t u9 = load64_le(b + 80U);
+    ws[10U] = u9;
+    uint64_t u10 = load64_le(b + 88U);
+    ws[11U] = u10;
+    uint64_t u11 = load64_le(b + 96U);
+    ws[12U] = u11;
+    uint64_t u12 = load64_le(b + 104U);
+    ws[13U] = u12;
+    uint64_t u13 = load64_le(b + 112U);
+    ws[14U] = u13;
+    uint64_t u14 = load64_le(b + 120U);
+    ws[15U] = u14;
+    uint64_t u15 = load64_le(b + 128U);
+    ws[16U] = u15;
+    uint64_t u16 = load64_le(b + 136U);
+    ws[17U] = u16;
+    uint64_t u17 = load64_le(b + 144U);
+    ws[18U] = u17;
+    uint64_t u18 = load64_le(b + 152U);
+    ws[19U] = u18;
+    uint64_t u19 = load64_le(b + 160U);
+    ws[20U] = u19;
+    uint64_t u20 = load64_le(b + 168U);
+    ws[21U] = u20;
+    uint64_t u21 = load64_le(b + 176U);
+    ws[22U] = u21;
+    uint64_t u22 = load64_le(b + 184U);
+    ws[23U] = u22;
+    uint64_t u23 = load64_le(b + 192U);
+    ws[24U] = u23;
+    uint64_t u24 = load64_le(b + 200U);
+    ws[25U] = u24;
+    uint64_t u25 = load64_le(b + 208U);
+    ws[26U] = u25;
+    uint64_t u26 = load64_le(b + 216U);
+    ws[27U] = u26;
+    uint64_t u27 = load64_le(b + 224U);
+    ws[28U] = u27;
+    uint64_t u28 = load64_le(b + 232U);
+    ws[29U] = u28;
+    uint64_t u29 = load64_le(b + 240U);
+    ws[30U] = u29;
+    uint64_t u30 = load64_le(b + 248U);
+    ws[31U] = u30;
+    for (uint32_t i = 0U; i < 25U; i++)
+    {
+      state[i] = state[i] ^ ws[i];
+    }
+    for (uint32_t i1 = 0U; i1 < 24U; i1++)
+    {
+      uint64_t _C[5U] = { 0U };
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        _C[i] =
+          state[i
+          + 0U]
+          ^ (state[i + 5U] ^ (state[i + 10U] ^ (state[i + 15U] ^ state[i + 20U]))););
+      KRML_MAYBE_FOR5(i2,
+        0U,
+        5U,
+        1U,
+        uint64_t uu____0 = _C[(i2 + 1U) % 5U];
+        uint64_t _D = _C[(i2 + 4U) % 5U] ^ (uu____0 << 1U | uu____0 >> 63U);
+        KRML_MAYBE_FOR5(i, 0U, 5U, 1U, state[i2 + 5U * i] = state[i2 + 5U * i] ^ _D;););
+      uint64_t x = state[1U];
+      uint64_t current = x;
+      for (uint32_t i = 0U; i < 24U; i++)
+      {
+        uint32_t _Y = Hacl_Impl_SHA3_Vec_keccak_piln[i];
+        uint32_t r = Hacl_Impl_SHA3_Vec_keccak_rotc[i];
+        uint64_t temp = state[_Y];
+        uint64_t uu____1 = current;
+        state[_Y] = uu____1 << r | uu____1 >> (64U - r);
+        current = temp;
+      }
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        uint64_t v0 = state[0U + 5U * i] ^ (~state[1U + 5U * i] & state[2U + 5U * i]);
+        uint64_t v1 = state[1U + 5U * i] ^ (~state[2U + 5U * i] & state[3U + 5U * i]);
+        uint64_t v2 = state[2U + 5U * i] ^ (~state[3U + 5U * i] & state[4U + 5U * i]);
+        uint64_t v3 = state[3U + 5U * i] ^ (~state[4U + 5U * i] & state[0U + 5U * i]);
+        uint64_t v4 = state[4U + 5U * i] ^ (~state[0U + 5U * i] & state[1U + 5U * i]);
+        state[0U + 5U * i] = v0;
+        state[1U + 5U * i] = v1;
+        state[2U + 5U * i] = v2;
+        state[3U + 5U * i] = v3;
+        state[4U + 5U * i] = v4;);
+      uint64_t c = Hacl_Impl_SHA3_Vec_keccak_rndc[i1];
+      state[0U] = state[0U] ^ c;
+    }
+  }
+}
+
+void
+Hacl_Hash_SHA3_Scalar_shake128_absorb_final(
+  uint64_t *state,
+  uint8_t *input,
+  uint32_t inputByteLen
+)
+{
+  uint32_t rem = inputByteLen % 168U;
+  uint8_t b2[256U] = { 0U };
+  uint8_t *b_ = b2;
+  uint32_t rem1 = inputByteLen % 168U;
+  uint8_t *b00 = input;
+  uint8_t *bl0 = b_;
+  memcpy(bl0, b00 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
+  uint8_t *b01 = b_;
+  b01[rem] = 0x1FU;
+  uint64_t ws[32U] = { 0U };
+  uint8_t *b = b_;
+  uint64_t u0 = load64_le(b);
+  ws[0U] = u0;
+  uint64_t u1 = load64_le(b + 8U);
+  ws[1U] = u1;
+  uint64_t u2 = load64_le(b + 16U);
+  ws[2U] = u2;
+  uint64_t u3 = load64_le(b + 24U);
+  ws[3U] = u3;
+  uint64_t u4 = load64_le(b + 32U);
+  ws[4U] = u4;
+  uint64_t u5 = load64_le(b + 40U);
+  ws[5U] = u5;
+  uint64_t u6 = load64_le(b + 48U);
+  ws[6U] = u6;
+  uint64_t u7 = load64_le(b + 56U);
+  ws[7U] = u7;
+  uint64_t u8 = load64_le(b + 64U);
+  ws[8U] = u8;
+  uint64_t u9 = load64_le(b + 72U);
+  ws[9U] = u9;
+  uint64_t u10 = load64_le(b + 80U);
+  ws[10U] = u10;
+  uint64_t u11 = load64_le(b + 88U);
+  ws[11U] = u11;
+  uint64_t u12 = load64_le(b + 96U);
+  ws[12U] = u12;
+  uint64_t u13 = load64_le(b + 104U);
+  ws[13U] = u13;
+  uint64_t u14 = load64_le(b + 112U);
+  ws[14U] = u14;
+  uint64_t u15 = load64_le(b + 120U);
+  ws[15U] = u15;
+  uint64_t u16 = load64_le(b + 128U);
+  ws[16U] = u16;
+  uint64_t u17 = load64_le(b + 136U);
+  ws[17U] = u17;
+  uint64_t u18 = load64_le(b + 144U);
+  ws[18U] = u18;
+  uint64_t u19 = load64_le(b + 152U);
+  ws[19U] = u19;
+  uint64_t u20 = load64_le(b + 160U);
+  ws[20U] = u20;
+  uint64_t u21 = load64_le(b + 168U);
+  ws[21U] = u21;
+  uint64_t u22 = load64_le(b + 176U);
+  ws[22U] = u22;
+  uint64_t u23 = load64_le(b + 184U);
+  ws[23U] = u23;
+  uint64_t u24 = load64_le(b + 192U);
+  ws[24U] = u24;
+  uint64_t u25 = load64_le(b + 200U);
+  ws[25U] = u25;
+  uint64_t u26 = load64_le(b + 208U);
+  ws[26U] = u26;
+  uint64_t u27 = load64_le(b + 216U);
+  ws[27U] = u27;
+  uint64_t u28 = load64_le(b + 224U);
+  ws[28U] = u28;
+  uint64_t u29 = load64_le(b + 232U);
+  ws[29U] = u29;
+  uint64_t u30 = load64_le(b + 240U);
+  ws[30U] = u30;
+  uint64_t u31 = load64_le(b + 248U);
+  ws[31U] = u31;
+  for (uint32_t i = 0U; i < 25U; i++)
+  {
+    state[i] = state[i] ^ ws[i];
+  }
+  uint8_t b3[256U] = { 0U };
+  uint8_t *b4 = b3;
+  uint8_t *b0 = b4;
+  b0[167U] = 0x80U;
+  uint64_t ws0[32U] = { 0U };
+  uint8_t *b1 = b4;
+  uint64_t u = load64_le(b1);
+  ws0[0U] = u;
+  uint64_t u32 = load64_le(b1 + 8U);
+  ws0[1U] = u32;
+  uint64_t u33 = load64_le(b1 + 16U);
+  ws0[2U] = u33;
+  uint64_t u34 = load64_le(b1 + 24U);
+  ws0[3U] = u34;
+  uint64_t u35 = load64_le(b1 + 32U);
+  ws0[4U] = u35;
+  uint64_t u36 = load64_le(b1 + 40U);
+  ws0[5U] = u36;
+  uint64_t u37 = load64_le(b1 + 48U);
+  ws0[6U] = u37;
+  uint64_t u38 = load64_le(b1 + 56U);
+  ws0[7U] = u38;
+  uint64_t u39 = load64_le(b1 + 64U);
+  ws0[8U] = u39;
+  uint64_t u40 = load64_le(b1 + 72U);
+  ws0[9U] = u40;
+  uint64_t u41 = load64_le(b1 + 80U);
+  ws0[10U] = u41;
+  uint64_t u42 = load64_le(b1 + 88U);
+  ws0[11U] = u42;
+  uint64_t u43 = load64_le(b1 + 96U);
+  ws0[12U] = u43;
+  uint64_t u44 = load64_le(b1 + 104U);
+  ws0[13U] = u44;
+  uint64_t u45 = load64_le(b1 + 112U);
+  ws0[14U] = u45;
+  uint64_t u46 = load64_le(b1 + 120U);
+  ws0[15U] = u46;
+  uint64_t u47 = load64_le(b1 + 128U);
+  ws0[16U] = u47;
+  uint64_t u48 = load64_le(b1 + 136U);
+  ws0[17U] = u48;
+  uint64_t u49 = load64_le(b1 + 144U);
+  ws0[18U] = u49;
+  uint64_t u50 = load64_le(b1 + 152U);
+  ws0[19U] = u50;
+  uint64_t u51 = load64_le(b1 + 160U);
+  ws0[20U] = u51;
+  uint64_t u52 = load64_le(b1 + 168U);
+  ws0[21U] = u52;
+  uint64_t u53 = load64_le(b1 + 176U);
+  ws0[22U] = u53;
+  uint64_t u54 = load64_le(b1 + 184U);
+  ws0[23U] = u54;
+  uint64_t u55 = load64_le(b1 + 192U);
+  ws0[24U] = u55;
+  uint64_t u56 = load64_le(b1 + 200U);
+  ws0[25U] = u56;
+  uint64_t u57 = load64_le(b1 + 208U);
+  ws0[26U] = u57;
+  uint64_t u58 = load64_le(b1 + 216U);
+  ws0[27U] = u58;
+  uint64_t u59 = load64_le(b1 + 224U);
+  ws0[28U] = u59;
+  uint64_t u60 = load64_le(b1 + 232U);
+  ws0[29U] = u60;
+  uint64_t u61 = load64_le(b1 + 240U);
+  ws0[30U] = u61;
+  uint64_t u62 = load64_le(b1 + 248U);
+  ws0[31U] = u62;
+  for (uint32_t i = 0U; i < 25U; i++)
+  {
+    state[i] = state[i] ^ ws0[i];
+  }
+  for (uint32_t i0 = 0U; i0 < 24U; i0++)
+  {
+    uint64_t _C[5U] = { 0U };
+    KRML_MAYBE_FOR5(i,
+      0U,
+      5U,
+      1U,
+      _C[i] = state[i + 0U] ^ (state[i + 5U] ^ (state[i + 10U] ^ (state[i + 15U] ^ state[i + 20U]))););
+    KRML_MAYBE_FOR5(i1,
+      0U,
+      5U,
+      1U,
+      uint64_t uu____0 = _C[(i1 + 1U) % 5U];
+      uint64_t _D = _C[(i1 + 4U) % 5U] ^ (uu____0 << 1U | uu____0 >> 63U);
+      KRML_MAYBE_FOR5(i, 0U, 5U, 1U, state[i1 + 5U * i] = state[i1 + 5U * i] ^ _D;););
+    uint64_t x = state[1U];
+    uint64_t current = x;
+    for (uint32_t i = 0U; i < 24U; i++)
+    {
+      uint32_t _Y = Hacl_Impl_SHA3_Vec_keccak_piln[i];
+      uint32_t r = Hacl_Impl_SHA3_Vec_keccak_rotc[i];
+      uint64_t temp = state[_Y];
+      uint64_t uu____1 = current;
+      state[_Y] = uu____1 << r | uu____1 >> (64U - r);
+      current = temp;
+    }
+    KRML_MAYBE_FOR5(i,
+      0U,
+      5U,
+      1U,
+      uint64_t v0 = state[0U + 5U * i] ^ (~state[1U + 5U * i] & state[2U + 5U * i]);
+      uint64_t v1 = state[1U + 5U * i] ^ (~state[2U + 5U * i] & state[3U + 5U * i]);
+      uint64_t v2 = state[2U + 5U * i] ^ (~state[3U + 5U * i] & state[4U + 5U * i]);
+      uint64_t v3 = state[3U + 5U * i] ^ (~state[4U + 5U * i] & state[0U + 5U * i]);
+      uint64_t v4 = state[4U + 5U * i] ^ (~state[0U + 5U * i] & state[1U + 5U * i]);
+      state[0U + 5U * i] = v0;
+      state[1U + 5U * i] = v1;
+      state[2U + 5U * i] = v2;
+      state[3U + 5U * i] = v3;
+      state[4U + 5U * i] = v4;);
+    uint64_t c = Hacl_Impl_SHA3_Vec_keccak_rndc[i0];
+    state[0U] = state[0U] ^ c;
+  }
+}
+
+void
+Hacl_Hash_SHA3_Scalar_shake128_squeeze_nblocks(
+  uint64_t *state,
+  uint8_t *output,
+  uint32_t outputByteLen
+)
+{
+  for (uint32_t i0 = 0U; i0 < outputByteLen / 168U; i0++)
+  {
+    uint8_t hbuf[256U] = { 0U };
+    uint64_t ws[32U] = { 0U };
+    memcpy(ws, state, 25U * sizeof (uint64_t));
+    for (uint32_t i = 0U; i < 32U; i++)
+    {
+      store64_le(hbuf + i * 8U, ws[i]);
+    }
+    memcpy(output + i0 * 168U, hbuf, 168U * sizeof (uint8_t));
+    for (uint32_t i1 = 0U; i1 < 24U; i1++)
+    {
+      uint64_t _C[5U] = { 0U };
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        _C[i] =
+          state[i
+          + 0U]
+          ^ (state[i + 5U] ^ (state[i + 10U] ^ (state[i + 15U] ^ state[i + 20U]))););
+      KRML_MAYBE_FOR5(i2,
+        0U,
+        5U,
+        1U,
+        uint64_t uu____0 = _C[(i2 + 1U) % 5U];
+        uint64_t _D = _C[(i2 + 4U) % 5U] ^ (uu____0 << 1U | uu____0 >> 63U);
+        KRML_MAYBE_FOR5(i, 0U, 5U, 1U, state[i2 + 5U * i] = state[i2 + 5U * i] ^ _D;););
+      uint64_t x = state[1U];
+      uint64_t current = x;
+      for (uint32_t i = 0U; i < 24U; i++)
+      {
+        uint32_t _Y = Hacl_Impl_SHA3_Vec_keccak_piln[i];
+        uint32_t r = Hacl_Impl_SHA3_Vec_keccak_rotc[i];
+        uint64_t temp = state[_Y];
+        uint64_t uu____1 = current;
+        state[_Y] = uu____1 << r | uu____1 >> (64U - r);
+        current = temp;
+      }
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        uint64_t v0 = state[0U + 5U * i] ^ (~state[1U + 5U * i] & state[2U + 5U * i]);
+        uint64_t v1 = state[1U + 5U * i] ^ (~state[2U + 5U * i] & state[3U + 5U * i]);
+        uint64_t v2 = state[2U + 5U * i] ^ (~state[3U + 5U * i] & state[4U + 5U * i]);
+        uint64_t v3 = state[3U + 5U * i] ^ (~state[4U + 5U * i] & state[0U + 5U * i]);
+        uint64_t v4 = state[4U + 5U * i] ^ (~state[0U + 5U * i] & state[1U + 5U * i]);
+        state[0U + 5U * i] = v0;
+        state[1U + 5U * i] = v1;
+        state[2U + 5U * i] = v2;
+        state[3U + 5U * i] = v3;
+        state[4U + 5U * i] = v4;);
+      uint64_t c = Hacl_Impl_SHA3_Vec_keccak_rndc[i1];
+      state[0U] = state[0U] ^ c;
+    }
+  }
+}
+
diff --git a/sys/hacl/c/src/msvc/Hacl_Hash_SHA3_Simd256.c b/sys/hacl/c/src/msvc/Hacl_Hash_SHA3_Simd256.c
index b9bfcee59..9046f3dbe 100644
--- a/sys/hacl/c/src/msvc/Hacl_Hash_SHA3_Simd256.c
+++ b/sys/hacl/c/src/msvc/Hacl_Hash_SHA3_Simd256.c
@@ -26,20 +26,19 @@
 #include "Hacl_Hash_SHA3_Simd256.h"
 
 #include "internal/Hacl_Hash_SHA3_Scalar.h"
-#include "libintvector.h"
 
 void
 Hacl_Hash_SHA3_Simd256_shake128(
-  uint32_t inputByteLen,
+  uint8_t *output0,
+  uint8_t *output1,
+  uint8_t *output2,
+  uint8_t *output3,
+  uint32_t outputByteLen,
   uint8_t *input0,
   uint8_t *input1,
   uint8_t *input2,
   uint8_t *input3,
-  uint32_t outputByteLen,
-  uint8_t *output0,
-  uint8_t *output1,
-  uint8_t *output2,
-  uint8_t *output3
+  uint32_t inputByteLen
 )
 {
   K____uint8_t___uint8_t____K____uint8_t___uint8_t_
@@ -438,63 +437,63 @@ Hacl_Hash_SHA3_Simd256_shake128(
   K____uint8_t___uint8_t____K____uint8_t___uint8_t_
   b_ = { .fst = b00, .snd = { .fst = b10, .snd = { .fst = b20, .snd = b30 } } };
   uint32_t rem1 = inputByteLen % rateInBytes;
-  uint8_t *b32 = ib.snd.snd.snd;
-  uint8_t *b22 = ib.snd.snd.fst;
-  uint8_t *b12 = ib.snd.fst;
-  uint8_t *b02 = ib.fst;
+  uint8_t *b31 = ib.snd.snd.snd;
+  uint8_t *b21 = ib.snd.snd.fst;
+  uint8_t *b11 = ib.snd.fst;
+  uint8_t *b01 = ib.fst;
   uint8_t *bl3 = b_.snd.snd.snd;
   uint8_t *bl2 = b_.snd.snd.fst;
   uint8_t *bl1 = b_.snd.fst;
   uint8_t *bl0 = b_.fst;
-  memcpy(bl0, b02 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
-  memcpy(bl1, b12 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
-  memcpy(bl2, b22 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
-  memcpy(bl3, b32 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
+  memcpy(bl0, b01 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
+  memcpy(bl1, b11 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
+  memcpy(bl2, b21 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
+  memcpy(bl3, b31 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
+  uint8_t *b32 = b_.snd.snd.snd;
+  uint8_t *b22 = b_.snd.snd.fst;
+  uint8_t *b12 = b_.snd.fst;
+  uint8_t *b02 = b_.fst;
+  b02[rem] = 0x1FU;
+  b12[rem] = 0x1FU;
+  b22[rem] = 0x1FU;
+  b32[rem] = 0x1FU;
+  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U };
   uint8_t *b33 = b_.snd.snd.snd;
   uint8_t *b23 = b_.snd.snd.fst;
   uint8_t *b13 = b_.snd.fst;
   uint8_t *b03 = b_.fst;
-  b03[rem] = 0x1FU;
-  b13[rem] = 0x1FU;
-  b23[rem] = 0x1FU;
-  b33[rem] = 0x1FU;
-  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U };
-  uint8_t *b34 = b_.snd.snd.snd;
-  uint8_t *b24 = b_.snd.snd.fst;
-  uint8_t *b14 = b_.snd.fst;
-  uint8_t *b04 = b_.fst;
-  ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04);
-  ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14);
-  ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24);
-  ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34);
-  ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 32U);
-  ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 32U);
-  ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 32U);
-  ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 32U);
-  ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 64U);
-  ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 64U);
-  ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 64U);
-  ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 64U);
-  ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 96U);
-  ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 96U);
-  ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 96U);
-  ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 96U);
-  ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 128U);
-  ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 128U);
-  ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 128U);
-  ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 128U);
-  ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 160U);
-  ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 160U);
-  ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 160U);
-  ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 160U);
-  ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 192U);
-  ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 192U);
-  ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 192U);
-  ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 192U);
-  ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 224U);
-  ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 224U);
-  ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 224U);
-  ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 224U);
+  ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03);
+  ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13);
+  ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23);
+  ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33);
+  ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 32U);
+  ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 32U);
+  ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 32U);
+  ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 32U);
+  ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 64U);
+  ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 64U);
+  ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 64U);
+  ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 64U);
+  ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 96U);
+  ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 96U);
+  ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 96U);
+  ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 96U);
+  ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 128U);
+  ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 128U);
+  ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 128U);
+  ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 128U);
+  ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 160U);
+  ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 160U);
+  ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 160U);
+  ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 160U);
+  ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 192U);
+  ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 192U);
+  ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 192U);
+  ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 192U);
+  ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 224U);
+  ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 224U);
+  ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 224U);
+  ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 224U);
   Lib_IntVector_Intrinsics_vec256 v00 = ws32[0U];
   Lib_IntVector_Intrinsics_vec256 v10 = ws32[1U];
   Lib_IntVector_Intrinsics_vec256 v20 = ws32[2U];
@@ -723,57 +722,57 @@ Hacl_Hash_SHA3_Simd256_shake128(
   {
     s[i] = Lib_IntVector_Intrinsics_vec256_xor(s[i], ws32[i]);
   }
-  uint8_t b05[256U] = { 0U };
-  uint8_t b15[256U] = { 0U };
-  uint8_t b25[256U] = { 0U };
-  uint8_t b35[256U] = { 0U };
+  uint8_t b04[256U] = { 0U };
+  uint8_t b14[256U] = { 0U };
+  uint8_t b24[256U] = { 0U };
+  uint8_t b34[256U] = { 0U };
   K____uint8_t___uint8_t____K____uint8_t___uint8_t_
-  b = { .fst = b05, .snd = { .fst = b15, .snd = { .fst = b25, .snd = b35 } } };
-  uint8_t *b36 = b.snd.snd.snd;
+  b = { .fst = b04, .snd = { .fst = b14, .snd = { .fst = b24, .snd = b34 } } };
+  uint8_t *b35 = b.snd.snd.snd;
+  uint8_t *b25 = b.snd.snd.fst;
+  uint8_t *b15 = b.snd.fst;
+  uint8_t *b05 = b.fst;
+  b05[rateInBytes - 1U] = 0x80U;
+  b15[rateInBytes - 1U] = 0x80U;
+  b25[rateInBytes - 1U] = 0x80U;
+  b35[rateInBytes - 1U] = 0x80U;
+  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws34[32U] KRML_POST_ALIGN(32) = { 0U };
+  uint8_t *b3 = b.snd.snd.snd;
   uint8_t *b26 = b.snd.snd.fst;
   uint8_t *b16 = b.snd.fst;
   uint8_t *b06 = b.fst;
-  b06[rateInBytes - 1U] = 0x80U;
-  b16[rateInBytes - 1U] = 0x80U;
-  b26[rateInBytes - 1U] = 0x80U;
-  b36[rateInBytes - 1U] = 0x80U;
-  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws34[32U] KRML_POST_ALIGN(32) = { 0U };
-  uint8_t *b37 = b.snd.snd.snd;
-  uint8_t *b27 = b.snd.snd.fst;
-  uint8_t *b17 = b.snd.fst;
-  uint8_t *b07 = b.fst;
-  ws34[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07);
-  ws34[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17);
-  ws34[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27);
-  ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37);
-  ws34[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 32U);
-  ws34[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 32U);
-  ws34[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 32U);
-  ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 32U);
-  ws34[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 64U);
-  ws34[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 64U);
-  ws34[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 64U);
-  ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 64U);
-  ws34[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 96U);
-  ws34[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 96U);
-  ws34[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 96U);
-  ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 96U);
-  ws34[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 128U);
-  ws34[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 128U);
-  ws34[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 128U);
-  ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 128U);
-  ws34[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 160U);
-  ws34[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 160U);
-  ws34[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 160U);
-  ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 160U);
-  ws34[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 192U);
-  ws34[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 192U);
-  ws34[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 192U);
-  ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 192U);
-  ws34[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 224U);
-  ws34[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 224U);
-  ws34[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 224U);
-  ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 224U);
+  ws34[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06);
+  ws34[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16);
+  ws34[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26);
+  ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3);
+  ws34[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 32U);
+  ws34[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 32U);
+  ws34[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 32U);
+  ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 32U);
+  ws34[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 64U);
+  ws34[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 64U);
+  ws34[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 64U);
+  ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 64U);
+  ws34[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 96U);
+  ws34[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 96U);
+  ws34[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 96U);
+  ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 96U);
+  ws34[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 128U);
+  ws34[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 128U);
+  ws34[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 128U);
+  ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 128U);
+  ws34[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 160U);
+  ws34[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 160U);
+  ws34[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 160U);
+  ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 160U);
+  ws34[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 192U);
+  ws34[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 192U);
+  ws34[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 192U);
+  ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 192U);
+  ws34[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 224U);
+  ws34[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 224U);
+  ws34[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 224U);
+  ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 224U);
   Lib_IntVector_Intrinsics_vec256 v08 = ws34[0U];
   Lib_IntVector_Intrinsics_vec256 v18 = ws34[1U];
   Lib_IntVector_Intrinsics_vec256 v28 = ws34[2U];
@@ -1295,62 +1294,49 @@ Hacl_Hash_SHA3_Simd256_shake128(
     Lib_IntVector_Intrinsics_vec256 ws30 = v1__22;
     Lib_IntVector_Intrinsics_vec256 ws31 = v3__22;
     ws[0U] = ws0;
-    ws[1U] = ws1;
-    ws[2U] = ws2;
-    ws[3U] = ws3;
-    ws[4U] = ws4;
-    ws[5U] = ws5;
-    ws[6U] = ws6;
-    ws[7U] = ws7;
-    ws[8U] = ws8;
-    ws[9U] = ws9;
-    ws[10U] = ws10;
-    ws[11U] = ws11;
-    ws[12U] = ws12;
-    ws[13U] = ws13;
-    ws[14U] = ws14;
-    ws[15U] = ws15;
-    ws[16U] = ws16;
-    ws[17U] = ws17;
-    ws[18U] = ws18;
-    ws[19U] = ws19;
-    ws[20U] = ws20;
-    ws[21U] = ws21;
-    ws[22U] = ws22;
-    ws[23U] = ws23;
-    ws[24U] = ws24;
-    ws[25U] = ws25;
-    ws[26U] = ws26;
-    ws[27U] = ws27;
-    ws[28U] = ws28;
-    ws[29U] = ws29;
-    ws[30U] = ws30;
+    ws[1U] = ws4;
+    ws[2U] = ws8;
+    ws[3U] = ws12;
+    ws[4U] = ws16;
+    ws[5U] = ws20;
+    ws[6U] = ws24;
+    ws[7U] = ws28;
+    ws[8U] = ws1;
+    ws[9U] = ws5;
+    ws[10U] = ws9;
+    ws[11U] = ws13;
+    ws[12U] = ws17;
+    ws[13U] = ws21;
+    ws[14U] = ws25;
+    ws[15U] = ws29;
+    ws[16U] = ws2;
+    ws[17U] = ws6;
+    ws[18U] = ws10;
+    ws[19U] = ws14;
+    ws[20U] = ws18;
+    ws[21U] = ws22;
+    ws[22U] = ws26;
+    ws[23U] = ws30;
+    ws[24U] = ws3;
+    ws[25U] = ws7;
+    ws[26U] = ws11;
+    ws[27U] = ws15;
+    ws[28U] = ws19;
+    ws[29U] = ws23;
+    ws[30U] = ws27;
     ws[31U] = ws31;
     for (uint32_t i = 0U; i < 32U; i++)
     {
       Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]);
     }
-    for (uint32_t i = 0U; i < rateInBytes / 32U; i++)
-    {
-      uint8_t *b31 = rb.snd.snd.snd;
-      uint8_t *b21 = rb.snd.snd.fst;
-      uint8_t *b11 = rb.snd.fst;
-      uint8_t *b01 = rb.fst;
-      memcpy(b01 + i0 * rateInBytes + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t));
-      memcpy(b11 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t));
-      memcpy(b21 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t));
-      memcpy(b31 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t));
-    }
-    uint32_t rem0 = rateInBytes % 32U;
-    uint32_t j = rateInBytes / 32U;
-    uint8_t *b31 = rb.snd.snd.snd;
-    uint8_t *b21 = rb.snd.snd.fst;
-    uint8_t *b11 = rb.snd.fst;
-    uint8_t *b01 = rb.fst;
-    memcpy(b01 + i0 * rateInBytes + j * 32U, hbuf + j * 128U, rem0 * sizeof (uint8_t));
-    memcpy(b11 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 32U, rem0 * sizeof (uint8_t));
-    memcpy(b21 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 64U, rem0 * sizeof (uint8_t));
-    memcpy(b31 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 96U, rem0 * sizeof (uint8_t));
+    uint8_t *b36 = rb.snd.snd.snd;
+    uint8_t *b2 = rb.snd.snd.fst;
+    uint8_t *b1 = rb.snd.fst;
+    uint8_t *b0 = rb.fst;
+    memcpy(b0 + i0 * rateInBytes, hbuf, rateInBytes * sizeof (uint8_t));
+    memcpy(b1 + i0 * rateInBytes, hbuf + 256U, rateInBytes * sizeof (uint8_t));
+    memcpy(b2 + i0 * rateInBytes, hbuf + 512U, rateInBytes * sizeof (uint8_t));
+    memcpy(b36 + i0 * rateInBytes, hbuf + 768U, rateInBytes * sizeof (uint8_t));
     for (uint32_t i1 = 0U; i1 < 24U; i1++)
     {
       KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 _C[5U] KRML_POST_ALIGN(32) = { 0U };
@@ -1645,76 +1631,63 @@ Hacl_Hash_SHA3_Simd256_shake128(
   Lib_IntVector_Intrinsics_vec256 ws30 = v1__22;
   Lib_IntVector_Intrinsics_vec256 ws31 = v3__22;
   ws[0U] = ws0;
-  ws[1U] = ws1;
-  ws[2U] = ws2;
-  ws[3U] = ws3;
-  ws[4U] = ws4;
-  ws[5U] = ws5;
-  ws[6U] = ws6;
-  ws[7U] = ws7;
-  ws[8U] = ws8;
-  ws[9U] = ws9;
-  ws[10U] = ws10;
-  ws[11U] = ws11;
-  ws[12U] = ws12;
-  ws[13U] = ws13;
-  ws[14U] = ws14;
-  ws[15U] = ws15;
-  ws[16U] = ws16;
-  ws[17U] = ws17;
-  ws[18U] = ws18;
-  ws[19U] = ws19;
-  ws[20U] = ws20;
-  ws[21U] = ws21;
-  ws[22U] = ws22;
-  ws[23U] = ws23;
-  ws[24U] = ws24;
-  ws[25U] = ws25;
-  ws[26U] = ws26;
-  ws[27U] = ws27;
-  ws[28U] = ws28;
-  ws[29U] = ws29;
-  ws[30U] = ws30;
+  ws[1U] = ws4;
+  ws[2U] = ws8;
+  ws[3U] = ws12;
+  ws[4U] = ws16;
+  ws[5U] = ws20;
+  ws[6U] = ws24;
+  ws[7U] = ws28;
+  ws[8U] = ws1;
+  ws[9U] = ws5;
+  ws[10U] = ws9;
+  ws[11U] = ws13;
+  ws[12U] = ws17;
+  ws[13U] = ws21;
+  ws[14U] = ws25;
+  ws[15U] = ws29;
+  ws[16U] = ws2;
+  ws[17U] = ws6;
+  ws[18U] = ws10;
+  ws[19U] = ws14;
+  ws[20U] = ws18;
+  ws[21U] = ws22;
+  ws[22U] = ws26;
+  ws[23U] = ws30;
+  ws[24U] = ws3;
+  ws[25U] = ws7;
+  ws[26U] = ws11;
+  ws[27U] = ws15;
+  ws[28U] = ws19;
+  ws[29U] = ws23;
+  ws[30U] = ws27;
   ws[31U] = ws31;
   for (uint32_t i = 0U; i < 32U; i++)
   {
     Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]);
   }
-  for (uint32_t i = 0U; i < remOut / 32U; i++)
-  {
-    uint8_t *b3 = rb.snd.snd.snd;
-    uint8_t *b2 = rb.snd.snd.fst;
-    uint8_t *b1 = rb.snd.fst;
-    uint8_t *b0 = rb.fst;
-    memcpy(b0 + outputByteLen - remOut + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t));
-    memcpy(b1 + outputByteLen - remOut + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t));
-    memcpy(b2 + outputByteLen - remOut + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t));
-    memcpy(b3 + outputByteLen - remOut + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t));
-  }
-  uint32_t rem0 = remOut % 32U;
-  uint32_t j = remOut / 32U;
-  uint8_t *b3 = rb.snd.snd.snd;
+  uint8_t *b36 = rb.snd.snd.snd;
   uint8_t *b2 = rb.snd.snd.fst;
   uint8_t *b1 = rb.snd.fst;
   uint8_t *b0 = rb.fst;
-  memcpy(b0 + outputByteLen - remOut + j * 32U, hbuf + j * 128U, rem0 * sizeof (uint8_t));
-  memcpy(b1 + outputByteLen - remOut + j * 32U, hbuf + j * 128U + 32U, rem0 * sizeof (uint8_t));
-  memcpy(b2 + outputByteLen - remOut + j * 32U, hbuf + j * 128U + 64U, rem0 * sizeof (uint8_t));
-  memcpy(b3 + outputByteLen - remOut + j * 32U, hbuf + j * 128U + 96U, rem0 * sizeof (uint8_t));
+  memcpy(b0 + outputByteLen - remOut, hbuf, remOut * sizeof (uint8_t));
+  memcpy(b1 + outputByteLen - remOut, hbuf + 256U, remOut * sizeof (uint8_t));
+  memcpy(b2 + outputByteLen - remOut, hbuf + 512U, remOut * sizeof (uint8_t));
+  memcpy(b36 + outputByteLen - remOut, hbuf + 768U, remOut * sizeof (uint8_t));
 }
 
 void
 Hacl_Hash_SHA3_Simd256_shake256(
-  uint32_t inputByteLen,
+  uint8_t *output0,
+  uint8_t *output1,
+  uint8_t *output2,
+  uint8_t *output3,
+  uint32_t outputByteLen,
   uint8_t *input0,
   uint8_t *input1,
   uint8_t *input2,
   uint8_t *input3,
-  uint32_t outputByteLen,
-  uint8_t *output0,
-  uint8_t *output1,
-  uint8_t *output2,
-  uint8_t *output3
+  uint32_t inputByteLen
 )
 {
   K____uint8_t___uint8_t____K____uint8_t___uint8_t_
@@ -2113,63 +2086,63 @@ Hacl_Hash_SHA3_Simd256_shake256(
   K____uint8_t___uint8_t____K____uint8_t___uint8_t_
   b_ = { .fst = b00, .snd = { .fst = b10, .snd = { .fst = b20, .snd = b30 } } };
   uint32_t rem1 = inputByteLen % rateInBytes;
-  uint8_t *b32 = ib.snd.snd.snd;
-  uint8_t *b22 = ib.snd.snd.fst;
-  uint8_t *b12 = ib.snd.fst;
-  uint8_t *b02 = ib.fst;
+  uint8_t *b31 = ib.snd.snd.snd;
+  uint8_t *b21 = ib.snd.snd.fst;
+  uint8_t *b11 = ib.snd.fst;
+  uint8_t *b01 = ib.fst;
   uint8_t *bl3 = b_.snd.snd.snd;
   uint8_t *bl2 = b_.snd.snd.fst;
   uint8_t *bl1 = b_.snd.fst;
   uint8_t *bl0 = b_.fst;
-  memcpy(bl0, b02 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
-  memcpy(bl1, b12 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
-  memcpy(bl2, b22 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
-  memcpy(bl3, b32 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
+  memcpy(bl0, b01 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
+  memcpy(bl1, b11 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
+  memcpy(bl2, b21 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
+  memcpy(bl3, b31 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
+  uint8_t *b32 = b_.snd.snd.snd;
+  uint8_t *b22 = b_.snd.snd.fst;
+  uint8_t *b12 = b_.snd.fst;
+  uint8_t *b02 = b_.fst;
+  b02[rem] = 0x1FU;
+  b12[rem] = 0x1FU;
+  b22[rem] = 0x1FU;
+  b32[rem] = 0x1FU;
+  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U };
   uint8_t *b33 = b_.snd.snd.snd;
   uint8_t *b23 = b_.snd.snd.fst;
   uint8_t *b13 = b_.snd.fst;
   uint8_t *b03 = b_.fst;
-  b03[rem] = 0x1FU;
-  b13[rem] = 0x1FU;
-  b23[rem] = 0x1FU;
-  b33[rem] = 0x1FU;
-  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U };
-  uint8_t *b34 = b_.snd.snd.snd;
-  uint8_t *b24 = b_.snd.snd.fst;
-  uint8_t *b14 = b_.snd.fst;
-  uint8_t *b04 = b_.fst;
-  ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04);
-  ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14);
-  ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24);
-  ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34);
-  ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 32U);
-  ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 32U);
-  ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 32U);
-  ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 32U);
-  ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 64U);
-  ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 64U);
-  ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 64U);
-  ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 64U);
-  ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 96U);
-  ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 96U);
-  ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 96U);
-  ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 96U);
-  ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 128U);
-  ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 128U);
-  ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 128U);
-  ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 128U);
-  ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 160U);
-  ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 160U);
-  ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 160U);
-  ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 160U);
-  ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 192U);
-  ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 192U);
-  ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 192U);
-  ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 192U);
-  ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 224U);
-  ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 224U);
-  ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 224U);
-  ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 224U);
+  ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03);
+  ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13);
+  ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23);
+  ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33);
+  ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 32U);
+  ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 32U);
+  ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 32U);
+  ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 32U);
+  ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 64U);
+  ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 64U);
+  ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 64U);
+  ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 64U);
+  ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 96U);
+  ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 96U);
+  ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 96U);
+  ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 96U);
+  ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 128U);
+  ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 128U);
+  ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 128U);
+  ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 128U);
+  ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 160U);
+  ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 160U);
+  ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 160U);
+  ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 160U);
+  ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 192U);
+  ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 192U);
+  ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 192U);
+  ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 192U);
+  ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 224U);
+  ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 224U);
+  ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 224U);
+  ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 224U);
   Lib_IntVector_Intrinsics_vec256 v00 = ws32[0U];
   Lib_IntVector_Intrinsics_vec256 v10 = ws32[1U];
   Lib_IntVector_Intrinsics_vec256 v20 = ws32[2U];
@@ -2398,57 +2371,57 @@ Hacl_Hash_SHA3_Simd256_shake256(
   {
     s[i] = Lib_IntVector_Intrinsics_vec256_xor(s[i], ws32[i]);
   }
-  uint8_t b05[256U] = { 0U };
-  uint8_t b15[256U] = { 0U };
-  uint8_t b25[256U] = { 0U };
-  uint8_t b35[256U] = { 0U };
+  uint8_t b04[256U] = { 0U };
+  uint8_t b14[256U] = { 0U };
+  uint8_t b24[256U] = { 0U };
+  uint8_t b34[256U] = { 0U };
   K____uint8_t___uint8_t____K____uint8_t___uint8_t_
-  b = { .fst = b05, .snd = { .fst = b15, .snd = { .fst = b25, .snd = b35 } } };
-  uint8_t *b36 = b.snd.snd.snd;
+  b = { .fst = b04, .snd = { .fst = b14, .snd = { .fst = b24, .snd = b34 } } };
+  uint8_t *b35 = b.snd.snd.snd;
+  uint8_t *b25 = b.snd.snd.fst;
+  uint8_t *b15 = b.snd.fst;
+  uint8_t *b05 = b.fst;
+  b05[rateInBytes - 1U] = 0x80U;
+  b15[rateInBytes - 1U] = 0x80U;
+  b25[rateInBytes - 1U] = 0x80U;
+  b35[rateInBytes - 1U] = 0x80U;
+  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws34[32U] KRML_POST_ALIGN(32) = { 0U };
+  uint8_t *b3 = b.snd.snd.snd;
   uint8_t *b26 = b.snd.snd.fst;
   uint8_t *b16 = b.snd.fst;
   uint8_t *b06 = b.fst;
-  b06[rateInBytes - 1U] = 0x80U;
-  b16[rateInBytes - 1U] = 0x80U;
-  b26[rateInBytes - 1U] = 0x80U;
-  b36[rateInBytes - 1U] = 0x80U;
-  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws34[32U] KRML_POST_ALIGN(32) = { 0U };
-  uint8_t *b37 = b.snd.snd.snd;
-  uint8_t *b27 = b.snd.snd.fst;
-  uint8_t *b17 = b.snd.fst;
-  uint8_t *b07 = b.fst;
-  ws34[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07);
-  ws34[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17);
-  ws34[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27);
-  ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37);
-  ws34[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 32U);
-  ws34[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 32U);
-  ws34[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 32U);
-  ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 32U);
-  ws34[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 64U);
-  ws34[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 64U);
-  ws34[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 64U);
-  ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 64U);
-  ws34[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 96U);
-  ws34[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 96U);
-  ws34[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 96U);
-  ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 96U);
-  ws34[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 128U);
-  ws34[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 128U);
-  ws34[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 128U);
-  ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 128U);
-  ws34[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 160U);
-  ws34[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 160U);
-  ws34[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 160U);
-  ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 160U);
-  ws34[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 192U);
-  ws34[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 192U);
-  ws34[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 192U);
-  ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 192U);
-  ws34[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 224U);
-  ws34[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 224U);
-  ws34[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 224U);
-  ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 224U);
+  ws34[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06);
+  ws34[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16);
+  ws34[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26);
+  ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3);
+  ws34[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 32U);
+  ws34[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 32U);
+  ws34[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 32U);
+  ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 32U);
+  ws34[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 64U);
+  ws34[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 64U);
+  ws34[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 64U);
+  ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 64U);
+  ws34[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 96U);
+  ws34[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 96U);
+  ws34[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 96U);
+  ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 96U);
+  ws34[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 128U);
+  ws34[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 128U);
+  ws34[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 128U);
+  ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 128U);
+  ws34[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 160U);
+  ws34[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 160U);
+  ws34[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 160U);
+  ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 160U);
+  ws34[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 192U);
+  ws34[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 192U);
+  ws34[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 192U);
+  ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 192U);
+  ws34[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 224U);
+  ws34[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 224U);
+  ws34[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 224U);
+  ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 224U);
   Lib_IntVector_Intrinsics_vec256 v08 = ws34[0U];
   Lib_IntVector_Intrinsics_vec256 v18 = ws34[1U];
   Lib_IntVector_Intrinsics_vec256 v28 = ws34[2U];
@@ -2970,62 +2943,49 @@ Hacl_Hash_SHA3_Simd256_shake256(
     Lib_IntVector_Intrinsics_vec256 ws30 = v1__22;
     Lib_IntVector_Intrinsics_vec256 ws31 = v3__22;
     ws[0U] = ws0;
-    ws[1U] = ws1;
-    ws[2U] = ws2;
-    ws[3U] = ws3;
-    ws[4U] = ws4;
-    ws[5U] = ws5;
-    ws[6U] = ws6;
-    ws[7U] = ws7;
-    ws[8U] = ws8;
-    ws[9U] = ws9;
-    ws[10U] = ws10;
-    ws[11U] = ws11;
-    ws[12U] = ws12;
-    ws[13U] = ws13;
-    ws[14U] = ws14;
-    ws[15U] = ws15;
-    ws[16U] = ws16;
-    ws[17U] = ws17;
-    ws[18U] = ws18;
-    ws[19U] = ws19;
-    ws[20U] = ws20;
-    ws[21U] = ws21;
-    ws[22U] = ws22;
-    ws[23U] = ws23;
-    ws[24U] = ws24;
-    ws[25U] = ws25;
-    ws[26U] = ws26;
-    ws[27U] = ws27;
-    ws[28U] = ws28;
-    ws[29U] = ws29;
-    ws[30U] = ws30;
+    ws[1U] = ws4;
+    ws[2U] = ws8;
+    ws[3U] = ws12;
+    ws[4U] = ws16;
+    ws[5U] = ws20;
+    ws[6U] = ws24;
+    ws[7U] = ws28;
+    ws[8U] = ws1;
+    ws[9U] = ws5;
+    ws[10U] = ws9;
+    ws[11U] = ws13;
+    ws[12U] = ws17;
+    ws[13U] = ws21;
+    ws[14U] = ws25;
+    ws[15U] = ws29;
+    ws[16U] = ws2;
+    ws[17U] = ws6;
+    ws[18U] = ws10;
+    ws[19U] = ws14;
+    ws[20U] = ws18;
+    ws[21U] = ws22;
+    ws[22U] = ws26;
+    ws[23U] = ws30;
+    ws[24U] = ws3;
+    ws[25U] = ws7;
+    ws[26U] = ws11;
+    ws[27U] = ws15;
+    ws[28U] = ws19;
+    ws[29U] = ws23;
+    ws[30U] = ws27;
     ws[31U] = ws31;
     for (uint32_t i = 0U; i < 32U; i++)
     {
       Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]);
     }
-    for (uint32_t i = 0U; i < rateInBytes / 32U; i++)
-    {
-      uint8_t *b31 = rb.snd.snd.snd;
-      uint8_t *b21 = rb.snd.snd.fst;
-      uint8_t *b11 = rb.snd.fst;
-      uint8_t *b01 = rb.fst;
-      memcpy(b01 + i0 * rateInBytes + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t));
-      memcpy(b11 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t));
-      memcpy(b21 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t));
-      memcpy(b31 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t));
-    }
-    uint32_t rem0 = rateInBytes % 32U;
-    uint32_t j = rateInBytes / 32U;
-    uint8_t *b31 = rb.snd.snd.snd;
-    uint8_t *b21 = rb.snd.snd.fst;
-    uint8_t *b11 = rb.snd.fst;
-    uint8_t *b01 = rb.fst;
-    memcpy(b01 + i0 * rateInBytes + j * 32U, hbuf + j * 128U, rem0 * sizeof (uint8_t));
-    memcpy(b11 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 32U, rem0 * sizeof (uint8_t));
-    memcpy(b21 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 64U, rem0 * sizeof (uint8_t));
-    memcpy(b31 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 96U, rem0 * sizeof (uint8_t));
+    uint8_t *b36 = rb.snd.snd.snd;
+    uint8_t *b2 = rb.snd.snd.fst;
+    uint8_t *b1 = rb.snd.fst;
+    uint8_t *b0 = rb.fst;
+    memcpy(b0 + i0 * rateInBytes, hbuf, rateInBytes * sizeof (uint8_t));
+    memcpy(b1 + i0 * rateInBytes, hbuf + 256U, rateInBytes * sizeof (uint8_t));
+    memcpy(b2 + i0 * rateInBytes, hbuf + 512U, rateInBytes * sizeof (uint8_t));
+    memcpy(b36 + i0 * rateInBytes, hbuf + 768U, rateInBytes * sizeof (uint8_t));
     for (uint32_t i1 = 0U; i1 < 24U; i1++)
     {
       KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 _C[5U] KRML_POST_ALIGN(32) = { 0U };
@@ -3320,75 +3280,62 @@ Hacl_Hash_SHA3_Simd256_shake256(
   Lib_IntVector_Intrinsics_vec256 ws30 = v1__22;
   Lib_IntVector_Intrinsics_vec256 ws31 = v3__22;
   ws[0U] = ws0;
-  ws[1U] = ws1;
-  ws[2U] = ws2;
-  ws[3U] = ws3;
-  ws[4U] = ws4;
-  ws[5U] = ws5;
-  ws[6U] = ws6;
-  ws[7U] = ws7;
-  ws[8U] = ws8;
-  ws[9U] = ws9;
-  ws[10U] = ws10;
-  ws[11U] = ws11;
-  ws[12U] = ws12;
-  ws[13U] = ws13;
-  ws[14U] = ws14;
-  ws[15U] = ws15;
-  ws[16U] = ws16;
-  ws[17U] = ws17;
-  ws[18U] = ws18;
-  ws[19U] = ws19;
-  ws[20U] = ws20;
-  ws[21U] = ws21;
-  ws[22U] = ws22;
-  ws[23U] = ws23;
-  ws[24U] = ws24;
-  ws[25U] = ws25;
-  ws[26U] = ws26;
-  ws[27U] = ws27;
-  ws[28U] = ws28;
-  ws[29U] = ws29;
-  ws[30U] = ws30;
+  ws[1U] = ws4;
+  ws[2U] = ws8;
+  ws[3U] = ws12;
+  ws[4U] = ws16;
+  ws[5U] = ws20;
+  ws[6U] = ws24;
+  ws[7U] = ws28;
+  ws[8U] = ws1;
+  ws[9U] = ws5;
+  ws[10U] = ws9;
+  ws[11U] = ws13;
+  ws[12U] = ws17;
+  ws[13U] = ws21;
+  ws[14U] = ws25;
+  ws[15U] = ws29;
+  ws[16U] = ws2;
+  ws[17U] = ws6;
+  ws[18U] = ws10;
+  ws[19U] = ws14;
+  ws[20U] = ws18;
+  ws[21U] = ws22;
+  ws[22U] = ws26;
+  ws[23U] = ws30;
+  ws[24U] = ws3;
+  ws[25U] = ws7;
+  ws[26U] = ws11;
+  ws[27U] = ws15;
+  ws[28U] = ws19;
+  ws[29U] = ws23;
+  ws[30U] = ws27;
   ws[31U] = ws31;
   for (uint32_t i = 0U; i < 32U; i++)
   {
     Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]);
   }
-  for (uint32_t i = 0U; i < remOut / 32U; i++)
-  {
-    uint8_t *b3 = rb.snd.snd.snd;
-    uint8_t *b2 = rb.snd.snd.fst;
-    uint8_t *b1 = rb.snd.fst;
-    uint8_t *b0 = rb.fst;
-    memcpy(b0 + outputByteLen - remOut + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t));
-    memcpy(b1 + outputByteLen - remOut + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t));
-    memcpy(b2 + outputByteLen - remOut + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t));
-    memcpy(b3 + outputByteLen - remOut + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t));
-  }
-  uint32_t rem0 = remOut % 32U;
-  uint32_t j = remOut / 32U;
-  uint8_t *b3 = rb.snd.snd.snd;
+  uint8_t *b36 = rb.snd.snd.snd;
   uint8_t *b2 = rb.snd.snd.fst;
   uint8_t *b1 = rb.snd.fst;
   uint8_t *b0 = rb.fst;
-  memcpy(b0 + outputByteLen - remOut + j * 32U, hbuf + j * 128U, rem0 * sizeof (uint8_t));
-  memcpy(b1 + outputByteLen - remOut + j * 32U, hbuf + j * 128U + 32U, rem0 * sizeof (uint8_t));
-  memcpy(b2 + outputByteLen - remOut + j * 32U, hbuf + j * 128U + 64U, rem0 * sizeof (uint8_t));
-  memcpy(b3 + outputByteLen - remOut + j * 32U, hbuf + j * 128U + 96U, rem0 * sizeof (uint8_t));
+  memcpy(b0 + outputByteLen - remOut, hbuf, remOut * sizeof (uint8_t));
+  memcpy(b1 + outputByteLen - remOut, hbuf + 256U, remOut * sizeof (uint8_t));
+  memcpy(b2 + outputByteLen - remOut, hbuf + 512U, remOut * sizeof (uint8_t));
+  memcpy(b36 + outputByteLen - remOut, hbuf + 768U, remOut * sizeof (uint8_t));
 }
 
 void
 Hacl_Hash_SHA3_Simd256_sha3_224(
-  uint32_t inputByteLen,
+  uint8_t *output0,
+  uint8_t *output1,
+  uint8_t *output2,
+  uint8_t *output3,
   uint8_t *input0,
   uint8_t *input1,
   uint8_t *input2,
   uint8_t *input3,
-  uint8_t *output0,
-  uint8_t *output1,
-  uint8_t *output2,
-  uint8_t *output3
+  uint32_t inputByteLen
 )
 {
   K____uint8_t___uint8_t____K____uint8_t___uint8_t_
@@ -3787,63 +3734,63 @@ Hacl_Hash_SHA3_Simd256_sha3_224(
   K____uint8_t___uint8_t____K____uint8_t___uint8_t_
   b_ = { .fst = b00, .snd = { .fst = b10, .snd = { .fst = b20, .snd = b30 } } };
   uint32_t rem1 = inputByteLen % rateInBytes;
-  uint8_t *b32 = ib.snd.snd.snd;
-  uint8_t *b22 = ib.snd.snd.fst;
-  uint8_t *b12 = ib.snd.fst;
-  uint8_t *b02 = ib.fst;
+  uint8_t *b31 = ib.snd.snd.snd;
+  uint8_t *b21 = ib.snd.snd.fst;
+  uint8_t *b11 = ib.snd.fst;
+  uint8_t *b01 = ib.fst;
   uint8_t *bl3 = b_.snd.snd.snd;
   uint8_t *bl2 = b_.snd.snd.fst;
   uint8_t *bl1 = b_.snd.fst;
   uint8_t *bl0 = b_.fst;
-  memcpy(bl0, b02 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
-  memcpy(bl1, b12 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
-  memcpy(bl2, b22 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
-  memcpy(bl3, b32 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
+  memcpy(bl0, b01 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
+  memcpy(bl1, b11 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
+  memcpy(bl2, b21 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
+  memcpy(bl3, b31 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
+  uint8_t *b32 = b_.snd.snd.snd;
+  uint8_t *b22 = b_.snd.snd.fst;
+  uint8_t *b12 = b_.snd.fst;
+  uint8_t *b02 = b_.fst;
+  b02[rem] = 0x06U;
+  b12[rem] = 0x06U;
+  b22[rem] = 0x06U;
+  b32[rem] = 0x06U;
+  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U };
   uint8_t *b33 = b_.snd.snd.snd;
   uint8_t *b23 = b_.snd.snd.fst;
   uint8_t *b13 = b_.snd.fst;
   uint8_t *b03 = b_.fst;
-  b03[rem] = 0x06U;
-  b13[rem] = 0x06U;
-  b23[rem] = 0x06U;
-  b33[rem] = 0x06U;
-  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U };
-  uint8_t *b34 = b_.snd.snd.snd;
-  uint8_t *b24 = b_.snd.snd.fst;
-  uint8_t *b14 = b_.snd.fst;
-  uint8_t *b04 = b_.fst;
-  ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04);
-  ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14);
-  ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24);
-  ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34);
-  ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 32U);
-  ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 32U);
-  ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 32U);
-  ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 32U);
-  ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 64U);
-  ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 64U);
-  ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 64U);
-  ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 64U);
-  ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 96U);
-  ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 96U);
-  ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 96U);
-  ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 96U);
-  ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 128U);
-  ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 128U);
-  ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 128U);
-  ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 128U);
-  ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 160U);
-  ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 160U);
-  ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 160U);
-  ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 160U);
-  ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 192U);
-  ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 192U);
-  ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 192U);
-  ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 192U);
-  ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 224U);
-  ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 224U);
-  ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 224U);
-  ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 224U);
+  ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03);
+  ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13);
+  ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23);
+  ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33);
+  ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 32U);
+  ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 32U);
+  ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 32U);
+  ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 32U);
+  ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 64U);
+  ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 64U);
+  ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 64U);
+  ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 64U);
+  ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 96U);
+  ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 96U);
+  ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 96U);
+  ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 96U);
+  ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 128U);
+  ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 128U);
+  ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 128U);
+  ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 128U);
+  ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 160U);
+  ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 160U);
+  ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 160U);
+  ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 160U);
+  ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 192U);
+  ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 192U);
+  ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 192U);
+  ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 192U);
+  ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 224U);
+  ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 224U);
+  ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 224U);
+  ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 224U);
   Lib_IntVector_Intrinsics_vec256 v00 = ws32[0U];
   Lib_IntVector_Intrinsics_vec256 v10 = ws32[1U];
   Lib_IntVector_Intrinsics_vec256 v20 = ws32[2U];
@@ -4072,57 +4019,57 @@ Hacl_Hash_SHA3_Simd256_sha3_224(
   {
     s[i] = Lib_IntVector_Intrinsics_vec256_xor(s[i], ws32[i]);
   }
-  uint8_t b05[256U] = { 0U };
-  uint8_t b15[256U] = { 0U };
-  uint8_t b25[256U] = { 0U };
-  uint8_t b35[256U] = { 0U };
+  uint8_t b04[256U] = { 0U };
+  uint8_t b14[256U] = { 0U };
+  uint8_t b24[256U] = { 0U };
+  uint8_t b34[256U] = { 0U };
   K____uint8_t___uint8_t____K____uint8_t___uint8_t_
-  b = { .fst = b05, .snd = { .fst = b15, .snd = { .fst = b25, .snd = b35 } } };
-  uint8_t *b36 = b.snd.snd.snd;
+  b = { .fst = b04, .snd = { .fst = b14, .snd = { .fst = b24, .snd = b34 } } };
+  uint8_t *b35 = b.snd.snd.snd;
+  uint8_t *b25 = b.snd.snd.fst;
+  uint8_t *b15 = b.snd.fst;
+  uint8_t *b05 = b.fst;
+  b05[rateInBytes - 1U] = 0x80U;
+  b15[rateInBytes - 1U] = 0x80U;
+  b25[rateInBytes - 1U] = 0x80U;
+  b35[rateInBytes - 1U] = 0x80U;
+  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws34[32U] KRML_POST_ALIGN(32) = { 0U };
+  uint8_t *b3 = b.snd.snd.snd;
   uint8_t *b26 = b.snd.snd.fst;
   uint8_t *b16 = b.snd.fst;
   uint8_t *b06 = b.fst;
-  b06[rateInBytes - 1U] = 0x80U;
-  b16[rateInBytes - 1U] = 0x80U;
-  b26[rateInBytes - 1U] = 0x80U;
-  b36[rateInBytes - 1U] = 0x80U;
-  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws34[32U] KRML_POST_ALIGN(32) = { 0U };
-  uint8_t *b37 = b.snd.snd.snd;
-  uint8_t *b27 = b.snd.snd.fst;
-  uint8_t *b17 = b.snd.fst;
-  uint8_t *b07 = b.fst;
-  ws34[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07);
-  ws34[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17);
-  ws34[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27);
-  ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37);
-  ws34[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 32U);
-  ws34[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 32U);
-  ws34[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 32U);
-  ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 32U);
-  ws34[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 64U);
-  ws34[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 64U);
-  ws34[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 64U);
-  ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 64U);
-  ws34[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 96U);
-  ws34[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 96U);
-  ws34[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 96U);
-  ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 96U);
-  ws34[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 128U);
-  ws34[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 128U);
-  ws34[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 128U);
-  ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 128U);
-  ws34[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 160U);
-  ws34[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 160U);
-  ws34[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 160U);
-  ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 160U);
-  ws34[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 192U);
-  ws34[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 192U);
-  ws34[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 192U);
-  ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 192U);
-  ws34[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 224U);
-  ws34[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 224U);
-  ws34[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 224U);
-  ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 224U);
+  ws34[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06);
+  ws34[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16);
+  ws34[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26);
+  ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3);
+  ws34[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 32U);
+  ws34[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 32U);
+  ws34[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 32U);
+  ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 32U);
+  ws34[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 64U);
+  ws34[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 64U);
+  ws34[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 64U);
+  ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 64U);
+  ws34[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 96U);
+  ws34[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 96U);
+  ws34[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 96U);
+  ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 96U);
+  ws34[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 128U);
+  ws34[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 128U);
+  ws34[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 128U);
+  ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 128U);
+  ws34[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 160U);
+  ws34[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 160U);
+  ws34[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 160U);
+  ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 160U);
+  ws34[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 192U);
+  ws34[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 192U);
+  ws34[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 192U);
+  ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 192U);
+  ws34[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 224U);
+  ws34[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 224U);
+  ws34[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 224U);
+  ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 224U);
   Lib_IntVector_Intrinsics_vec256 v08 = ws34[0U];
   Lib_IntVector_Intrinsics_vec256 v18 = ws34[1U];
   Lib_IntVector_Intrinsics_vec256 v28 = ws34[2U];
@@ -4644,62 +4591,49 @@ Hacl_Hash_SHA3_Simd256_sha3_224(
     Lib_IntVector_Intrinsics_vec256 ws30 = v1__22;
     Lib_IntVector_Intrinsics_vec256 ws31 = v3__22;
     ws[0U] = ws0;
-    ws[1U] = ws1;
-    ws[2U] = ws2;
-    ws[3U] = ws3;
-    ws[4U] = ws4;
-    ws[5U] = ws5;
-    ws[6U] = ws6;
-    ws[7U] = ws7;
-    ws[8U] = ws8;
-    ws[9U] = ws9;
-    ws[10U] = ws10;
-    ws[11U] = ws11;
-    ws[12U] = ws12;
-    ws[13U] = ws13;
-    ws[14U] = ws14;
-    ws[15U] = ws15;
-    ws[16U] = ws16;
-    ws[17U] = ws17;
-    ws[18U] = ws18;
-    ws[19U] = ws19;
-    ws[20U] = ws20;
-    ws[21U] = ws21;
-    ws[22U] = ws22;
-    ws[23U] = ws23;
-    ws[24U] = ws24;
-    ws[25U] = ws25;
-    ws[26U] = ws26;
-    ws[27U] = ws27;
-    ws[28U] = ws28;
-    ws[29U] = ws29;
-    ws[30U] = ws30;
+    ws[1U] = ws4;
+    ws[2U] = ws8;
+    ws[3U] = ws12;
+    ws[4U] = ws16;
+    ws[5U] = ws20;
+    ws[6U] = ws24;
+    ws[7U] = ws28;
+    ws[8U] = ws1;
+    ws[9U] = ws5;
+    ws[10U] = ws9;
+    ws[11U] = ws13;
+    ws[12U] = ws17;
+    ws[13U] = ws21;
+    ws[14U] = ws25;
+    ws[15U] = ws29;
+    ws[16U] = ws2;
+    ws[17U] = ws6;
+    ws[18U] = ws10;
+    ws[19U] = ws14;
+    ws[20U] = ws18;
+    ws[21U] = ws22;
+    ws[22U] = ws26;
+    ws[23U] = ws30;
+    ws[24U] = ws3;
+    ws[25U] = ws7;
+    ws[26U] = ws11;
+    ws[27U] = ws15;
+    ws[28U] = ws19;
+    ws[29U] = ws23;
+    ws[30U] = ws27;
     ws[31U] = ws31;
     for (uint32_t i = 0U; i < 32U; i++)
     {
       Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]);
     }
-    for (uint32_t i = 0U; i < rateInBytes / 32U; i++)
-    {
-      uint8_t *b31 = rb.snd.snd.snd;
-      uint8_t *b21 = rb.snd.snd.fst;
-      uint8_t *b11 = rb.snd.fst;
-      uint8_t *b01 = rb.fst;
-      memcpy(b01 + i0 * rateInBytes + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t));
-      memcpy(b11 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t));
-      memcpy(b21 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t));
-      memcpy(b31 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t));
-    }
-    uint32_t rem0 = rateInBytes % 32U;
-    uint32_t j = rateInBytes / 32U;
-    uint8_t *b31 = rb.snd.snd.snd;
-    uint8_t *b21 = rb.snd.snd.fst;
-    uint8_t *b11 = rb.snd.fst;
-    uint8_t *b01 = rb.fst;
-    memcpy(b01 + i0 * rateInBytes + j * 32U, hbuf + j * 128U, rem0 * sizeof (uint8_t));
-    memcpy(b11 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 32U, rem0 * sizeof (uint8_t));
-    memcpy(b21 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 64U, rem0 * sizeof (uint8_t));
-    memcpy(b31 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 96U, rem0 * sizeof (uint8_t));
+    uint8_t *b36 = rb.snd.snd.snd;
+    uint8_t *b2 = rb.snd.snd.fst;
+    uint8_t *b1 = rb.snd.fst;
+    uint8_t *b0 = rb.fst;
+    memcpy(b0 + i0 * rateInBytes, hbuf, rateInBytes * sizeof (uint8_t));
+    memcpy(b1 + i0 * rateInBytes, hbuf + 256U, rateInBytes * sizeof (uint8_t));
+    memcpy(b2 + i0 * rateInBytes, hbuf + 512U, rateInBytes * sizeof (uint8_t));
+    memcpy(b36 + i0 * rateInBytes, hbuf + 768U, rateInBytes * sizeof (uint8_t));
     for (uint32_t i1 = 0U; i1 < 24U; i1++)
     {
       KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 _C[5U] KRML_POST_ALIGN(32) = { 0U };
@@ -4994,75 +4928,62 @@ Hacl_Hash_SHA3_Simd256_sha3_224(
   Lib_IntVector_Intrinsics_vec256 ws30 = v1__22;
   Lib_IntVector_Intrinsics_vec256 ws31 = v3__22;
   ws[0U] = ws0;
-  ws[1U] = ws1;
-  ws[2U] = ws2;
-  ws[3U] = ws3;
-  ws[4U] = ws4;
-  ws[5U] = ws5;
-  ws[6U] = ws6;
-  ws[7U] = ws7;
-  ws[8U] = ws8;
-  ws[9U] = ws9;
-  ws[10U] = ws10;
-  ws[11U] = ws11;
-  ws[12U] = ws12;
-  ws[13U] = ws13;
-  ws[14U] = ws14;
-  ws[15U] = ws15;
-  ws[16U] = ws16;
-  ws[17U] = ws17;
-  ws[18U] = ws18;
-  ws[19U] = ws19;
-  ws[20U] = ws20;
-  ws[21U] = ws21;
-  ws[22U] = ws22;
-  ws[23U] = ws23;
-  ws[24U] = ws24;
-  ws[25U] = ws25;
-  ws[26U] = ws26;
-  ws[27U] = ws27;
-  ws[28U] = ws28;
-  ws[29U] = ws29;
-  ws[30U] = ws30;
+  ws[1U] = ws4;
+  ws[2U] = ws8;
+  ws[3U] = ws12;
+  ws[4U] = ws16;
+  ws[5U] = ws20;
+  ws[6U] = ws24;
+  ws[7U] = ws28;
+  ws[8U] = ws1;
+  ws[9U] = ws5;
+  ws[10U] = ws9;
+  ws[11U] = ws13;
+  ws[12U] = ws17;
+  ws[13U] = ws21;
+  ws[14U] = ws25;
+  ws[15U] = ws29;
+  ws[16U] = ws2;
+  ws[17U] = ws6;
+  ws[18U] = ws10;
+  ws[19U] = ws14;
+  ws[20U] = ws18;
+  ws[21U] = ws22;
+  ws[22U] = ws26;
+  ws[23U] = ws30;
+  ws[24U] = ws3;
+  ws[25U] = ws7;
+  ws[26U] = ws11;
+  ws[27U] = ws15;
+  ws[28U] = ws19;
+  ws[29U] = ws23;
+  ws[30U] = ws27;
   ws[31U] = ws31;
   for (uint32_t i = 0U; i < 32U; i++)
   {
     Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]);
   }
-  for (uint32_t i = 0U; i < remOut / 32U; i++)
-  {
-    uint8_t *b3 = rb.snd.snd.snd;
-    uint8_t *b2 = rb.snd.snd.fst;
-    uint8_t *b1 = rb.snd.fst;
-    uint8_t *b0 = rb.fst;
-    memcpy(b0 + 28U - remOut + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t));
-    memcpy(b1 + 28U - remOut + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t));
-    memcpy(b2 + 28U - remOut + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t));
-    memcpy(b3 + 28U - remOut + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t));
-  }
-  uint32_t rem0 = remOut % 32U;
-  uint32_t j = remOut / 32U;
-  uint8_t *b3 = rb.snd.snd.snd;
+  uint8_t *b36 = rb.snd.snd.snd;
   uint8_t *b2 = rb.snd.snd.fst;
   uint8_t *b1 = rb.snd.fst;
   uint8_t *b0 = rb.fst;
-  memcpy(b0 + 28U - remOut + j * 32U, hbuf + j * 128U, rem0 * sizeof (uint8_t));
-  memcpy(b1 + 28U - remOut + j * 32U, hbuf + j * 128U + 32U, rem0 * sizeof (uint8_t));
-  memcpy(b2 + 28U - remOut + j * 32U, hbuf + j * 128U + 64U, rem0 * sizeof (uint8_t));
-  memcpy(b3 + 28U - remOut + j * 32U, hbuf + j * 128U + 96U, rem0 * sizeof (uint8_t));
+  memcpy(b0 + 28U - remOut, hbuf, remOut * sizeof (uint8_t));
+  memcpy(b1 + 28U - remOut, hbuf + 256U, remOut * sizeof (uint8_t));
+  memcpy(b2 + 28U - remOut, hbuf + 512U, remOut * sizeof (uint8_t));
+  memcpy(b36 + 28U - remOut, hbuf + 768U, remOut * sizeof (uint8_t));
 }
 
 void
 Hacl_Hash_SHA3_Simd256_sha3_256(
-  uint32_t inputByteLen,
+  uint8_t *output0,
+  uint8_t *output1,
+  uint8_t *output2,
+  uint8_t *output3,
   uint8_t *input0,
   uint8_t *input1,
   uint8_t *input2,
   uint8_t *input3,
-  uint8_t *output0,
-  uint8_t *output1,
-  uint8_t *output2,
-  uint8_t *output3
+  uint32_t inputByteLen
 )
 {
   K____uint8_t___uint8_t____K____uint8_t___uint8_t_
@@ -5461,63 +5382,63 @@ Hacl_Hash_SHA3_Simd256_sha3_256(
   K____uint8_t___uint8_t____K____uint8_t___uint8_t_
   b_ = { .fst = b00, .snd = { .fst = b10, .snd = { .fst = b20, .snd = b30 } } };
   uint32_t rem1 = inputByteLen % rateInBytes;
-  uint8_t *b32 = ib.snd.snd.snd;
-  uint8_t *b22 = ib.snd.snd.fst;
-  uint8_t *b12 = ib.snd.fst;
-  uint8_t *b02 = ib.fst;
+  uint8_t *b31 = ib.snd.snd.snd;
+  uint8_t *b21 = ib.snd.snd.fst;
+  uint8_t *b11 = ib.snd.fst;
+  uint8_t *b01 = ib.fst;
   uint8_t *bl3 = b_.snd.snd.snd;
   uint8_t *bl2 = b_.snd.snd.fst;
   uint8_t *bl1 = b_.snd.fst;
   uint8_t *bl0 = b_.fst;
-  memcpy(bl0, b02 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
-  memcpy(bl1, b12 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
-  memcpy(bl2, b22 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
-  memcpy(bl3, b32 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
+  memcpy(bl0, b01 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
+  memcpy(bl1, b11 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
+  memcpy(bl2, b21 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
+  memcpy(bl3, b31 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
+  uint8_t *b32 = b_.snd.snd.snd;
+  uint8_t *b22 = b_.snd.snd.fst;
+  uint8_t *b12 = b_.snd.fst;
+  uint8_t *b02 = b_.fst;
+  b02[rem] = 0x06U;
+  b12[rem] = 0x06U;
+  b22[rem] = 0x06U;
+  b32[rem] = 0x06U;
+  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U };
   uint8_t *b33 = b_.snd.snd.snd;
   uint8_t *b23 = b_.snd.snd.fst;
   uint8_t *b13 = b_.snd.fst;
   uint8_t *b03 = b_.fst;
-  b03[rem] = 0x06U;
-  b13[rem] = 0x06U;
-  b23[rem] = 0x06U;
-  b33[rem] = 0x06U;
-  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U };
-  uint8_t *b34 = b_.snd.snd.snd;
-  uint8_t *b24 = b_.snd.snd.fst;
-  uint8_t *b14 = b_.snd.fst;
-  uint8_t *b04 = b_.fst;
-  ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04);
-  ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14);
-  ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24);
-  ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34);
-  ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 32U);
-  ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 32U);
-  ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 32U);
-  ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 32U);
-  ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 64U);
-  ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 64U);
-  ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 64U);
-  ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 64U);
-  ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 96U);
-  ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 96U);
-  ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 96U);
-  ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 96U);
-  ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 128U);
-  ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 128U);
-  ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 128U);
-  ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 128U);
-  ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 160U);
-  ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 160U);
-  ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 160U);
-  ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 160U);
-  ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 192U);
-  ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 192U);
-  ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 192U);
-  ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 192U);
-  ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 224U);
-  ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 224U);
-  ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 224U);
-  ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 224U);
+  ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03);
+  ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13);
+  ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23);
+  ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33);
+  ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 32U);
+  ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 32U);
+  ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 32U);
+  ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 32U);
+  ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 64U);
+  ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 64U);
+  ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 64U);
+  ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 64U);
+  ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 96U);
+  ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 96U);
+  ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 96U);
+  ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 96U);
+  ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 128U);
+  ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 128U);
+  ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 128U);
+  ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 128U);
+  ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 160U);
+  ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 160U);
+  ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 160U);
+  ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 160U);
+  ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 192U);
+  ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 192U);
+  ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 192U);
+  ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 192U);
+  ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 224U);
+  ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 224U);
+  ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 224U);
+  ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 224U);
   Lib_IntVector_Intrinsics_vec256 v00 = ws32[0U];
   Lib_IntVector_Intrinsics_vec256 v10 = ws32[1U];
   Lib_IntVector_Intrinsics_vec256 v20 = ws32[2U];
@@ -5746,57 +5667,57 @@ Hacl_Hash_SHA3_Simd256_sha3_256(
   {
     s[i] = Lib_IntVector_Intrinsics_vec256_xor(s[i], ws32[i]);
   }
-  uint8_t b05[256U] = { 0U };
-  uint8_t b15[256U] = { 0U };
-  uint8_t b25[256U] = { 0U };
-  uint8_t b35[256U] = { 0U };
+  uint8_t b04[256U] = { 0U };
+  uint8_t b14[256U] = { 0U };
+  uint8_t b24[256U] = { 0U };
+  uint8_t b34[256U] = { 0U };
   K____uint8_t___uint8_t____K____uint8_t___uint8_t_
-  b = { .fst = b05, .snd = { .fst = b15, .snd = { .fst = b25, .snd = b35 } } };
-  uint8_t *b36 = b.snd.snd.snd;
+  b = { .fst = b04, .snd = { .fst = b14, .snd = { .fst = b24, .snd = b34 } } };
+  uint8_t *b35 = b.snd.snd.snd;
+  uint8_t *b25 = b.snd.snd.fst;
+  uint8_t *b15 = b.snd.fst;
+  uint8_t *b05 = b.fst;
+  b05[rateInBytes - 1U] = 0x80U;
+  b15[rateInBytes - 1U] = 0x80U;
+  b25[rateInBytes - 1U] = 0x80U;
+  b35[rateInBytes - 1U] = 0x80U;
+  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws34[32U] KRML_POST_ALIGN(32) = { 0U };
+  uint8_t *b3 = b.snd.snd.snd;
   uint8_t *b26 = b.snd.snd.fst;
   uint8_t *b16 = b.snd.fst;
   uint8_t *b06 = b.fst;
-  b06[rateInBytes - 1U] = 0x80U;
-  b16[rateInBytes - 1U] = 0x80U;
-  b26[rateInBytes - 1U] = 0x80U;
-  b36[rateInBytes - 1U] = 0x80U;
-  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws34[32U] KRML_POST_ALIGN(32) = { 0U };
-  uint8_t *b37 = b.snd.snd.snd;
-  uint8_t *b27 = b.snd.snd.fst;
-  uint8_t *b17 = b.snd.fst;
-  uint8_t *b07 = b.fst;
-  ws34[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07);
-  ws34[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17);
-  ws34[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27);
-  ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37);
-  ws34[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 32U);
-  ws34[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 32U);
-  ws34[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 32U);
-  ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 32U);
-  ws34[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 64U);
-  ws34[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 64U);
-  ws34[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 64U);
-  ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 64U);
-  ws34[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 96U);
-  ws34[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 96U);
-  ws34[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 96U);
-  ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 96U);
-  ws34[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 128U);
-  ws34[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 128U);
-  ws34[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 128U);
-  ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 128U);
-  ws34[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 160U);
-  ws34[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 160U);
-  ws34[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 160U);
-  ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 160U);
-  ws34[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 192U);
-  ws34[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 192U);
-  ws34[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 192U);
-  ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 192U);
-  ws34[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 224U);
-  ws34[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 224U);
-  ws34[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 224U);
-  ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 224U);
+  ws34[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06);
+  ws34[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16);
+  ws34[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26);
+  ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3);
+  ws34[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 32U);
+  ws34[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 32U);
+  ws34[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 32U);
+  ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 32U);
+  ws34[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 64U);
+  ws34[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 64U);
+  ws34[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 64U);
+  ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 64U);
+  ws34[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 96U);
+  ws34[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 96U);
+  ws34[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 96U);
+  ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 96U);
+  ws34[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 128U);
+  ws34[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 128U);
+  ws34[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 128U);
+  ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 128U);
+  ws34[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 160U);
+  ws34[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 160U);
+  ws34[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 160U);
+  ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 160U);
+  ws34[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 192U);
+  ws34[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 192U);
+  ws34[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 192U);
+  ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 192U);
+  ws34[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 224U);
+  ws34[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 224U);
+  ws34[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 224U);
+  ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 224U);
   Lib_IntVector_Intrinsics_vec256 v08 = ws34[0U];
   Lib_IntVector_Intrinsics_vec256 v18 = ws34[1U];
   Lib_IntVector_Intrinsics_vec256 v28 = ws34[2U];
@@ -6318,62 +6239,49 @@ Hacl_Hash_SHA3_Simd256_sha3_256(
     Lib_IntVector_Intrinsics_vec256 ws30 = v1__22;
     Lib_IntVector_Intrinsics_vec256 ws31 = v3__22;
     ws[0U] = ws0;
-    ws[1U] = ws1;
-    ws[2U] = ws2;
-    ws[3U] = ws3;
-    ws[4U] = ws4;
-    ws[5U] = ws5;
-    ws[6U] = ws6;
-    ws[7U] = ws7;
-    ws[8U] = ws8;
-    ws[9U] = ws9;
-    ws[10U] = ws10;
-    ws[11U] = ws11;
-    ws[12U] = ws12;
-    ws[13U] = ws13;
-    ws[14U] = ws14;
-    ws[15U] = ws15;
-    ws[16U] = ws16;
-    ws[17U] = ws17;
-    ws[18U] = ws18;
-    ws[19U] = ws19;
-    ws[20U] = ws20;
-    ws[21U] = ws21;
-    ws[22U] = ws22;
-    ws[23U] = ws23;
-    ws[24U] = ws24;
-    ws[25U] = ws25;
-    ws[26U] = ws26;
-    ws[27U] = ws27;
-    ws[28U] = ws28;
-    ws[29U] = ws29;
-    ws[30U] = ws30;
+    ws[1U] = ws4;
+    ws[2U] = ws8;
+    ws[3U] = ws12;
+    ws[4U] = ws16;
+    ws[5U] = ws20;
+    ws[6U] = ws24;
+    ws[7U] = ws28;
+    ws[8U] = ws1;
+    ws[9U] = ws5;
+    ws[10U] = ws9;
+    ws[11U] = ws13;
+    ws[12U] = ws17;
+    ws[13U] = ws21;
+    ws[14U] = ws25;
+    ws[15U] = ws29;
+    ws[16U] = ws2;
+    ws[17U] = ws6;
+    ws[18U] = ws10;
+    ws[19U] = ws14;
+    ws[20U] = ws18;
+    ws[21U] = ws22;
+    ws[22U] = ws26;
+    ws[23U] = ws30;
+    ws[24U] = ws3;
+    ws[25U] = ws7;
+    ws[26U] = ws11;
+    ws[27U] = ws15;
+    ws[28U] = ws19;
+    ws[29U] = ws23;
+    ws[30U] = ws27;
     ws[31U] = ws31;
     for (uint32_t i = 0U; i < 32U; i++)
     {
       Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]);
     }
-    for (uint32_t i = 0U; i < rateInBytes / 32U; i++)
-    {
-      uint8_t *b31 = rb.snd.snd.snd;
-      uint8_t *b21 = rb.snd.snd.fst;
-      uint8_t *b11 = rb.snd.fst;
-      uint8_t *b01 = rb.fst;
-      memcpy(b01 + i0 * rateInBytes + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t));
-      memcpy(b11 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t));
-      memcpy(b21 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t));
-      memcpy(b31 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t));
-    }
-    uint32_t rem0 = rateInBytes % 32U;
-    uint32_t j = rateInBytes / 32U;
-    uint8_t *b31 = rb.snd.snd.snd;
-    uint8_t *b21 = rb.snd.snd.fst;
-    uint8_t *b11 = rb.snd.fst;
-    uint8_t *b01 = rb.fst;
-    memcpy(b01 + i0 * rateInBytes + j * 32U, hbuf + j * 128U, rem0 * sizeof (uint8_t));
-    memcpy(b11 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 32U, rem0 * sizeof (uint8_t));
-    memcpy(b21 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 64U, rem0 * sizeof (uint8_t));
-    memcpy(b31 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 96U, rem0 * sizeof (uint8_t));
+    uint8_t *b36 = rb.snd.snd.snd;
+    uint8_t *b2 = rb.snd.snd.fst;
+    uint8_t *b1 = rb.snd.fst;
+    uint8_t *b0 = rb.fst;
+    memcpy(b0 + i0 * rateInBytes, hbuf, rateInBytes * sizeof (uint8_t));
+    memcpy(b1 + i0 * rateInBytes, hbuf + 256U, rateInBytes * sizeof (uint8_t));
+    memcpy(b2 + i0 * rateInBytes, hbuf + 512U, rateInBytes * sizeof (uint8_t));
+    memcpy(b36 + i0 * rateInBytes, hbuf + 768U, rateInBytes * sizeof (uint8_t));
     for (uint32_t i1 = 0U; i1 < 24U; i1++)
     {
       KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 _C[5U] KRML_POST_ALIGN(32) = { 0U };
@@ -6668,75 +6576,62 @@ Hacl_Hash_SHA3_Simd256_sha3_256(
   Lib_IntVector_Intrinsics_vec256 ws30 = v1__22;
   Lib_IntVector_Intrinsics_vec256 ws31 = v3__22;
   ws[0U] = ws0;
-  ws[1U] = ws1;
-  ws[2U] = ws2;
-  ws[3U] = ws3;
-  ws[4U] = ws4;
-  ws[5U] = ws5;
-  ws[6U] = ws6;
-  ws[7U] = ws7;
-  ws[8U] = ws8;
-  ws[9U] = ws9;
-  ws[10U] = ws10;
-  ws[11U] = ws11;
-  ws[12U] = ws12;
-  ws[13U] = ws13;
-  ws[14U] = ws14;
-  ws[15U] = ws15;
-  ws[16U] = ws16;
-  ws[17U] = ws17;
-  ws[18U] = ws18;
-  ws[19U] = ws19;
-  ws[20U] = ws20;
-  ws[21U] = ws21;
-  ws[22U] = ws22;
-  ws[23U] = ws23;
-  ws[24U] = ws24;
-  ws[25U] = ws25;
-  ws[26U] = ws26;
-  ws[27U] = ws27;
-  ws[28U] = ws28;
-  ws[29U] = ws29;
-  ws[30U] = ws30;
+  ws[1U] = ws4;
+  ws[2U] = ws8;
+  ws[3U] = ws12;
+  ws[4U] = ws16;
+  ws[5U] = ws20;
+  ws[6U] = ws24;
+  ws[7U] = ws28;
+  ws[8U] = ws1;
+  ws[9U] = ws5;
+  ws[10U] = ws9;
+  ws[11U] = ws13;
+  ws[12U] = ws17;
+  ws[13U] = ws21;
+  ws[14U] = ws25;
+  ws[15U] = ws29;
+  ws[16U] = ws2;
+  ws[17U] = ws6;
+  ws[18U] = ws10;
+  ws[19U] = ws14;
+  ws[20U] = ws18;
+  ws[21U] = ws22;
+  ws[22U] = ws26;
+  ws[23U] = ws30;
+  ws[24U] = ws3;
+  ws[25U] = ws7;
+  ws[26U] = ws11;
+  ws[27U] = ws15;
+  ws[28U] = ws19;
+  ws[29U] = ws23;
+  ws[30U] = ws27;
   ws[31U] = ws31;
   for (uint32_t i = 0U; i < 32U; i++)
   {
     Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]);
   }
-  for (uint32_t i = 0U; i < remOut / 32U; i++)
-  {
-    uint8_t *b3 = rb.snd.snd.snd;
-    uint8_t *b2 = rb.snd.snd.fst;
-    uint8_t *b1 = rb.snd.fst;
-    uint8_t *b0 = rb.fst;
-    memcpy(b0 + 32U - remOut + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t));
-    memcpy(b1 + 32U - remOut + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t));
-    memcpy(b2 + 32U - remOut + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t));
-    memcpy(b3 + 32U - remOut + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t));
-  }
-  uint32_t rem0 = remOut % 32U;
-  uint32_t j = remOut / 32U;
-  uint8_t *b3 = rb.snd.snd.snd;
+  uint8_t *b36 = rb.snd.snd.snd;
   uint8_t *b2 = rb.snd.snd.fst;
   uint8_t *b1 = rb.snd.fst;
   uint8_t *b0 = rb.fst;
-  memcpy(b0 + 32U - remOut + j * 32U, hbuf + j * 128U, rem0 * sizeof (uint8_t));
-  memcpy(b1 + 32U - remOut + j * 32U, hbuf + j * 128U + 32U, rem0 * sizeof (uint8_t));
-  memcpy(b2 + 32U - remOut + j * 32U, hbuf + j * 128U + 64U, rem0 * sizeof (uint8_t));
-  memcpy(b3 + 32U - remOut + j * 32U, hbuf + j * 128U + 96U, rem0 * sizeof (uint8_t));
+  memcpy(b0 + 32U - remOut, hbuf, remOut * sizeof (uint8_t));
+  memcpy(b1 + 32U - remOut, hbuf + 256U, remOut * sizeof (uint8_t));
+  memcpy(b2 + 32U - remOut, hbuf + 512U, remOut * sizeof (uint8_t));
+  memcpy(b36 + 32U - remOut, hbuf + 768U, remOut * sizeof (uint8_t));
 }
 
 void
 Hacl_Hash_SHA3_Simd256_sha3_384(
-  uint32_t inputByteLen,
+  uint8_t *output0,
+  uint8_t *output1,
+  uint8_t *output2,
+  uint8_t *output3,
   uint8_t *input0,
   uint8_t *input1,
   uint8_t *input2,
   uint8_t *input3,
-  uint8_t *output0,
-  uint8_t *output1,
-  uint8_t *output2,
-  uint8_t *output3
+  uint32_t inputByteLen
 )
 {
   K____uint8_t___uint8_t____K____uint8_t___uint8_t_
@@ -7135,63 +7030,63 @@ Hacl_Hash_SHA3_Simd256_sha3_384(
   K____uint8_t___uint8_t____K____uint8_t___uint8_t_
   b_ = { .fst = b00, .snd = { .fst = b10, .snd = { .fst = b20, .snd = b30 } } };
   uint32_t rem1 = inputByteLen % rateInBytes;
-  uint8_t *b32 = ib.snd.snd.snd;
-  uint8_t *b22 = ib.snd.snd.fst;
-  uint8_t *b12 = ib.snd.fst;
-  uint8_t *b02 = ib.fst;
+  uint8_t *b31 = ib.snd.snd.snd;
+  uint8_t *b21 = ib.snd.snd.fst;
+  uint8_t *b11 = ib.snd.fst;
+  uint8_t *b01 = ib.fst;
   uint8_t *bl3 = b_.snd.snd.snd;
   uint8_t *bl2 = b_.snd.snd.fst;
   uint8_t *bl1 = b_.snd.fst;
   uint8_t *bl0 = b_.fst;
-  memcpy(bl0, b02 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
-  memcpy(bl1, b12 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
-  memcpy(bl2, b22 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
-  memcpy(bl3, b32 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
+  memcpy(bl0, b01 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
+  memcpy(bl1, b11 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
+  memcpy(bl2, b21 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
+  memcpy(bl3, b31 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
+  uint8_t *b32 = b_.snd.snd.snd;
+  uint8_t *b22 = b_.snd.snd.fst;
+  uint8_t *b12 = b_.snd.fst;
+  uint8_t *b02 = b_.fst;
+  b02[rem] = 0x06U;
+  b12[rem] = 0x06U;
+  b22[rem] = 0x06U;
+  b32[rem] = 0x06U;
+  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U };
   uint8_t *b33 = b_.snd.snd.snd;
   uint8_t *b23 = b_.snd.snd.fst;
   uint8_t *b13 = b_.snd.fst;
   uint8_t *b03 = b_.fst;
-  b03[rem] = 0x06U;
-  b13[rem] = 0x06U;
-  b23[rem] = 0x06U;
-  b33[rem] = 0x06U;
-  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U };
-  uint8_t *b34 = b_.snd.snd.snd;
-  uint8_t *b24 = b_.snd.snd.fst;
-  uint8_t *b14 = b_.snd.fst;
-  uint8_t *b04 = b_.fst;
-  ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04);
-  ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14);
-  ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24);
-  ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34);
-  ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 32U);
-  ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 32U);
-  ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 32U);
-  ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 32U);
-  ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 64U);
-  ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 64U);
-  ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 64U);
-  ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 64U);
-  ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 96U);
-  ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 96U);
-  ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 96U);
-  ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 96U);
-  ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 128U);
-  ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 128U);
-  ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 128U);
-  ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 128U);
-  ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 160U);
-  ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 160U);
-  ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 160U);
-  ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 160U);
-  ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 192U);
-  ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 192U);
-  ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 192U);
-  ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 192U);
-  ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 224U);
-  ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 224U);
-  ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 224U);
-  ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 224U);
+  ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03);
+  ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13);
+  ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23);
+  ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33);
+  ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 32U);
+  ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 32U);
+  ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 32U);
+  ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 32U);
+  ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 64U);
+  ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 64U);
+  ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 64U);
+  ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 64U);
+  ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 96U);
+  ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 96U);
+  ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 96U);
+  ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 96U);
+  ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 128U);
+  ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 128U);
+  ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 128U);
+  ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 128U);
+  ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 160U);
+  ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 160U);
+  ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 160U);
+  ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 160U);
+  ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 192U);
+  ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 192U);
+  ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 192U);
+  ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 192U);
+  ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 224U);
+  ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 224U);
+  ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 224U);
+  ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 224U);
   Lib_IntVector_Intrinsics_vec256 v00 = ws32[0U];
   Lib_IntVector_Intrinsics_vec256 v10 = ws32[1U];
   Lib_IntVector_Intrinsics_vec256 v20 = ws32[2U];
@@ -7420,57 +7315,57 @@ Hacl_Hash_SHA3_Simd256_sha3_384(
   {
     s[i] = Lib_IntVector_Intrinsics_vec256_xor(s[i], ws32[i]);
   }
-  uint8_t b05[256U] = { 0U };
-  uint8_t b15[256U] = { 0U };
-  uint8_t b25[256U] = { 0U };
-  uint8_t b35[256U] = { 0U };
+  uint8_t b04[256U] = { 0U };
+  uint8_t b14[256U] = { 0U };
+  uint8_t b24[256U] = { 0U };
+  uint8_t b34[256U] = { 0U };
   K____uint8_t___uint8_t____K____uint8_t___uint8_t_
-  b = { .fst = b05, .snd = { .fst = b15, .snd = { .fst = b25, .snd = b35 } } };
-  uint8_t *b36 = b.snd.snd.snd;
+  b = { .fst = b04, .snd = { .fst = b14, .snd = { .fst = b24, .snd = b34 } } };
+  uint8_t *b35 = b.snd.snd.snd;
+  uint8_t *b25 = b.snd.snd.fst;
+  uint8_t *b15 = b.snd.fst;
+  uint8_t *b05 = b.fst;
+  b05[rateInBytes - 1U] = 0x80U;
+  b15[rateInBytes - 1U] = 0x80U;
+  b25[rateInBytes - 1U] = 0x80U;
+  b35[rateInBytes - 1U] = 0x80U;
+  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws34[32U] KRML_POST_ALIGN(32) = { 0U };
+  uint8_t *b3 = b.snd.snd.snd;
   uint8_t *b26 = b.snd.snd.fst;
   uint8_t *b16 = b.snd.fst;
   uint8_t *b06 = b.fst;
-  b06[rateInBytes - 1U] = 0x80U;
-  b16[rateInBytes - 1U] = 0x80U;
-  b26[rateInBytes - 1U] = 0x80U;
-  b36[rateInBytes - 1U] = 0x80U;
-  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws34[32U] KRML_POST_ALIGN(32) = { 0U };
-  uint8_t *b37 = b.snd.snd.snd;
-  uint8_t *b27 = b.snd.snd.fst;
-  uint8_t *b17 = b.snd.fst;
-  uint8_t *b07 = b.fst;
-  ws34[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07);
-  ws34[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17);
-  ws34[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27);
-  ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37);
-  ws34[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 32U);
-  ws34[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 32U);
-  ws34[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 32U);
-  ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 32U);
-  ws34[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 64U);
-  ws34[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 64U);
-  ws34[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 64U);
-  ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 64U);
-  ws34[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 96U);
-  ws34[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 96U);
-  ws34[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 96U);
-  ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 96U);
-  ws34[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 128U);
-  ws34[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 128U);
-  ws34[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 128U);
-  ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 128U);
-  ws34[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 160U);
-  ws34[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 160U);
-  ws34[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 160U);
-  ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 160U);
-  ws34[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 192U);
-  ws34[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 192U);
-  ws34[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 192U);
-  ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 192U);
-  ws34[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 224U);
-  ws34[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 224U);
-  ws34[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 224U);
-  ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 224U);
+  ws34[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06);
+  ws34[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16);
+  ws34[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26);
+  ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3);
+  ws34[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 32U);
+  ws34[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 32U);
+  ws34[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 32U);
+  ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 32U);
+  ws34[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 64U);
+  ws34[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 64U);
+  ws34[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 64U);
+  ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 64U);
+  ws34[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 96U);
+  ws34[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 96U);
+  ws34[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 96U);
+  ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 96U);
+  ws34[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 128U);
+  ws34[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 128U);
+  ws34[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 128U);
+  ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 128U);
+  ws34[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 160U);
+  ws34[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 160U);
+  ws34[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 160U);
+  ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 160U);
+  ws34[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 192U);
+  ws34[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 192U);
+  ws34[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 192U);
+  ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 192U);
+  ws34[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 224U);
+  ws34[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 224U);
+  ws34[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 224U);
+  ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 224U);
   Lib_IntVector_Intrinsics_vec256 v08 = ws34[0U];
   Lib_IntVector_Intrinsics_vec256 v18 = ws34[1U];
   Lib_IntVector_Intrinsics_vec256 v28 = ws34[2U];
@@ -7992,62 +7887,49 @@ Hacl_Hash_SHA3_Simd256_sha3_384(
     Lib_IntVector_Intrinsics_vec256 ws30 = v1__22;
     Lib_IntVector_Intrinsics_vec256 ws31 = v3__22;
     ws[0U] = ws0;
-    ws[1U] = ws1;
-    ws[2U] = ws2;
-    ws[3U] = ws3;
-    ws[4U] = ws4;
-    ws[5U] = ws5;
-    ws[6U] = ws6;
-    ws[7U] = ws7;
-    ws[8U] = ws8;
-    ws[9U] = ws9;
-    ws[10U] = ws10;
-    ws[11U] = ws11;
-    ws[12U] = ws12;
-    ws[13U] = ws13;
-    ws[14U] = ws14;
-    ws[15U] = ws15;
-    ws[16U] = ws16;
-    ws[17U] = ws17;
-    ws[18U] = ws18;
-    ws[19U] = ws19;
-    ws[20U] = ws20;
-    ws[21U] = ws21;
-    ws[22U] = ws22;
-    ws[23U] = ws23;
-    ws[24U] = ws24;
-    ws[25U] = ws25;
-    ws[26U] = ws26;
-    ws[27U] = ws27;
-    ws[28U] = ws28;
-    ws[29U] = ws29;
-    ws[30U] = ws30;
+    ws[1U] = ws4;
+    ws[2U] = ws8;
+    ws[3U] = ws12;
+    ws[4U] = ws16;
+    ws[5U] = ws20;
+    ws[6U] = ws24;
+    ws[7U] = ws28;
+    ws[8U] = ws1;
+    ws[9U] = ws5;
+    ws[10U] = ws9;
+    ws[11U] = ws13;
+    ws[12U] = ws17;
+    ws[13U] = ws21;
+    ws[14U] = ws25;
+    ws[15U] = ws29;
+    ws[16U] = ws2;
+    ws[17U] = ws6;
+    ws[18U] = ws10;
+    ws[19U] = ws14;
+    ws[20U] = ws18;
+    ws[21U] = ws22;
+    ws[22U] = ws26;
+    ws[23U] = ws30;
+    ws[24U] = ws3;
+    ws[25U] = ws7;
+    ws[26U] = ws11;
+    ws[27U] = ws15;
+    ws[28U] = ws19;
+    ws[29U] = ws23;
+    ws[30U] = ws27;
     ws[31U] = ws31;
     for (uint32_t i = 0U; i < 32U; i++)
     {
       Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]);
     }
-    for (uint32_t i = 0U; i < rateInBytes / 32U; i++)
-    {
-      uint8_t *b31 = rb.snd.snd.snd;
-      uint8_t *b21 = rb.snd.snd.fst;
-      uint8_t *b11 = rb.snd.fst;
-      uint8_t *b01 = rb.fst;
-      memcpy(b01 + i0 * rateInBytes + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t));
-      memcpy(b11 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t));
-      memcpy(b21 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t));
-      memcpy(b31 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t));
-    }
-    uint32_t rem0 = rateInBytes % 32U;
-    uint32_t j = rateInBytes / 32U;
-    uint8_t *b31 = rb.snd.snd.snd;
-    uint8_t *b21 = rb.snd.snd.fst;
-    uint8_t *b11 = rb.snd.fst;
-    uint8_t *b01 = rb.fst;
-    memcpy(b01 + i0 * rateInBytes + j * 32U, hbuf + j * 128U, rem0 * sizeof (uint8_t));
-    memcpy(b11 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 32U, rem0 * sizeof (uint8_t));
-    memcpy(b21 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 64U, rem0 * sizeof (uint8_t));
-    memcpy(b31 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 96U, rem0 * sizeof (uint8_t));
+    uint8_t *b36 = rb.snd.snd.snd;
+    uint8_t *b2 = rb.snd.snd.fst;
+    uint8_t *b1 = rb.snd.fst;
+    uint8_t *b0 = rb.fst;
+    memcpy(b0 + i0 * rateInBytes, hbuf, rateInBytes * sizeof (uint8_t));
+    memcpy(b1 + i0 * rateInBytes, hbuf + 256U, rateInBytes * sizeof (uint8_t));
+    memcpy(b2 + i0 * rateInBytes, hbuf + 512U, rateInBytes * sizeof (uint8_t));
+    memcpy(b36 + i0 * rateInBytes, hbuf + 768U, rateInBytes * sizeof (uint8_t));
     for (uint32_t i1 = 0U; i1 < 24U; i1++)
     {
       KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 _C[5U] KRML_POST_ALIGN(32) = { 0U };
@@ -8342,75 +8224,62 @@ Hacl_Hash_SHA3_Simd256_sha3_384(
   Lib_IntVector_Intrinsics_vec256 ws30 = v1__22;
   Lib_IntVector_Intrinsics_vec256 ws31 = v3__22;
   ws[0U] = ws0;
-  ws[1U] = ws1;
-  ws[2U] = ws2;
-  ws[3U] = ws3;
-  ws[4U] = ws4;
-  ws[5U] = ws5;
-  ws[6U] = ws6;
-  ws[7U] = ws7;
-  ws[8U] = ws8;
-  ws[9U] = ws9;
-  ws[10U] = ws10;
-  ws[11U] = ws11;
-  ws[12U] = ws12;
-  ws[13U] = ws13;
-  ws[14U] = ws14;
-  ws[15U] = ws15;
-  ws[16U] = ws16;
-  ws[17U] = ws17;
-  ws[18U] = ws18;
-  ws[19U] = ws19;
-  ws[20U] = ws20;
-  ws[21U] = ws21;
-  ws[22U] = ws22;
-  ws[23U] = ws23;
-  ws[24U] = ws24;
-  ws[25U] = ws25;
-  ws[26U] = ws26;
-  ws[27U] = ws27;
-  ws[28U] = ws28;
-  ws[29U] = ws29;
-  ws[30U] = ws30;
+  ws[1U] = ws4;
+  ws[2U] = ws8;
+  ws[3U] = ws12;
+  ws[4U] = ws16;
+  ws[5U] = ws20;
+  ws[6U] = ws24;
+  ws[7U] = ws28;
+  ws[8U] = ws1;
+  ws[9U] = ws5;
+  ws[10U] = ws9;
+  ws[11U] = ws13;
+  ws[12U] = ws17;
+  ws[13U] = ws21;
+  ws[14U] = ws25;
+  ws[15U] = ws29;
+  ws[16U] = ws2;
+  ws[17U] = ws6;
+  ws[18U] = ws10;
+  ws[19U] = ws14;
+  ws[20U] = ws18;
+  ws[21U] = ws22;
+  ws[22U] = ws26;
+  ws[23U] = ws30;
+  ws[24U] = ws3;
+  ws[25U] = ws7;
+  ws[26U] = ws11;
+  ws[27U] = ws15;
+  ws[28U] = ws19;
+  ws[29U] = ws23;
+  ws[30U] = ws27;
   ws[31U] = ws31;
   for (uint32_t i = 0U; i < 32U; i++)
   {
     Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]);
   }
-  for (uint32_t i = 0U; i < remOut / 32U; i++)
-  {
-    uint8_t *b3 = rb.snd.snd.snd;
-    uint8_t *b2 = rb.snd.snd.fst;
-    uint8_t *b1 = rb.snd.fst;
-    uint8_t *b0 = rb.fst;
-    memcpy(b0 + 48U - remOut + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t));
-    memcpy(b1 + 48U - remOut + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t));
-    memcpy(b2 + 48U - remOut + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t));
-    memcpy(b3 + 48U - remOut + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t));
-  }
-  uint32_t rem0 = remOut % 32U;
-  uint32_t j = remOut / 32U;
-  uint8_t *b3 = rb.snd.snd.snd;
+  uint8_t *b36 = rb.snd.snd.snd;
   uint8_t *b2 = rb.snd.snd.fst;
   uint8_t *b1 = rb.snd.fst;
   uint8_t *b0 = rb.fst;
-  memcpy(b0 + 48U - remOut + j * 32U, hbuf + j * 128U, rem0 * sizeof (uint8_t));
-  memcpy(b1 + 48U - remOut + j * 32U, hbuf + j * 128U + 32U, rem0 * sizeof (uint8_t));
-  memcpy(b2 + 48U - remOut + j * 32U, hbuf + j * 128U + 64U, rem0 * sizeof (uint8_t));
-  memcpy(b3 + 48U - remOut + j * 32U, hbuf + j * 128U + 96U, rem0 * sizeof (uint8_t));
+  memcpy(b0 + 48U - remOut, hbuf, remOut * sizeof (uint8_t));
+  memcpy(b1 + 48U - remOut, hbuf + 256U, remOut * sizeof (uint8_t));
+  memcpy(b2 + 48U - remOut, hbuf + 512U, remOut * sizeof (uint8_t));
+  memcpy(b36 + 48U - remOut, hbuf + 768U, remOut * sizeof (uint8_t));
 }
 
 void
 Hacl_Hash_SHA3_Simd256_sha3_512(
-  uint32_t inputByteLen,
-  uint8_t *input0,
-  uint8_t *input1,
-  uint8_t *input2,
-  uint8_t *input3,
   uint8_t *output0,
   uint8_t *output1,
   uint8_t *output2,
-  uint8_t *output3
+  uint8_t *output3,
+  uint8_t *input0,
+  uint8_t *input1,
+  uint8_t *input2,
+  uint8_t *input3,
+  uint32_t inputByteLen
 )
 {
   K____uint8_t___uint8_t____K____uint8_t___uint8_t_
@@ -8809,63 +8678,63 @@ Hacl_Hash_SHA3_Simd256_sha3_512(
   K____uint8_t___uint8_t____K____uint8_t___uint8_t_
   b_ = { .fst = b00, .snd = { .fst = b10, .snd = { .fst = b20, .snd = b30 } } };
   uint32_t rem1 = inputByteLen % rateInBytes;
-  uint8_t *b32 = ib.snd.snd.snd;
-  uint8_t *b22 = ib.snd.snd.fst;
-  uint8_t *b12 = ib.snd.fst;
-  uint8_t *b02 = ib.fst;
+  uint8_t *b31 = ib.snd.snd.snd;
+  uint8_t *b21 = ib.snd.snd.fst;
+  uint8_t *b11 = ib.snd.fst;
+  uint8_t *b01 = ib.fst;
   uint8_t *bl3 = b_.snd.snd.snd;
   uint8_t *bl2 = b_.snd.snd.fst;
   uint8_t *bl1 = b_.snd.fst;
   uint8_t *bl0 = b_.fst;
-  memcpy(bl0, b02 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
-  memcpy(bl1, b12 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
-  memcpy(bl2, b22 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
-  memcpy(bl3, b32 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
+  memcpy(bl0, b01 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
+  memcpy(bl1, b11 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
+  memcpy(bl2, b21 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
+  memcpy(bl3, b31 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
+  uint8_t *b32 = b_.snd.snd.snd;
+  uint8_t *b22 = b_.snd.snd.fst;
+  uint8_t *b12 = b_.snd.fst;
+  uint8_t *b02 = b_.fst;
+  b02[rem] = 0x06U;
+  b12[rem] = 0x06U;
+  b22[rem] = 0x06U;
+  b32[rem] = 0x06U;
+  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U };
   uint8_t *b33 = b_.snd.snd.snd;
   uint8_t *b23 = b_.snd.snd.fst;
   uint8_t *b13 = b_.snd.fst;
   uint8_t *b03 = b_.fst;
-  b03[rem] = 0x06U;
-  b13[rem] = 0x06U;
-  b23[rem] = 0x06U;
-  b33[rem] = 0x06U;
-  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws32[32U] KRML_POST_ALIGN(32) = { 0U };
-  uint8_t *b34 = b_.snd.snd.snd;
-  uint8_t *b24 = b_.snd.snd.fst;
-  uint8_t *b14 = b_.snd.fst;
-  uint8_t *b04 = b_.fst;
-  ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04);
-  ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14);
-  ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24);
-  ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34);
-  ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 32U);
-  ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 32U);
-  ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 32U);
-  ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 32U);
-  ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 64U);
-  ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 64U);
-  ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 64U);
-  ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 64U);
-  ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 96U);
-  ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 96U);
-  ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 96U);
-  ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 96U);
-  ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 128U);
-  ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 128U);
-  ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 128U);
-  ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 128U);
-  ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 160U);
-  ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 160U);
-  ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 160U);
-  ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 160U);
-  ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 192U);
-  ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 192U);
-  ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 192U);
-  ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 192U);
-  ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b04 + 224U);
-  ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b14 + 224U);
-  ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b24 + 224U);
-  ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b34 + 224U);
+  ws32[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03);
+  ws32[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13);
+  ws32[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23);
+  ws32[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33);
+  ws32[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 32U);
+  ws32[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 32U);
+  ws32[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 32U);
+  ws32[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 32U);
+  ws32[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 64U);
+  ws32[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 64U);
+  ws32[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 64U);
+  ws32[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 64U);
+  ws32[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 96U);
+  ws32[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 96U);
+  ws32[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 96U);
+  ws32[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 96U);
+  ws32[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 128U);
+  ws32[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 128U);
+  ws32[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 128U);
+  ws32[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 128U);
+  ws32[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 160U);
+  ws32[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 160U);
+  ws32[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 160U);
+  ws32[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 160U);
+  ws32[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 192U);
+  ws32[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 192U);
+  ws32[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 192U);
+  ws32[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 192U);
+  ws32[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 224U);
+  ws32[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 224U);
+  ws32[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 224U);
+  ws32[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 224U);
   Lib_IntVector_Intrinsics_vec256 v00 = ws32[0U];
   Lib_IntVector_Intrinsics_vec256 v10 = ws32[1U];
   Lib_IntVector_Intrinsics_vec256 v20 = ws32[2U];
@@ -9094,57 +8963,57 @@ Hacl_Hash_SHA3_Simd256_sha3_512(
   {
     s[i] = Lib_IntVector_Intrinsics_vec256_xor(s[i], ws32[i]);
   }
-  uint8_t b05[256U] = { 0U };
-  uint8_t b15[256U] = { 0U };
-  uint8_t b25[256U] = { 0U };
-  uint8_t b35[256U] = { 0U };
+  uint8_t b04[256U] = { 0U };
+  uint8_t b14[256U] = { 0U };
+  uint8_t b24[256U] = { 0U };
+  uint8_t b34[256U] = { 0U };
   K____uint8_t___uint8_t____K____uint8_t___uint8_t_
-  b = { .fst = b05, .snd = { .fst = b15, .snd = { .fst = b25, .snd = b35 } } };
-  uint8_t *b36 = b.snd.snd.snd;
+  b = { .fst = b04, .snd = { .fst = b14, .snd = { .fst = b24, .snd = b34 } } };
+  uint8_t *b35 = b.snd.snd.snd;
+  uint8_t *b25 = b.snd.snd.fst;
+  uint8_t *b15 = b.snd.fst;
+  uint8_t *b05 = b.fst;
+  b05[rateInBytes - 1U] = 0x80U;
+  b15[rateInBytes - 1U] = 0x80U;
+  b25[rateInBytes - 1U] = 0x80U;
+  b35[rateInBytes - 1U] = 0x80U;
+  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws34[32U] KRML_POST_ALIGN(32) = { 0U };
+  uint8_t *b3 = b.snd.snd.snd;
   uint8_t *b26 = b.snd.snd.fst;
   uint8_t *b16 = b.snd.fst;
   uint8_t *b06 = b.fst;
-  b06[rateInBytes - 1U] = 0x80U;
-  b16[rateInBytes - 1U] = 0x80U;
-  b26[rateInBytes - 1U] = 0x80U;
-  b36[rateInBytes - 1U] = 0x80U;
-  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws34[32U] KRML_POST_ALIGN(32) = { 0U };
-  uint8_t *b37 = b.snd.snd.snd;
-  uint8_t *b27 = b.snd.snd.fst;
-  uint8_t *b17 = b.snd.fst;
-  uint8_t *b07 = b.fst;
-  ws34[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07);
-  ws34[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17);
-  ws34[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27);
-  ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37);
-  ws34[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 32U);
-  ws34[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 32U);
-  ws34[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 32U);
-  ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 32U);
-  ws34[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 64U);
-  ws34[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 64U);
-  ws34[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 64U);
-  ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 64U);
-  ws34[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 96U);
-  ws34[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 96U);
-  ws34[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 96U);
-  ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 96U);
-  ws34[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 128U);
-  ws34[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 128U);
-  ws34[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 128U);
-  ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 128U);
-  ws34[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 160U);
-  ws34[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 160U);
-  ws34[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 160U);
-  ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 160U);
-  ws34[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 192U);
-  ws34[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 192U);
-  ws34[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 192U);
-  ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 192U);
-  ws34[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b07 + 224U);
-  ws34[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b17 + 224U);
-  ws34[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b27 + 224U);
-  ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b37 + 224U);
+  ws34[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06);
+  ws34[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16);
+  ws34[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26);
+  ws34[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3);
+  ws34[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 32U);
+  ws34[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 32U);
+  ws34[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 32U);
+  ws34[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 32U);
+  ws34[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 64U);
+  ws34[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 64U);
+  ws34[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 64U);
+  ws34[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 64U);
+  ws34[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 96U);
+  ws34[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 96U);
+  ws34[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 96U);
+  ws34[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 96U);
+  ws34[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 128U);
+  ws34[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 128U);
+  ws34[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 128U);
+  ws34[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 128U);
+  ws34[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 160U);
+  ws34[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 160U);
+  ws34[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 160U);
+  ws34[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 160U);
+  ws34[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 192U);
+  ws34[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 192U);
+  ws34[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 192U);
+  ws34[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 192U);
+  ws34[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b06 + 224U);
+  ws34[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b16 + 224U);
+  ws34[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b26 + 224U);
+  ws34[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 224U);
   Lib_IntVector_Intrinsics_vec256 v08 = ws34[0U];
   Lib_IntVector_Intrinsics_vec256 v18 = ws34[1U];
   Lib_IntVector_Intrinsics_vec256 v28 = ws34[2U];
@@ -9666,62 +9535,49 @@ Hacl_Hash_SHA3_Simd256_sha3_512(
     Lib_IntVector_Intrinsics_vec256 ws30 = v1__22;
     Lib_IntVector_Intrinsics_vec256 ws31 = v3__22;
     ws[0U] = ws0;
-    ws[1U] = ws1;
-    ws[2U] = ws2;
-    ws[3U] = ws3;
-    ws[4U] = ws4;
-    ws[5U] = ws5;
-    ws[6U] = ws6;
-    ws[7U] = ws7;
-    ws[8U] = ws8;
-    ws[9U] = ws9;
-    ws[10U] = ws10;
-    ws[11U] = ws11;
-    ws[12U] = ws12;
-    ws[13U] = ws13;
-    ws[14U] = ws14;
-    ws[15U] = ws15;
-    ws[16U] = ws16;
-    ws[17U] = ws17;
-    ws[18U] = ws18;
-    ws[19U] = ws19;
-    ws[20U] = ws20;
-    ws[21U] = ws21;
-    ws[22U] = ws22;
-    ws[23U] = ws23;
-    ws[24U] = ws24;
-    ws[25U] = ws25;
-    ws[26U] = ws26;
-    ws[27U] = ws27;
-    ws[28U] = ws28;
-    ws[29U] = ws29;
-    ws[30U] = ws30;
+    ws[1U] = ws4;
+    ws[2U] = ws8;
+    ws[3U] = ws12;
+    ws[4U] = ws16;
+    ws[5U] = ws20;
+    ws[6U] = ws24;
+    ws[7U] = ws28;
+    ws[8U] = ws1;
+    ws[9U] = ws5;
+    ws[10U] = ws9;
+    ws[11U] = ws13;
+    ws[12U] = ws17;
+    ws[13U] = ws21;
+    ws[14U] = ws25;
+    ws[15U] = ws29;
+    ws[16U] = ws2;
+    ws[17U] = ws6;
+    ws[18U] = ws10;
+    ws[19U] = ws14;
+    ws[20U] = ws18;
+    ws[21U] = ws22;
+    ws[22U] = ws26;
+    ws[23U] = ws30;
+    ws[24U] = ws3;
+    ws[25U] = ws7;
+    ws[26U] = ws11;
+    ws[27U] = ws15;
+    ws[28U] = ws19;
+    ws[29U] = ws23;
+    ws[30U] = ws27;
     ws[31U] = ws31;
     for (uint32_t i = 0U; i < 32U; i++)
     {
       Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]);
     }
-    for (uint32_t i = 0U; i < rateInBytes / 32U; i++)
-    {
-      uint8_t *b31 = rb.snd.snd.snd;
-      uint8_t *b21 = rb.snd.snd.fst;
-      uint8_t *b11 = rb.snd.fst;
-      uint8_t *b01 = rb.fst;
-      memcpy(b01 + i0 * rateInBytes + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t));
-      memcpy(b11 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t));
-      memcpy(b21 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t));
-      memcpy(b31 + i0 * rateInBytes + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t));
-    }
-    uint32_t rem0 = rateInBytes % 32U;
-    uint32_t j = rateInBytes / 32U;
-    uint8_t *b31 = rb.snd.snd.snd;
-    uint8_t *b21 = rb.snd.snd.fst;
-    uint8_t *b11 = rb.snd.fst;
-    uint8_t *b01 = rb.fst;
-    memcpy(b01 + i0 * rateInBytes + j * 32U, hbuf + j * 128U, rem0 * sizeof (uint8_t));
-    memcpy(b11 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 32U, rem0 * sizeof (uint8_t));
-    memcpy(b21 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 64U, rem0 * sizeof (uint8_t));
-    memcpy(b31 + i0 * rateInBytes + j * 32U, hbuf + j * 128U + 96U, rem0 * sizeof (uint8_t));
+    uint8_t *b36 = rb.snd.snd.snd;
+    uint8_t *b2 = rb.snd.snd.fst;
+    uint8_t *b1 = rb.snd.fst;
+    uint8_t *b0 = rb.fst;
+    memcpy(b0 + i0 * rateInBytes, hbuf, rateInBytes * sizeof (uint8_t));
+    memcpy(b1 + i0 * rateInBytes, hbuf + 256U, rateInBytes * sizeof (uint8_t));
+    memcpy(b2 + i0 * rateInBytes, hbuf + 512U, rateInBytes * sizeof (uint8_t));
+    memcpy(b36 + i0 * rateInBytes, hbuf + 768U, rateInBytes * sizeof (uint8_t));
     for (uint32_t i1 = 0U; i1 < 24U; i1++)
     {
       KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 _C[5U] KRML_POST_ALIGN(32) = { 0U };
@@ -10016,61 +9872,1482 @@ Hacl_Hash_SHA3_Simd256_sha3_512(
   Lib_IntVector_Intrinsics_vec256 ws30 = v1__22;
   Lib_IntVector_Intrinsics_vec256 ws31 = v3__22;
   ws[0U] = ws0;
-  ws[1U] = ws1;
-  ws[2U] = ws2;
-  ws[3U] = ws3;
-  ws[4U] = ws4;
-  ws[5U] = ws5;
-  ws[6U] = ws6;
-  ws[7U] = ws7;
-  ws[8U] = ws8;
-  ws[9U] = ws9;
-  ws[10U] = ws10;
-  ws[11U] = ws11;
-  ws[12U] = ws12;
-  ws[13U] = ws13;
-  ws[14U] = ws14;
-  ws[15U] = ws15;
-  ws[16U] = ws16;
-  ws[17U] = ws17;
-  ws[18U] = ws18;
-  ws[19U] = ws19;
-  ws[20U] = ws20;
-  ws[21U] = ws21;
-  ws[22U] = ws22;
-  ws[23U] = ws23;
-  ws[24U] = ws24;
-  ws[25U] = ws25;
-  ws[26U] = ws26;
-  ws[27U] = ws27;
-  ws[28U] = ws28;
-  ws[29U] = ws29;
-  ws[30U] = ws30;
+  ws[1U] = ws4;
+  ws[2U] = ws8;
+  ws[3U] = ws12;
+  ws[4U] = ws16;
+  ws[5U] = ws20;
+  ws[6U] = ws24;
+  ws[7U] = ws28;
+  ws[8U] = ws1;
+  ws[9U] = ws5;
+  ws[10U] = ws9;
+  ws[11U] = ws13;
+  ws[12U] = ws17;
+  ws[13U] = ws21;
+  ws[14U] = ws25;
+  ws[15U] = ws29;
+  ws[16U] = ws2;
+  ws[17U] = ws6;
+  ws[18U] = ws10;
+  ws[19U] = ws14;
+  ws[20U] = ws18;
+  ws[21U] = ws22;
+  ws[22U] = ws26;
+  ws[23U] = ws30;
+  ws[24U] = ws3;
+  ws[25U] = ws7;
+  ws[26U] = ws11;
+  ws[27U] = ws15;
+  ws[28U] = ws19;
+  ws[29U] = ws23;
+  ws[30U] = ws27;
   ws[31U] = ws31;
   for (uint32_t i = 0U; i < 32U; i++)
   {
     Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]);
   }
-  for (uint32_t i = 0U; i < remOut / 32U; i++)
-  {
-    uint8_t *b3 = rb.snd.snd.snd;
-    uint8_t *b2 = rb.snd.snd.fst;
-    uint8_t *b1 = rb.snd.fst;
-    uint8_t *b0 = rb.fst;
-    memcpy(b0 + 64U - remOut + i * 32U, hbuf + i * 128U, 32U * sizeof (uint8_t));
-    memcpy(b1 + 64U - remOut + i * 32U, hbuf + i * 128U + 32U, 32U * sizeof (uint8_t));
-    memcpy(b2 + 64U - remOut + i * 32U, hbuf + i * 128U + 64U, 32U * sizeof (uint8_t));
-    memcpy(b3 + 64U - remOut + i * 32U, hbuf + i * 128U + 96U, 32U * sizeof (uint8_t));
-  }
-  uint32_t rem0 = remOut % 32U;
-  uint32_t j = remOut / 32U;
-  uint8_t *b3 = rb.snd.snd.snd;
+  uint8_t *b36 = rb.snd.snd.snd;
   uint8_t *b2 = rb.snd.snd.fst;
   uint8_t *b1 = rb.snd.fst;
   uint8_t *b0 = rb.fst;
-  memcpy(b0 + 64U - remOut + j * 32U, hbuf + j * 128U, rem0 * sizeof (uint8_t));
-  memcpy(b1 + 64U - remOut + j * 32U, hbuf + j * 128U + 32U, rem0 * sizeof (uint8_t));
-  memcpy(b2 + 64U - remOut + j * 32U, hbuf + j * 128U + 64U, rem0 * sizeof (uint8_t));
-  memcpy(b3 + 64U - remOut + j * 32U, hbuf + j * 128U + 96U, rem0 * sizeof (uint8_t));
+  memcpy(b0 + 64U - remOut, hbuf, remOut * sizeof (uint8_t));
+  memcpy(b1 + 64U - remOut, hbuf + 256U, remOut * sizeof (uint8_t));
+  memcpy(b2 + 64U - remOut, hbuf + 512U, remOut * sizeof (uint8_t));
+  memcpy(b36 + 64U - remOut, hbuf + 768U, remOut * sizeof (uint8_t));
+}
+
+uint64_t *Hacl_Hash_SHA3_Simd256_state_malloc(void)
+{
+  uint64_t *buf = (uint64_t *)KRML_HOST_CALLOC(100U, sizeof (uint64_t));
+  return buf;
+}
+
+void Hacl_Hash_SHA3_Simd256_state_free(uint64_t *s)
+{
+  KRML_HOST_FREE(s);
+}
+
+void
+Hacl_Hash_SHA3_Simd256_shake128_absorb_nblocks(
+  Lib_IntVector_Intrinsics_vec256 *state,
+  uint8_t *input0,
+  uint8_t *input1,
+  uint8_t *input2,
+  uint8_t *input3,
+  uint32_t inputByteLen
+)
+{
+  for (uint32_t i0 = 0U; i0 < inputByteLen / 168U; i0++)
+  {
+    uint8_t b00[256U] = { 0U };
+    uint8_t b10[256U] = { 0U };
+    uint8_t b20[256U] = { 0U };
+    uint8_t b30[256U] = { 0U };
+    K____uint8_t___uint8_t____K____uint8_t___uint8_t_
+    b_ = { .fst = b00, .snd = { .fst = b10, .snd = { .fst = b20, .snd = b30 } } };
+    uint8_t *b01 = input0;
+    uint8_t *b11 = input1;
+    uint8_t *b21 = input2;
+    uint8_t *b31 = input3;
+    uint8_t *bl3 = b_.snd.snd.snd;
+    uint8_t *bl2 = b_.snd.snd.fst;
+    uint8_t *bl1 = b_.snd.fst;
+    uint8_t *bl0 = b_.fst;
+    memcpy(bl0, b01 + i0 * 168U, 168U * sizeof (uint8_t));
+    memcpy(bl1, b11 + i0 * 168U, 168U * sizeof (uint8_t));
+    memcpy(bl2, b21 + i0 * 168U, 168U * sizeof (uint8_t));
+    memcpy(bl3, b31 + i0 * 168U, 168U * sizeof (uint8_t));
+    KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws[32U] KRML_POST_ALIGN(32) = { 0U };
+    uint8_t *b3 = b_.snd.snd.snd;
+    uint8_t *b2 = b_.snd.snd.fst;
+    uint8_t *b1 = b_.snd.fst;
+    uint8_t *b0 = b_.fst;
+    ws[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0);
+    ws[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1);
+    ws[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2);
+    ws[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3);
+    ws[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 32U);
+    ws[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 32U);
+    ws[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 32U);
+    ws[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 32U);
+    ws[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 64U);
+    ws[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 64U);
+    ws[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 64U);
+    ws[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 64U);
+    ws[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 96U);
+    ws[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 96U);
+    ws[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 96U);
+    ws[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 96U);
+    ws[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 128U);
+    ws[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 128U);
+    ws[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 128U);
+    ws[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 128U);
+    ws[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 160U);
+    ws[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 160U);
+    ws[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 160U);
+    ws[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 160U);
+    ws[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 192U);
+    ws[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 192U);
+    ws[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 192U);
+    ws[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 192U);
+    ws[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 224U);
+    ws[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 224U);
+    ws[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 224U);
+    ws[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 224U);
+    Lib_IntVector_Intrinsics_vec256 v00 = ws[0U];
+    Lib_IntVector_Intrinsics_vec256 v10 = ws[1U];
+    Lib_IntVector_Intrinsics_vec256 v20 = ws[2U];
+    Lib_IntVector_Intrinsics_vec256 v30 = ws[3U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v00, v10);
+    Lib_IntVector_Intrinsics_vec256
+    v1_ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v00, v10);
+    Lib_IntVector_Intrinsics_vec256
+    v2_ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v20, v30);
+    Lib_IntVector_Intrinsics_vec256
+    v3_ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v20, v30);
+    Lib_IntVector_Intrinsics_vec256
+    v0__ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_, v2_);
+    Lib_IntVector_Intrinsics_vec256
+    v1__ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_, v2_);
+    Lib_IntVector_Intrinsics_vec256
+    v2__ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_, v3_);
+    Lib_IntVector_Intrinsics_vec256
+    v3__ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_, v3_);
+    Lib_IntVector_Intrinsics_vec256 ws0 = v0__;
+    Lib_IntVector_Intrinsics_vec256 ws1 = v2__;
+    Lib_IntVector_Intrinsics_vec256 ws2 = v1__;
+    Lib_IntVector_Intrinsics_vec256 ws3 = v3__;
+    Lib_IntVector_Intrinsics_vec256 v01 = ws[4U];
+    Lib_IntVector_Intrinsics_vec256 v11 = ws[5U];
+    Lib_IntVector_Intrinsics_vec256 v21 = ws[6U];
+    Lib_IntVector_Intrinsics_vec256 v31 = ws[7U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v01, v11);
+    Lib_IntVector_Intrinsics_vec256
+    v1_0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v01, v11);
+    Lib_IntVector_Intrinsics_vec256
+    v2_0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v21, v31);
+    Lib_IntVector_Intrinsics_vec256
+    v3_0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v21, v31);
+    Lib_IntVector_Intrinsics_vec256
+    v0__0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_0, v2_0);
+    Lib_IntVector_Intrinsics_vec256
+    v1__0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_0, v2_0);
+    Lib_IntVector_Intrinsics_vec256
+    v2__0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_0, v3_0);
+    Lib_IntVector_Intrinsics_vec256
+    v3__0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_0, v3_0);
+    Lib_IntVector_Intrinsics_vec256 ws4 = v0__0;
+    Lib_IntVector_Intrinsics_vec256 ws5 = v2__0;
+    Lib_IntVector_Intrinsics_vec256 ws6 = v1__0;
+    Lib_IntVector_Intrinsics_vec256 ws7 = v3__0;
+    Lib_IntVector_Intrinsics_vec256 v02 = ws[8U];
+    Lib_IntVector_Intrinsics_vec256 v12 = ws[9U];
+    Lib_IntVector_Intrinsics_vec256 v22 = ws[10U];
+    Lib_IntVector_Intrinsics_vec256 v32 = ws[11U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v02, v12);
+    Lib_IntVector_Intrinsics_vec256
+    v1_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v02, v12);
+    Lib_IntVector_Intrinsics_vec256
+    v2_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v22, v32);
+    Lib_IntVector_Intrinsics_vec256
+    v3_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v22, v32);
+    Lib_IntVector_Intrinsics_vec256
+    v0__1 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_1, v2_1);
+    Lib_IntVector_Intrinsics_vec256
+    v1__1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_1, v2_1);
+    Lib_IntVector_Intrinsics_vec256
+    v2__1 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_1, v3_1);
+    Lib_IntVector_Intrinsics_vec256
+    v3__1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_1, v3_1);
+    Lib_IntVector_Intrinsics_vec256 ws8 = v0__1;
+    Lib_IntVector_Intrinsics_vec256 ws9 = v2__1;
+    Lib_IntVector_Intrinsics_vec256 ws10 = v1__1;
+    Lib_IntVector_Intrinsics_vec256 ws11 = v3__1;
+    Lib_IntVector_Intrinsics_vec256 v03 = ws[12U];
+    Lib_IntVector_Intrinsics_vec256 v13 = ws[13U];
+    Lib_IntVector_Intrinsics_vec256 v23 = ws[14U];
+    Lib_IntVector_Intrinsics_vec256 v33 = ws[15U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_2 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v03, v13);
+    Lib_IntVector_Intrinsics_vec256
+    v1_2 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v03, v13);
+    Lib_IntVector_Intrinsics_vec256
+    v2_2 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v23, v33);
+    Lib_IntVector_Intrinsics_vec256
+    v3_2 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v23, v33);
+    Lib_IntVector_Intrinsics_vec256
+    v0__2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_2, v2_2);
+    Lib_IntVector_Intrinsics_vec256
+    v1__2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_2, v2_2);
+    Lib_IntVector_Intrinsics_vec256
+    v2__2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_2, v3_2);
+    Lib_IntVector_Intrinsics_vec256
+    v3__2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_2, v3_2);
+    Lib_IntVector_Intrinsics_vec256 ws12 = v0__2;
+    Lib_IntVector_Intrinsics_vec256 ws13 = v2__2;
+    Lib_IntVector_Intrinsics_vec256 ws14 = v1__2;
+    Lib_IntVector_Intrinsics_vec256 ws15 = v3__2;
+    Lib_IntVector_Intrinsics_vec256 v04 = ws[16U];
+    Lib_IntVector_Intrinsics_vec256 v14 = ws[17U];
+    Lib_IntVector_Intrinsics_vec256 v24 = ws[18U];
+    Lib_IntVector_Intrinsics_vec256 v34 = ws[19U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_3 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v04, v14);
+    Lib_IntVector_Intrinsics_vec256
+    v1_3 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v04, v14);
+    Lib_IntVector_Intrinsics_vec256
+    v2_3 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v24, v34);
+    Lib_IntVector_Intrinsics_vec256
+    v3_3 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v24, v34);
+    Lib_IntVector_Intrinsics_vec256
+    v0__3 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_3, v2_3);
+    Lib_IntVector_Intrinsics_vec256
+    v1__3 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_3, v2_3);
+    Lib_IntVector_Intrinsics_vec256
+    v2__3 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_3, v3_3);
+    Lib_IntVector_Intrinsics_vec256
+    v3__3 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_3, v3_3);
+    Lib_IntVector_Intrinsics_vec256 ws16 = v0__3;
+    Lib_IntVector_Intrinsics_vec256 ws17 = v2__3;
+    Lib_IntVector_Intrinsics_vec256 ws18 = v1__3;
+    Lib_IntVector_Intrinsics_vec256 ws19 = v3__3;
+    Lib_IntVector_Intrinsics_vec256 v05 = ws[20U];
+    Lib_IntVector_Intrinsics_vec256 v15 = ws[21U];
+    Lib_IntVector_Intrinsics_vec256 v25 = ws[22U];
+    Lib_IntVector_Intrinsics_vec256 v35 = ws[23U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_4 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v05, v15);
+    Lib_IntVector_Intrinsics_vec256
+    v1_4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v05, v15);
+    Lib_IntVector_Intrinsics_vec256
+    v2_4 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v25, v35);
+    Lib_IntVector_Intrinsics_vec256
+    v3_4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v25, v35);
+    Lib_IntVector_Intrinsics_vec256
+    v0__4 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_4, v2_4);
+    Lib_IntVector_Intrinsics_vec256
+    v1__4 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_4, v2_4);
+    Lib_IntVector_Intrinsics_vec256
+    v2__4 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_4, v3_4);
+    Lib_IntVector_Intrinsics_vec256
+    v3__4 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_4, v3_4);
+    Lib_IntVector_Intrinsics_vec256 ws20 = v0__4;
+    Lib_IntVector_Intrinsics_vec256 ws21 = v2__4;
+    Lib_IntVector_Intrinsics_vec256 ws22 = v1__4;
+    Lib_IntVector_Intrinsics_vec256 ws23 = v3__4;
+    Lib_IntVector_Intrinsics_vec256 v06 = ws[24U];
+    Lib_IntVector_Intrinsics_vec256 v16 = ws[25U];
+    Lib_IntVector_Intrinsics_vec256 v26 = ws[26U];
+    Lib_IntVector_Intrinsics_vec256 v36 = ws[27U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_5 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v06, v16);
+    Lib_IntVector_Intrinsics_vec256
+    v1_5 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v06, v16);
+    Lib_IntVector_Intrinsics_vec256
+    v2_5 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v26, v36);
+    Lib_IntVector_Intrinsics_vec256
+    v3_5 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v26, v36);
+    Lib_IntVector_Intrinsics_vec256
+    v0__5 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_5, v2_5);
+    Lib_IntVector_Intrinsics_vec256
+    v1__5 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_5, v2_5);
+    Lib_IntVector_Intrinsics_vec256
+    v2__5 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_5, v3_5);
+    Lib_IntVector_Intrinsics_vec256
+    v3__5 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_5, v3_5);
+    Lib_IntVector_Intrinsics_vec256 ws24 = v0__5;
+    Lib_IntVector_Intrinsics_vec256 ws25 = v2__5;
+    Lib_IntVector_Intrinsics_vec256 ws26 = v1__5;
+    Lib_IntVector_Intrinsics_vec256 ws27 = v3__5;
+    Lib_IntVector_Intrinsics_vec256 v0 = ws[28U];
+    Lib_IntVector_Intrinsics_vec256 v1 = ws[29U];
+    Lib_IntVector_Intrinsics_vec256 v2 = ws[30U];
+    Lib_IntVector_Intrinsics_vec256 v3 = ws[31U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_6 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0, v1);
+    Lib_IntVector_Intrinsics_vec256
+    v1_6 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0, v1);
+    Lib_IntVector_Intrinsics_vec256
+    v2_6 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v2, v3);
+    Lib_IntVector_Intrinsics_vec256
+    v3_6 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v2, v3);
+    Lib_IntVector_Intrinsics_vec256
+    v0__6 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_6, v2_6);
+    Lib_IntVector_Intrinsics_vec256
+    v1__6 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_6, v2_6);
+    Lib_IntVector_Intrinsics_vec256
+    v2__6 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_6, v3_6);
+    Lib_IntVector_Intrinsics_vec256
+    v3__6 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_6, v3_6);
+    Lib_IntVector_Intrinsics_vec256 ws28 = v0__6;
+    Lib_IntVector_Intrinsics_vec256 ws29 = v2__6;
+    Lib_IntVector_Intrinsics_vec256 ws30 = v1__6;
+    Lib_IntVector_Intrinsics_vec256 ws31 = v3__6;
+    ws[0U] = ws0;
+    ws[1U] = ws1;
+    ws[2U] = ws2;
+    ws[3U] = ws3;
+    ws[4U] = ws4;
+    ws[5U] = ws5;
+    ws[6U] = ws6;
+    ws[7U] = ws7;
+    ws[8U] = ws8;
+    ws[9U] = ws9;
+    ws[10U] = ws10;
+    ws[11U] = ws11;
+    ws[12U] = ws12;
+    ws[13U] = ws13;
+    ws[14U] = ws14;
+    ws[15U] = ws15;
+    ws[16U] = ws16;
+    ws[17U] = ws17;
+    ws[18U] = ws18;
+    ws[19U] = ws19;
+    ws[20U] = ws20;
+    ws[21U] = ws21;
+    ws[22U] = ws22;
+    ws[23U] = ws23;
+    ws[24U] = ws24;
+    ws[25U] = ws25;
+    ws[26U] = ws26;
+    ws[27U] = ws27;
+    ws[28U] = ws28;
+    ws[29U] = ws29;
+    ws[30U] = ws30;
+    ws[31U] = ws31;
+    for (uint32_t i = 0U; i < 25U; i++)
+    {
+      state[i] = Lib_IntVector_Intrinsics_vec256_xor(state[i], ws[i]);
+    }
+    for (uint32_t i1 = 0U; i1 < 24U; i1++)
+    {
+      KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 _C[5U] KRML_POST_ALIGN(32) = { 0U };
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        Lib_IntVector_Intrinsics_vec256 uu____0 = state[i + 0U];
+        Lib_IntVector_Intrinsics_vec256 uu____1 = state[i + 5U];
+        Lib_IntVector_Intrinsics_vec256 uu____2 = state[i + 10U];
+        _C[i] =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____0,
+            Lib_IntVector_Intrinsics_vec256_xor(uu____1,
+              Lib_IntVector_Intrinsics_vec256_xor(uu____2,
+                Lib_IntVector_Intrinsics_vec256_xor(state[i + 15U], state[i + 20U])))););
+      KRML_MAYBE_FOR5(i2,
+        0U,
+        5U,
+        1U,
+        Lib_IntVector_Intrinsics_vec256 uu____3 = _C[(i2 + 4U) % 5U];
+        Lib_IntVector_Intrinsics_vec256 uu____4 = _C[(i2 + 1U) % 5U];
+        Lib_IntVector_Intrinsics_vec256
+        _D =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____3,
+            Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_left64(uu____4,
+                1U),
+              Lib_IntVector_Intrinsics_vec256_shift_right64(uu____4, 63U)));
+        KRML_MAYBE_FOR5(i,
+          0U,
+          5U,
+          1U,
+          state[i2 + 5U * i] = Lib_IntVector_Intrinsics_vec256_xor(state[i2 + 5U * i], _D);););
+      Lib_IntVector_Intrinsics_vec256 x = state[1U];
+      Lib_IntVector_Intrinsics_vec256 current = x;
+      for (uint32_t i = 0U; i < 24U; i++)
+      {
+        uint32_t _Y = Hacl_Impl_SHA3_Vec_keccak_piln[i];
+        uint32_t r = Hacl_Impl_SHA3_Vec_keccak_rotc[i];
+        Lib_IntVector_Intrinsics_vec256 temp = state[_Y];
+        Lib_IntVector_Intrinsics_vec256 uu____5 = current;
+        state[_Y] =
+          Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_left64(uu____5,
+              r),
+            Lib_IntVector_Intrinsics_vec256_shift_right64(uu____5, 64U - r));
+        current = temp;
+      }
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        Lib_IntVector_Intrinsics_vec256 uu____6 = state[0U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____7 = Lib_IntVector_Intrinsics_vec256_lognot(state[1U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v07 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____6,
+            Lib_IntVector_Intrinsics_vec256_and(uu____7, state[2U + 5U * i]));
+        Lib_IntVector_Intrinsics_vec256 uu____8 = state[1U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____9 = Lib_IntVector_Intrinsics_vec256_lognot(state[2U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v17 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____8,
+            Lib_IntVector_Intrinsics_vec256_and(uu____9, state[3U + 5U * i]));
+        Lib_IntVector_Intrinsics_vec256 uu____10 = state[2U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____11 = Lib_IntVector_Intrinsics_vec256_lognot(state[3U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v27 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____10,
+            Lib_IntVector_Intrinsics_vec256_and(uu____11, state[4U + 5U * i]));
+        Lib_IntVector_Intrinsics_vec256 uu____12 = state[3U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____13 = Lib_IntVector_Intrinsics_vec256_lognot(state[4U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v37 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____12,
+            Lib_IntVector_Intrinsics_vec256_and(uu____13, state[0U + 5U * i]));
+        Lib_IntVector_Intrinsics_vec256 uu____14 = state[4U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____15 = Lib_IntVector_Intrinsics_vec256_lognot(state[0U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v4 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____14,
+            Lib_IntVector_Intrinsics_vec256_and(uu____15, state[1U + 5U * i]));
+        state[0U + 5U * i] = v07;
+        state[1U + 5U * i] = v17;
+        state[2U + 5U * i] = v27;
+        state[3U + 5U * i] = v37;
+        state[4U + 5U * i] = v4;);
+      uint64_t c = Hacl_Impl_SHA3_Vec_keccak_rndc[i1];
+      Lib_IntVector_Intrinsics_vec256 uu____16 = state[0U];
+      state[0U] =
+        Lib_IntVector_Intrinsics_vec256_xor(uu____16,
+          Lib_IntVector_Intrinsics_vec256_load64(c));
+    }
+  }
+}
+
+void
+Hacl_Hash_SHA3_Simd256_shake128_absorb_final(
+  Lib_IntVector_Intrinsics_vec256 *state,
+  uint8_t *input0,
+  uint8_t *input1,
+  uint8_t *input2,
+  uint8_t *input3,
+  uint32_t inputByteLen
+)
+{
+  uint32_t rem = inputByteLen % 168U;
+  uint8_t b00[256U] = { 0U };
+  uint8_t b10[256U] = { 0U };
+  uint8_t b20[256U] = { 0U };
+  uint8_t b30[256U] = { 0U };
+  K____uint8_t___uint8_t____K____uint8_t___uint8_t_
+  b_ = { .fst = b00, .snd = { .fst = b10, .snd = { .fst = b20, .snd = b30 } } };
+  uint32_t rem1 = inputByteLen % 168U;
+  uint8_t *b01 = input0;
+  uint8_t *b11 = input1;
+  uint8_t *b21 = input2;
+  uint8_t *b31 = input3;
+  uint8_t *bl3 = b_.snd.snd.snd;
+  uint8_t *bl2 = b_.snd.snd.fst;
+  uint8_t *bl1 = b_.snd.fst;
+  uint8_t *bl0 = b_.fst;
+  memcpy(bl0, b01 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
+  memcpy(bl1, b11 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
+  memcpy(bl2, b21 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
+  memcpy(bl3, b31 + inputByteLen - rem1, rem1 * sizeof (uint8_t));
+  uint8_t *b32 = b_.snd.snd.snd;
+  uint8_t *b22 = b_.snd.snd.fst;
+  uint8_t *b12 = b_.snd.fst;
+  uint8_t *b02 = b_.fst;
+  b02[rem] = 0x1FU;
+  b12[rem] = 0x1FU;
+  b22[rem] = 0x1FU;
+  b32[rem] = 0x1FU;
+  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws[32U] KRML_POST_ALIGN(32) = { 0U };
+  uint8_t *b33 = b_.snd.snd.snd;
+  uint8_t *b23 = b_.snd.snd.fst;
+  uint8_t *b13 = b_.snd.fst;
+  uint8_t *b03 = b_.fst;
+  ws[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03);
+  ws[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13);
+  ws[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23);
+  ws[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33);
+  ws[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 32U);
+  ws[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 32U);
+  ws[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 32U);
+  ws[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 32U);
+  ws[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 64U);
+  ws[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 64U);
+  ws[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 64U);
+  ws[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 64U);
+  ws[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 96U);
+  ws[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 96U);
+  ws[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 96U);
+  ws[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 96U);
+  ws[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 128U);
+  ws[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 128U);
+  ws[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 128U);
+  ws[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 128U);
+  ws[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 160U);
+  ws[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 160U);
+  ws[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 160U);
+  ws[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 160U);
+  ws[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 192U);
+  ws[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 192U);
+  ws[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 192U);
+  ws[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 192U);
+  ws[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b03 + 224U);
+  ws[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b13 + 224U);
+  ws[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b23 + 224U);
+  ws[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b33 + 224U);
+  Lib_IntVector_Intrinsics_vec256 v00 = ws[0U];
+  Lib_IntVector_Intrinsics_vec256 v10 = ws[1U];
+  Lib_IntVector_Intrinsics_vec256 v20 = ws[2U];
+  Lib_IntVector_Intrinsics_vec256 v30 = ws[3U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v00, v10);
+  Lib_IntVector_Intrinsics_vec256
+  v1_ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v00, v10);
+  Lib_IntVector_Intrinsics_vec256
+  v2_ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v20, v30);
+  Lib_IntVector_Intrinsics_vec256
+  v3_ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v20, v30);
+  Lib_IntVector_Intrinsics_vec256
+  v0__ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_, v2_);
+  Lib_IntVector_Intrinsics_vec256
+  v1__ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_, v2_);
+  Lib_IntVector_Intrinsics_vec256
+  v2__ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_, v3_);
+  Lib_IntVector_Intrinsics_vec256
+  v3__ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_, v3_);
+  Lib_IntVector_Intrinsics_vec256 ws00 = v0__;
+  Lib_IntVector_Intrinsics_vec256 ws110 = v2__;
+  Lib_IntVector_Intrinsics_vec256 ws210 = v1__;
+  Lib_IntVector_Intrinsics_vec256 ws32 = v3__;
+  Lib_IntVector_Intrinsics_vec256 v01 = ws[4U];
+  Lib_IntVector_Intrinsics_vec256 v11 = ws[5U];
+  Lib_IntVector_Intrinsics_vec256 v21 = ws[6U];
+  Lib_IntVector_Intrinsics_vec256 v31 = ws[7U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v01, v11);
+  Lib_IntVector_Intrinsics_vec256
+  v1_0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v01, v11);
+  Lib_IntVector_Intrinsics_vec256
+  v2_0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v21, v31);
+  Lib_IntVector_Intrinsics_vec256
+  v3_0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v21, v31);
+  Lib_IntVector_Intrinsics_vec256
+  v0__0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_0, v2_0);
+  Lib_IntVector_Intrinsics_vec256
+  v1__0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_0, v2_0);
+  Lib_IntVector_Intrinsics_vec256
+  v2__0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_0, v3_0);
+  Lib_IntVector_Intrinsics_vec256
+  v3__0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_0, v3_0);
+  Lib_IntVector_Intrinsics_vec256 ws40 = v0__0;
+  Lib_IntVector_Intrinsics_vec256 ws50 = v2__0;
+  Lib_IntVector_Intrinsics_vec256 ws60 = v1__0;
+  Lib_IntVector_Intrinsics_vec256 ws70 = v3__0;
+  Lib_IntVector_Intrinsics_vec256 v02 = ws[8U];
+  Lib_IntVector_Intrinsics_vec256 v12 = ws[9U];
+  Lib_IntVector_Intrinsics_vec256 v22 = ws[10U];
+  Lib_IntVector_Intrinsics_vec256 v32 = ws[11U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v02, v12);
+  Lib_IntVector_Intrinsics_vec256
+  v1_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v02, v12);
+  Lib_IntVector_Intrinsics_vec256
+  v2_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v22, v32);
+  Lib_IntVector_Intrinsics_vec256
+  v3_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v22, v32);
+  Lib_IntVector_Intrinsics_vec256
+  v0__1 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_1, v2_1);
+  Lib_IntVector_Intrinsics_vec256
+  v1__1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_1, v2_1);
+  Lib_IntVector_Intrinsics_vec256
+  v2__1 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_1, v3_1);
+  Lib_IntVector_Intrinsics_vec256
+  v3__1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_1, v3_1);
+  Lib_IntVector_Intrinsics_vec256 ws80 = v0__1;
+  Lib_IntVector_Intrinsics_vec256 ws90 = v2__1;
+  Lib_IntVector_Intrinsics_vec256 ws100 = v1__1;
+  Lib_IntVector_Intrinsics_vec256 ws111 = v3__1;
+  Lib_IntVector_Intrinsics_vec256 v03 = ws[12U];
+  Lib_IntVector_Intrinsics_vec256 v13 = ws[13U];
+  Lib_IntVector_Intrinsics_vec256 v23 = ws[14U];
+  Lib_IntVector_Intrinsics_vec256 v33 = ws[15U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_2 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v03, v13);
+  Lib_IntVector_Intrinsics_vec256
+  v1_2 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v03, v13);
+  Lib_IntVector_Intrinsics_vec256
+  v2_2 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v23, v33);
+  Lib_IntVector_Intrinsics_vec256
+  v3_2 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v23, v33);
+  Lib_IntVector_Intrinsics_vec256
+  v0__2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_2, v2_2);
+  Lib_IntVector_Intrinsics_vec256
+  v1__2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_2, v2_2);
+  Lib_IntVector_Intrinsics_vec256
+  v2__2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_2, v3_2);
+  Lib_IntVector_Intrinsics_vec256
+  v3__2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_2, v3_2);
+  Lib_IntVector_Intrinsics_vec256 ws120 = v0__2;
+  Lib_IntVector_Intrinsics_vec256 ws130 = v2__2;
+  Lib_IntVector_Intrinsics_vec256 ws140 = v1__2;
+  Lib_IntVector_Intrinsics_vec256 ws150 = v3__2;
+  Lib_IntVector_Intrinsics_vec256 v04 = ws[16U];
+  Lib_IntVector_Intrinsics_vec256 v14 = ws[17U];
+  Lib_IntVector_Intrinsics_vec256 v24 = ws[18U];
+  Lib_IntVector_Intrinsics_vec256 v34 = ws[19U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_3 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v04, v14);
+  Lib_IntVector_Intrinsics_vec256
+  v1_3 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v04, v14);
+  Lib_IntVector_Intrinsics_vec256
+  v2_3 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v24, v34);
+  Lib_IntVector_Intrinsics_vec256
+  v3_3 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v24, v34);
+  Lib_IntVector_Intrinsics_vec256
+  v0__3 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_3, v2_3);
+  Lib_IntVector_Intrinsics_vec256
+  v1__3 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_3, v2_3);
+  Lib_IntVector_Intrinsics_vec256
+  v2__3 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_3, v3_3);
+  Lib_IntVector_Intrinsics_vec256
+  v3__3 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_3, v3_3);
+  Lib_IntVector_Intrinsics_vec256 ws160 = v0__3;
+  Lib_IntVector_Intrinsics_vec256 ws170 = v2__3;
+  Lib_IntVector_Intrinsics_vec256 ws180 = v1__3;
+  Lib_IntVector_Intrinsics_vec256 ws190 = v3__3;
+  Lib_IntVector_Intrinsics_vec256 v05 = ws[20U];
+  Lib_IntVector_Intrinsics_vec256 v15 = ws[21U];
+  Lib_IntVector_Intrinsics_vec256 v25 = ws[22U];
+  Lib_IntVector_Intrinsics_vec256 v35 = ws[23U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_4 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v05, v15);
+  Lib_IntVector_Intrinsics_vec256
+  v1_4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v05, v15);
+  Lib_IntVector_Intrinsics_vec256
+  v2_4 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v25, v35);
+  Lib_IntVector_Intrinsics_vec256
+  v3_4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v25, v35);
+  Lib_IntVector_Intrinsics_vec256
+  v0__4 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_4, v2_4);
+  Lib_IntVector_Intrinsics_vec256
+  v1__4 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_4, v2_4);
+  Lib_IntVector_Intrinsics_vec256
+  v2__4 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_4, v3_4);
+  Lib_IntVector_Intrinsics_vec256
+  v3__4 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_4, v3_4);
+  Lib_IntVector_Intrinsics_vec256 ws200 = v0__4;
+  Lib_IntVector_Intrinsics_vec256 ws211 = v2__4;
+  Lib_IntVector_Intrinsics_vec256 ws220 = v1__4;
+  Lib_IntVector_Intrinsics_vec256 ws230 = v3__4;
+  Lib_IntVector_Intrinsics_vec256 v06 = ws[24U];
+  Lib_IntVector_Intrinsics_vec256 v16 = ws[25U];
+  Lib_IntVector_Intrinsics_vec256 v26 = ws[26U];
+  Lib_IntVector_Intrinsics_vec256 v36 = ws[27U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_5 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v06, v16);
+  Lib_IntVector_Intrinsics_vec256
+  v1_5 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v06, v16);
+  Lib_IntVector_Intrinsics_vec256
+  v2_5 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v26, v36);
+  Lib_IntVector_Intrinsics_vec256
+  v3_5 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v26, v36);
+  Lib_IntVector_Intrinsics_vec256
+  v0__5 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_5, v2_5);
+  Lib_IntVector_Intrinsics_vec256
+  v1__5 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_5, v2_5);
+  Lib_IntVector_Intrinsics_vec256
+  v2__5 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_5, v3_5);
+  Lib_IntVector_Intrinsics_vec256
+  v3__5 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_5, v3_5);
+  Lib_IntVector_Intrinsics_vec256 ws240 = v0__5;
+  Lib_IntVector_Intrinsics_vec256 ws250 = v2__5;
+  Lib_IntVector_Intrinsics_vec256 ws260 = v1__5;
+  Lib_IntVector_Intrinsics_vec256 ws270 = v3__5;
+  Lib_IntVector_Intrinsics_vec256 v07 = ws[28U];
+  Lib_IntVector_Intrinsics_vec256 v17 = ws[29U];
+  Lib_IntVector_Intrinsics_vec256 v27 = ws[30U];
+  Lib_IntVector_Intrinsics_vec256 v37 = ws[31U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_6 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v07, v17);
+  Lib_IntVector_Intrinsics_vec256
+  v1_6 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v07, v17);
+  Lib_IntVector_Intrinsics_vec256
+  v2_6 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v27, v37);
+  Lib_IntVector_Intrinsics_vec256
+  v3_6 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v27, v37);
+  Lib_IntVector_Intrinsics_vec256
+  v0__6 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_6, v2_6);
+  Lib_IntVector_Intrinsics_vec256
+  v1__6 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_6, v2_6);
+  Lib_IntVector_Intrinsics_vec256
+  v2__6 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_6, v3_6);
+  Lib_IntVector_Intrinsics_vec256
+  v3__6 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_6, v3_6);
+  Lib_IntVector_Intrinsics_vec256 ws280 = v0__6;
+  Lib_IntVector_Intrinsics_vec256 ws290 = v2__6;
+  Lib_IntVector_Intrinsics_vec256 ws300 = v1__6;
+  Lib_IntVector_Intrinsics_vec256 ws310 = v3__6;
+  ws[0U] = ws00;
+  ws[1U] = ws110;
+  ws[2U] = ws210;
+  ws[3U] = ws32;
+  ws[4U] = ws40;
+  ws[5U] = ws50;
+  ws[6U] = ws60;
+  ws[7U] = ws70;
+  ws[8U] = ws80;
+  ws[9U] = ws90;
+  ws[10U] = ws100;
+  ws[11U] = ws111;
+  ws[12U] = ws120;
+  ws[13U] = ws130;
+  ws[14U] = ws140;
+  ws[15U] = ws150;
+  ws[16U] = ws160;
+  ws[17U] = ws170;
+  ws[18U] = ws180;
+  ws[19U] = ws190;
+  ws[20U] = ws200;
+  ws[21U] = ws211;
+  ws[22U] = ws220;
+  ws[23U] = ws230;
+  ws[24U] = ws240;
+  ws[25U] = ws250;
+  ws[26U] = ws260;
+  ws[27U] = ws270;
+  ws[28U] = ws280;
+  ws[29U] = ws290;
+  ws[30U] = ws300;
+  ws[31U] = ws310;
+  for (uint32_t i = 0U; i < 25U; i++)
+  {
+    state[i] = Lib_IntVector_Intrinsics_vec256_xor(state[i], ws[i]);
+  }
+  uint8_t b04[256U] = { 0U };
+  uint8_t b14[256U] = { 0U };
+  uint8_t b24[256U] = { 0U };
+  uint8_t b34[256U] = { 0U };
+  K____uint8_t___uint8_t____K____uint8_t___uint8_t_
+  b = { .fst = b04, .snd = { .fst = b14, .snd = { .fst = b24, .snd = b34 } } };
+  uint8_t *b35 = b.snd.snd.snd;
+  uint8_t *b25 = b.snd.snd.fst;
+  uint8_t *b15 = b.snd.fst;
+  uint8_t *b05 = b.fst;
+  b05[167U] = 0x80U;
+  b15[167U] = 0x80U;
+  b25[167U] = 0x80U;
+  b35[167U] = 0x80U;
+  KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws33[32U] KRML_POST_ALIGN(32) = { 0U };
+  uint8_t *b3 = b.snd.snd.snd;
+  uint8_t *b2 = b.snd.snd.fst;
+  uint8_t *b1 = b.snd.fst;
+  uint8_t *b0 = b.fst;
+  ws33[0U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0);
+  ws33[1U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1);
+  ws33[2U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2);
+  ws33[3U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3);
+  ws33[4U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 32U);
+  ws33[5U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 32U);
+  ws33[6U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 32U);
+  ws33[7U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 32U);
+  ws33[8U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 64U);
+  ws33[9U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 64U);
+  ws33[10U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 64U);
+  ws33[11U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 64U);
+  ws33[12U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 96U);
+  ws33[13U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 96U);
+  ws33[14U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 96U);
+  ws33[15U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 96U);
+  ws33[16U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 128U);
+  ws33[17U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 128U);
+  ws33[18U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 128U);
+  ws33[19U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 128U);
+  ws33[20U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 160U);
+  ws33[21U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 160U);
+  ws33[22U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 160U);
+  ws33[23U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 160U);
+  ws33[24U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 192U);
+  ws33[25U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 192U);
+  ws33[26U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 192U);
+  ws33[27U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 192U);
+  ws33[28U] = Lib_IntVector_Intrinsics_vec256_load64_le(b0 + 224U);
+  ws33[29U] = Lib_IntVector_Intrinsics_vec256_load64_le(b1 + 224U);
+  ws33[30U] = Lib_IntVector_Intrinsics_vec256_load64_le(b2 + 224U);
+  ws33[31U] = Lib_IntVector_Intrinsics_vec256_load64_le(b3 + 224U);
+  Lib_IntVector_Intrinsics_vec256 v08 = ws33[0U];
+  Lib_IntVector_Intrinsics_vec256 v18 = ws33[1U];
+  Lib_IntVector_Intrinsics_vec256 v28 = ws33[2U];
+  Lib_IntVector_Intrinsics_vec256 v38 = ws33[3U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_7 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v08, v18);
+  Lib_IntVector_Intrinsics_vec256
+  v1_7 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v08, v18);
+  Lib_IntVector_Intrinsics_vec256
+  v2_7 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v28, v38);
+  Lib_IntVector_Intrinsics_vec256
+  v3_7 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v28, v38);
+  Lib_IntVector_Intrinsics_vec256
+  v0__7 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_7, v2_7);
+  Lib_IntVector_Intrinsics_vec256
+  v1__7 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_7, v2_7);
+  Lib_IntVector_Intrinsics_vec256
+  v2__7 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_7, v3_7);
+  Lib_IntVector_Intrinsics_vec256
+  v3__7 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_7, v3_7);
+  Lib_IntVector_Intrinsics_vec256 ws0 = v0__7;
+  Lib_IntVector_Intrinsics_vec256 ws1 = v2__7;
+  Lib_IntVector_Intrinsics_vec256 ws2 = v1__7;
+  Lib_IntVector_Intrinsics_vec256 ws3 = v3__7;
+  Lib_IntVector_Intrinsics_vec256 v09 = ws33[4U];
+  Lib_IntVector_Intrinsics_vec256 v19 = ws33[5U];
+  Lib_IntVector_Intrinsics_vec256 v29 = ws33[6U];
+  Lib_IntVector_Intrinsics_vec256 v39 = ws33[7U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_8 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v09, v19);
+  Lib_IntVector_Intrinsics_vec256
+  v1_8 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v09, v19);
+  Lib_IntVector_Intrinsics_vec256
+  v2_8 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v29, v39);
+  Lib_IntVector_Intrinsics_vec256
+  v3_8 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v29, v39);
+  Lib_IntVector_Intrinsics_vec256
+  v0__8 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_8, v2_8);
+  Lib_IntVector_Intrinsics_vec256
+  v1__8 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_8, v2_8);
+  Lib_IntVector_Intrinsics_vec256
+  v2__8 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_8, v3_8);
+  Lib_IntVector_Intrinsics_vec256
+  v3__8 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_8, v3_8);
+  Lib_IntVector_Intrinsics_vec256 ws4 = v0__8;
+  Lib_IntVector_Intrinsics_vec256 ws5 = v2__8;
+  Lib_IntVector_Intrinsics_vec256 ws6 = v1__8;
+  Lib_IntVector_Intrinsics_vec256 ws7 = v3__8;
+  Lib_IntVector_Intrinsics_vec256 v010 = ws33[8U];
+  Lib_IntVector_Intrinsics_vec256 v110 = ws33[9U];
+  Lib_IntVector_Intrinsics_vec256 v210 = ws33[10U];
+  Lib_IntVector_Intrinsics_vec256 v310 = ws33[11U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_9 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v010, v110);
+  Lib_IntVector_Intrinsics_vec256
+  v1_9 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v010, v110);
+  Lib_IntVector_Intrinsics_vec256
+  v2_9 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v210, v310);
+  Lib_IntVector_Intrinsics_vec256
+  v3_9 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v210, v310);
+  Lib_IntVector_Intrinsics_vec256
+  v0__9 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_9, v2_9);
+  Lib_IntVector_Intrinsics_vec256
+  v1__9 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_9, v2_9);
+  Lib_IntVector_Intrinsics_vec256
+  v2__9 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_9, v3_9);
+  Lib_IntVector_Intrinsics_vec256
+  v3__9 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_9, v3_9);
+  Lib_IntVector_Intrinsics_vec256 ws8 = v0__9;
+  Lib_IntVector_Intrinsics_vec256 ws9 = v2__9;
+  Lib_IntVector_Intrinsics_vec256 ws10 = v1__9;
+  Lib_IntVector_Intrinsics_vec256 ws11 = v3__9;
+  Lib_IntVector_Intrinsics_vec256 v011 = ws33[12U];
+  Lib_IntVector_Intrinsics_vec256 v111 = ws33[13U];
+  Lib_IntVector_Intrinsics_vec256 v211 = ws33[14U];
+  Lib_IntVector_Intrinsics_vec256 v311 = ws33[15U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_10 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v011, v111);
+  Lib_IntVector_Intrinsics_vec256
+  v1_10 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v011, v111);
+  Lib_IntVector_Intrinsics_vec256
+  v2_10 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v211, v311);
+  Lib_IntVector_Intrinsics_vec256
+  v3_10 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v211, v311);
+  Lib_IntVector_Intrinsics_vec256
+  v0__10 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_10, v2_10);
+  Lib_IntVector_Intrinsics_vec256
+  v1__10 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_10, v2_10);
+  Lib_IntVector_Intrinsics_vec256
+  v2__10 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_10, v3_10);
+  Lib_IntVector_Intrinsics_vec256
+  v3__10 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_10, v3_10);
+  Lib_IntVector_Intrinsics_vec256 ws12 = v0__10;
+  Lib_IntVector_Intrinsics_vec256 ws13 = v2__10;
+  Lib_IntVector_Intrinsics_vec256 ws14 = v1__10;
+  Lib_IntVector_Intrinsics_vec256 ws15 = v3__10;
+  Lib_IntVector_Intrinsics_vec256 v012 = ws33[16U];
+  Lib_IntVector_Intrinsics_vec256 v112 = ws33[17U];
+  Lib_IntVector_Intrinsics_vec256 v212 = ws33[18U];
+  Lib_IntVector_Intrinsics_vec256 v312 = ws33[19U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v012, v112);
+  Lib_IntVector_Intrinsics_vec256
+  v1_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v012, v112);
+  Lib_IntVector_Intrinsics_vec256
+  v2_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v212, v312);
+  Lib_IntVector_Intrinsics_vec256
+  v3_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v212, v312);
+  Lib_IntVector_Intrinsics_vec256
+  v0__11 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_11, v2_11);
+  Lib_IntVector_Intrinsics_vec256
+  v1__11 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_11, v2_11);
+  Lib_IntVector_Intrinsics_vec256
+  v2__11 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_11, v3_11);
+  Lib_IntVector_Intrinsics_vec256
+  v3__11 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_11, v3_11);
+  Lib_IntVector_Intrinsics_vec256 ws16 = v0__11;
+  Lib_IntVector_Intrinsics_vec256 ws17 = v2__11;
+  Lib_IntVector_Intrinsics_vec256 ws18 = v1__11;
+  Lib_IntVector_Intrinsics_vec256 ws19 = v3__11;
+  Lib_IntVector_Intrinsics_vec256 v013 = ws33[20U];
+  Lib_IntVector_Intrinsics_vec256 v113 = ws33[21U];
+  Lib_IntVector_Intrinsics_vec256 v213 = ws33[22U];
+  Lib_IntVector_Intrinsics_vec256 v313 = ws33[23U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_12 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v013, v113);
+  Lib_IntVector_Intrinsics_vec256
+  v1_12 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v013, v113);
+  Lib_IntVector_Intrinsics_vec256
+  v2_12 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v213, v313);
+  Lib_IntVector_Intrinsics_vec256
+  v3_12 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v213, v313);
+  Lib_IntVector_Intrinsics_vec256
+  v0__12 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_12, v2_12);
+  Lib_IntVector_Intrinsics_vec256
+  v1__12 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_12, v2_12);
+  Lib_IntVector_Intrinsics_vec256
+  v2__12 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_12, v3_12);
+  Lib_IntVector_Intrinsics_vec256
+  v3__12 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_12, v3_12);
+  Lib_IntVector_Intrinsics_vec256 ws20 = v0__12;
+  Lib_IntVector_Intrinsics_vec256 ws21 = v2__12;
+  Lib_IntVector_Intrinsics_vec256 ws22 = v1__12;
+  Lib_IntVector_Intrinsics_vec256 ws23 = v3__12;
+  Lib_IntVector_Intrinsics_vec256 v014 = ws33[24U];
+  Lib_IntVector_Intrinsics_vec256 v114 = ws33[25U];
+  Lib_IntVector_Intrinsics_vec256 v214 = ws33[26U];
+  Lib_IntVector_Intrinsics_vec256 v314 = ws33[27U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_13 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v014, v114);
+  Lib_IntVector_Intrinsics_vec256
+  v1_13 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v014, v114);
+  Lib_IntVector_Intrinsics_vec256
+  v2_13 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v214, v314);
+  Lib_IntVector_Intrinsics_vec256
+  v3_13 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v214, v314);
+  Lib_IntVector_Intrinsics_vec256
+  v0__13 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_13, v2_13);
+  Lib_IntVector_Intrinsics_vec256
+  v1__13 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_13, v2_13);
+  Lib_IntVector_Intrinsics_vec256
+  v2__13 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_13, v3_13);
+  Lib_IntVector_Intrinsics_vec256
+  v3__13 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_13, v3_13);
+  Lib_IntVector_Intrinsics_vec256 ws24 = v0__13;
+  Lib_IntVector_Intrinsics_vec256 ws25 = v2__13;
+  Lib_IntVector_Intrinsics_vec256 ws26 = v1__13;
+  Lib_IntVector_Intrinsics_vec256 ws27 = v3__13;
+  Lib_IntVector_Intrinsics_vec256 v0 = ws33[28U];
+  Lib_IntVector_Intrinsics_vec256 v1 = ws33[29U];
+  Lib_IntVector_Intrinsics_vec256 v2 = ws33[30U];
+  Lib_IntVector_Intrinsics_vec256 v3 = ws33[31U];
+  Lib_IntVector_Intrinsics_vec256
+  v0_14 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0, v1);
+  Lib_IntVector_Intrinsics_vec256
+  v1_14 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0, v1);
+  Lib_IntVector_Intrinsics_vec256
+  v2_14 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v2, v3);
+  Lib_IntVector_Intrinsics_vec256
+  v3_14 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v2, v3);
+  Lib_IntVector_Intrinsics_vec256
+  v0__14 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_14, v2_14);
+  Lib_IntVector_Intrinsics_vec256
+  v1__14 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_14, v2_14);
+  Lib_IntVector_Intrinsics_vec256
+  v2__14 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_14, v3_14);
+  Lib_IntVector_Intrinsics_vec256
+  v3__14 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_14, v3_14);
+  Lib_IntVector_Intrinsics_vec256 ws28 = v0__14;
+  Lib_IntVector_Intrinsics_vec256 ws29 = v2__14;
+  Lib_IntVector_Intrinsics_vec256 ws30 = v1__14;
+  Lib_IntVector_Intrinsics_vec256 ws31 = v3__14;
+  ws33[0U] = ws0;
+  ws33[1U] = ws1;
+  ws33[2U] = ws2;
+  ws33[3U] = ws3;
+  ws33[4U] = ws4;
+  ws33[5U] = ws5;
+  ws33[6U] = ws6;
+  ws33[7U] = ws7;
+  ws33[8U] = ws8;
+  ws33[9U] = ws9;
+  ws33[10U] = ws10;
+  ws33[11U] = ws11;
+  ws33[12U] = ws12;
+  ws33[13U] = ws13;
+  ws33[14U] = ws14;
+  ws33[15U] = ws15;
+  ws33[16U] = ws16;
+  ws33[17U] = ws17;
+  ws33[18U] = ws18;
+  ws33[19U] = ws19;
+  ws33[20U] = ws20;
+  ws33[21U] = ws21;
+  ws33[22U] = ws22;
+  ws33[23U] = ws23;
+  ws33[24U] = ws24;
+  ws33[25U] = ws25;
+  ws33[26U] = ws26;
+  ws33[27U] = ws27;
+  ws33[28U] = ws28;
+  ws33[29U] = ws29;
+  ws33[30U] = ws30;
+  ws33[31U] = ws31;
+  for (uint32_t i = 0U; i < 25U; i++)
+  {
+    state[i] = Lib_IntVector_Intrinsics_vec256_xor(state[i], ws33[i]);
+  }
+  for (uint32_t i0 = 0U; i0 < 24U; i0++)
+  {
+    KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 _C[5U] KRML_POST_ALIGN(32) = { 0U };
+    KRML_MAYBE_FOR5(i,
+      0U,
+      5U,
+      1U,
+      Lib_IntVector_Intrinsics_vec256 uu____0 = state[i + 0U];
+      Lib_IntVector_Intrinsics_vec256 uu____1 = state[i + 5U];
+      Lib_IntVector_Intrinsics_vec256 uu____2 = state[i + 10U];
+      _C[i] =
+        Lib_IntVector_Intrinsics_vec256_xor(uu____0,
+          Lib_IntVector_Intrinsics_vec256_xor(uu____1,
+            Lib_IntVector_Intrinsics_vec256_xor(uu____2,
+              Lib_IntVector_Intrinsics_vec256_xor(state[i + 15U], state[i + 20U])))););
+    KRML_MAYBE_FOR5(i1,
+      0U,
+      5U,
+      1U,
+      Lib_IntVector_Intrinsics_vec256 uu____3 = _C[(i1 + 4U) % 5U];
+      Lib_IntVector_Intrinsics_vec256 uu____4 = _C[(i1 + 1U) % 5U];
+      Lib_IntVector_Intrinsics_vec256
+      _D =
+        Lib_IntVector_Intrinsics_vec256_xor(uu____3,
+          Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_left64(uu____4,
+              1U),
+            Lib_IntVector_Intrinsics_vec256_shift_right64(uu____4, 63U)));
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        state[i1 + 5U * i] = Lib_IntVector_Intrinsics_vec256_xor(state[i1 + 5U * i], _D);););
+    Lib_IntVector_Intrinsics_vec256 x = state[1U];
+    Lib_IntVector_Intrinsics_vec256 current = x;
+    for (uint32_t i = 0U; i < 24U; i++)
+    {
+      uint32_t _Y = Hacl_Impl_SHA3_Vec_keccak_piln[i];
+      uint32_t r = Hacl_Impl_SHA3_Vec_keccak_rotc[i];
+      Lib_IntVector_Intrinsics_vec256 temp = state[_Y];
+      Lib_IntVector_Intrinsics_vec256 uu____5 = current;
+      state[_Y] =
+        Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_left64(uu____5, r),
+          Lib_IntVector_Intrinsics_vec256_shift_right64(uu____5, 64U - r));
+      current = temp;
+    }
+    KRML_MAYBE_FOR5(i,
+      0U,
+      5U,
+      1U,
+      Lib_IntVector_Intrinsics_vec256 uu____6 = state[0U + 5U * i];
+      Lib_IntVector_Intrinsics_vec256
+      uu____7 = Lib_IntVector_Intrinsics_vec256_lognot(state[1U + 5U * i]);
+      Lib_IntVector_Intrinsics_vec256
+      v015 =
+        Lib_IntVector_Intrinsics_vec256_xor(uu____6,
+          Lib_IntVector_Intrinsics_vec256_and(uu____7, state[2U + 5U * i]));
+      Lib_IntVector_Intrinsics_vec256 uu____8 = state[1U + 5U * i];
+      Lib_IntVector_Intrinsics_vec256
+      uu____9 = Lib_IntVector_Intrinsics_vec256_lognot(state[2U + 5U * i]);
+      Lib_IntVector_Intrinsics_vec256
+      v115 =
+        Lib_IntVector_Intrinsics_vec256_xor(uu____8,
+          Lib_IntVector_Intrinsics_vec256_and(uu____9, state[3U + 5U * i]));
+      Lib_IntVector_Intrinsics_vec256 uu____10 = state[2U + 5U * i];
+      Lib_IntVector_Intrinsics_vec256
+      uu____11 = Lib_IntVector_Intrinsics_vec256_lognot(state[3U + 5U * i]);
+      Lib_IntVector_Intrinsics_vec256
+      v215 =
+        Lib_IntVector_Intrinsics_vec256_xor(uu____10,
+          Lib_IntVector_Intrinsics_vec256_and(uu____11, state[4U + 5U * i]));
+      Lib_IntVector_Intrinsics_vec256 uu____12 = state[3U + 5U * i];
+      Lib_IntVector_Intrinsics_vec256
+      uu____13 = Lib_IntVector_Intrinsics_vec256_lognot(state[4U + 5U * i]);
+      Lib_IntVector_Intrinsics_vec256
+      v315 =
+        Lib_IntVector_Intrinsics_vec256_xor(uu____12,
+          Lib_IntVector_Intrinsics_vec256_and(uu____13, state[0U + 5U * i]));
+      Lib_IntVector_Intrinsics_vec256 uu____14 = state[4U + 5U * i];
+      Lib_IntVector_Intrinsics_vec256
+      uu____15 = Lib_IntVector_Intrinsics_vec256_lognot(state[0U + 5U * i]);
+      Lib_IntVector_Intrinsics_vec256
+      v4 =
+        Lib_IntVector_Intrinsics_vec256_xor(uu____14,
+          Lib_IntVector_Intrinsics_vec256_and(uu____15, state[1U + 5U * i]));
+      state[0U + 5U * i] = v015;
+      state[1U + 5U * i] = v115;
+      state[2U + 5U * i] = v215;
+      state[3U + 5U * i] = v315;
+      state[4U + 5U * i] = v4;);
+    uint64_t c = Hacl_Impl_SHA3_Vec_keccak_rndc[i0];
+    Lib_IntVector_Intrinsics_vec256 uu____16 = state[0U];
+    state[0U] =
+      Lib_IntVector_Intrinsics_vec256_xor(uu____16,
+        Lib_IntVector_Intrinsics_vec256_load64(c));
+  }
+}
+
+void
+Hacl_Hash_SHA3_Simd256_shake128_squeeze_nblocks(
+  Lib_IntVector_Intrinsics_vec256 *state,
+  uint8_t *output0,
+  uint8_t *output1,
+  uint8_t *output2,
+  uint8_t *output3,
+  uint32_t outputByteLen
+)
+{
+  for (uint32_t i0 = 0U; i0 < outputByteLen / 168U; i0++)
+  {
+    uint8_t hbuf[1024U] = { 0U };
+    KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 ws[32U] KRML_POST_ALIGN(32) = { 0U };
+    memcpy(ws, state, 25U * sizeof (Lib_IntVector_Intrinsics_vec256));
+    Lib_IntVector_Intrinsics_vec256 v00 = ws[0U];
+    Lib_IntVector_Intrinsics_vec256 v10 = ws[1U];
+    Lib_IntVector_Intrinsics_vec256 v20 = ws[2U];
+    Lib_IntVector_Intrinsics_vec256 v30 = ws[3U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v00, v10);
+    Lib_IntVector_Intrinsics_vec256
+    v1_ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v00, v10);
+    Lib_IntVector_Intrinsics_vec256
+    v2_ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v20, v30);
+    Lib_IntVector_Intrinsics_vec256
+    v3_ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v20, v30);
+    Lib_IntVector_Intrinsics_vec256
+    v0__ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_, v2_);
+    Lib_IntVector_Intrinsics_vec256
+    v1__ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_, v2_);
+    Lib_IntVector_Intrinsics_vec256
+    v2__ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_, v3_);
+    Lib_IntVector_Intrinsics_vec256
+    v3__ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_, v3_);
+    Lib_IntVector_Intrinsics_vec256 ws0 = v0__;
+    Lib_IntVector_Intrinsics_vec256 ws1 = v2__;
+    Lib_IntVector_Intrinsics_vec256 ws2 = v1__;
+    Lib_IntVector_Intrinsics_vec256 ws3 = v3__;
+    Lib_IntVector_Intrinsics_vec256 v01 = ws[4U];
+    Lib_IntVector_Intrinsics_vec256 v11 = ws[5U];
+    Lib_IntVector_Intrinsics_vec256 v21 = ws[6U];
+    Lib_IntVector_Intrinsics_vec256 v31 = ws[7U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v01, v11);
+    Lib_IntVector_Intrinsics_vec256
+    v1_0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v01, v11);
+    Lib_IntVector_Intrinsics_vec256
+    v2_0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v21, v31);
+    Lib_IntVector_Intrinsics_vec256
+    v3_0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v21, v31);
+    Lib_IntVector_Intrinsics_vec256
+    v0__0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_0, v2_0);
+    Lib_IntVector_Intrinsics_vec256
+    v1__0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_0, v2_0);
+    Lib_IntVector_Intrinsics_vec256
+    v2__0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_0, v3_0);
+    Lib_IntVector_Intrinsics_vec256
+    v3__0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_0, v3_0);
+    Lib_IntVector_Intrinsics_vec256 ws4 = v0__0;
+    Lib_IntVector_Intrinsics_vec256 ws5 = v2__0;
+    Lib_IntVector_Intrinsics_vec256 ws6 = v1__0;
+    Lib_IntVector_Intrinsics_vec256 ws7 = v3__0;
+    Lib_IntVector_Intrinsics_vec256 v02 = ws[8U];
+    Lib_IntVector_Intrinsics_vec256 v12 = ws[9U];
+    Lib_IntVector_Intrinsics_vec256 v22 = ws[10U];
+    Lib_IntVector_Intrinsics_vec256 v32 = ws[11U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v02, v12);
+    Lib_IntVector_Intrinsics_vec256
+    v1_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v02, v12);
+    Lib_IntVector_Intrinsics_vec256
+    v2_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v22, v32);
+    Lib_IntVector_Intrinsics_vec256
+    v3_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v22, v32);
+    Lib_IntVector_Intrinsics_vec256
+    v0__1 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_1, v2_1);
+    Lib_IntVector_Intrinsics_vec256
+    v1__1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_1, v2_1);
+    Lib_IntVector_Intrinsics_vec256
+    v2__1 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_1, v3_1);
+    Lib_IntVector_Intrinsics_vec256
+    v3__1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_1, v3_1);
+    Lib_IntVector_Intrinsics_vec256 ws8 = v0__1;
+    Lib_IntVector_Intrinsics_vec256 ws9 = v2__1;
+    Lib_IntVector_Intrinsics_vec256 ws10 = v1__1;
+    Lib_IntVector_Intrinsics_vec256 ws11 = v3__1;
+    Lib_IntVector_Intrinsics_vec256 v03 = ws[12U];
+    Lib_IntVector_Intrinsics_vec256 v13 = ws[13U];
+    Lib_IntVector_Intrinsics_vec256 v23 = ws[14U];
+    Lib_IntVector_Intrinsics_vec256 v33 = ws[15U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_2 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v03, v13);
+    Lib_IntVector_Intrinsics_vec256
+    v1_2 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v03, v13);
+    Lib_IntVector_Intrinsics_vec256
+    v2_2 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v23, v33);
+    Lib_IntVector_Intrinsics_vec256
+    v3_2 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v23, v33);
+    Lib_IntVector_Intrinsics_vec256
+    v0__2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_2, v2_2);
+    Lib_IntVector_Intrinsics_vec256
+    v1__2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_2, v2_2);
+    Lib_IntVector_Intrinsics_vec256
+    v2__2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_2, v3_2);
+    Lib_IntVector_Intrinsics_vec256
+    v3__2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_2, v3_2);
+    Lib_IntVector_Intrinsics_vec256 ws12 = v0__2;
+    Lib_IntVector_Intrinsics_vec256 ws13 = v2__2;
+    Lib_IntVector_Intrinsics_vec256 ws14 = v1__2;
+    Lib_IntVector_Intrinsics_vec256 ws15 = v3__2;
+    Lib_IntVector_Intrinsics_vec256 v04 = ws[16U];
+    Lib_IntVector_Intrinsics_vec256 v14 = ws[17U];
+    Lib_IntVector_Intrinsics_vec256 v24 = ws[18U];
+    Lib_IntVector_Intrinsics_vec256 v34 = ws[19U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_3 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v04, v14);
+    Lib_IntVector_Intrinsics_vec256
+    v1_3 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v04, v14);
+    Lib_IntVector_Intrinsics_vec256
+    v2_3 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v24, v34);
+    Lib_IntVector_Intrinsics_vec256
+    v3_3 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v24, v34);
+    Lib_IntVector_Intrinsics_vec256
+    v0__3 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_3, v2_3);
+    Lib_IntVector_Intrinsics_vec256
+    v1__3 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_3, v2_3);
+    Lib_IntVector_Intrinsics_vec256
+    v2__3 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_3, v3_3);
+    Lib_IntVector_Intrinsics_vec256
+    v3__3 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_3, v3_3);
+    Lib_IntVector_Intrinsics_vec256 ws16 = v0__3;
+    Lib_IntVector_Intrinsics_vec256 ws17 = v2__3;
+    Lib_IntVector_Intrinsics_vec256 ws18 = v1__3;
+    Lib_IntVector_Intrinsics_vec256 ws19 = v3__3;
+    Lib_IntVector_Intrinsics_vec256 v05 = ws[20U];
+    Lib_IntVector_Intrinsics_vec256 v15 = ws[21U];
+    Lib_IntVector_Intrinsics_vec256 v25 = ws[22U];
+    Lib_IntVector_Intrinsics_vec256 v35 = ws[23U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_4 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v05, v15);
+    Lib_IntVector_Intrinsics_vec256
+    v1_4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v05, v15);
+    Lib_IntVector_Intrinsics_vec256
+    v2_4 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v25, v35);
+    Lib_IntVector_Intrinsics_vec256
+    v3_4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v25, v35);
+    Lib_IntVector_Intrinsics_vec256
+    v0__4 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_4, v2_4);
+    Lib_IntVector_Intrinsics_vec256
+    v1__4 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_4, v2_4);
+    Lib_IntVector_Intrinsics_vec256
+    v2__4 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_4, v3_4);
+    Lib_IntVector_Intrinsics_vec256
+    v3__4 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_4, v3_4);
+    Lib_IntVector_Intrinsics_vec256 ws20 = v0__4;
+    Lib_IntVector_Intrinsics_vec256 ws21 = v2__4;
+    Lib_IntVector_Intrinsics_vec256 ws22 = v1__4;
+    Lib_IntVector_Intrinsics_vec256 ws23 = v3__4;
+    Lib_IntVector_Intrinsics_vec256 v06 = ws[24U];
+    Lib_IntVector_Intrinsics_vec256 v16 = ws[25U];
+    Lib_IntVector_Intrinsics_vec256 v26 = ws[26U];
+    Lib_IntVector_Intrinsics_vec256 v36 = ws[27U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_5 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v06, v16);
+    Lib_IntVector_Intrinsics_vec256
+    v1_5 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v06, v16);
+    Lib_IntVector_Intrinsics_vec256
+    v2_5 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v26, v36);
+    Lib_IntVector_Intrinsics_vec256
+    v3_5 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v26, v36);
+    Lib_IntVector_Intrinsics_vec256
+    v0__5 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_5, v2_5);
+    Lib_IntVector_Intrinsics_vec256
+    v1__5 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_5, v2_5);
+    Lib_IntVector_Intrinsics_vec256
+    v2__5 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_5, v3_5);
+    Lib_IntVector_Intrinsics_vec256
+    v3__5 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_5, v3_5);
+    Lib_IntVector_Intrinsics_vec256 ws24 = v0__5;
+    Lib_IntVector_Intrinsics_vec256 ws25 = v2__5;
+    Lib_IntVector_Intrinsics_vec256 ws26 = v1__5;
+    Lib_IntVector_Intrinsics_vec256 ws27 = v3__5;
+    Lib_IntVector_Intrinsics_vec256 v0 = ws[28U];
+    Lib_IntVector_Intrinsics_vec256 v1 = ws[29U];
+    Lib_IntVector_Intrinsics_vec256 v2 = ws[30U];
+    Lib_IntVector_Intrinsics_vec256 v3 = ws[31U];
+    Lib_IntVector_Intrinsics_vec256
+    v0_6 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0, v1);
+    Lib_IntVector_Intrinsics_vec256
+    v1_6 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0, v1);
+    Lib_IntVector_Intrinsics_vec256
+    v2_6 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v2, v3);
+    Lib_IntVector_Intrinsics_vec256
+    v3_6 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v2, v3);
+    Lib_IntVector_Intrinsics_vec256
+    v0__6 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_6, v2_6);
+    Lib_IntVector_Intrinsics_vec256
+    v1__6 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_6, v2_6);
+    Lib_IntVector_Intrinsics_vec256
+    v2__6 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_6, v3_6);
+    Lib_IntVector_Intrinsics_vec256
+    v3__6 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_6, v3_6);
+    Lib_IntVector_Intrinsics_vec256 ws28 = v0__6;
+    Lib_IntVector_Intrinsics_vec256 ws29 = v2__6;
+    Lib_IntVector_Intrinsics_vec256 ws30 = v1__6;
+    Lib_IntVector_Intrinsics_vec256 ws31 = v3__6;
+    ws[0U] = ws0;
+    ws[1U] = ws4;
+    ws[2U] = ws8;
+    ws[3U] = ws12;
+    ws[4U] = ws16;
+    ws[5U] = ws20;
+    ws[6U] = ws24;
+    ws[7U] = ws28;
+    ws[8U] = ws1;
+    ws[9U] = ws5;
+    ws[10U] = ws9;
+    ws[11U] = ws13;
+    ws[12U] = ws17;
+    ws[13U] = ws21;
+    ws[14U] = ws25;
+    ws[15U] = ws29;
+    ws[16U] = ws2;
+    ws[17U] = ws6;
+    ws[18U] = ws10;
+    ws[19U] = ws14;
+    ws[20U] = ws18;
+    ws[21U] = ws22;
+    ws[22U] = ws26;
+    ws[23U] = ws30;
+    ws[24U] = ws3;
+    ws[25U] = ws7;
+    ws[26U] = ws11;
+    ws[27U] = ws15;
+    ws[28U] = ws19;
+    ws[29U] = ws23;
+    ws[30U] = ws27;
+    ws[31U] = ws31;
+    for (uint32_t i = 0U; i < 32U; i++)
+    {
+      Lib_IntVector_Intrinsics_vec256_store64_le(hbuf + i * 32U, ws[i]);
+    }
+    uint8_t *b0 = output0;
+    uint8_t *b1 = output1;
+    uint8_t *b2 = output2;
+    uint8_t *b3 = output3;
+    memcpy(b0 + i0 * 168U, hbuf, 168U * sizeof (uint8_t));
+    memcpy(b1 + i0 * 168U, hbuf + 256U, 168U * sizeof (uint8_t));
+    memcpy(b2 + i0 * 168U, hbuf + 512U, 168U * sizeof (uint8_t));
+    memcpy(b3 + i0 * 168U, hbuf + 768U, 168U * sizeof (uint8_t));
+    for (uint32_t i1 = 0U; i1 < 24U; i1++)
+    {
+      KRML_PRE_ALIGN(32) Lib_IntVector_Intrinsics_vec256 _C[5U] KRML_POST_ALIGN(32) = { 0U };
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        Lib_IntVector_Intrinsics_vec256 uu____0 = state[i + 0U];
+        Lib_IntVector_Intrinsics_vec256 uu____1 = state[i + 5U];
+        Lib_IntVector_Intrinsics_vec256 uu____2 = state[i + 10U];
+        _C[i] =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____0,
+            Lib_IntVector_Intrinsics_vec256_xor(uu____1,
+              Lib_IntVector_Intrinsics_vec256_xor(uu____2,
+                Lib_IntVector_Intrinsics_vec256_xor(state[i + 15U], state[i + 20U])))););
+      KRML_MAYBE_FOR5(i2,
+        0U,
+        5U,
+        1U,
+        Lib_IntVector_Intrinsics_vec256 uu____3 = _C[(i2 + 4U) % 5U];
+        Lib_IntVector_Intrinsics_vec256 uu____4 = _C[(i2 + 1U) % 5U];
+        Lib_IntVector_Intrinsics_vec256
+        _D =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____3,
+            Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_left64(uu____4,
+                1U),
+              Lib_IntVector_Intrinsics_vec256_shift_right64(uu____4, 63U)));
+        KRML_MAYBE_FOR5(i,
+          0U,
+          5U,
+          1U,
+          state[i2 + 5U * i] = Lib_IntVector_Intrinsics_vec256_xor(state[i2 + 5U * i], _D);););
+      Lib_IntVector_Intrinsics_vec256 x = state[1U];
+      Lib_IntVector_Intrinsics_vec256 current = x;
+      for (uint32_t i = 0U; i < 24U; i++)
+      {
+        uint32_t _Y = Hacl_Impl_SHA3_Vec_keccak_piln[i];
+        uint32_t r = Hacl_Impl_SHA3_Vec_keccak_rotc[i];
+        Lib_IntVector_Intrinsics_vec256 temp = state[_Y];
+        Lib_IntVector_Intrinsics_vec256 uu____5 = current;
+        state[_Y] =
+          Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_left64(uu____5,
+              r),
+            Lib_IntVector_Intrinsics_vec256_shift_right64(uu____5, 64U - r));
+        current = temp;
+      }
+      KRML_MAYBE_FOR5(i,
+        0U,
+        5U,
+        1U,
+        Lib_IntVector_Intrinsics_vec256 uu____6 = state[0U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____7 = Lib_IntVector_Intrinsics_vec256_lognot(state[1U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v07 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____6,
+            Lib_IntVector_Intrinsics_vec256_and(uu____7, state[2U + 5U * i]));
+        Lib_IntVector_Intrinsics_vec256 uu____8 = state[1U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____9 = Lib_IntVector_Intrinsics_vec256_lognot(state[2U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v17 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____8,
+            Lib_IntVector_Intrinsics_vec256_and(uu____9, state[3U + 5U * i]));
+        Lib_IntVector_Intrinsics_vec256 uu____10 = state[2U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____11 = Lib_IntVector_Intrinsics_vec256_lognot(state[3U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v27 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____10,
+            Lib_IntVector_Intrinsics_vec256_and(uu____11, state[4U + 5U * i]));
+        Lib_IntVector_Intrinsics_vec256 uu____12 = state[3U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____13 = Lib_IntVector_Intrinsics_vec256_lognot(state[4U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v37 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____12,
+            Lib_IntVector_Intrinsics_vec256_and(uu____13, state[0U + 5U * i]));
+        Lib_IntVector_Intrinsics_vec256 uu____14 = state[4U + 5U * i];
+        Lib_IntVector_Intrinsics_vec256
+        uu____15 = Lib_IntVector_Intrinsics_vec256_lognot(state[0U + 5U * i]);
+        Lib_IntVector_Intrinsics_vec256
+        v4 =
+          Lib_IntVector_Intrinsics_vec256_xor(uu____14,
+            Lib_IntVector_Intrinsics_vec256_and(uu____15, state[1U + 5U * i]));
+        state[0U + 5U * i] = v07;
+        state[1U + 5U * i] = v17;
+        state[2U + 5U * i] = v27;
+        state[3U + 5U * i] = v37;
+        state[4U + 5U * i] = v4;);
+      uint64_t c = Hacl_Impl_SHA3_Vec_keccak_rndc[i1];
+      Lib_IntVector_Intrinsics_vec256 uu____16 = state[0U];
+      state[0U] =
+        Lib_IntVector_Intrinsics_vec256_xor(uu____16,
+          Lib_IntVector_Intrinsics_vec256_load64(c));
+    }
+  }
 }
 
diff --git a/sys/hacl/src/bindings.rs b/sys/hacl/src/bindings.rs
index dc1fcb151..bba565ebc 100644
--- a/sys/hacl/src/bindings.rs
+++ b/sys/hacl/src/bindings.rs
@@ -321,6 +321,71 @@ extern "C" {
 extern "C" {
     pub fn Hacl_Hash_SHA3_sha3_512(output: *mut u8, input: *mut u8, input_len: u32);
 }
+extern "C" {
+    pub fn Hacl_Hash_SHA3_Scalar_state_malloc() -> *mut u64;
+}
+extern "C" {
+    pub fn Hacl_Hash_SHA3_Scalar_state_free(s: *mut u64);
+}
+extern "C" {
+    pub fn Hacl_Hash_SHA3_Scalar_shake128_absorb_nblocks(
+        state: *mut u64,
+        input: *mut u8,
+        inputByteLen: u32,
+    );
+}
+extern "C" {
+    pub fn Hacl_Hash_SHA3_Scalar_shake128_absorb_final(
+        state: *mut u64,
+        input: *mut u8,
+        inputByteLen: u32,
+    );
+}
+extern "C" {
+    pub fn Hacl_Hash_SHA3_Scalar_shake128_squeeze_nblocks(
+        state: *mut u64,
+        output: *mut u8,
+        outputByteLen: u32,
+    );
+}
+
+extern "C" {
+    pub fn Hacl_Hash_SHA3_Simd256_state_malloc() -> *mut Lib_IntVector_Intrinsics_vec256;
+}
+extern "C" {
+    pub fn Hacl_Hash_SHA3_Simd256_state_free(s: *mut Lib_IntVector_Intrinsics_vec256);
+}
+extern "C" {
+    pub fn Hacl_Hash_SHA3_Simd256_shake128_absorb_nblocks(
+        state: *mut Lib_IntVector_Intrinsics_vec256,
+        input0: *mut u8,
+        input1: *mut u8,
+        input2: *mut u8,
+        input3: *mut u8,
+        inputByteLen: u32,
+    );
+}
+extern "C" {
+    pub fn Hacl_Hash_SHA3_Simd256_shake128_absorb_final(
+        state: *mut Lib_IntVector_Intrinsics_vec256,
+        input0: *mut u8,
+        input1: *mut u8,
+        input2: *mut u8,
+        input3: *mut u8,
+        inputByteLen: u32,
+    );
+}
+extern "C" {
+    pub fn Hacl_Hash_SHA3_Simd256_shake128_squeeze_nblocks(
+        state: *mut Lib_IntVector_Intrinsics_vec256,
+        output0: *mut u8,
+        output1: *mut u8,
+        output2: *mut u8,
+        output3: *mut u8,
+        outputByteLen: u32,
+    );
+}
+
 extern "C" {
     pub fn Hacl_Hash_SHA3_absorb_inner(rateInBytes: u32, block: *mut u8, s: *mut u64);
 }
diff --git a/sys/pqclean/src/bindings.rs b/sys/pqclean/src/bindings.rs
index d50f83289..59a2d73d9 100644
--- a/sys/pqclean/src/bindings.rs
+++ b/sys/pqclean/src/bindings.rs
@@ -1,4 +1,4 @@
-/* automatically generated by rust-bindgen 0.69.2 */
+/* automatically generated by rust-bindgen 0.69.1 */
 
 pub const SHAKE128_RATE: u32 = 168;
 pub const SHAKE256_RATE: u32 = 136;