From 71091b7bcf18489a0c7a866d044b622411056829 Mon Sep 17 00:00:00 2001
From: Bobbin Threadbare <bobbinth@protonmail.com>
Date: Thu, 5 Oct 2023 01:11:00 -0700
Subject: [PATCH] fix: assert2 errors and minor refactoring

---
 core/src/lib.rs                          |  2 +-
 stdlib/Cargo.toml                        | 10 +++++-----
 stdlib/asm/crypto/dsa/rpo_falcon512.masm | 10 +++++-----
 stdlib/docs/crypto/dsa/rpo_falcon512.md  | 15 +++++++++++++++
 stdlib/tests/crypto/falcon.rs            |  5 ++++-
 5 files changed, 30 insertions(+), 12 deletions(-)
 create mode 100644 stdlib/docs/crypto/dsa/rpo_falcon512.md
diff --git a/core/src/lib.rs b/core/src/lib.rs
index a3f09da52f..b7d3050f94 100644
--- a/core/src/lib.rs
+++ b/core/src/lib.rs
@@ -30,7 +30,7 @@ pub mod crypto {
     }
 
     pub mod dsa {
-        pub use miden_crypto::dsa::rpo_falcon512::*;
+        pub use miden_crypto::dsa::rpo_falcon512;
     }
 }
 
diff --git a/stdlib/Cargo.toml b/stdlib/Cargo.toml
index 33ec8d1a76..6ce166c368 100644
--- a/stdlib/Cargo.toml
+++ b/stdlib/Cargo.toml
@@ -27,13 +27,13 @@ std = ["test-utils/std"]
 assembly = { package = "miden-assembly", default-features = false, path = "../assembly", version = "0.7" }
 
 [dev-dependencies]
-blake3 = "1.3.3"
+blake3 = "1.4"
 miden-air = { package = "miden-air", path = "../air", version = "0.7", default-features = false }
-num-bigint = "0.4.3"
+num-bigint = "0.4"
 processor = { package = "miden-processor", path = "../processor", version = "0.7", features = ["internals"], default-features = false }
-serde_json = "1.0.94"
-sha2 = "0.10.6"
-sha3 = "0.10.6"
+serde_json = "1.0"
+sha2 = "0.10"
+sha3 = "0.10"
 test-utils = { package = "miden-test-utils", path = "../test-utils" }
 winter-air = { package = "winter-air", version = "0.6" }
 winter-fri = { package = "winter-fri", version = "0.6" }
diff --git a/stdlib/asm/crypto/dsa/rpo_falcon512.masm b/stdlib/asm/crypto/dsa/rpo_falcon512.masm
index 140ac74a33..5558fa1723 100644
--- a/stdlib/asm/crypto/dsa/rpo_falcon512.masm
+++ b/stdlib/asm/crypto/dsa/rpo_falcon512.masm
@@ -26,7 +26,7 @@ export.mod_12289
     adv.push_u64div
 
     adv_push.2
-    u32assert.2
+    u32assert2
 
     swap
     push.M
@@ -189,18 +189,18 @@ export.load_h_s2_and_product.1
         adv_pipe
 
         dupw.1
-        u32assert.2
+        u32assert2
         push.M u32unchecked_lt assert
         push.M u32unchecked_lt assert
-        u32assert.2
+        u32assert2
         push.M u32unchecked_lt assert
         push.M u32unchecked_lt assert
 
         dupw
-        u32assert.2
+        u32assert2
         push.M u32unchecked_lt assert
         push.M u32unchecked_lt assert
-        u32assert.2
+        u32assert2
         push.M u32unchecked_lt assert
         push.M u32unchecked_lt assert
 
diff --git a/stdlib/docs/crypto/dsa/rpo_falcon512.md b/stdlib/docs/crypto/dsa/rpo_falcon512.md
new file mode 100644
index 0000000000..f9c7e5c13f
--- /dev/null
+++ b/stdlib/docs/crypto/dsa/rpo_falcon512.md
@@ -0,0 +1,15 @@
+
+## std::crypto::dsa::rpo_falcon512
+| Procedure | Description |
+| ----------- | ------------- |
+| mod_12289 | Given dividend ( i.e. field element a ) on stack top, this routine computes c = a % 12289<br /><br />Expected stack state<br /><br />[a, ...]<br /><br />Output stack state looks like<br /><br />[c, ...] \| c = a % 12289 |
+| hash_to_point | Takes as input a message digest, a nonce of size 40 bytes represented as 8 field elements<br /><br />and a pointer. The procedure absorbs MSG and NONCE into a fresh RPO state and squeezes the<br /><br />coefficients of a polynomial c representing the hash-to-point of (MSG \|\| NONCE). The coefficients<br /><br />are then saved in the memory region [c_ptr, c_ptr + 128).<br /><br />This implementation of the `hash_to_point` procedure avoids the rejection-sampling step<br /><br />required in the per-the-spec algorithm by using the observation on page 31 in<br /><br />https://falcon-sign.info/falcon.pdf<br /><br />Input: [c_ptr, MSG, NONCE1, NONCE0, ...]<br /><br />Output: [...]<br /><br />Cycles: 1327 |
+| powers_of_tau | For an element `tau := (tau0, tau1)` in the quadratic extension field, computes all its powers<br /><br />`tau^i` for `i = 0,..., 512` and stores them in the memory region `[tau_ptr, tau_ptr + 513)`.<br /><br />The procedure returns `tau_ptr + 513`.<br /><br />Input: [tau1, tau0, tau_ptr, ...]<br /><br />Output: [tau_ptr + 513, ...]<br /><br />Cycles: 8323 |
+| set_to_zero | Sets the memory region `[ptr, ptr + 512)` to zero. The pointer c_ptr := ptr + 512 is returned<br /><br />to be used to store the hash-to-point polynomial of the message later on.<br /><br />Input: [ptr, ...]<br /><br />Output: [...]<br /><br />Cycles: 2607 |
+| load_h_s2_and_product | Takes as input PK, the hash of the coefficients of the polynomial `h` representing the expanded<br /><br />public key, and a pointer to the memory location where the coefficients of the polynomial `h`<br /><br />will be stored.<br /><br />The procedure loads `h` from the advice stack and compares its hash with the provided hash `PK`.<br /><br />It then loads the polynomial `s2` representing the signature from the advice stack and lays it<br /><br />in memory right after `h`.<br /><br />It then loads the claimed polynomial `h * s2` in Z_Q[x] where Q is the Miden VM prime from<br /><br />the advice stack and lays it right after `s2`.<br /><br />The hash of `h`, `s2` and the claimed product is also computed and the first two field elements<br /><br />of the digest (i.e., the Fiat-Shamir challenge) are returned on the stack alongside<br /><br />the incremented pointer.<br /><br />Input: [ptr, PK, ...]<br /><br />Output: [tau1, tau0, ptr + 512 ...]<br /><br />Cycles: 5049 |
+| probablistic_product | Checks that pi == h * s2 in Z_Q[x] by evaluating both sides at a random point.<br /><br />The procedure takes as input a pointer h_ptr to h. The other two polynomials<br /><br />are located at h_ptr + 128, for s2, and h_ptr + 256, for pi. The procedure takes<br /><br />also a pointer zeros_ptr to a region of memory [zeros_ptr, zeros_ptr + 1024)<br /><br />and a pointer tau_ptr to powers of the random point we are evaluating at stored<br /><br />as [a_i, b_i, x, x] where (a_i, b_i) := tau^i for i in [0, 1023].<br /><br />The procedure returns () if the check passes, otherwise it raises an exception<br /><br />related to an unsatisfied assertion.<br /><br />Input: [h_ptr, zeros_ptr, tau_ptr, ...]<br /><br />Output: [...]<br /><br />Cycles: 2504 |
+| norm_sq | Normalizes an `e` in [0, q) to be in [-(q-1) << 1, (q-1) << 1) and returns its square norm.<br /><br />We use the following formula to do so:<br /><br />normalize(e) = e^2 - phi * (2*q*e - q^2) where phi := (e > (q - 1)/2)<br /><br />The formula implements:<br /><br />if e > (q-1)/2:<br /><br />return (q - e)^2<br /><br />else:<br /><br />return e^2<br /><br />The use of the formula avoids using the if-else block.<br /><br />Input: [e, ...]<br /><br />Output [norm(e)^2, ...]<br /><br />Cycles: 21 |
+| diff_mod_q | On input a tuple (u, w, v), the following computes (v - (u + (- w % q) % q) % q).<br /><br />We can avoid doing three modular reductions by using the following facts:<br /><br />1. q is much smaller than the Miden prime. Precisely, q * 2^50 < Q<br /><br />2. The coefficients of the product polynomial, u and w, are less than J := 512 * q^2<br /><br />3. The coefficients of c are less than q.<br /><br />This means that we can substitute (v - (u + (- w % q) % q) % q) with  v + w + J - u without<br /><br />risking Q-overflow since \|v + w + J - u\| < 1025 * q^2<br /><br />To get the final result we reduce (v + w + J - u) modulo q.<br /><br />Input: [v, w, u, ...]<br /><br />Output: [e, ...]<br /><br />Cycles: 44 |
+| compute_s1_norm_sq | Takes a pointer to a polynomial pi of degree less than 1024 with coefficients in Z_Q and<br /><br />a polynomial c of degree 512 with coefficients also in Z_Q, where Q is the Miden prime.<br /><br />The goal is to compute s1 = c - pi  = c - h * s2 in Z_q[x]/(phi) where q is the Falcon prime.<br /><br />The pointer pi_ptr points both to pi and c through the relation c_ptr = pi_ptr + offset<br /><br />where offset := 1281.<br /><br />The naive way to compute s1 would be to first reduce the polynomial pi modulo the Falcon<br /><br />prime q and then modulo the irreducible polynomial phi = x^512 + 1. Then we would need to negate<br /><br />the coefficients of pi modulo q and only then can we add these coefficients to the coefficients<br /><br />of c and then reduce the result modulo q one more time.<br /><br />Knowing that the end goal of computing c is to compute its norm squared, we can do better.<br /><br />We can compute s1 in a single pass by delaying the q-modular reduction til the end. This can<br /><br />be achieved through a careful analysis of the computation of the difference between pi and c.<br /><br />The i-th coefficient s1_i of s1 is equal to c_i - (pi_i - pi_{512 + i}) which is equal to<br /><br />c_i  + pi_{512 + i} - pi_i. Now, we know that the size of the pi_i coefficients is bounded by<br /><br />J := 512 * q^2 and this means that J + pi_{512 + i} - pi_i does not Q-underflow and since<br /><br />J = 0 modulo q, the addition of J does not affect the final result. It is also important to<br /><br />note that adding J does not Q-overflow by virtue of q * 2^50 < Q.<br /><br />All of the above implies that we can compute s1_i with only one modular reduction at the end,<br /><br />in addition to one modular reduction applied to c_i.<br /><br />Moreover, since we are only interested in the square norm of s1_i, we do not have to store<br /><br />s1_i and then load it at a later point, and instead we can immediatly follow the computation<br /><br />of s1_i with computing its square norm.<br /><br />After computing the square norm of s1_i, we can accumulate into an accumulator to compute the<br /><br />sum of the square norms of all the coefficients of polynomial c. Using the overflow stack, this<br /><br />can be delayed til the end.<br /><br />Input: [pi_ptr, ...]<br /><br />Output: [norm_sq(s1), ...]<br /><br />Cycles: 58888 |
+| compute_s2_norm_sq | Compute the square norm of the polynomial s2 given a pointer to its coefficients.<br /><br />Input: [s2_ptr, ...]<br /><br />Output: [norm_sq(s2), ...]<br /><br />Cycles: 13322 |
+| verify | Verifies a signature against a public key and a message. The procedure gets as inputs the hash<br /><br />of the public key and the hash of the message via the operand stack. The signature is provided<br /><br />via the advice stack.<br /><br />The signature is valid if and only if the procedure returns.<br /><br />Input: [PK, MSG, NONCE1, NONCE1, ...]<br /><br />Output: [...]<br /><br />Cycles: ~ 92029 |
diff --git a/stdlib/tests/crypto/falcon.rs b/stdlib/tests/crypto/falcon.rs
index ecac149343..f149addfae 100644
--- a/stdlib/tests/crypto/falcon.rs
+++ b/stdlib/tests/crypto/falcon.rs
@@ -2,7 +2,10 @@ use miden_air::{Felt, StarkField};
 
 use std::vec;
 use test_utils::{
-    crypto::{KeyPair, Polynomial, Rpo256, Signature},
+    crypto::{
+        rpo_falcon512::{KeyPair, Polynomial, Signature},
+        Rpo256,
+    },
     rand::rand_vector,
     Test, Word,
 };