From 71091b7bcf18489a0c7a866d044b622411056829 Mon Sep 17 00:00:00 2001 From: Bobbin Threadbare Date: Thu, 5 Oct 2023 01:11:00 -0700 Subject: [PATCH] fix: assert2 errors and minor refactoring --- core/src/lib.rs | 2 +- stdlib/Cargo.toml | 10 +++++----- stdlib/asm/crypto/dsa/rpo_falcon512.masm | 10 +++++----- stdlib/docs/crypto/dsa/rpo_falcon512.md | 15 +++++++++++++++ stdlib/tests/crypto/falcon.rs | 5 ++++- 5 files changed, 30 insertions(+), 12 deletions(-) create mode 100644 stdlib/docs/crypto/dsa/rpo_falcon512.md diff --git a/core/src/lib.rs b/core/src/lib.rs index a3f09da52f..b7d3050f94 100644 --- a/core/src/lib.rs +++ b/core/src/lib.rs @@ -30,7 +30,7 @@ pub mod crypto { } pub mod dsa { - pub use miden_crypto::dsa::rpo_falcon512::*; + pub use miden_crypto::dsa::rpo_falcon512; } } diff --git a/stdlib/Cargo.toml b/stdlib/Cargo.toml index 33ec8d1a76..6ce166c368 100644 --- a/stdlib/Cargo.toml +++ b/stdlib/Cargo.toml @@ -27,13 +27,13 @@ std = ["test-utils/std"] assembly = { package = "miden-assembly", default-features = false, path = "../assembly", version = "0.7" } [dev-dependencies] -blake3 = "1.3.3" +blake3 = "1.4" miden-air = { package = "miden-air", path = "../air", version = "0.7", default-features = false } -num-bigint = "0.4.3" +num-bigint = "0.4" processor = { package = "miden-processor", path = "../processor", version = "0.7", features = ["internals"], default-features = false } -serde_json = "1.0.94" -sha2 = "0.10.6" -sha3 = "0.10.6" +serde_json = "1.0" +sha2 = "0.10" +sha3 = "0.10" test-utils = { package = "miden-test-utils", path = "../test-utils" } winter-air = { package = "winter-air", version = "0.6" } winter-fri = { package = "winter-fri", version = "0.6" } diff --git a/stdlib/asm/crypto/dsa/rpo_falcon512.masm b/stdlib/asm/crypto/dsa/rpo_falcon512.masm index 140ac74a33..5558fa1723 100644 --- a/stdlib/asm/crypto/dsa/rpo_falcon512.masm +++ b/stdlib/asm/crypto/dsa/rpo_falcon512.masm @@ -26,7 +26,7 @@ export.mod_12289 adv.push_u64div adv_push.2 - u32assert.2 + u32assert2 swap push.M @@ -189,18 +189,18 @@ export.load_h_s2_and_product.1 adv_pipe dupw.1 - u32assert.2 + u32assert2 push.M u32unchecked_lt assert push.M u32unchecked_lt assert - u32assert.2 + u32assert2 push.M u32unchecked_lt assert push.M u32unchecked_lt assert dupw - u32assert.2 + u32assert2 push.M u32unchecked_lt assert push.M u32unchecked_lt assert - u32assert.2 + u32assert2 push.M u32unchecked_lt assert push.M u32unchecked_lt assert diff --git a/stdlib/docs/crypto/dsa/rpo_falcon512.md b/stdlib/docs/crypto/dsa/rpo_falcon512.md new file mode 100644 index 0000000000..f9c7e5c13f --- /dev/null +++ b/stdlib/docs/crypto/dsa/rpo_falcon512.md @@ -0,0 +1,15 @@ + +## std::crypto::dsa::rpo_falcon512 +| Procedure | Description | +| ----------- | ------------- | +| mod_12289 | Given dividend ( i.e. field element a ) on stack top, this routine computes c = a % 12289

Expected stack state

[a, ...]

Output stack state looks like

[c, ...] \| c = a % 12289 | +| hash_to_point | Takes as input a message digest, a nonce of size 40 bytes represented as 8 field elements

and a pointer. The procedure absorbs MSG and NONCE into a fresh RPO state and squeezes the

coefficients of a polynomial c representing the hash-to-point of (MSG \|\| NONCE). The coefficients

are then saved in the memory region [c_ptr, c_ptr + 128).

This implementation of the `hash_to_point` procedure avoids the rejection-sampling step

required in the per-the-spec algorithm by using the observation on page 31 in

https://falcon-sign.info/falcon.pdf

Input: [c_ptr, MSG, NONCE1, NONCE0, ...]

Output: [...]

Cycles: 1327 | +| powers_of_tau | For an element `tau := (tau0, tau1)` in the quadratic extension field, computes all its powers

`tau^i` for `i = 0,..., 512` and stores them in the memory region `[tau_ptr, tau_ptr + 513)`.

The procedure returns `tau_ptr + 513`.

Input: [tau1, tau0, tau_ptr, ...]

Output: [tau_ptr + 513, ...]

Cycles: 8323 | +| set_to_zero | Sets the memory region `[ptr, ptr + 512)` to zero. The pointer c_ptr := ptr + 512 is returned

to be used to store the hash-to-point polynomial of the message later on.

Input: [ptr, ...]

Output: [...]

Cycles: 2607 | +| load_h_s2_and_product | Takes as input PK, the hash of the coefficients of the polynomial `h` representing the expanded

public key, and a pointer to the memory location where the coefficients of the polynomial `h`

will be stored.

The procedure loads `h` from the advice stack and compares its hash with the provided hash `PK`.

It then loads the polynomial `s2` representing the signature from the advice stack and lays it

in memory right after `h`.

It then loads the claimed polynomial `h * s2` in Z_Q[x] where Q is the Miden VM prime from

the advice stack and lays it right after `s2`.

The hash of `h`, `s2` and the claimed product is also computed and the first two field elements

of the digest (i.e., the Fiat-Shamir challenge) are returned on the stack alongside

the incremented pointer.

Input: [ptr, PK, ...]

Output: [tau1, tau0, ptr + 512 ...]

Cycles: 5049 | +| probablistic_product | Checks that pi == h * s2 in Z_Q[x] by evaluating both sides at a random point.

The procedure takes as input a pointer h_ptr to h. The other two polynomials

are located at h_ptr + 128, for s2, and h_ptr + 256, for pi. The procedure takes

also a pointer zeros_ptr to a region of memory [zeros_ptr, zeros_ptr + 1024)

and a pointer tau_ptr to powers of the random point we are evaluating at stored

as [a_i, b_i, x, x] where (a_i, b_i) := tau^i for i in [0, 1023].

The procedure returns () if the check passes, otherwise it raises an exception

related to an unsatisfied assertion.

Input: [h_ptr, zeros_ptr, tau_ptr, ...]

Output: [...]

Cycles: 2504 | +| norm_sq | Normalizes an `e` in [0, q) to be in [-(q-1) << 1, (q-1) << 1) and returns its square norm.

We use the following formula to do so:

normalize(e) = e^2 - phi * (2*q*e - q^2) where phi := (e > (q - 1)/2)

The formula implements:

if e > (q-1)/2:

return (q - e)^2

else:

return e^2

The use of the formula avoids using the if-else block.

Input: [e, ...]

Output [norm(e)^2, ...]

Cycles: 21 | +| diff_mod_q | On input a tuple (u, w, v), the following computes (v - (u + (- w % q) % q) % q).

We can avoid doing three modular reductions by using the following facts:

1. q is much smaller than the Miden prime. Precisely, q * 2^50 < Q

2. The coefficients of the product polynomial, u and w, are less than J := 512 * q^2

3. The coefficients of c are less than q.

This means that we can substitute (v - (u + (- w % q) % q) % q) with v + w + J - u without

risking Q-overflow since \|v + w + J - u\| < 1025 * q^2

To get the final result we reduce (v + w + J - u) modulo q.

Input: [v, w, u, ...]

Output: [e, ...]

Cycles: 44 | +| compute_s1_norm_sq | Takes a pointer to a polynomial pi of degree less than 1024 with coefficients in Z_Q and

a polynomial c of degree 512 with coefficients also in Z_Q, where Q is the Miden prime.

The goal is to compute s1 = c - pi = c - h * s2 in Z_q[x]/(phi) where q is the Falcon prime.

The pointer pi_ptr points both to pi and c through the relation c_ptr = pi_ptr + offset

where offset := 1281.

The naive way to compute s1 would be to first reduce the polynomial pi modulo the Falcon

prime q and then modulo the irreducible polynomial phi = x^512 + 1. Then we would need to negate

the coefficients of pi modulo q and only then can we add these coefficients to the coefficients

of c and then reduce the result modulo q one more time.

Knowing that the end goal of computing c is to compute its norm squared, we can do better.

We can compute s1 in a single pass by delaying the q-modular reduction til the end. This can

be achieved through a careful analysis of the computation of the difference between pi and c.

The i-th coefficient s1_i of s1 is equal to c_i - (pi_i - pi_{512 + i}) which is equal to

c_i + pi_{512 + i} - pi_i. Now, we know that the size of the pi_i coefficients is bounded by

J := 512 * q^2 and this means that J + pi_{512 + i} - pi_i does not Q-underflow and since

J = 0 modulo q, the addition of J does not affect the final result. It is also important to

note that adding J does not Q-overflow by virtue of q * 2^50 < Q.

All of the above implies that we can compute s1_i with only one modular reduction at the end,

in addition to one modular reduction applied to c_i.

Moreover, since we are only interested in the square norm of s1_i, we do not have to store

s1_i and then load it at a later point, and instead we can immediatly follow the computation

of s1_i with computing its square norm.

After computing the square norm of s1_i, we can accumulate into an accumulator to compute the

sum of the square norms of all the coefficients of polynomial c. Using the overflow stack, this

can be delayed til the end.

Input: [pi_ptr, ...]

Output: [norm_sq(s1), ...]

Cycles: 58888 | +| compute_s2_norm_sq | Compute the square norm of the polynomial s2 given a pointer to its coefficients.

Input: [s2_ptr, ...]

Output: [norm_sq(s2), ...]

Cycles: 13322 | +| verify | Verifies a signature against a public key and a message. The procedure gets as inputs the hash

of the public key and the hash of the message via the operand stack. The signature is provided

via the advice stack.

The signature is valid if and only if the procedure returns.

Input: [PK, MSG, NONCE1, NONCE1, ...]

Output: [...]

Cycles: ~ 92029 | diff --git a/stdlib/tests/crypto/falcon.rs b/stdlib/tests/crypto/falcon.rs index ecac149343..f149addfae 100644 --- a/stdlib/tests/crypto/falcon.rs +++ b/stdlib/tests/crypto/falcon.rs @@ -2,7 +2,10 @@ use miden_air::{Felt, StarkField}; use std::vec; use test_utils::{ - crypto::{KeyPair, Polynomial, Rpo256, Signature}, + crypto::{ + rpo_falcon512::{KeyPair, Polynomial, Signature}, + Rpo256, + }, rand::rand_vector, Test, Word, };