From a979221c6e1d255e53850ea0268701655af17ad4 Mon Sep 17 00:00:00 2001 From: Mikhail Volkhov Date: Thu, 6 Feb 2025 14:58:16 +0000 Subject: [PATCH] Turn off constraint parallelisation for small vector size? --- kimchi/src/circuits/constraints.rs | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/kimchi/src/circuits/constraints.rs b/kimchi/src/circuits/constraints.rs index b5935b142b..b138a6c7fd 100644 --- a/kimchi/src/circuits/constraints.rs +++ b/kimchi/src/circuits/constraints.rs @@ -374,11 +374,25 @@ impl, OpeningProof: OpenProof> impl ConstraintSystem { /// evaluate witness polynomials over domains pub fn evaluate(&self, w: &[DP; COLUMNS], z: &DP) -> WitnessOverDomains { - // this optimisation saves 100ms + // this optimisation saves 100ms for the prover. + // but it adds 3% = 2.5ms to the verifier on small (1k rows) circuits. + + // the idea is to have threading minimised below a certain threshold. + let min_len = { + let threads_to_use = if w[0].len() <= 2048 { + 1 + } else { + rayon::max_num_threads() + }; + // min batch size is COLUMNS, when threads_to_use == 1, + // in which case every iterator will use one thread + std::cmp::max(COLUMNS, COLUMNS / threads_to_use) + }; // compute shifted witness polynomials let w8: [E>; COLUMNS] = (0..COLUMNS) .into_par_iter() + .with_min_len(min_len) .map(|i| w[i].evaluate_over_domain_by_ref(self.domain.d8)) .collect::>() .try_into() @@ -386,6 +400,7 @@ impl ConstraintSystem { let w4: [E>; COLUMNS] = (0..COLUMNS) .into_par_iter() + .with_min_len(min_len) .map(|i| { E::>::from_vec_and_domain( (0..self.domain.d4.size) @@ -405,6 +420,7 @@ impl ConstraintSystem { let d4_next_w: [_; COLUMNS] = (0..COLUMNS) .into_par_iter() + .with_min_len(min_len) .map(|i| w4[i].shift(4)) .collect::>() .try_into() @@ -412,6 +428,7 @@ impl ConstraintSystem { let d8_next_w: [_; COLUMNS] = (0..COLUMNS) .into_par_iter() + .with_min_len(min_len) .map(|i| w8[i].shift(8)) .collect::>() .try_into()