diff --git a/halo2_proofs/Cargo.toml b/halo2_proofs/Cargo.toml index 53684f61..941d1f5c 100644 --- a/halo2_proofs/Cargo.toml +++ b/halo2_proofs/Cargo.toml @@ -55,6 +55,8 @@ memmap = "0.7" plotters = { version = "0.3.0", optional = true } tabbycat = { version = "0.1", features = ["attributes"], optional = true } +poly-optimizer = { path = "../../polyeval/" } + [target.'cfg(target_arch = "wasm32")'.dependencies] # plotters depends on web-sys, which eventually depends on bumpalo 3. This dependency is # required because our MSRV is 1.51, but bumpalo 3.9 increased its MSRV to 1.54. We can diff --git a/halo2_proofs/src/arithmetic.rs b/halo2_proofs/src/arithmetic.rs index 97e9fe68..ccbdcc91 100644 --- a/halo2_proofs/src/arithmetic.rs +++ b/halo2_proofs/src/arithmetic.rs @@ -543,7 +543,7 @@ pub fn gpu_ifft(a: &mut [G], omega: G::Scalar, log_n: u32, divisor: G: /// by $n$. /// /// This will use multithreading if beneficial. -pub fn best_fft(a: &mut [G], omega: G::Scalar, log_n: u32) { +pub fn best_fft(a: &mut [G], omega: G::Scalar, log_n: u32) { cfg_if::cfg_if! { if #[cfg(feature = "cuda")]{ return gpu_fft(a, omega, log_n); @@ -553,7 +553,7 @@ pub fn best_fft(a: &mut [G], omega: G::Scalar, log_n: u32) { } } -pub fn best_fft_cpu(a: &mut [G], omega: G::Scalar, log_n: u32) { +pub fn best_fft_cpu(a: &mut [G], omega: G::Scalar, log_n: u32) { fn bitreverse(mut n: usize, l: usize) -> usize { let mut r = 0; for _ in 0..l { diff --git a/halo2_proofs/src/lib.rs b/halo2_proofs/src/lib.rs index 67c58eac..e14fdc52 100644 --- a/halo2_proofs/src/lib.rs +++ b/halo2_proofs/src/lib.rs @@ -1,3 +1,6 @@ +#![allow(incomplete_features)] +#![feature(generic_const_exprs)] + //! # halo2_proofs #![feature(local_key_cell_methods)] diff --git a/halo2_proofs/src/plonk/evaluation.rs b/halo2_proofs/src/plonk/evaluation.rs index 5f2885c8..d4b75a89 100644 --- a/halo2_proofs/src/plonk/evaluation.rs +++ b/halo2_proofs/src/plonk/evaluation.rs @@ -1,5 +1,6 @@ use super::{evaluation_gpu, ConstraintSystem, Expression}; use crate::multicore; +use crate::helpers::Serializable; use crate::plonk::symbol::ProveExpression; use crate::plonk::symbol::ProveExpressionUnit; use crate::plonk::symbol; @@ -890,6 +891,8 @@ impl Evaluator { let l_active_row = &pk.l_active_row; let p = &pk.vk.cs.permutation; + + let timer = ark_std::start_timer!(|| "permutations"); // Permutations let permutation = &permutations[0]; diff --git a/halo2_proofs/src/plonk/evaluation_gpu.rs b/halo2_proofs/src/plonk/evaluation_gpu.rs index edc2c6e2..1c3d9c4b 100644 --- a/halo2_proofs/src/plonk/evaluation_gpu.rs +++ b/halo2_proofs/src/plonk/evaluation_gpu.rs @@ -1,5 +1,6 @@ use super::Expression; use crate::multicore; +use crate::helpers::Serializable; use crate::plonk::lookup::prover::Committed; use crate::plonk::permutation::Argument; use crate::plonk::{lookup, permutation, Any, ProvingKey}; @@ -461,8 +462,29 @@ impl ProveExpression { &mut unit_cache, &mut LinkedList::new(), &mut helper, + false )?; program.read_into_buffer(&values_buf.0.unwrap().0, input)?; + + + { + use std::fs::File; + use std::io::Write; + + + let mut f = match File::create("input.txt") { + Ok(file) => file, + Err(e) => { + panic!("Failed to create file: {}", e); + } + }; + let dd = format!("{:?}", input); + match f.write_all(dd.as_bytes()) { + Ok(_) => println!("Data has been written to the file."), + Err(e) => println!("Error occurred while writing to the file: {}", e), + } + } + println!("cache: {:?}", unit_cache.data.keys().collect::>().into_iter().map(|x| ProveExpressionUnit::key_to_string(*x)).collect::>()); Ok(unit_cache.data) @@ -479,6 +501,7 @@ impl ProveExpression { FftKernel::::create(programs).expect("Cannot initialize kernel!"); let gpu_idx = group_idx % kern.kernels.len(); + println!("GPU IDX = {:?}", gpu_idx); let data = kern.kernels[gpu_idx] .program .run(closures, &mut values.values[..]) @@ -505,7 +528,7 @@ impl ProveExpression { let v = self._eval_gpu( pk, program, memory_cache, - advice, instance, y, unit_cache, allocator, helper, + advice, instance, y, unit_cache, allocator, helper, false )?; match v { (Some((l, rot_l)), Some(r)) => { @@ -574,7 +597,7 @@ impl ProveExpression { let l = uid._eval_gpu( pk, program, memory_cache, - advice, instance, y, unit_cache, allocator, helper, + advice, instance, y, unit_cache, allocator, helper, false )?.0.unwrap(); let kernel_name = format!("{}_eval_batch_scale", "Bn256_Fr"); @@ -613,6 +636,122 @@ impl ProveExpression { } } + fn read_buffer_and_store(&self, buffer: &Buffer, program: &Program, + buffer_len : usize, + file_name : String, + ) { + use std::fs::File; + use std::io::Write; + + let mut t = vec![F::zero(); buffer_len]; + let mut tbuf = t.as_mut_slice(); + let ret = program.read_into_buffer(&buffer, &mut tbuf); + assert!(ret.is_ok()); + + let poly = Polynomial::new(tbuf.to_vec()); + + let mut g = File::create(format!("{}.data", file_name)).unwrap(); + let ret = as Serializable>::store(&poly, &mut g); + assert!(ret.is_ok()); + + // store printed version for verifying binary correctly decoded + let mut g = File::create(format!("{}.txt", file_name)).unwrap(); + let ret = g.write_all(format!("{:?}", poly).as_bytes()); + assert!(ret.is_ok()); + } + + fn read_table_and_store(&self, + advice: &Vec>, + fixed: &Vec>, + expr_type : ProveExpressionUnit, + idx: usize, + file_name : String,) { + use std::fs::File; + use std::io::Write; + + let t = match expr_type { + ProveExpressionUnit::Fixed { column_index : _, rotation : _ } => &fixed[idx], + ProveExpressionUnit::Advice { column_index : _, rotation : _} => &advice[idx], + ProveExpressionUnit::Instance { column_index : _, rotation : _} => unimplemented!(), + }; + + let mut g = File::create(format!("{}.data", file_name)).unwrap(); + let ret = as Serializable>::store(t, &mut g); + assert!(ret.is_ok()); + + // store printed version for verifying binary correctly decoded + let mut g = File::create(format!("{}.txt", file_name)).unwrap(); + let ret = g.write_all(format!("{:?}", t).as_bytes()); + assert!(ret.is_ok()); + } + + fn prove_expr_to_string(&self, expr_type : &ProveExpressionUnit) -> &str { + match expr_type { + ProveExpressionUnit::Fixed { column_index : _, rotation : _ } => "f", + ProveExpressionUnit::Advice { column_index : _, rotation : _} => "a", + ProveExpressionUnit::Instance { column_index : _, rotation : _} => unimplemented!(), + } + } + + fn dump_equation_data(&self, + result_buffer_eval: &Buffer, + // result_buffer_coeff: &Buffer, + program: &Program, + advice: &Vec>, + fixed: &Vec>, + lhs : (usize, ProveExpressionUnit), + rhs : Option<(usize, ProveExpressionUnit)>, + op : &Bop, + lhs_buffer : &Buffer, + rhs_buffer : Option<&Buffer>, + //lhs_buffer_coeff : &Buffer, + //rhs_buffer_coeff : &Buffer, + ) { + + let (lhs_idx, lhs_type) = lhs; + + println!("lhs_buffer = {:?}", lhs_buffer); + println!("result_buffer = {:?}", result_buffer_eval); + + const D : usize = 2usize.pow(20); + + let op_name_str = match op { Bop::Sum => "sum", Bop::Product => "mul", }; + let lhs_type_str = self.prove_expr_to_string(&lhs_type); + + + // Note: all the data from the buffer will be in eval form, while advice/fixed tables store + // polys in coeff form + + let rhs_idx = if rhs.is_none() { "yconst".to_string() } else { rhs.as_ref().unwrap().0.to_string() }; + let rhs_type_str = if rhs.is_none() { "_".to_string() } else { self.prove_expr_to_string(&rhs.as_ref().unwrap().1).to_string() }; + + // Result gpu buffer grab + self.read_buffer_and_store(result_buffer_eval, program, D, + format!("{}{}_{}_{}{}", lhs_type_str, lhs_idx.to_string(), + op_name_str, rhs_type_str, rhs_idx.to_string())); + + // LHS operand gpu buffer grab + self.read_buffer_and_store(lhs_buffer, program, D, + format!("{}{}", lhs_type_str, lhs_idx.to_string())); + // LHS operand grab from input table + self.read_table_and_store(advice, fixed, lhs_type, lhs_idx, + format!("input_{}{}", lhs_type_str, lhs_idx.to_string())); + + + if rhs.is_some() { + println!("rhs_buffer = {:?}", rhs_buffer.unwrap()); + let (rhs_idx, rhs_type) = rhs.unwrap(); + + // RHS operand gpu buffer grab + self.read_buffer_and_store(rhs_buffer.unwrap(), program, D, + format!("{}{}", rhs_type_str, rhs_idx.to_string())); + + // RHS operand grab from input table + self.read_table_and_store(advice, fixed, rhs_type, rhs_idx, + format!("input_{}{}", rhs_type_str, rhs_idx.to_string())); + } + + } pub(crate) fn _eval_gpu>( &self, @@ -625,6 +764,7 @@ impl ProveExpression { unit_cache: &mut Cache>, allocator: &mut LinkedList>, helper: &mut ExtendedFFTHelper, + dump_data : bool, ) -> EcResult<(Option<(Rc>, i32)>, Option)> { let size = 1u32 << pk.vk.domain.extended_k(); let local_work_size = 128; @@ -643,57 +783,118 @@ impl ProveExpression { )? { Ok((Some((v, 0)), None)) } else { + + + const N : usize = 1 << 18; + const EN : usize = 1 << 20; + const RT : usize = 2 * 20; + + use poly_optimizer::poly::TestableChunk; + use poly_optimizer::poly::PolyContext; + use poly_optimizer::poly::ExtendedDomain; + use poly_optimizer::poly::Coeff as PCoeff; + + let mut rt_chunk = TestableChunk::::new(); + let polyeval_ctx = + PolyContext::>::new_coeff_context( + ExtendedDomain::::new( + rt_chunk.as_ptr(), + pairing::arithmetic::FieldExt::ZETA)).from_coeff_to_eval(); + + let match_str = "(S(u(a30-3)) + Y)".to_string(); + + let lhs_info = (30, ProveExpressionUnit::Advice{column_index: 0, rotation: Rotation(0)}); + let rhs_info = None; // (7, ProveExpressionUnit::Advice{column_index: 0, rotation: Rotation(0)}); match self { ProveExpression::Op(l, r, op) => { + + let do_dump_data = self.to_string() == match_str; + let l = l._eval_gpu( pk, program, memory_cache, - advice, instance, y, unit_cache, allocator, helper, + advice, instance, y, unit_cache, allocator, helper, do_dump_data )?; let r = r._eval_gpu( pk, program, memory_cache, - advice, instance, y, unit_cache, allocator, helper, + advice, instance, y, unit_cache, allocator, helper, do_dump_data )?; + let lbuf = l.clone(); + let rbuf = r.clone(); //let timer = start_timer!(|| format!("gpu eval sum {} {:?} {:?}", size, l.0, r.0)); let res = match (l.0, r.0) { (Some(l), Some(r)) => { - let kernel_name = match op { - Bop::Sum => format!("{}_eval_sum", "Bn256_Fr"), - Bop::Product => format!("{}_eval_mul", "Bn256_Fr"), - }; - let kernel = program.create_kernel( - &kernel_name, - global_work_size as usize, - local_work_size as usize, - )?; - - let res = if r.1 == 0 && Rc::strong_count(&r.0) == 1 { - r.0.clone() - } else if l.1 == 0 && Rc::strong_count(&l.0) == 1 { - l.0.clone() - } else { - Rc::new(allocator.pop_front().unwrap_or_else(|| unsafe { - program.create_buffer::(size as usize).unwrap() - })) - }; - - kernel - .arg(res.as_ref()) - .arg(l.0.as_ref()) - .arg(r.0.as_ref()) - .arg(&l.1) - .arg(&r.1) - .arg(&size) - .run()?; - - if Rc::strong_count(&l.0) == 1 { - allocator.push_back(Rc::try_unwrap(l.0).unwrap()) - } - if Rc::strong_count(&r.0) == 1 { - allocator.push_back(Rc::try_unwrap(r.0).unwrap()) - } + let l_s = do_shift::( + &polyeval_ctx, + program, + allocator, + l.0.as_ref(), + l.1,)?; + let r_s = do_shift::( + &polyeval_ctx, + program, + allocator, + r.0.as_ref(), + r.1,)?; + + let l = buffer_to_vec::(program, &l_s); + let r = buffer_to_vec::(program, &r_s); + + let res = match op { + Bop::Sum => { + polyeval_ctx.sum(&l, &r) + }, + Bop::Product => { + polyeval_ctx.mul(&l, &r) + } + }; - Ok((Some((res, 0)), None)) + let mut out_buf = allocator + .pop_front() + .unwrap_or_else(|| unsafe { program.create_buffer::(EN as usize).unwrap() }); + program.write_from_buffer(&mut out_buf, &res)?; + + Ok((Some((Rc::new(out_buf), 0)), None)) + + + //let kernel_name = match op { + // Bop::Sum => format!("{}_eval_sum", "Bn256_Fr"), + // Bop::Product => format!("{}_eval_mul", "Bn256_Fr"), + //}; + //let kernel = program.create_kernel( + // &kernel_name, + // global_work_size as usize, + // local_work_size as usize, + //)?; + + //let res = if r.1 == 0 && Rc::strong_count(&r.0) == 1 { + // r.0.clone() + //} else if l.1 == 0 && Rc::strong_count(&l.0) == 1 { + // l.0.clone() + //} else { + // Rc::new(allocator.pop_front().unwrap_or_else(|| unsafe { + // program.create_buffer::(size as usize).unwrap() + // })) + //}; + + //kernel + // .arg(res.as_ref()) + // .arg(l.0.as_ref()) + // .arg(r.0.as_ref()) + // .arg(&l.1) + // .arg(&r.1) + // .arg(&size) + // .run()?; + + //if Rc::strong_count(&l.0) == 1 { + // allocator.push_back(Rc::try_unwrap(l.0).unwrap()) + //} + + //if Rc::strong_count(&r.0) == 1 { + // allocator.push_back(Rc::try_unwrap(r.0).unwrap()) + //} + + //Ok((Some((res, 0)), None)) } (None, None) => match op { Bop::Sum => Ok((None, Some(l.1.unwrap() + r.1.unwrap()))), @@ -702,40 +903,111 @@ impl ProveExpression { (None, Some(b)) | (Some(b), None) => { let c = l.1.or(r.1).unwrap(); let c = program.create_buffer_from_slice(&vec![c])?; - let kernel_name = match op { - Bop::Sum => format!("{}_eval_sum_c", "Bn256_Fr"), - Bop::Product => format!("{}_eval_mul_c", "Bn256_Fr"), - }; - let kernel = program.create_kernel( - &kernel_name, - global_work_size as usize, - local_work_size as usize, - )?; - - let res = if b.1 == 0 && Rc::strong_count(&b.0) == 1 { - b.0.clone() - } else { - Rc::new(allocator.pop_front().unwrap_or_else(|| unsafe { - program.create_buffer::(size as usize).unwrap() - })) - }; - kernel - .arg(res.as_ref()) - .arg(b.0.as_ref()) - .arg(&b.1) - .arg(&c) - .arg(&size) - .run()?; - if Rc::strong_count(&b.0) == 1 { - allocator.push_back(Rc::try_unwrap(b.0).unwrap()) - } + const N : usize = 1 << 18; + const EN : usize = 1 << 20; + const RT : usize = 2 * 20; - Ok((Some((res, 0)), None)) + let res = match op { + Bop::Sum => { + do_yconst::( + &polyeval_ctx, + program, + allocator, + &b.0, + b.1, + &c) + } + Bop::Product => { + do_scale::( + &polyeval_ctx, + program, + allocator, + &b.0, + b.1, + &c) + } + }?; + + let rc_res = Rc::new(res); + + Ok((Some((rc_res, 0)), None)) + + + //let kernel_name = match op { + // Bop::Sum => format!("{}_eval_sum_c", "Bn256_Fr"), + // Bop::Product => format!("{}_eval_mul_c", "Bn256_Fr"), + //}; + //let kernel = program.create_kernel( + // &kernel_name, + // global_work_size as usize, + // local_work_size as usize, + //)?; + + //let res = if b.1 == 0 && Rc::strong_count(&b.0) == 1 { + // b.0.clone() + //} else { + // Rc::new(allocator.pop_front().unwrap_or_else(|| unsafe { + // program.create_buffer::(size as usize).unwrap() + // })) + //}; + + //kernel + // .arg(res.as_ref()) + // .arg(b.0.as_ref()) + // .arg(&b.1) + // .arg(&c) + // .arg(&size) + // .run()?; + + //if Rc::strong_count(&b.0) == 1 { + // allocator.push_back(Rc::try_unwrap(b.0).unwrap()) + //} + + //if do_dump_data { + // self.read_buffer_and_store(&c, program, 1, + // "yconst_val".to_string()); + //} + + //Ok((Some((res, 0)), None)) } }; //end_timer!(timer); + // + + if self.to_string() == match_str { + // S((u(a18-2) * u(a26-3))) + // "S((u(a20-1) * u(a46-1)))".to_string() { + // "(u(f9-0) * u(a8-0))".to_string() { + // "(u(a20-2) * u(a40-1))".to_string() { + println!("FIND_ME !!!!!!!!!!!!!!!!!!!!"); + + println!("result = {:?}", res); + println!("lbuf = {:?}", lbuf); + println!("rbuf = {:?}", rbuf); + println!("self = {:?}", self); + + let buff = res.as_ref().unwrap().0.as_ref().unwrap().0.as_ref(); + + let rbuf_input = if rhs_info.is_none() { None } else { Some(rbuf.0.as_ref().unwrap().0.as_ref()) }; + + println!("Dumping equation data"); + self.dump_equation_data( + buff, + // &res_buffer_coeff, + program, + advice, + &pk.fixed_polys, + lhs_info, + rhs_info, + op, + lbuf.0.as_ref().unwrap().0.as_ref(), + rbuf_input, + //&lbuf_coeff, + //&rbuf_coeff, + ); + } res } @@ -783,7 +1055,14 @@ impl ProveExpression { } => (&instance[*column_index], rotation), }; - let buffer = do_extended_fft(pk, program, origin_values, allocator, helper)?; + let buffer = do_polyeval_extended_fft( + &polyeval_ctx.from_eval_to_coeff(), + pk, + program, + origin_values, + allocator, + helper)?; + let value = if cache_action == CacheAction::Cache { unit_cache.update(group, buffer, |buffer| allocator.push_back(buffer)) } else { @@ -798,7 +1077,7 @@ impl ProveExpression { ProveExpression::Scale(l, ys) => { let l = l._eval_gpu( pk, program, memory_cache, - advice, instance, y, unit_cache, allocator, helper, + advice, instance, y, unit_cache, allocator, helper, false )?; let l = l.0.unwrap(); let max_y_order = ys.keys().max().unwrap(); @@ -811,34 +1090,65 @@ impl ProveExpression { acc + y[*y_order as usize] * f }); let c = program.create_buffer_from_slice(&vec![c])?; - - let kernel_name = format!("{}_eval_scale", "Bn256_Fr"); - let kernel = program.create_kernel( - &kernel_name, - global_work_size as usize, - local_work_size as usize, - )?; - - let res = if l.1 == 0 && Rc::strong_count(&l.0) == 1 { - l.0.clone() - } else { - Rc::new(allocator.pop_front().unwrap_or_else(|| unsafe { - program.create_buffer::(size as usize).unwrap() - })) - }; - kernel - .arg(res.as_ref()) - .arg(l.0.as_ref()) - .arg(&l.1) - .arg(&size) - .arg(&c) - .run()?; - - if Rc::strong_count(&l.0) == 1 { - allocator.push_back(Rc::try_unwrap(l.0).unwrap()) - } - - Ok((Some((res, 0)), None)) + + let res = do_scale::( + &polyeval_ctx, + program, + allocator, + &l.0, + l.1, + &c)?; + + let rc_res = Rc::new(res); + + Ok((Some((rc_res, 0)), None)) + + // let kernel_name = format!("{}_eval_scale", "Bn256_Fr"); + // let kernel = program.create_kernel( + // &kernel_name, + // global_work_size as usize, + // local_work_size as usize, + // )?; + + // let res = if l.1 == 0 && Rc::strong_count(&l.0) == 1 { + // l.0.clone() + // } else { + // Rc::new(allocator.pop_front().unwrap_or_else(|| unsafe { + // program.create_buffer::(size as usize).unwrap() + // })) + // }; + // kernel + // .arg(res.as_ref()) + // .arg(l.0.as_ref()) + // .arg(&l.1) + // .arg(&size) + // .arg(&c) + // .run()?; + + // if Rc::strong_count(&l.0) == 1 { + // allocator.push_back(Rc::try_unwrap(l.0).unwrap()) + // } + + // if dump_data { + // let dump_str = self.to_string() + // .replace("(", "_") + // .replace(")", "_") + // .replace("-", "_") + // .replace("+", "_") + // .replace(" ", "_") ; + // println!("FIND_ME !!!!!!!!!!!!!!!!!!!!"); + + // println!("scale buffer = {:?}", c); + // println!("scale result = {:?}", res.as_ref()); + + // self.read_buffer_and_store(&c, program, 1, format!("scalar_val_{}", dump_str)); + + // self.read_buffer_and_store(res.as_ref(), program, 2usize.pow(20), + // format!("scale_result_{}", dump_str)); + // } + + + // Ok((Some((res, 0)), None)) } } } @@ -929,6 +1239,123 @@ pub(crate) fn load_unit_from_mem_cache, const EN :usize>(program: &Program, buf : &Buffer ) -> Vec { + let mut t = vec![F::zero(); EN]; + let mut tbuf = t.as_mut_slice(); + let ret = program.read_into_buffer(&buf, &mut tbuf); + assert!(ret.is_ok()); + + tbuf.to_vec() +} + +use poly_optimizer::poly::TestableChunk; +use poly_optimizer::poly::PolyContext; +use poly_optimizer::poly::ExtendedDomain; +use poly_optimizer::poly::Coeff as PCoeff; +use poly_optimizer::poly::Eval; + +#[cfg(feature = "cuda")] +pub(crate) fn do_shift, const N : usize, const EN : usize, const RT:usize>( + ctx : &PolyContext>, + program: &Program, + allocator: &mut LinkedList>, + buf : &Buffer, + rot : i32, +) -> EcResult> +{ + let f = buffer_to_vec::(program, buf); + + let res = ctx.shift(&f, rot); + + let mut out_buf = allocator + .pop_front() + .unwrap_or_else(|| unsafe { program.create_buffer::(EN as usize).unwrap() }); + program.write_from_buffer(&mut out_buf, &res)?; + + Ok(out_buf) +} + +#[cfg(feature = "cuda")] +pub(crate) fn do_yconst, const N : usize, const EN : usize, const RT:usize>( + ctx : &PolyContext>, + program: &Program, + allocator: &mut LinkedList>, + buf : &Buffer, + rot : i32, + yconst_buf : &Buffer, +) -> EcResult> +{ + let b_s = do_shift::( + &ctx, + program, + allocator, + buf, + rot)?; + + let f = buffer_to_vec::(program, &b_s); + let y_vec = buffer_to_vec::(program, yconst_buf); + let yconst = y_vec.first().unwrap(); + + let res = ctx.yconst(&f, *yconst); + + let mut out_buf = allocator + .pop_front() + .unwrap_or_else(|| unsafe { program.create_buffer::(EN as usize).unwrap() }); + program.write_from_buffer(&mut out_buf, &res)?; + + Ok(out_buf) +} + +#[cfg(feature = "cuda")] +pub(crate) fn do_scale, const N : usize, const EN : usize, const RT:usize>( + ctx : &PolyContext>, + program: &Program, + allocator: &mut LinkedList>, + buf : &Buffer, + rot : i32, + scalar_buf : &Buffer, +) -> EcResult> +{ + let b_s = do_shift::( + &ctx, + program, + allocator, + buf, + rot)?; + + let f = buffer_to_vec::(program, &b_s); + let s_vec = buffer_to_vec::(program, scalar_buf); + let scalar = s_vec.first().unwrap(); + + let res = ctx.scale(&f, *scalar); + + let mut out_buf = allocator + .pop_front() + .unwrap_or_else(|| unsafe { program.create_buffer::(EN as usize).unwrap() }); + program.write_from_buffer(&mut out_buf, &res)?; + + Ok(out_buf) +} + +#[cfg(feature = "cuda")] +pub(crate) fn do_polyeval_extended_fft, const N : usize, const EN : usize, const RT:usize>( + ctx : &PolyContext>, + pk: &ProvingKey, + program: &Program, + origin_values: &Polynomial, + allocator: &mut LinkedList>, + helper: &mut ExtendedFFTHelper, +) -> EcResult> +{ + let res = ctx.ntt(&origin_values.values).0; + + let mut out_buf = allocator + .pop_front() + .unwrap_or_else(|| unsafe { program.create_buffer::(EN as usize).unwrap() }); + program.write_from_buffer(&mut out_buf, &res)?; + + Ok(out_buf) +} #[cfg(feature = "cuda")] pub(crate) fn do_extended_fft>( @@ -937,9 +1364,10 @@ pub(crate) fn do_extended_fft>( origin_values: &Polynomial, allocator: &mut LinkedList>, helper: &mut ExtendedFFTHelper, -) -> EcResult> { - let origin_size = 1u32 << pk.vk.domain.k(); - let extended_size = 1u32 << pk.vk.domain.extended_k(); +) -> EcResult> +{ + let origin_size = 1u32 << pk.vk.domain.k(); // 2^k + let extended_size = 1u32 << pk.vk.domain.extended_k(); // 2^ext_k let local_work_size = 128; let global_work_size = extended_size / local_work_size; @@ -976,6 +1404,7 @@ pub(crate) fn do_extended_fft>( .run()?; //end_timer!(timer); + //let timer = start_timer!(|| "do fft pure"); let domain = &pk.vk.domain; let a = do_fft_pure( diff --git a/halo2_proofs/src/poly/domain.rs b/halo2_proofs/src/poly/domain.rs index a7ecf23f..eb2a71e5 100644 --- a/halo2_proofs/src/poly/domain.rs +++ b/halo2_proofs/src/poly/domain.rs @@ -28,7 +28,7 @@ pub struct EvaluationDomain { omega_inv: G::Scalar, extended_omega: G::Scalar, extended_omega_inv: G::Scalar, - pub(crate) g_coset: G::Scalar, + pub g_coset: G::Scalar, pub(crate) g_coset_inv: G::Scalar, quotient_poly_degree: u64, pub(crate) ifft_divisor: G::Scalar, @@ -37,7 +37,7 @@ pub struct EvaluationDomain { barycentric_weight: G::Scalar, } -impl EvaluationDomain { +impl EvaluationDomain { /// This constructs a new evaluation domain object based on the provided /// values $j, k$. pub fn new(j: u32, k: u32) -> Self { @@ -56,6 +56,7 @@ impl EvaluationDomain { while (1 << extended_k) < (n * quotient_poly_degree) { extended_k += 1; } + println!("extended_k = {}", extended_k); let mut extended_omega = G::Scalar::root_of_unity();