From 904b9723307584bd5d40902c0b0352b4a6fd855d Mon Sep 17 00:00:00 2001 From: Venu Vemula Date: Fri, 25 Oct 2024 17:12:47 -0400 Subject: [PATCH 01/24] feat: add countries CSV --- .../examples/countries/countries.csv | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 crates/proof-of-sql/examples/countries/countries.csv diff --git a/crates/proof-of-sql/examples/countries/countries.csv b/crates/proof-of-sql/examples/countries/countries.csv new file mode 100644 index 000000000..ee40560e7 --- /dev/null +++ b/crates/proof-of-sql/examples/countries/countries.csv @@ -0,0 +1,31 @@ +Country,Continent,GDP,GDPP +UnitedStates,NorthAmerica,21137,63543 +China,Asia,14342,10261 +Japan,Asia,5081,40293 +Germany,Europe,3846,46329 +India,Asia,2875,2099 +UnitedKingdom,Europe,2825,42330 +France,Europe,2716,41463 +Italy,Europe,2001,33279 +Brazil,SouthAmerica,1839,8718 +Canada,NorthAmerica,1643,43119 +Russia,EuropeAsia,1637,11229 +SouthKorea,Asia,1622,31489 +Australia,Oceania,1382,53799 +Spain,Europe,1316,28152 +Mexico,NorthAmerica,1265,9958 +Indonesia,Asia,1119,4152 +Netherlands,Europe,902,52477 +SaudiArabia,Asia,793,23206 +Turkey,EuropeAsia,761,9005 +Switzerland,Europe,703,81392 +Argentina,SouthAmerica,449,9921 +Sweden,Europe,528,52073 +Nigeria,Africa,448,2190 +Poland,Europe,594,15673 +Thailand,Asia,509,7306 +SouthAfrica,Africa,350,5883 +Philippines,Asia,402,3685 +Colombia,SouthAmerica,323,6458 +Egypt,Africa,302,3012 +Pakistan,Asia,278,1260 \ No newline at end of file From 552979dbf930262d505b31ceeb2c56cb0676ec24 Mon Sep 17 00:00:00 2001 From: Venu Vemula Date: Fri, 25 Oct 2024 17:14:25 -0400 Subject: [PATCH 02/24] feat: add more countries gdp data --- crates/proof-of-sql/examples/countries/countries.csv | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/crates/proof-of-sql/examples/countries/countries.csv b/crates/proof-of-sql/examples/countries/countries.csv index ee40560e7..7cc4c9cb5 100644 --- a/crates/proof-of-sql/examples/countries/countries.csv +++ b/crates/proof-of-sql/examples/countries/countries.csv @@ -28,4 +28,8 @@ SouthAfrica,Africa,350,5883 Philippines,Asia,402,3685 Colombia,SouthAmerica,323,6458 Egypt,Africa,302,3012 -Pakistan,Asia,278,1260 \ No newline at end of file +Pakistan,Asia,278,1260 +Bangladesh,Asia,302,1855 +Vietnam,Asia,283,2900 +Chile,SouthAmerica,252,13120 +Finland,Europe,268,48888 \ No newline at end of file From ffeec5e30202fec5a242b1187cf0943602f847fa Mon Sep 17 00:00:00 2001 From: Venu Vemula Date: Fri, 25 Oct 2024 17:15:44 -0400 Subject: [PATCH 03/24] feat: correct countries gdp data --- crates/proof-of-sql/examples/countries/countries.csv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/proof-of-sql/examples/countries/countries.csv b/crates/proof-of-sql/examples/countries/countries.csv index 7cc4c9cb5..397102f8f 100644 --- a/crates/proof-of-sql/examples/countries/countries.csv +++ b/crates/proof-of-sql/examples/countries/countries.csv @@ -28,7 +28,7 @@ SouthAfrica,Africa,350,5883 Philippines,Asia,402,3685 Colombia,SouthAmerica,323,6458 Egypt,Africa,302,3012 -Pakistan,Asia,278,1260 +Pakistan,Asia,278,1450 Bangladesh,Asia,302,1855 Vietnam,Asia,283,2900 Chile,SouthAmerica,252,13120 From f67f8335026ef1d560acb7fca7cd4c51ff3a1e8c Mon Sep 17 00:00:00 2001 From: Venu Vemula Date: Fri, 25 Oct 2024 17:17:16 -0400 Subject: [PATCH 04/24] refactor: rename countries csv to countries_gdp csv --- .../examples/countries/{countries.csv => countries_gdp.csv} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename crates/proof-of-sql/examples/countries/{countries.csv => countries_gdp.csv} (100%) diff --git a/crates/proof-of-sql/examples/countries/countries.csv b/crates/proof-of-sql/examples/countries/countries_gdp.csv similarity index 100% rename from crates/proof-of-sql/examples/countries/countries.csv rename to crates/proof-of-sql/examples/countries/countries_gdp.csv From 8bbfa1c6f4e165007a71ff622cb9b8116f39d543 Mon Sep 17 00:00:00 2001 From: Venu Vemula Date: Fri, 25 Oct 2024 17:47:34 -0400 Subject: [PATCH 05/24] feat: add countries example code --- .../proof-of-sql/examples/countries/main.rs | 118 ++++++++++++++++++ 1 file changed, 118 insertions(+) create mode 100644 crates/proof-of-sql/examples/countries/main.rs diff --git a/crates/proof-of-sql/examples/countries/main.rs b/crates/proof-of-sql/examples/countries/main.rs new file mode 100644 index 000000000..5e2fc8e29 --- /dev/null +++ b/crates/proof-of-sql/examples/countries/main.rs @@ -0,0 +1,118 @@ +//! This is a non-interactive example of using Proof of SQL with a countries dataset. +//! To run this, use `cargo run --release --example countries`. +//! +//! NOTE: If this doesn't work because you do not have the appropriate GPU drivers installed, +//! you can run `cargo run --release --example countries --no-default-features --features="arrow cpu-perf"` instead. It will be slower for proof generation. + +use arrow::datatypes::SchemaRef; +use arrow_csv::{infer_schema_from_files, ReaderBuilder}; +use proof_of_sql::{ + base::database::{ + arrow_schema_utility::get_posql_compatible_schema, OwnedTable, OwnedTableTestAccessor, + TestAccessor, + }, + proof_primitive::dory::{ + DynamicDoryCommitment, DynamicDoryEvaluationProof, ProverSetup, PublicParameters, + VerifierSetup, + }, + sql::{parse::QueryExpr, postprocessing::apply_postprocessing_steps, proof::QueryProof}, +}; +use rand::{rngs::StdRng, SeedableRng}; +use std::{fs::File, time::Instant}; + +// We generate the public parameters and the setups used by the prover and verifier for the Dory PCS. +// The `max_nu` should be set such that the maximum table size is less than `2^(2*max_nu-1)`. +const DORY_SETUP_MAX_NU: usize = 8; +// This should be a "nothing-up-my-sleeve" phrase or number. +const DORY_SEED: [u8; 32] = *b"7a1b3c8d2e4f9g6h5i0j7k2l8m3n9o1p"; + +/// # Panics +/// Will panic if the query does not parse or the proof fails to verify. +fn prove_and_verify_query( + sql: &str, + accessor: &OwnedTableTestAccessor, + prover_setup: &ProverSetup, + verifier_setup: &VerifierSetup, +) { + // Parse the query: + println!("Parsing the query: {sql}..."); + let now = Instant::now(); + let query_plan = QueryExpr::::try_new( + sql.parse().unwrap(), + "countries".parse().unwrap(), + accessor, + ) + .unwrap(); + println!("Done in {} ms.", now.elapsed().as_secs_f64() * 1000.); + + // Generate the proof and result: + print!("Generating proof..."); + let now = Instant::now(); + let (proof, provable_result) = QueryProof::::new( + query_plan.proof_expr(), + accessor, + &prover_setup, + ); + println!("Done in {} ms.", now.elapsed().as_secs_f64() * 1000.); + + // Verify the result with the proof: + print!("Verifying proof..."); + let now = Instant::now(); + let result = proof + .verify( + query_plan.proof_expr(), + accessor, + &provable_result, + &verifier_setup, + ) + .unwrap(); + let result = apply_postprocessing_steps(result.table, query_plan.postprocessing()); + println!("Verified in {} ms.", now.elapsed().as_secs_f64() * 1000.); + + // Display the result + println!("Query Result:"); + println!("{result:?}"); +} + +fn main() { + let mut rng = StdRng::from_seed(DORY_SEED); + let public_parameters = PublicParameters::rand(DORY_SETUP_MAX_NU, &mut rng); + let prover_setup = ProverSetup::from(&public_parameters); + let verifier_setup = VerifierSetup::from(&public_parameters); + + let filename = "./crates/proof-of-sql/examples/countries/countries_gdp.csv"; + let inferred_schema = + SchemaRef::new(infer_schema_from_files(&[filename.to_string()], b',', None, true).unwrap()); + let posql_compatible_schema = get_posql_compatible_schema(&inferred_schema); + + let countries_batch = ReaderBuilder::new(posql_compatible_schema) + .with_header(true) + .build(File::open(filename).unwrap()) + .unwrap() + .next() + .unwrap() + .unwrap(); + + // Load the table into an "Accessor" so that the prover and verifier can access the data/commitments. + let mut accessor = + OwnedTableTestAccessor::::new_empty_with_setup(&prover_setup); + accessor.add_table( + "countries.countries".parse().unwrap(), + OwnedTable::try_from(countries_batch).unwrap(), + 0, + ); + + prove_and_verify_query( + "SELECT COUNT(*) AS total_countries FROM countries", + &accessor, + &prover_setup, + &verifier_setup, + ); + + prove_and_verify_query( + "SELECT continent, MAX(gdp) as max_gdp, COUNT(*) as country_count FROM countries GROUP BY continent ORDER BY max_gdp DESC", + &accessor, + &prover_setup, + &verifier_setup, + ); +} From 0c284469f84388d2394bdaa03acbf543ebb4e88d Mon Sep 17 00:00:00 2001 From: Venu Vemula Date: Fri, 25 Oct 2024 17:51:45 -0400 Subject: [PATCH 06/24] feat: add example query with filter --- crates/proof-of-sql/examples/countries/main.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/crates/proof-of-sql/examples/countries/main.rs b/crates/proof-of-sql/examples/countries/main.rs index 5e2fc8e29..a17c16c1f 100644 --- a/crates/proof-of-sql/examples/countries/main.rs +++ b/crates/proof-of-sql/examples/countries/main.rs @@ -115,4 +115,11 @@ fn main() { &prover_setup, &verifier_setup, ); + + prove_and_verify_query( + "SELECT country FROM countries WHERE continent = 'Asia'", + &accessor, + &prover_setup, + &verifier_setup, + ); } From 471f64605944ef0644d62cbf1dbc2f844d5533c2 Mon Sep 17 00:00:00 2001 From: Venu Vemula Date: Fri, 25 Oct 2024 17:56:07 -0400 Subject: [PATCH 07/24] feat: add example query with complex filter --- crates/proof-of-sql/examples/countries/main.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/crates/proof-of-sql/examples/countries/main.rs b/crates/proof-of-sql/examples/countries/main.rs index a17c16c1f..5a8244704 100644 --- a/crates/proof-of-sql/examples/countries/main.rs +++ b/crates/proof-of-sql/examples/countries/main.rs @@ -122,4 +122,11 @@ fn main() { &prover_setup, &verifier_setup, ); + + prove_and_verify_query( + "SELECT country FROM countries WHERE gdp > 500 AND gdp < 1500", + &accessor, + &prover_setup, + &verifier_setup, + ); } From 5d106cb2e60ca2824ebddc3d18ff001a5159bff6 Mon Sep 17 00:00:00 2001 From: Venu Vemula Date: Fri, 25 Oct 2024 17:58:43 -0400 Subject: [PATCH 08/24] feat: add countries example to Cargo.toml --- crates/proof-of-sql/Cargo.toml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/crates/proof-of-sql/Cargo.toml b/crates/proof-of-sql/Cargo.toml index cfa73da37..c2be7d100 100644 --- a/crates/proof-of-sql/Cargo.toml +++ b/crates/proof-of-sql/Cargo.toml @@ -127,6 +127,10 @@ required-features = [ "arrow" ] name = "sushi" required-features = [ "arrow" ] +[[example]] +name = "countries" +required-features = [ "arrow" ] + [[bench]] name = "posql_benches" harness = false From 5dfd6ec780a831518b7ba4eaaee6a9e7857e0170 Mon Sep 17 00:00:00 2001 From: Venu Vemula Date: Fri, 25 Oct 2024 18:01:48 -0400 Subject: [PATCH 09/24] feat: add countries example to lint-and-test.yml --- .github/workflows/lint-and-test.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/lint-and-test.yml b/.github/workflows/lint-and-test.yml index 50d45e760..51191d986 100644 --- a/.github/workflows/lint-and-test.yml +++ b/.github/workflows/lint-and-test.yml @@ -130,6 +130,8 @@ jobs: run: cargo run --example plastics - name: Run sushi example run: cargo run --example sushi + - name: Run countries example + run: cargo run --example countries - name: Run posql_db example (With Blitzar) run: bash crates/proof-of-sql/examples/posql_db/run_example.sh - name: Run posql_db example (Without Blitzar) From 08e361992c89a088a40366e14936efb8d5a4ee45 Mon Sep 17 00:00:00 2001 From: Venu Vemula Date: Fri, 25 Oct 2024 20:08:33 -0400 Subject: [PATCH 10/24] fix: remove query which genertes wild card --- crates/proof-of-sql/examples/countries/main.rs | 7 ------- 1 file changed, 7 deletions(-) diff --git a/crates/proof-of-sql/examples/countries/main.rs b/crates/proof-of-sql/examples/countries/main.rs index 5a8244704..520d92775 100644 --- a/crates/proof-of-sql/examples/countries/main.rs +++ b/crates/proof-of-sql/examples/countries/main.rs @@ -109,13 +109,6 @@ fn main() { &verifier_setup, ); - prove_and_verify_query( - "SELECT continent, MAX(gdp) as max_gdp, COUNT(*) as country_count FROM countries GROUP BY continent ORDER BY max_gdp DESC", - &accessor, - &prover_setup, - &verifier_setup, - ); - prove_and_verify_query( "SELECT country FROM countries WHERE continent = 'Asia'", &accessor, From 7d7712f1f06308acba056332682d0100a7b03636 Mon Sep 17 00:00:00 2001 From: Venu Vemula Date: Fri, 25 Oct 2024 20:14:29 -0400 Subject: [PATCH 11/24] refactor: add query for sum --- crates/proof-of-sql/examples/countries/main.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/crates/proof-of-sql/examples/countries/main.rs b/crates/proof-of-sql/examples/countries/main.rs index 520d92775..10bfb8705 100644 --- a/crates/proof-of-sql/examples/countries/main.rs +++ b/crates/proof-of-sql/examples/countries/main.rs @@ -122,4 +122,11 @@ fn main() { &prover_setup, &verifier_setup, ); + + prove_and_verify_query( + "SELECT SUM(gdp) AS total_market_cap FROM countries WHERE country = 'China' OR country = 'India'", + &accessor, + &prover_setup, + &verifier_setup, + ); } From ddcd1a48b739655a68aa77cb05c3f86b8a3b070f Mon Sep 17 00:00:00 2001 From: Vamshi Maskuri <117595548+varshith257@users.noreply.github.com> Date: Sat, 26 Oct 2024 15:24:55 -0400 Subject: [PATCH 12/24] chore: remove `test_accessor_utility` --- crates/proof-of-sql/src/base/database/mod.rs | 5 - .../base/database/test_accessor_utility.rs | 218 ------------------ 2 files changed, 223 deletions(-) delete mode 100644 crates/proof-of-sql/src/base/database/test_accessor_utility.rs diff --git a/crates/proof-of-sql/src/base/database/mod.rs b/crates/proof-of-sql/src/base/database/mod.rs index e65b7efb5..a5079e732 100644 --- a/crates/proof-of-sql/src/base/database/mod.rs +++ b/crates/proof-of-sql/src/base/database/mod.rs @@ -34,11 +34,6 @@ pub use record_batch_utility::ToArrow; #[cfg(feature = "arrow")] pub mod arrow_schema_utility; -#[cfg(all(test, feature = "arrow", feature = "test"))] -mod test_accessor_utility; -#[cfg(all(test, feature = "arrow", feature = "test"))] -pub use test_accessor_utility::{make_random_test_accessor_data, RandomTestAccessorDescriptor}; - mod owned_column; pub(crate) use owned_column::compare_indexes_by_owned_columns_with_direction; pub use owned_column::OwnedColumn; diff --git a/crates/proof-of-sql/src/base/database/test_accessor_utility.rs b/crates/proof-of-sql/src/base/database/test_accessor_utility.rs deleted file mode 100644 index 2b06081dd..000000000 --- a/crates/proof-of-sql/src/base/database/test_accessor_utility.rs +++ /dev/null @@ -1,218 +0,0 @@ -use crate::base::database::ColumnType; -use arrow::{ - array::{ - Array, BooleanArray, Decimal128Array, Decimal256Array, Int16Array, Int32Array, Int64Array, - Int8Array, StringArray, TimestampMicrosecondArray, TimestampMillisecondArray, - TimestampNanosecondArray, TimestampSecondArray, - }, - datatypes::{i256, DataType, Field, Schema, TimeUnit}, - record_batch::RecordBatch, -}; -use proof_of_sql_parser::posql_time::PoSQLTimeUnit; -use rand::{ - distributions::{Distribution, Uniform}, - rngs::StdRng, -}; -use std::sync::Arc; - -/// Specify what form a randomly generated `TestAccessor` can take -pub struct RandomTestAccessorDescriptor { - /// The minimum number of rows in the generated `RecordBatch` - pub min_rows: usize, - /// The maximum number of rows in the generated `RecordBatch` - pub max_rows: usize, - /// The minimum value of the generated data - pub min_value: i64, - /// The maximum value of the generated data - pub max_value: i64, -} - -impl Default for RandomTestAccessorDescriptor { - fn default() -> Self { - Self { - min_rows: 0, - max_rows: 100, - min_value: -5, - max_value: 5, - } - } -} - -/// Generate a `DataFrame` with random data -/// -/// # Panics -/// -/// This function may panic in the following cases: -/// - If `Precision::new(7)` fails when creating a `Decimal75` column type, which would occur -/// if the precision is invalid. -/// - When calling `.unwrap()` on the result of `RecordBatch::try_new(schema, columns)`, which -/// will panic if the schema and columns do not align correctly or if there are any other -/// underlying errors. -#[allow(dead_code, clippy::too_many_lines)] -pub fn make_random_test_accessor_data( - rng: &mut StdRng, - cols: &[(&str, ColumnType)], - descriptor: &RandomTestAccessorDescriptor, -) -> RecordBatch { - let n = Uniform::new(descriptor.min_rows, descriptor.max_rows + 1).sample(rng); - let dist = Uniform::new(descriptor.min_value, descriptor.max_value + 1); - - let mut columns: Vec> = Vec::with_capacity(n); - let mut column_fields: Vec<_> = Vec::with_capacity(n); - - for (col_name, col_type) in cols { - let values: Vec = dist.sample_iter(&mut *rng).take(n).collect(); - - match col_type { - ColumnType::Boolean => { - column_fields.push(Field::new(*col_name, DataType::Boolean, false)); - let boolean_values: Vec = values.iter().map(|x| x % 2 != 0).collect(); - columns.push(Arc::new(BooleanArray::from(boolean_values))); - } - ColumnType::TinyInt => { - column_fields.push(Field::new(*col_name, DataType::Int8, false)); - let values: Vec = values - .iter() - .map(|x| ((*x >> 56) as i8)) // Shift right to align the lower 8 bits - .collect(); - columns.push(Arc::new(Int8Array::from(values))); - } - ColumnType::SmallInt => { - column_fields.push(Field::new(*col_name, DataType::Int16, false)); - let values: Vec = values - .iter() - .map(|x| ((*x >> 48) as i16)) // Shift right to align the lower 16 bits - .collect(); - columns.push(Arc::new(Int16Array::from(values))); - } - ColumnType::Int => { - column_fields.push(Field::new(*col_name, DataType::Int32, false)); - let values: Vec = values - .iter() - .map(|x| ((*x >> 32) as i32)) // Shift right to align the lower 32 bits - .collect(); - columns.push(Arc::new(Int32Array::from(values))); - } - ColumnType::BigInt => { - column_fields.push(Field::new(*col_name, DataType::Int64, false)); - let values: Vec = values.clone(); - columns.push(Arc::new(Int64Array::from(values))); - } - ColumnType::Int128 => { - column_fields.push(Field::new(*col_name, DataType::Decimal128(38, 0), false)); - - let values: Vec = values.iter().map(|x| i128::from(*x)).collect(); - columns.push(Arc::new( - Decimal128Array::from(values.clone()) - .with_precision_and_scale(38, 0) - .unwrap(), - )); - } - ColumnType::Decimal75(precision, scale) => { - column_fields.push(Field::new( - *col_name, - DataType::Decimal256(precision.value(), *scale), - false, - )); - - let values: Vec = values.iter().map(|x| i256::from(*x)).collect(); - columns.push(Arc::new( - Decimal256Array::from(values.clone()) - .with_precision_and_scale(precision.value(), *scale) - .unwrap(), - )); - } - ColumnType::VarChar => { - let col = &values - .iter() - .map(|v| "s".to_owned() + &v.to_string()[..]) - .collect::>()[..]; - let col: Vec<_> = col.iter().map(String::as_str).collect(); - - column_fields.push(Field::new(*col_name, DataType::Utf8, false)); - - columns.push(Arc::new(StringArray::from(col))); - } - ColumnType::Scalar => unimplemented!("Scalar columns are not supported by arrow"), - ColumnType::TimestampTZ(tu, tz) => { - column_fields.push(Field::new( - *col_name, - DataType::Timestamp( - match tu { - PoSQLTimeUnit::Second => TimeUnit::Second, - PoSQLTimeUnit::Millisecond => TimeUnit::Millisecond, - PoSQLTimeUnit::Microsecond => TimeUnit::Microsecond, - PoSQLTimeUnit::Nanosecond => TimeUnit::Nanosecond, - }, - Some(Arc::from(tz.to_string())), - ), - false, - )); - // Create the correct timestamp array based on the time unit - let timestamp_array: Arc = match tu { - PoSQLTimeUnit::Second => Arc::new(TimestampSecondArray::from(values.clone())), - PoSQLTimeUnit::Millisecond => { - Arc::new(TimestampMillisecondArray::from(values.clone())) - } - PoSQLTimeUnit::Microsecond => { - Arc::new(TimestampMicrosecondArray::from(values.clone())) - } - PoSQLTimeUnit::Nanosecond => { - Arc::new(TimestampNanosecondArray::from(values.clone())) - } - }; - columns.push(timestamp_array); - } - } - } - - let schema = Arc::new(Schema::new(column_fields)); - RecordBatch::try_new(schema, columns).unwrap() -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::record_batch; - use rand_core::SeedableRng; - - #[test] - fn we_can_construct_a_random_test_data() { - let descriptor = RandomTestAccessorDescriptor::default(); - let mut rng = StdRng::from_seed([0u8; 32]); - let cols = [ - ("a", ColumnType::BigInt), - ("b", ColumnType::VarChar), - ("c", ColumnType::Int128), - ("d", ColumnType::SmallInt), - ("e", ColumnType::Int), - ("f", ColumnType::TinyInt), - ]; - - let data1 = make_random_test_accessor_data(&mut rng, &cols, &descriptor); - let data2 = make_random_test_accessor_data(&mut rng, &cols, &descriptor); - assert_ne!(data1.num_rows(), data2.num_rows()); - } - - #[test] - fn we_can_construct_a_random_test_data_with_the_correct_data() { - let descriptor = RandomTestAccessorDescriptor { - min_rows: 1, - max_rows: 1, - min_value: -2, - max_value: -2, - }; - let mut rng = StdRng::from_seed([0u8; 32]); - let cols = [ - ("b", ColumnType::BigInt), - ("a", ColumnType::VarChar), - ("c", ColumnType::Int128), - ]; - let data = make_random_test_accessor_data(&mut rng, &cols, &descriptor); - - assert_eq!( - data, - record_batch!("b" => [-2_i64], "a" => ["s-2"], "c" => [-2_i128]) - ); - } -} From 76207882296fb0a4c23e256eb48464edb5495395 Mon Sep 17 00:00:00 2001 From: Vamshi Maskuri <117595548+varshith257@users.noreply.github.com> Date: Sat, 26 Oct 2024 15:24:56 -0400 Subject: [PATCH 13/24] chore: add `arrow` module --- Cargo.toml | 4 ++-- crates/proof-of-sql/src/base/arrow/mod.rs | 1 + crates/proof-of-sql/src/base/mod.rs | 3 +++ 3 files changed, 6 insertions(+), 2 deletions(-) create mode 100644 crates/proof-of-sql/src/base/arrow/mod.rs diff --git a/Cargo.toml b/Cargo.toml index a0d8f7216..035636f51 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,8 +20,8 @@ ark-poly = { version = "0.4.0" } ark-serialize = { version = "0.4.0" } ark-std = { version = "0.4.0", default-features = false } arrayvec = { version = "0.7", default-features = false } -arrow = { version = "51.0" } -arrow-csv = { version = "51.0" } +arrow = { version = "51.0.0" } +arrow-csv = { version = "51.0.0" } bit-iter = { version = "1.1.1" } bigdecimal = { version = "0.4.5", default-features = false, features = ["serde"] } blake3 = { version = "1.3.3", default-features = false } diff --git a/crates/proof-of-sql/src/base/arrow/mod.rs b/crates/proof-of-sql/src/base/arrow/mod.rs new file mode 100644 index 000000000..61ca01f43 --- /dev/null +++ b/crates/proof-of-sql/src/base/arrow/mod.rs @@ -0,0 +1 @@ +//! This module provides conversions and utilities for working with Arrow data structures. diff --git a/crates/proof-of-sql/src/base/mod.rs b/crates/proof-of-sql/src/base/mod.rs index ad5573639..657b855d1 100644 --- a/crates/proof-of-sql/src/base/mod.rs +++ b/crates/proof-of-sql/src/base/mod.rs @@ -1,5 +1,8 @@ //! This module contains basic shared functionalities of the library. /// TODO: add docs +#[cfg(feature = "arrow")] +pub mod arrow; + pub(crate) mod bit; pub mod commitment; pub mod database; From ea328c1cfcf73dba283650bf542ce3049c244d92 Mon Sep 17 00:00:00 2001 From: Vamshi Maskuri <117595548+varshith257@users.noreply.github.com> Date: Sat, 26 Oct 2024 15:24:56 -0400 Subject: [PATCH 14/24] chore: move `owned_and_arrow_conversions` module into `arrow` module --- crates/proof-of-sql/src/base/arrow/mod.rs | 7 +++++++ .../{database => arrow}/owned_and_arrow_conversions.rs | 5 ++--- .../owned_and_arrow_conversions_test.rs | 4 ++-- crates/proof-of-sql/src/base/database/mod.rs | 9 ++------- 4 files changed, 13 insertions(+), 12 deletions(-) rename crates/proof-of-sql/src/base/{database => arrow}/owned_and_arrow_conversions.rs (98%) rename crates/proof-of-sql/src/base/{database => arrow}/owned_and_arrow_conversions_test.rs (97%) diff --git a/crates/proof-of-sql/src/base/arrow/mod.rs b/crates/proof-of-sql/src/base/arrow/mod.rs index 61ca01f43..c00d5b063 100644 --- a/crates/proof-of-sql/src/base/arrow/mod.rs +++ b/crates/proof-of-sql/src/base/arrow/mod.rs @@ -1 +1,8 @@ //! This module provides conversions and utilities for working with Arrow data structures. + +/// Module for converting between owned and Arrow data structures. +pub mod owned_and_arrow_conversions; + +#[cfg(test)] +/// Tests for owned and Arrow conversions. +mod owned_and_arrow_conversions_test; diff --git a/crates/proof-of-sql/src/base/database/owned_and_arrow_conversions.rs b/crates/proof-of-sql/src/base/arrow/owned_and_arrow_conversions.rs similarity index 98% rename from crates/proof-of-sql/src/base/database/owned_and_arrow_conversions.rs rename to crates/proof-of-sql/src/base/arrow/owned_and_arrow_conversions.rs index adf4f94af..cf16f0376 100644 --- a/crates/proof-of-sql/src/base/database/owned_and_arrow_conversions.rs +++ b/crates/proof-of-sql/src/base/arrow/owned_and_arrow_conversions.rs @@ -12,11 +12,10 @@ //! This is because there is no `Int128` type in Arrow. //! This does not check that the values are less than 39 digits. //! However, the actual arrow backing `i128` is the correct value. -use super::scalar_and_i256_conversions::convert_scalar_to_i256; use crate::base::{ database::{ - scalar_and_i256_conversions::convert_i256_to_scalar, OwnedColumn, OwnedTable, - OwnedTableError, + scalar_and_i256_conversions::{convert_i256_to_scalar, convert_scalar_to_i256}, + OwnedColumn, OwnedTable, OwnedTableError, }, map::IndexMap, math::decimal::Precision, diff --git a/crates/proof-of-sql/src/base/database/owned_and_arrow_conversions_test.rs b/crates/proof-of-sql/src/base/arrow/owned_and_arrow_conversions_test.rs similarity index 97% rename from crates/proof-of-sql/src/base/database/owned_and_arrow_conversions_test.rs rename to crates/proof-of-sql/src/base/arrow/owned_and_arrow_conversions_test.rs index 970df4bad..539d94eaa 100644 --- a/crates/proof-of-sql/src/base/database/owned_and_arrow_conversions_test.rs +++ b/crates/proof-of-sql/src/base/arrow/owned_and_arrow_conversions_test.rs @@ -1,7 +1,7 @@ -use super::{OwnedColumn, OwnedTable}; +use super::owned_and_arrow_conversions::OwnedArrowConversionError; use crate::{ base::{ - database::{owned_table_utility::*, OwnedArrowConversionError}, + database::{owned_table_utility::*, OwnedColumn, OwnedTable}, map::IndexMap, scalar::Curve25519Scalar, }, diff --git a/crates/proof-of-sql/src/base/database/mod.rs b/crates/proof-of-sql/src/base/database/mod.rs index a5079e732..a630529ee 100644 --- a/crates/proof-of-sql/src/base/database/mod.rs +++ b/crates/proof-of-sql/src/base/database/mod.rs @@ -19,6 +19,8 @@ mod literal_value; pub use literal_value::LiteralValue; mod table_ref; +#[cfg(feature = "arrow")] +pub use crate::base::arrow::owned_and_arrow_conversions::OwnedArrowConversionError; pub use table_ref::TableRef; #[cfg(feature = "arrow")] @@ -58,13 +60,6 @@ mod expression_evaluation_error; mod expression_evaluation_test; pub use expression_evaluation_error::{ExpressionEvaluationError, ExpressionEvaluationResult}; -#[cfg(feature = "arrow")] -mod owned_and_arrow_conversions; -#[cfg(feature = "arrow")] -pub use owned_and_arrow_conversions::OwnedArrowConversionError; -#[cfg(all(test, feature = "arrow"))] -mod owned_and_arrow_conversions_test; - mod test_accessor; pub use test_accessor::TestAccessor; #[cfg(test)] From 6346e81663903a22194663ba719aa842c80f21a4 Mon Sep 17 00:00:00 2001 From: Vamshi Maskuri <117595548+varshith257@users.noreply.github.com> Date: Sat, 26 Oct 2024 15:24:57 -0400 Subject: [PATCH 15/24] chore: move `scalar_and_i256_conversions` module into `arrow` module --- crates/proof-of-sql/src/base/arrow/mod.rs | 3 +++ .../src/base/arrow/owned_and_arrow_conversions.rs | 6 ++---- .../{database => arrow}/scalar_and_i256_conversions.rs | 8 +++----- .../src/base/database/arrow_array_to_column_conversion.rs | 6 ++++-- crates/proof-of-sql/src/base/database/mod.rs | 7 +++---- 5 files changed, 15 insertions(+), 15 deletions(-) rename crates/proof-of-sql/src/base/{database => arrow}/scalar_and_i256_conversions.rs (96%) diff --git a/crates/proof-of-sql/src/base/arrow/mod.rs b/crates/proof-of-sql/src/base/arrow/mod.rs index c00d5b063..8ac51c34e 100644 --- a/crates/proof-of-sql/src/base/arrow/mod.rs +++ b/crates/proof-of-sql/src/base/arrow/mod.rs @@ -6,3 +6,6 @@ pub mod owned_and_arrow_conversions; #[cfg(test)] /// Tests for owned and Arrow conversions. mod owned_and_arrow_conversions_test; + +/// Module for scalar and i256 conversions. +pub mod scalar_and_i256_conversions; diff --git a/crates/proof-of-sql/src/base/arrow/owned_and_arrow_conversions.rs b/crates/proof-of-sql/src/base/arrow/owned_and_arrow_conversions.rs index cf16f0376..74ad96839 100644 --- a/crates/proof-of-sql/src/base/arrow/owned_and_arrow_conversions.rs +++ b/crates/proof-of-sql/src/base/arrow/owned_and_arrow_conversions.rs @@ -12,11 +12,9 @@ //! This is because there is no `Int128` type in Arrow. //! This does not check that the values are less than 39 digits. //! However, the actual arrow backing `i128` is the correct value. +use super::scalar_and_i256_conversions::{convert_i256_to_scalar, convert_scalar_to_i256}; use crate::base::{ - database::{ - scalar_and_i256_conversions::{convert_i256_to_scalar, convert_scalar_to_i256}, - OwnedColumn, OwnedTable, OwnedTableError, - }, + database::{OwnedColumn, OwnedTable, OwnedTableError}, map::IndexMap, math::decimal::Precision, scalar::Scalar, diff --git a/crates/proof-of-sql/src/base/database/scalar_and_i256_conversions.rs b/crates/proof-of-sql/src/base/arrow/scalar_and_i256_conversions.rs similarity index 96% rename from crates/proof-of-sql/src/base/database/scalar_and_i256_conversions.rs rename to crates/proof-of-sql/src/base/arrow/scalar_and_i256_conversions.rs index 9a44c3766..f606c03cb 100644 --- a/crates/proof-of-sql/src/base/database/scalar_and_i256_conversions.rs +++ b/crates/proof-of-sql/src/base/arrow/scalar_and_i256_conversions.rs @@ -54,12 +54,10 @@ pub fn convert_i256_to_scalar(value: &i256) -> Option { #[cfg(test)] mod tests { - - use super::{convert_i256_to_scalar, convert_scalar_to_i256}; - use crate::base::{ - database::scalar_and_i256_conversions::{MAX_SUPPORTED_I256, MIN_SUPPORTED_I256}, - scalar::{Curve25519Scalar, Scalar}, + use super::{ + convert_i256_to_scalar, convert_scalar_to_i256, MAX_SUPPORTED_I256, MIN_SUPPORTED_I256, }; + use crate::base::scalar::{Curve25519Scalar, Scalar}; use arrow::datatypes::i256; use num_traits::Zero; use rand::RngCore; diff --git a/crates/proof-of-sql/src/base/database/arrow_array_to_column_conversion.rs b/crates/proof-of-sql/src/base/database/arrow_array_to_column_conversion.rs index 15770f312..d6b7d7bbc 100644 --- a/crates/proof-of-sql/src/base/database/arrow_array_to_column_conversion.rs +++ b/crates/proof-of-sql/src/base/database/arrow_array_to_column_conversion.rs @@ -1,5 +1,7 @@ -use super::scalar_and_i256_conversions::convert_i256_to_scalar; -use crate::base::{database::Column, math::decimal::Precision, scalar::Scalar}; +use crate::base::{ + arrow::scalar_and_i256_conversions::convert_i256_to_scalar, database::Column, + math::decimal::Precision, scalar::Scalar, +}; use arrow::{ array::{ Array, ArrayRef, BooleanArray, Decimal128Array, Decimal256Array, Int16Array, Int32Array, diff --git a/crates/proof-of-sql/src/base/database/mod.rs b/crates/proof-of-sql/src/base/database/mod.rs index a630529ee..c8c4697c5 100644 --- a/crates/proof-of-sql/src/base/database/mod.rs +++ b/crates/proof-of-sql/src/base/database/mod.rs @@ -20,7 +20,9 @@ pub use literal_value::LiteralValue; mod table_ref; #[cfg(feature = "arrow")] -pub use crate::base::arrow::owned_and_arrow_conversions::OwnedArrowConversionError; +pub use crate::base::arrow::{ + owned_and_arrow_conversions::OwnedArrowConversionError, scalar_and_i256_conversions, +}; pub use table_ref::TableRef; #[cfg(feature = "arrow")] @@ -74,9 +76,6 @@ mod owned_table_test_accessor; pub use owned_table_test_accessor::OwnedTableTestAccessor; #[cfg(all(test, feature = "blitzar"))] mod owned_table_test_accessor_test; -/// Contains traits for scalar <-> i256 conversions -#[cfg(feature = "arrow")] -pub mod scalar_and_i256_conversions; /// TODO: add docs pub(crate) mod filter_util; From e585f53d175d5a36f893731eb995a9c99c171e7c Mon Sep 17 00:00:00 2001 From: Vamshi Maskuri <117595548+varshith257@users.noreply.github.com> Date: Sat, 26 Oct 2024 15:24:57 -0400 Subject: [PATCH 16/24] chore: move `arrow_array_to_column_conversion` module into `arrow` module --- crates/proof-of-sql/examples/posql_db/main.rs | 3 ++- .../examples/posql_db/record_batch_accessor.rs | 4 ++-- .../arrow_array_to_column_conversion.rs | 6 ++---- crates/proof-of-sql/src/base/arrow/mod.rs | 3 +++ .../proof-of-sql/src/base/commitment/table_commitment.rs | 4 +++- crates/proof-of-sql/src/base/database/mod.rs | 9 +++------ 6 files changed, 15 insertions(+), 14 deletions(-) rename crates/proof-of-sql/src/base/{database => arrow}/arrow_array_to_column_conversion.rs (99%) diff --git a/crates/proof-of-sql/examples/posql_db/main.rs b/crates/proof-of-sql/examples/posql_db/main.rs index a796ed25e..f2facf2c8 100644 --- a/crates/proof-of-sql/examples/posql_db/main.rs +++ b/crates/proof-of-sql/examples/posql_db/main.rs @@ -5,6 +5,7 @@ mod commit_accessor; mod csv_accessor; /// TODO: add docs mod record_batch_accessor; + use arrow::{ datatypes::{DataType, Field, Schema}, record_batch::RecordBatch, @@ -273,7 +274,7 @@ fn main() { end_timer(timer); println!( "Verified Result: {:?}", - RecordBatch::try_from(query_result).unwrap() + RecordBatch::try_from(query_result.table).unwrap() ); } } diff --git a/crates/proof-of-sql/examples/posql_db/record_batch_accessor.rs b/crates/proof-of-sql/examples/posql_db/record_batch_accessor.rs index 8af046972..08e25f4fe 100644 --- a/crates/proof-of-sql/examples/posql_db/record_batch_accessor.rs +++ b/crates/proof-of-sql/examples/posql_db/record_batch_accessor.rs @@ -2,9 +2,9 @@ use arrow::record_batch::RecordBatch; use bumpalo::Bump; use indexmap::IndexMap; use proof_of_sql::base::{ + arrow::arrow_array_to_column_conversion::ArrayRefExt, database::{ - ArrayRefExt, Column, ColumnRef, ColumnType, DataAccessor, MetadataAccessor, SchemaAccessor, - TableRef, + Column, ColumnRef, ColumnType, DataAccessor, MetadataAccessor, SchemaAccessor, TableRef, }, scalar::Scalar, }; diff --git a/crates/proof-of-sql/src/base/database/arrow_array_to_column_conversion.rs b/crates/proof-of-sql/src/base/arrow/arrow_array_to_column_conversion.rs similarity index 99% rename from crates/proof-of-sql/src/base/database/arrow_array_to_column_conversion.rs rename to crates/proof-of-sql/src/base/arrow/arrow_array_to_column_conversion.rs index d6b7d7bbc..15770f312 100644 --- a/crates/proof-of-sql/src/base/database/arrow_array_to_column_conversion.rs +++ b/crates/proof-of-sql/src/base/arrow/arrow_array_to_column_conversion.rs @@ -1,7 +1,5 @@ -use crate::base::{ - arrow::scalar_and_i256_conversions::convert_i256_to_scalar, database::Column, - math::decimal::Precision, scalar::Scalar, -}; +use super::scalar_and_i256_conversions::convert_i256_to_scalar; +use crate::base::{database::Column, math::decimal::Precision, scalar::Scalar}; use arrow::{ array::{ Array, ArrayRef, BooleanArray, Decimal128Array, Decimal256Array, Int16Array, Int32Array, diff --git a/crates/proof-of-sql/src/base/arrow/mod.rs b/crates/proof-of-sql/src/base/arrow/mod.rs index 8ac51c34e..16fdf1b95 100644 --- a/crates/proof-of-sql/src/base/arrow/mod.rs +++ b/crates/proof-of-sql/src/base/arrow/mod.rs @@ -1,5 +1,8 @@ //! This module provides conversions and utilities for working with Arrow data structures. +/// Module for handling conversion from Arrow arrays to columns. +pub mod arrow_array_to_column_conversion; + /// Module for converting between owned and Arrow data structures. pub mod owned_and_arrow_conversions; diff --git a/crates/proof-of-sql/src/base/commitment/table_commitment.rs b/crates/proof-of-sql/src/base/commitment/table_commitment.rs index 0f9e21783..f013cce90 100644 --- a/crates/proof-of-sql/src/base/commitment/table_commitment.rs +++ b/crates/proof-of-sql/src/base/commitment/table_commitment.rs @@ -3,7 +3,9 @@ use super::{ ColumnCommitmentsMismatch, Commitment, DuplicateIdentifiers, }; #[cfg(feature = "arrow")] -use crate::base::database::{ArrayRefExt, ArrowArrayToColumnConversionError}; +use crate::base::arrow::arrow_array_to_column_conversion::{ + ArrayRefExt, ArrowArrayToColumnConversionError, +}; use crate::base::{ database::{Column, ColumnField, CommitmentAccessor, OwnedTable, TableRef}, scalar::Scalar, diff --git a/crates/proof-of-sql/src/base/database/mod.rs b/crates/proof-of-sql/src/base/database/mod.rs index c8c4697c5..55546802c 100644 --- a/crates/proof-of-sql/src/base/database/mod.rs +++ b/crates/proof-of-sql/src/base/database/mod.rs @@ -21,15 +21,12 @@ pub use literal_value::LiteralValue; mod table_ref; #[cfg(feature = "arrow")] pub use crate::base::arrow::{ - owned_and_arrow_conversions::OwnedArrowConversionError, scalar_and_i256_conversions, + arrow_array_to_column_conversion::{ArrayRefExt, ArrowArrayToColumnConversionError}, + owned_and_arrow_conversions::OwnedArrowConversionError, + scalar_and_i256_conversions, }; pub use table_ref::TableRef; -#[cfg(feature = "arrow")] -mod arrow_array_to_column_conversion; -#[cfg(feature = "arrow")] -pub use arrow_array_to_column_conversion::{ArrayRefExt, ArrowArrayToColumnConversionError}; - #[cfg(feature = "arrow")] mod record_batch_utility; #[cfg(feature = "arrow")] From 75b5dfc7e476c89f706c315a089ae209e246cb22 Mon Sep 17 00:00:00 2001 From: Vamshi Maskuri <117595548+varshith257@users.noreply.github.com> Date: Sat, 26 Oct 2024 15:24:58 -0400 Subject: [PATCH 17/24] chore: move `record_batch_utility` module to `arrow` module --- crates/proof-of-sql/src/base/arrow/mod.rs | 3 +++ .../src/base/{database => arrow}/record_batch_utility.rs | 2 +- crates/proof-of-sql/src/base/database/mod.rs | 6 +----- 3 files changed, 5 insertions(+), 6 deletions(-) rename crates/proof-of-sql/src/base/{database => arrow}/record_batch_utility.rs (99%) diff --git a/crates/proof-of-sql/src/base/arrow/mod.rs b/crates/proof-of-sql/src/base/arrow/mod.rs index 16fdf1b95..3a3c4500d 100644 --- a/crates/proof-of-sql/src/base/arrow/mod.rs +++ b/crates/proof-of-sql/src/base/arrow/mod.rs @@ -10,5 +10,8 @@ pub mod owned_and_arrow_conversions; /// Tests for owned and Arrow conversions. mod owned_and_arrow_conversions_test; +/// Utility functions for record batches. +pub mod record_batch_utility; + /// Module for scalar and i256 conversions. pub mod scalar_and_i256_conversions; diff --git a/crates/proof-of-sql/src/base/database/record_batch_utility.rs b/crates/proof-of-sql/src/base/arrow/record_batch_utility.rs similarity index 99% rename from crates/proof-of-sql/src/base/database/record_batch_utility.rs rename to crates/proof-of-sql/src/base/arrow/record_batch_utility.rs index d1180005b..3ede592bd 100644 --- a/crates/proof-of-sql/src/base/database/record_batch_utility.rs +++ b/crates/proof-of-sql/src/base/arrow/record_batch_utility.rs @@ -169,7 +169,7 @@ macro_rules! record_batch { use arrow::datatypes::Field; use arrow::datatypes::Schema; use arrow::record_batch::RecordBatch; - use $crate::base::database::ToArrow; + use $crate::base::arrow::record_batch_utility::ToArrow; let schema = Arc::new(Schema::new( vec![$( diff --git a/crates/proof-of-sql/src/base/database/mod.rs b/crates/proof-of-sql/src/base/database/mod.rs index 55546802c..b40ba10eb 100644 --- a/crates/proof-of-sql/src/base/database/mod.rs +++ b/crates/proof-of-sql/src/base/database/mod.rs @@ -23,15 +23,11 @@ mod table_ref; pub use crate::base::arrow::{ arrow_array_to_column_conversion::{ArrayRefExt, ArrowArrayToColumnConversionError}, owned_and_arrow_conversions::OwnedArrowConversionError, + record_batch_utility::ToArrow, scalar_and_i256_conversions, }; pub use table_ref::TableRef; -#[cfg(feature = "arrow")] -mod record_batch_utility; -#[cfg(feature = "arrow")] -pub use record_batch_utility::ToArrow; - #[cfg(feature = "arrow")] pub mod arrow_schema_utility; From 3b563d9ab6a2e50fab1890ee15ff028e9f4d4f21 Mon Sep 17 00:00:00 2001 From: Vamshi Maskuri <117595548+varshith257@users.noreply.github.com> Date: Sat, 26 Oct 2024 15:24:58 -0400 Subject: [PATCH 18/24] chore!: remove direct conversions from `QueryData` to `RecordBatch` --- crates/proof-of-sql/src/sql/proof/mod.rs | 5 +++-- .../src/sql/proof/query_result.rs | 19 ------------------- 2 files changed, 3 insertions(+), 21 deletions(-) diff --git a/crates/proof-of-sql/src/sql/proof/mod.rs b/crates/proof-of-sql/src/sql/proof/mod.rs index 48139dc22..b33be315c 100644 --- a/crates/proof-of-sql/src/sql/proof/mod.rs +++ b/crates/proof-of-sql/src/sql/proof/mod.rs @@ -25,8 +25,6 @@ pub(crate) use provable_result_column::ProvableResultColumn; mod provable_query_result; pub use provable_query_result::ProvableQueryResult; -#[cfg(all(test, feature = "arrow"))] -mod provable_query_result_test; mod sumcheck_mle_evaluations; pub(crate) use sumcheck_mle_evaluations::SumcheckMleEvaluations; @@ -70,3 +68,6 @@ pub(crate) use result_element_serialization::{ mod first_round_builder; pub(crate) use first_round_builder::FirstRoundBuilder; + +#[cfg(all(test, feature = "arrow"))] +mod provable_query_result_test; diff --git a/crates/proof-of-sql/src/sql/proof/query_result.rs b/crates/proof-of-sql/src/sql/proof/query_result.rs index 31b9ad994..647e4ad0b 100644 --- a/crates/proof-of-sql/src/sql/proof/query_result.rs +++ b/crates/proof-of-sql/src/sql/proof/query_result.rs @@ -3,8 +3,6 @@ use crate::base::{ proof::ProofError, scalar::Scalar, }; -#[cfg(feature = "arrow")] -use arrow::{error::ArrowError, record_batch::RecordBatch}; use snafu::Snafu; /// Verifiable query errors @@ -54,22 +52,5 @@ pub struct QueryData { pub verification_hash: [u8; 32], } -impl QueryData { - #[cfg(all(test, feature = "arrow"))] - #[must_use] - pub fn into_record_batch(self) -> RecordBatch { - self.try_into().unwrap() - } -} - -#[cfg(feature = "arrow")] -impl TryFrom> for RecordBatch { - type Error = ArrowError; - - fn try_from(value: QueryData) -> Result { - Self::try_from(value.table) - } -} - /// The result of a query -- either an error or a table. pub type QueryResult = Result, QueryError>; From 106e68a3e7bf9dde9d64ac1da8e59c1ea4acccb5 Mon Sep 17 00:00:00 2001 From: Vamshi Maskuri <117595548+varshith257@users.noreply.github.com> Date: Sat, 26 Oct 2024 15:24:59 -0400 Subject: [PATCH 19/24] chore: create `column_arrow_conversions` module within `arrow` module --- .../base/arrow/column_arrow_conversions.rs | 79 ++++++++++++++++++ crates/proof-of-sql/src/base/arrow/mod.rs | 3 + .../proof-of-sql/src/base/database/column.rs | 80 +------------------ 3 files changed, 83 insertions(+), 79 deletions(-) create mode 100644 crates/proof-of-sql/src/base/arrow/column_arrow_conversions.rs diff --git a/crates/proof-of-sql/src/base/arrow/column_arrow_conversions.rs b/crates/proof-of-sql/src/base/arrow/column_arrow_conversions.rs new file mode 100644 index 000000000..5eade6cf3 --- /dev/null +++ b/crates/proof-of-sql/src/base/arrow/column_arrow_conversions.rs @@ -0,0 +1,79 @@ +use crate::base::{ + database::{ColumnField, ColumnType}, + math::decimal::Precision, +}; +use alloc::sync::Arc; +use arrow::datatypes::{DataType, Field, TimeUnit as ArrowTimeUnit}; +use proof_of_sql_parser::posql_time::{PoSQLTimeUnit, PoSQLTimeZone}; + +/// Convert [`ColumnType`] values to some arrow [`DataType`] +impl From<&ColumnType> for DataType { + fn from(column_type: &ColumnType) -> Self { + match column_type { + ColumnType::Boolean => DataType::Boolean, + ColumnType::TinyInt => DataType::Int8, + ColumnType::SmallInt => DataType::Int16, + ColumnType::Int => DataType::Int32, + ColumnType::BigInt => DataType::Int64, + ColumnType::Int128 => DataType::Decimal128(38, 0), + ColumnType::Decimal75(precision, scale) => { + DataType::Decimal256(precision.value(), *scale) + } + ColumnType::VarChar => DataType::Utf8, + ColumnType::Scalar => unimplemented!("Cannot convert Scalar type to arrow type"), + ColumnType::TimestampTZ(timeunit, timezone) => { + let arrow_timezone = Some(Arc::from(timezone.to_string())); + let arrow_timeunit = match timeunit { + PoSQLTimeUnit::Second => ArrowTimeUnit::Second, + PoSQLTimeUnit::Millisecond => ArrowTimeUnit::Millisecond, + PoSQLTimeUnit::Microsecond => ArrowTimeUnit::Microsecond, + PoSQLTimeUnit::Nanosecond => ArrowTimeUnit::Nanosecond, + }; + DataType::Timestamp(arrow_timeunit, arrow_timezone) + } + } + } +} + +/// Convert arrow [`DataType`] values to some [`ColumnType`] +impl TryFrom for ColumnType { + type Error = String; + + fn try_from(data_type: DataType) -> Result { + match data_type { + DataType::Boolean => Ok(ColumnType::Boolean), + DataType::Int8 => Ok(ColumnType::TinyInt), + DataType::Int16 => Ok(ColumnType::SmallInt), + DataType::Int32 => Ok(ColumnType::Int), + DataType::Int64 => Ok(ColumnType::BigInt), + DataType::Decimal128(38, 0) => Ok(ColumnType::Int128), + DataType::Decimal256(precision, scale) if precision <= 75 => { + Ok(ColumnType::Decimal75(Precision::new(precision)?, scale)) + } + DataType::Timestamp(time_unit, timezone_option) => { + let posql_time_unit = match time_unit { + ArrowTimeUnit::Second => PoSQLTimeUnit::Second, + ArrowTimeUnit::Millisecond => PoSQLTimeUnit::Millisecond, + ArrowTimeUnit::Microsecond => PoSQLTimeUnit::Microsecond, + ArrowTimeUnit::Nanosecond => PoSQLTimeUnit::Nanosecond, + }; + Ok(ColumnType::TimestampTZ( + posql_time_unit, + PoSQLTimeZone::try_from(&timezone_option)?, + )) + } + DataType::Utf8 => Ok(ColumnType::VarChar), + _ => Err(format!("Unsupported arrow data type {data_type:?}")), + } + } +} +/// Convert [`ColumnField`] values to arrow Field +impl From<&ColumnField> for Field { + fn from(column_field: &ColumnField) -> Self { + Field::new( + column_field.name().name(), + (&column_field.data_type()).into(), + false, + ) + } +} diff --git a/crates/proof-of-sql/src/base/arrow/mod.rs b/crates/proof-of-sql/src/base/arrow/mod.rs index 3a3c4500d..301defb0f 100644 --- a/crates/proof-of-sql/src/base/arrow/mod.rs +++ b/crates/proof-of-sql/src/base/arrow/mod.rs @@ -15,3 +15,6 @@ pub mod record_batch_utility; /// Module for scalar and i256 conversions. pub mod scalar_and_i256_conversions; + +/// Module for handling conversions between columns and Arrow arrays. +pub mod column_arrow_conversions; diff --git a/crates/proof-of-sql/src/base/database/column.rs b/crates/proof-of-sql/src/base/database/column.rs index 3d3b11372..be536b1d5 100644 --- a/crates/proof-of-sql/src/base/database/column.rs +++ b/crates/proof-of-sql/src/base/database/column.rs @@ -4,9 +4,7 @@ use crate::base::{ scalar::{Scalar, ScalarExt}, slice_ops::slice_cast_with, }; -use alloc::{sync::Arc, vec::Vec}; -#[cfg(feature = "arrow")] -use arrow::datatypes::{DataType, Field, TimeUnit as ArrowTimeUnit}; +use alloc::vec::Vec; use bumpalo::Bump; use core::{ fmt, @@ -412,70 +410,6 @@ impl ColumnType { } } -/// Convert [`ColumnType`] values to some arrow [`DataType`] -#[cfg(feature = "arrow")] -impl From<&ColumnType> for DataType { - fn from(column_type: &ColumnType) -> Self { - match column_type { - ColumnType::Boolean => DataType::Boolean, - ColumnType::TinyInt => DataType::Int8, - ColumnType::SmallInt => DataType::Int16, - ColumnType::Int => DataType::Int32, - ColumnType::BigInt => DataType::Int64, - ColumnType::Int128 => DataType::Decimal128(38, 0), - ColumnType::Decimal75(precision, scale) => { - DataType::Decimal256(precision.value(), *scale) - } - ColumnType::VarChar => DataType::Utf8, - ColumnType::Scalar => unimplemented!("Cannot convert Scalar type to arrow type"), - ColumnType::TimestampTZ(timeunit, timezone) => { - let arrow_timezone = Some(Arc::from(timezone.to_string())); - let arrow_timeunit = match timeunit { - PoSQLTimeUnit::Second => ArrowTimeUnit::Second, - PoSQLTimeUnit::Millisecond => ArrowTimeUnit::Millisecond, - PoSQLTimeUnit::Microsecond => ArrowTimeUnit::Microsecond, - PoSQLTimeUnit::Nanosecond => ArrowTimeUnit::Nanosecond, - }; - DataType::Timestamp(arrow_timeunit, arrow_timezone) - } - } - } -} - -/// Convert arrow [`DataType`] values to some [`ColumnType`] -#[cfg(feature = "arrow")] -impl TryFrom for ColumnType { - type Error = String; - - fn try_from(data_type: DataType) -> Result { - match data_type { - DataType::Boolean => Ok(ColumnType::Boolean), - DataType::Int8 => Ok(ColumnType::TinyInt), - DataType::Int16 => Ok(ColumnType::SmallInt), - DataType::Int32 => Ok(ColumnType::Int), - DataType::Int64 => Ok(ColumnType::BigInt), - DataType::Decimal128(38, 0) => Ok(ColumnType::Int128), - DataType::Decimal256(precision, scale) if precision <= 75 => { - Ok(ColumnType::Decimal75(Precision::new(precision)?, scale)) - } - DataType::Timestamp(time_unit, timezone_option) => { - let posql_time_unit = match time_unit { - ArrowTimeUnit::Second => PoSQLTimeUnit::Second, - ArrowTimeUnit::Millisecond => PoSQLTimeUnit::Millisecond, - ArrowTimeUnit::Microsecond => PoSQLTimeUnit::Microsecond, - ArrowTimeUnit::Nanosecond => PoSQLTimeUnit::Nanosecond, - }; - Ok(ColumnType::TimestampTZ( - posql_time_unit, - PoSQLTimeZone::try_from(&timezone_option)?, - )) - } - DataType::Utf8 => Ok(ColumnType::VarChar), - _ => Err(format!("Unsupported arrow data type {data_type:?}")), - } - } -} - /// Display the column type as a str name (in all caps) impl Display for ColumnType { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { @@ -570,18 +504,6 @@ impl ColumnField { } } -/// Convert [`ColumnField`] values to arrow Field -#[cfg(feature = "arrow")] -impl From<&ColumnField> for Field { - fn from(column_field: &ColumnField) -> Self { - Field::new( - column_field.name().name(), - (&column_field.data_type()).into(), - false, - ) - } -} - #[cfg(test)] mod tests { use super::*; From ff87390e595ee3bac2102b97368613cc74309e9b Mon Sep 17 00:00:00 2001 From: Vamshi Maskuri <117595548+varshith257@users.noreply.github.com> Date: Sat, 26 Oct 2024 15:24:59 -0400 Subject: [PATCH 20/24] chore: create `record_batch_errors` module within `arrow` module --- crates/proof-of-sql/src/base/arrow/mod.rs | 3 ++ .../src/base/arrow/record_batch_errors.rs | 38 ++++++++++++++++ .../src/base/commitment/table_commitment.rs | 43 ++----------------- 3 files changed, 45 insertions(+), 39 deletions(-) create mode 100644 crates/proof-of-sql/src/base/arrow/record_batch_errors.rs diff --git a/crates/proof-of-sql/src/base/arrow/mod.rs b/crates/proof-of-sql/src/base/arrow/mod.rs index 301defb0f..48197e05b 100644 --- a/crates/proof-of-sql/src/base/arrow/mod.rs +++ b/crates/proof-of-sql/src/base/arrow/mod.rs @@ -10,6 +10,9 @@ pub mod owned_and_arrow_conversions; /// Tests for owned and Arrow conversions. mod owned_and_arrow_conversions_test; +/// Module for record batch error definitions. +pub mod record_batch_errors; + /// Utility functions for record batches. pub mod record_batch_utility; diff --git a/crates/proof-of-sql/src/base/arrow/record_batch_errors.rs b/crates/proof-of-sql/src/base/arrow/record_batch_errors.rs new file mode 100644 index 000000000..b3986d1a6 --- /dev/null +++ b/crates/proof-of-sql/src/base/arrow/record_batch_errors.rs @@ -0,0 +1,38 @@ +use super::arrow_array_to_column_conversion::ArrowArrayToColumnConversionError; +use crate::base::commitment::ColumnCommitmentsMismatch; +use proof_of_sql_parser::ParseError; +use snafu::Snafu; + +/// Errors that can occur when trying to create or extend a [`TableCommitment`] from a record batch. +#[derive(Debug, Snafu)] +pub enum RecordBatchToColumnsError { + /// Error converting from arrow array + #[snafu(transparent)] + ArrowArrayToColumnConversionError { + /// The underlying source error + source: ArrowArrayToColumnConversionError, + }, + #[snafu(transparent)] + /// This error occurs when convering from a record batch name to an identifier fails. (Which may be impossible.) + FieldParseFail { + /// The underlying source error + source: ParseError, + }, +} + +/// Errors that can occur when attempting to append a record batch to a [`TableCommitment`]. +#[derive(Debug, Snafu)] +pub enum AppendRecordBatchTableCommitmentError { + /// During commitment operation, metadata indicates that operand tables cannot be the same. + #[snafu(transparent)] + ColumnCommitmentsMismatch { + /// The underlying source error + source: ColumnCommitmentsMismatch, + }, + /// Error converting from arrow array + #[snafu(transparent)] + ArrowBatchToColumnError { + /// The underlying source error + source: RecordBatchToColumnsError, + }, +} diff --git a/crates/proof-of-sql/src/base/commitment/table_commitment.rs b/crates/proof-of-sql/src/base/commitment/table_commitment.rs index f013cce90..b4387a765 100644 --- a/crates/proof-of-sql/src/base/commitment/table_commitment.rs +++ b/crates/proof-of-sql/src/base/commitment/table_commitment.rs @@ -3,8 +3,9 @@ use super::{ ColumnCommitmentsMismatch, Commitment, DuplicateIdentifiers, }; #[cfg(feature = "arrow")] -use crate::base::arrow::arrow_array_to_column_conversion::{ - ArrayRefExt, ArrowArrayToColumnConversionError, +use crate::base::arrow::{ + arrow_array_to_column_conversion::ArrayRefExt, + record_batch_errors::{AppendRecordBatchTableCommitmentError, RecordBatchToColumnsError}, }; use crate::base::{ database::{Column, ColumnField, CommitmentAccessor, OwnedTable, TableRef}, @@ -15,7 +16,7 @@ use alloc::vec::Vec; use arrow::record_batch::RecordBatch; use bumpalo::Bump; use core::ops::Range; -use proof_of_sql_parser::{Identifier, ParseError}; +use proof_of_sql_parser::Identifier; use serde::{Deserialize, Serialize}; use snafu::Snafu; @@ -85,42 +86,6 @@ pub enum TableCommitmentArithmeticError { NonContiguous, } -/// Errors that can occur when trying to create or extend a [`TableCommitment`] from a record batch. -#[cfg(feature = "arrow")] -#[derive(Debug, Snafu)] -pub enum RecordBatchToColumnsError { - /// Error converting from arrow array - #[snafu(transparent)] - ArrowArrayToColumnConversionError { - /// The underlying source error - source: ArrowArrayToColumnConversionError, - }, - #[snafu(transparent)] - /// This error occurs when convering from a record batch name to an identifier fails. (Which may be impossible.) - FieldParseFail { - /// The underlying source error - source: ParseError, - }, -} - -/// Errors that can occur when attempting to append a record batch to a [`TableCommitment`]. -#[cfg(feature = "arrow")] -#[derive(Debug, Snafu)] -pub enum AppendRecordBatchTableCommitmentError { - /// During commitment operation, metadata indicates that operand tables cannot be the same. - #[snafu(transparent)] - ColumnCommitmentsMismatch { - /// The underlying source error - source: ColumnCommitmentsMismatch, - }, - /// Error converting from arrow array - #[snafu(transparent)] - ArrowBatchToColumnError { - /// The underlying source error - source: RecordBatchToColumnsError, - }, -} - /// Commitment for an entire table, with column and table metadata. /// /// Unlike [`ColumnCommitments`], all columns in this commitment must have the same length. From d6a1eda515a60b808db1d81f73c4f789ef24ebcb Mon Sep 17 00:00:00 2001 From: Vamshi Maskuri <117595548+varshith257@users.noreply.github.com> Date: Sat, 26 Oct 2024 15:25:00 -0400 Subject: [PATCH 21/24] chore: create `record_batch_conversion` module within `arrow` module --- crates/proof-of-sql/src/base/arrow/mod.rs | 3 + .../src/base/arrow/record_batch_conversion.rs | 160 ++++++++++++++++++ .../src/base/commitment/table_commitment.rs | 158 +---------------- 3 files changed, 168 insertions(+), 153 deletions(-) create mode 100644 crates/proof-of-sql/src/base/arrow/record_batch_conversion.rs diff --git a/crates/proof-of-sql/src/base/arrow/mod.rs b/crates/proof-of-sql/src/base/arrow/mod.rs index 48197e05b..0bcac183d 100644 --- a/crates/proof-of-sql/src/base/arrow/mod.rs +++ b/crates/proof-of-sql/src/base/arrow/mod.rs @@ -10,6 +10,9 @@ pub mod owned_and_arrow_conversions; /// Tests for owned and Arrow conversions. mod owned_and_arrow_conversions_test; +/// Module for converting record batches. +pub mod record_batch_conversion; + /// Module for record batch error definitions. pub mod record_batch_errors; diff --git a/crates/proof-of-sql/src/base/arrow/record_batch_conversion.rs b/crates/proof-of-sql/src/base/arrow/record_batch_conversion.rs new file mode 100644 index 000000000..6f24457cc --- /dev/null +++ b/crates/proof-of-sql/src/base/arrow/record_batch_conversion.rs @@ -0,0 +1,160 @@ +use super::{ + arrow_array_to_column_conversion::ArrayRefExt, + record_batch_errors::{AppendRecordBatchTableCommitmentError, RecordBatchToColumnsError}, +}; +use crate::base::{ + commitment::{ + AppendColumnCommitmentsError, AppendTableCommitmentError, Commitment, TableCommitment, + TableCommitmentFromColumnsError, + }, + database::Column, + scalar::Scalar, +}; +use arrow::record_batch::RecordBatch; +use bumpalo::Bump; +use proof_of_sql_parser::Identifier; + +/// This function will return an error if: +/// - The field name cannot be parsed into an [`Identifier`]. +/// - The conversion of an Arrow array to a [`Column`] fails. +pub fn batch_to_columns<'a, S: Scalar + 'a>( + batch: &'a RecordBatch, + alloc: &'a Bump, +) -> Result)>, RecordBatchToColumnsError> { + batch + .schema() + .fields() + .into_iter() + .zip(batch.columns()) + .map(|(field, array)| { + let identifier: Identifier = field.name().parse()?; + let column: Column = array.to_column(alloc, &(0..array.len()), None)?; + Ok((identifier, column)) + }) + .collect() +} + +impl TableCommitment { + /// Append an arrow [`RecordBatch`] to the existing [`TableCommitment`]. + /// + /// The row offset is assumed to be the end of the [`TableCommitment`]'s current range. + /// + /// Will error on a variety of mismatches, or if the provided columns have mixed length. + #[allow(clippy::missing_panics_doc)] + pub fn try_append_record_batch( + &mut self, + batch: &RecordBatch, + setup: &C::PublicSetup<'_>, + ) -> Result<(), AppendRecordBatchTableCommitmentError> { + match self.try_append_rows( + batch_to_columns::(batch, &Bump::new())? + .iter() + .map(|(a, b)| (a, b)), + setup, + ) { + Ok(()) => Ok(()), + Err(AppendTableCommitmentError::MixedLengthColumns { .. }) => { + panic!("RecordBatches cannot have columns of mixed length") + } + Err(AppendTableCommitmentError::AppendColumnCommitments { + source: AppendColumnCommitmentsError::DuplicateIdentifiers { .. }, + }) => { + panic!("RecordBatches cannot have duplicate identifiers") + } + Err(AppendTableCommitmentError::AppendColumnCommitments { + source: AppendColumnCommitmentsError::Mismatch { source: e }, + }) => Err(e)?, + } + } + /// Returns a [`TableCommitment`] to the provided arrow [`RecordBatch`]. + pub fn try_from_record_batch( + batch: &RecordBatch, + setup: &C::PublicSetup<'_>, + ) -> Result, RecordBatchToColumnsError> { + Self::try_from_record_batch_with_offset(batch, 0, setup) + } + + /// Returns a [`TableCommitment`] to the provided arrow [`RecordBatch`] with the given row offset. + #[allow(clippy::missing_panics_doc)] + pub fn try_from_record_batch_with_offset( + batch: &RecordBatch, + offset: usize, + setup: &C::PublicSetup<'_>, + ) -> Result, RecordBatchToColumnsError> { + match Self::try_from_columns_with_offset( + batch_to_columns::(batch, &Bump::new())? + .iter() + .map(|(a, b)| (a, b)), + offset, + setup, + ) { + Ok(commitment) => Ok(commitment), + Err(TableCommitmentFromColumnsError::MixedLengthColumns { .. }) => { + panic!("RecordBatches cannot have columns of mixed length") + } + Err(TableCommitmentFromColumnsError::DuplicateIdentifiers { .. }) => { + panic!("RecordBatches cannot have duplicate identifiers") + } + } + } +} + +#[cfg(all(test, feature = "blitzar"))] +mod tests { + use super::*; + use crate::{base::scalar::Curve25519Scalar, record_batch}; + use curve25519_dalek::RistrettoPoint; + + #[test] + fn we_can_create_and_append_table_commitments_with_record_batchs() { + let batch = record_batch!( + "a" => [1i64, 2, 3], + "b" => ["1", "2", "3"], + ); + + let b_scals = ["1".into(), "2".into(), "3".into()]; + + let columns = [ + ( + &"a".parse().unwrap(), + &Column::::BigInt(&[1, 2, 3]), + ), + ( + &"b".parse().unwrap(), + &Column::::VarChar((&["1", "2", "3"], &b_scals)), + ), + ]; + + let mut expected_commitment = + TableCommitment::::try_from_columns_with_offset(columns, 0, &()) + .unwrap(); + + let mut commitment = + TableCommitment::::try_from_record_batch(&batch, &()).unwrap(); + + assert_eq!(commitment, expected_commitment); + + let batch2 = record_batch!( + "a" => [4i64, 5, 6], + "b" => ["4", "5", "6"], + ); + + let b_scals2 = ["4".into(), "5".into(), "6".into()]; + + let columns2 = [ + ( + &"a".parse().unwrap(), + &Column::::BigInt(&[4, 5, 6]), + ), + ( + &"b".parse().unwrap(), + &Column::::VarChar((&["4", "5", "6"], &b_scals2)), + ), + ]; + + expected_commitment.try_append_rows(columns2, &()).unwrap(); + commitment.try_append_record_batch(&batch2, &()).unwrap(); + + assert_eq!(commitment, expected_commitment); + } +} diff --git a/crates/proof-of-sql/src/base/commitment/table_commitment.rs b/crates/proof-of-sql/src/base/commitment/table_commitment.rs index b4387a765..1a52b7cea 100644 --- a/crates/proof-of-sql/src/base/commitment/table_commitment.rs +++ b/crates/proof-of-sql/src/base/commitment/table_commitment.rs @@ -2,19 +2,11 @@ use super::{ committable_column::CommittableColumn, AppendColumnCommitmentsError, ColumnCommitments, ColumnCommitmentsMismatch, Commitment, DuplicateIdentifiers, }; -#[cfg(feature = "arrow")] -use crate::base::arrow::{ - arrow_array_to_column_conversion::ArrayRefExt, - record_batch_errors::{AppendRecordBatchTableCommitmentError, RecordBatchToColumnsError}, -}; use crate::base::{ - database::{Column, ColumnField, CommitmentAccessor, OwnedTable, TableRef}, + database::{ColumnField, CommitmentAccessor, OwnedTable, TableRef}, scalar::Scalar, }; use alloc::vec::Vec; -#[cfg(feature = "arrow")] -use arrow::record_batch::RecordBatch; -use bumpalo::Bump; use core::ops::Range; use proof_of_sql_parser::Identifier; use serde::{Deserialize, Serialize}; @@ -365,90 +357,6 @@ impl TableCommitment { range, }) } - - /// Append an arrow [`RecordBatch`] to the existing [`TableCommitment`]. - /// - /// The row offset is assumed to be the end of the [`TableCommitment`]'s current range. - /// - /// Will error on a variety of mismatches, or if the provided columns have mixed length. - #[cfg(feature = "arrow")] - #[allow(clippy::missing_panics_doc)] - pub fn try_append_record_batch( - &mut self, - batch: &RecordBatch, - setup: &C::PublicSetup<'_>, - ) -> Result<(), AppendRecordBatchTableCommitmentError> { - match self.try_append_rows( - batch_to_columns::(batch, &Bump::new())? - .iter() - .map(|(a, b)| (a, b)), - setup, - ) { - Ok(()) => Ok(()), - Err(AppendTableCommitmentError::MixedLengthColumns { .. }) => { - panic!("RecordBatches cannot have columns of mixed length") - } - Err(AppendTableCommitmentError::AppendColumnCommitments { - source: AppendColumnCommitmentsError::DuplicateIdentifiers { .. }, - }) => { - panic!("RecordBatches cannot have duplicate identifiers") - } - Err(AppendTableCommitmentError::AppendColumnCommitments { - source: AppendColumnCommitmentsError::Mismatch { source: e }, - }) => Err(e)?, - } - } - /// Returns a [`TableCommitment`] to the provided arrow [`RecordBatch`]. - #[cfg(feature = "arrow")] - pub fn try_from_record_batch( - batch: &RecordBatch, - setup: &C::PublicSetup<'_>, - ) -> Result, RecordBatchToColumnsError> { - Self::try_from_record_batch_with_offset(batch, 0, setup) - } - - /// Returns a [`TableCommitment`] to the provided arrow [`RecordBatch`] with the given row offset. - #[allow(clippy::missing_panics_doc)] - #[cfg(feature = "arrow")] - pub fn try_from_record_batch_with_offset( - batch: &RecordBatch, - offset: usize, - setup: &C::PublicSetup<'_>, - ) -> Result, RecordBatchToColumnsError> { - match Self::try_from_columns_with_offset( - batch_to_columns::(batch, &Bump::new())? - .iter() - .map(|(a, b)| (a, b)), - offset, - setup, - ) { - Ok(commitment) => Ok(commitment), - Err(TableCommitmentFromColumnsError::MixedLengthColumns { .. }) => { - panic!("RecordBatches cannot have columns of mixed length") - } - Err(TableCommitmentFromColumnsError::DuplicateIdentifiers { .. }) => { - panic!("RecordBatches cannot have duplicate identifiers") - } - } - } -} - -#[cfg(feature = "arrow")] -fn batch_to_columns<'a, S: Scalar + 'a>( - batch: &'a RecordBatch, - alloc: &'a Bump, -) -> Result)>, RecordBatchToColumnsError> { - batch - .schema() - .fields() - .into_iter() - .zip(batch.columns()) - .map(|(field, array)| { - let identifier: Identifier = field.name().parse()?; - let column: Column = array.to_column(alloc, &(0..array.len()), None)?; - Ok((identifier, column)) - }) - .collect() } /// Return the number of rows for the provided columns, erroring if they have mixed length. @@ -472,13 +380,10 @@ fn num_rows_of_columns<'a>( #[cfg(all(test, feature = "arrow", feature = "blitzar"))] mod tests { use super::*; - use crate::{ - base::{ - database::{owned_table_utility::*, OwnedColumn}, - map::IndexMap, - scalar::Curve25519Scalar, - }, - record_batch, + use crate::base::{ + database::{owned_table_utility::*, OwnedColumn}, + map::IndexMap, + scalar::Curve25519Scalar, }; use curve25519_dalek::RistrettoPoint; @@ -1230,57 +1135,4 @@ mod tests { Err(TableCommitmentArithmeticError::NegativeRange { .. }) )); } - - #[test] - fn we_can_create_and_append_table_commitments_with_record_batchs() { - let batch = record_batch!( - "a" => [1i64, 2, 3], - "b" => ["1", "2", "3"], - ); - - let b_scals = ["1".into(), "2".into(), "3".into()]; - - let columns = [ - ( - &"a".parse().unwrap(), - &Column::::BigInt(&[1, 2, 3]), - ), - ( - &"b".parse().unwrap(), - &Column::::VarChar((&["1", "2", "3"], &b_scals)), - ), - ]; - - let mut expected_commitment = - TableCommitment::::try_from_columns_with_offset(columns, 0, &()) - .unwrap(); - - let mut commitment = - TableCommitment::::try_from_record_batch(&batch, &()).unwrap(); - - assert_eq!(commitment, expected_commitment); - - let batch2 = record_batch!( - "a" => [4i64, 5, 6], - "b" => ["4", "5", "6"], - ); - - let b_scals2 = ["4".into(), "5".into(), "6".into()]; - - let columns2 = [ - ( - &"a".parse().unwrap(), - &Column::::BigInt(&[4, 5, 6]), - ), - ( - &"b".parse().unwrap(), - &Column::::VarChar((&["4", "5", "6"], &b_scals2)), - ), - ]; - - expected_commitment.try_append_rows(columns2, &()).unwrap(); - commitment.try_append_record_batch(&batch2, &()).unwrap(); - - assert_eq!(commitment, expected_commitment); - } } From 574db2fd2f17fdf7b689cc45454e031120e6d28e Mon Sep 17 00:00:00 2001 From: jay Date: Sat, 26 Oct 2024 23:16:39 -0400 Subject: [PATCH 22/24] feat: add `ProofPlan::get_table_references` --- crates/proof-of-sql/src/sql/proof/proof_plan.rs | 5 ++++- .../proof-of-sql/src/sql/proof/query_proof_test.rs | 14 +++++++++++++- .../src/sql/proof/verifiable_query_result_test.rs | 6 +++++- .../src/sql/proof_plans/dyn_proof_plan.rs | 14 +++++++++++++- .../src/sql/proof_plans/filter_exec.rs | 6 +++++- .../src/sql/proof_plans/group_by_exec.rs | 6 +++++- .../src/sql/proof_plans/projection_exec.rs | 6 +++++- 7 files changed, 50 insertions(+), 7 deletions(-) diff --git a/crates/proof-of-sql/src/sql/proof/proof_plan.rs b/crates/proof-of-sql/src/sql/proof/proof_plan.rs index 430485308..42ceceab1 100644 --- a/crates/proof-of-sql/src/sql/proof/proof_plan.rs +++ b/crates/proof-of-sql/src/sql/proof/proof_plan.rs @@ -3,7 +3,7 @@ use crate::base::{ commitment::Commitment, database::{ Column, ColumnField, ColumnRef, CommitmentAccessor, DataAccessor, MetadataAccessor, - OwnedTable, + OwnedTable, TableRef, }, map::IndexSet, proof::ProofError, @@ -46,6 +46,9 @@ pub trait ProofPlan: Debug + Send + Sync + ProverEvaluate IndexSet; + + /// Return all the tables referenced in the Query + fn get_table_references(&self) -> IndexSet; } pub trait ProverEvaluate { diff --git a/crates/proof-of-sql/src/sql/proof/query_proof_test.rs b/crates/proof-of-sql/src/sql/proof/query_proof_test.rs index a4fa8a65a..e6e685673 100644 --- a/crates/proof-of-sql/src/sql/proof/query_proof_test.rs +++ b/crates/proof-of-sql/src/sql/proof/query_proof_test.rs @@ -7,7 +7,7 @@ use crate::{ database::{ owned_table_utility::{bigint, owned_table}, Column, ColumnField, ColumnRef, ColumnType, CommitmentAccessor, DataAccessor, - MetadataAccessor, OwnedTable, OwnedTableTestAccessor, TestAccessor, + MetadataAccessor, OwnedTable, OwnedTableTestAccessor, TableRef, TestAccessor, UnimplementedTestAccessor, }, map::IndexSet, @@ -109,6 +109,9 @@ impl ProofPlan for TrivialTestProofPlan { fn get_column_references(&self) -> IndexSet { unimplemented!("no real usage for this function yet") } + fn get_table_references(&self) -> IndexSet { + unimplemented!("no real usage for this function yet") + } } fn verify_a_trivial_query_proof_with_given_offset(n: usize, offset_generators: usize) { @@ -278,6 +281,9 @@ impl ProofPlan for SquareTestProofPlan { fn get_column_references(&self) -> IndexSet { unimplemented!("no real usage for this function yet") } + fn get_table_references(&self) -> IndexSet { + unimplemented!("no real usage for this function yet") + } } fn verify_a_proof_with_an_anchored_commitment_and_given_offset(offset_generators: usize) { @@ -481,6 +487,9 @@ impl ProofPlan for DoubleSquareTestProofPlan { fn get_column_references(&self) -> IndexSet { unimplemented!("no real usage for this function yet") } + fn get_table_references(&self) -> IndexSet { + unimplemented!("no real usage for this function yet") + } } fn verify_a_proof_with_an_intermediate_commitment_and_given_offset(offset_generators: usize) { @@ -677,6 +686,9 @@ impl ProofPlan for ChallengeTestProofPlan { fn get_column_references(&self) -> IndexSet { unimplemented!("no real usage for this function yet") } + fn get_table_references(&self) -> IndexSet { + unimplemented!("no real usage for this function yet") + } } fn verify_a_proof_with_a_post_result_challenge_and_given_offset(offset_generators: usize) { diff --git a/crates/proof-of-sql/src/sql/proof/verifiable_query_result_test.rs b/crates/proof-of-sql/src/sql/proof/verifiable_query_result_test.rs index 5d299e408..d2db5df0e 100644 --- a/crates/proof-of-sql/src/sql/proof/verifiable_query_result_test.rs +++ b/crates/proof-of-sql/src/sql/proof/verifiable_query_result_test.rs @@ -8,7 +8,7 @@ use crate::{ database::{ owned_table_utility::{bigint, owned_table}, Column, ColumnField, ColumnRef, ColumnType, CommitmentAccessor, DataAccessor, - MetadataAccessor, OwnedTable, TestAccessor, UnimplementedTestAccessor, + MetadataAccessor, OwnedTable, TableRef, TestAccessor, UnimplementedTestAccessor, }, map::IndexSet, proof::ProofError, @@ -88,6 +88,10 @@ impl ProofPlan for EmptyTestQueryExpr { fn get_column_references(&self) -> IndexSet { unimplemented!("no real usage for this function yet") } + + fn get_table_references(&self) -> IndexSet { + unimplemented!("no real usage for this function yet") + } } #[test] diff --git a/crates/proof-of-sql/src/sql/proof_plans/dyn_proof_plan.rs b/crates/proof-of-sql/src/sql/proof_plans/dyn_proof_plan.rs index c524a2c76..9b1222b4b 100644 --- a/crates/proof-of-sql/src/sql/proof_plans/dyn_proof_plan.rs +++ b/crates/proof-of-sql/src/sql/proof_plans/dyn_proof_plan.rs @@ -1,6 +1,10 @@ use super::{FilterExec, GroupByExec, ProjectionExec}; use crate::{ - base::{commitment::Commitment, database::Column, map::IndexSet}, + base::{ + commitment::Commitment, + database::{Column, TableRef}, + map::IndexSet, + }, sql::proof::{ProofPlan, ProverEvaluate}, }; use alloc::vec::Vec; @@ -89,6 +93,14 @@ impl ProofPlan for DynProofPlan { DynProofPlan::Filter(expr) => expr.get_column_references(), } } + + fn get_table_references(&self) -> IndexSet { + match self { + DynProofPlan::Projection(expr) => expr.get_table_references(), + DynProofPlan::GroupBy(expr) => expr.get_table_references(), + DynProofPlan::Filter(expr) => expr.get_table_references(), + } + } } impl ProverEvaluate for DynProofPlan { diff --git a/crates/proof-of-sql/src/sql/proof_plans/filter_exec.rs b/crates/proof-of-sql/src/sql/proof_plans/filter_exec.rs index 4259d3d88..5a1b6106b 100644 --- a/crates/proof-of-sql/src/sql/proof_plans/filter_exec.rs +++ b/crates/proof-of-sql/src/sql/proof_plans/filter_exec.rs @@ -4,7 +4,7 @@ use crate::{ commitment::Commitment, database::{ filter_util::filter_columns, Column, ColumnField, ColumnRef, CommitmentAccessor, - DataAccessor, MetadataAccessor, OwnedTable, + DataAccessor, MetadataAccessor, OwnedTable, TableRef, }, map::IndexSet, proof::ProofError, @@ -139,6 +139,10 @@ where columns } + + fn get_table_references(&self) -> IndexSet { + IndexSet::from_iter([self.table.table_ref]) + } } /// Alias for a filter expression with a honest prover. diff --git a/crates/proof-of-sql/src/sql/proof_plans/group_by_exec.rs b/crates/proof-of-sql/src/sql/proof_plans/group_by_exec.rs index 0a43da82f..385b8a2e7 100644 --- a/crates/proof-of-sql/src/sql/proof_plans/group_by_exec.rs +++ b/crates/proof-of-sql/src/sql/proof_plans/group_by_exec.rs @@ -7,7 +7,7 @@ use crate::{ aggregate_columns, compare_indexes_by_owned_columns, AggregatedColumns, }, Column, ColumnField, ColumnRef, ColumnType, CommitmentAccessor, DataAccessor, - MetadataAccessor, OwnedTable, + MetadataAccessor, OwnedTable, TableRef, }, map::IndexSet, proof::ProofError, @@ -202,6 +202,10 @@ impl ProofPlan for GroupByExec { columns } + + fn get_table_references(&self) -> IndexSet { + IndexSet::from_iter([self.table.table_ref]) + } } impl ProverEvaluate for GroupByExec { diff --git a/crates/proof-of-sql/src/sql/proof_plans/projection_exec.rs b/crates/proof-of-sql/src/sql/proof_plans/projection_exec.rs index fb66bff00..f3038b310 100644 --- a/crates/proof-of-sql/src/sql/proof_plans/projection_exec.rs +++ b/crates/proof-of-sql/src/sql/proof_plans/projection_exec.rs @@ -3,7 +3,7 @@ use crate::{ commitment::Commitment, database::{ Column, ColumnField, ColumnRef, CommitmentAccessor, DataAccessor, MetadataAccessor, - OwnedTable, + OwnedTable, TableRef, }, map::IndexSet, proof::ProofError, @@ -92,6 +92,10 @@ impl ProofPlan for ProjectionExec { }); columns } + + fn get_table_references(&self) -> IndexSet { + IndexSet::from_iter([self.table.table_ref]) + } } impl ProverEvaluate for ProjectionExec { From c7a98394b3a1307b27b3ed671b0130ae544c71c0 Mon Sep 17 00:00:00 2001 From: jay Date: Sat, 26 Oct 2024 23:16:40 -0400 Subject: [PATCH 23/24] style: leverage more imports in `dyn_proof_plan` --- .../src/sql/proof_plans/dyn_proof_plan.rs | 46 +++++++++++-------- 1 file changed, 27 insertions(+), 19 deletions(-) diff --git a/crates/proof-of-sql/src/sql/proof_plans/dyn_proof_plan.rs b/crates/proof-of-sql/src/sql/proof_plans/dyn_proof_plan.rs index 9b1222b4b..b7edcc70a 100644 --- a/crates/proof-of-sql/src/sql/proof_plans/dyn_proof_plan.rs +++ b/crates/proof-of-sql/src/sql/proof_plans/dyn_proof_plan.rs @@ -2,12 +2,20 @@ use super::{FilterExec, GroupByExec, ProjectionExec}; use crate::{ base::{ commitment::Commitment, - database::{Column, TableRef}, + database::{ + Column, ColumnField, ColumnRef, CommitmentAccessor, DataAccessor, MetadataAccessor, + OwnedTable, TableRef, + }, map::IndexSet, + proof::ProofError, + }, + sql::proof::{ + CountBuilder, FinalRoundBuilder, FirstRoundBuilder, ProofPlan, ProverEvaluate, + VerificationBuilder, }, - sql::proof::{ProofPlan, ProverEvaluate}, }; use alloc::vec::Vec; +use bumpalo::Bump; use serde::{Deserialize, Serialize}; /// The query plan for proving a query @@ -38,9 +46,9 @@ pub enum DynProofPlan { impl ProofPlan for DynProofPlan { fn count( &self, - builder: &mut crate::sql::proof::CountBuilder, - accessor: &dyn crate::base::database::MetadataAccessor, - ) -> Result<(), crate::base::proof::ProofError> { + builder: &mut CountBuilder, + accessor: &dyn MetadataAccessor, + ) -> Result<(), ProofError> { match self { DynProofPlan::Projection(expr) => expr.count(builder, accessor), DynProofPlan::GroupBy(expr) => expr.count(builder, accessor), @@ -48,7 +56,7 @@ impl ProofPlan for DynProofPlan { } } - fn get_length(&self, accessor: &dyn crate::base::database::MetadataAccessor) -> usize { + fn get_length(&self, accessor: &dyn MetadataAccessor) -> usize { match self { DynProofPlan::Projection(expr) => expr.get_length(accessor), DynProofPlan::GroupBy(expr) => expr.get_length(accessor), @@ -56,7 +64,7 @@ impl ProofPlan for DynProofPlan { } } - fn get_offset(&self, accessor: &dyn crate::base::database::MetadataAccessor) -> usize { + fn get_offset(&self, accessor: &dyn MetadataAccessor) -> usize { match self { DynProofPlan::Projection(expr) => expr.get_offset(accessor), DynProofPlan::GroupBy(expr) => expr.get_offset(accessor), @@ -67,10 +75,10 @@ impl ProofPlan for DynProofPlan { #[tracing::instrument(name = "DynProofPlan::verifier_evaluate", level = "debug", skip_all)] fn verifier_evaluate( &self, - builder: &mut crate::sql::proof::VerificationBuilder, - accessor: &dyn crate::base::database::CommitmentAccessor, - result: Option<&crate::base::database::OwnedTable>, - ) -> Result, crate::base::proof::ProofError> { + builder: &mut VerificationBuilder, + accessor: &dyn CommitmentAccessor, + result: Option<&OwnedTable>, + ) -> Result, ProofError> { match self { DynProofPlan::Projection(expr) => expr.verifier_evaluate(builder, accessor, result), DynProofPlan::GroupBy(expr) => expr.verifier_evaluate(builder, accessor, result), @@ -78,7 +86,7 @@ impl ProofPlan for DynProofPlan { } } - fn get_column_result_fields(&self) -> Vec { + fn get_column_result_fields(&self) -> Vec { match self { DynProofPlan::Projection(expr) => expr.get_column_result_fields(), DynProofPlan::GroupBy(expr) => expr.get_column_result_fields(), @@ -86,7 +94,7 @@ impl ProofPlan for DynProofPlan { } } - fn get_column_references(&self) -> IndexSet { + fn get_column_references(&self) -> IndexSet { match self { DynProofPlan::Projection(expr) => expr.get_column_references(), DynProofPlan::GroupBy(expr) => expr.get_column_references(), @@ -108,8 +116,8 @@ impl ProverEvaluate for DynProofPlan { fn result_evaluate<'a>( &self, input_length: usize, - alloc: &'a bumpalo::Bump, - accessor: &'a dyn crate::base::database::DataAccessor, + alloc: &'a Bump, + accessor: &'a dyn DataAccessor, ) -> Vec> { match self { DynProofPlan::Projection(expr) => expr.result_evaluate(input_length, alloc, accessor), @@ -118,7 +126,7 @@ impl ProverEvaluate for DynProofPlan { } } - fn first_round_evaluate(&self, builder: &mut crate::sql::proof::FirstRoundBuilder) { + fn first_round_evaluate(&self, builder: &mut FirstRoundBuilder) { match self { DynProofPlan::Projection(expr) => expr.first_round_evaluate(builder), DynProofPlan::GroupBy(expr) => expr.first_round_evaluate(builder), @@ -129,9 +137,9 @@ impl ProverEvaluate for DynProofPlan { #[tracing::instrument(name = "DynProofPlan::final_round_evaluate", level = "debug", skip_all)] fn final_round_evaluate<'a>( &self, - builder: &mut crate::sql::proof::FinalRoundBuilder<'a, C::Scalar>, - alloc: &'a bumpalo::Bump, - accessor: &'a dyn crate::base::database::DataAccessor, + builder: &mut FinalRoundBuilder<'a, C::Scalar>, + alloc: &'a Bump, + accessor: &'a dyn DataAccessor, ) -> Vec> { match self { DynProofPlan::Projection(expr) => expr.final_round_evaluate(builder, alloc, accessor), From 464f3fd83b045f62fd276bb6d308079a88364d1c Mon Sep 17 00:00:00 2001 From: jay Date: Sat, 26 Oct 2024 23:16:40 -0400 Subject: [PATCH 24/24] test: add some tests for `ProofPlan::get_table_references` --- crates/proof-of-sql/src/sql/proof_plans/filter_exec_test.rs | 4 ++++ .../proof-of-sql/src/sql/proof_plans/projection_exec_test.rs | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/crates/proof-of-sql/src/sql/proof_plans/filter_exec_test.rs b/crates/proof-of-sql/src/sql/proof_plans/filter_exec_test.rs index c6252d133..062781985 100644 --- a/crates/proof-of-sql/src/sql/proof_plans/filter_exec_test.rs +++ b/crates/proof-of-sql/src/sql/proof_plans/filter_exec_test.rs @@ -153,6 +153,10 @@ fn we_can_correctly_fetch_all_the_referenced_columns() { ) ]) ); + + let ref_tables = provable_ast.get_table_references(); + + assert_eq!(ref_tables, IndexSet::from_iter([table_ref])); } #[test] diff --git a/crates/proof-of-sql/src/sql/proof_plans/projection_exec_test.rs b/crates/proof-of-sql/src/sql/proof_plans/projection_exec_test.rs index 3addcfb17..c97ecf471 100644 --- a/crates/proof-of-sql/src/sql/proof_plans/projection_exec_test.rs +++ b/crates/proof-of-sql/src/sql/proof_plans/projection_exec_test.rs @@ -102,6 +102,10 @@ fn we_can_correctly_fetch_all_the_referenced_columns() { ), ]) ); + + let ref_tables = provable_ast.get_table_references(); + + assert_eq!(ref_tables, IndexSet::from_iter([table_ref])); } #[test]