diff --git a/.github/workflows/lint-and-test.yml b/.github/workflows/lint-and-test.yml index ae6cb6598..3703f52ac 100644 --- a/.github/workflows/lint-and-test.yml +++ b/.github/workflows/lint-and-test.yml @@ -120,24 +120,6 @@ jobs: run: cargo run --example wood_types - name: Run movies example run: cargo run --example movies - - name: Run dinosaurs example - run: cargo run --example dinosaurs - - name: Run books example - run: cargo run --example books - - name: Run programming books example - run: cargo run --example programming_books - - name: Run brands example - run: cargo run --example brands - - name: Run avocado-prices example - run: cargo run --example avocado-prices - - name: Run plastics example - run: cargo run --example plastics - - name: Run sushi example - run: cargo run --example sushi - - name: Run countries example - run: cargo run --example countries - - name: Run rockets example - run: cargo run --example rockets - name: Run posql_db example (With Blitzar) run: bash crates/proof-of-sql/examples/posql_db/run_example.sh - name: Run posql_db example (Without Blitzar) diff --git a/Cargo.toml b/Cargo.toml index 035636f51..a0d8f7216 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,8 +20,8 @@ ark-poly = { version = "0.4.0" } ark-serialize = { version = "0.4.0" } ark-std = { version = "0.4.0", default-features = false } arrayvec = { version = "0.7", default-features = false } -arrow = { version = "51.0.0" } -arrow-csv = { version = "51.0.0" } +arrow = { version = "51.0" } +arrow-csv = { version = "51.0" } bit-iter = { version = "1.1.1" } bigdecimal = { version = "0.4.5", default-features = false, features = ["serde"] } blake3 = { version = "1.3.3", default-features = false } diff --git a/crates/proof-of-sql/Cargo.toml b/crates/proof-of-sql/Cargo.toml index 9dc45a7dd..6a4462216 100644 --- a/crates/proof-of-sql/Cargo.toml +++ b/crates/proof-of-sql/Cargo.toml @@ -107,40 +107,6 @@ required-features = [ "arrow" ] name = "movies" required-features = [ "arrow" ] -name = "dinosaurs" -required-features = [ "arrow" ] - -[[example]] -name = "books" -required-features = [ "arrow" ] - -[[example]] -name = "programming_books" -required-features = ["arrow"] - -[[example]] -name = "brands" -required-features = [ "arrow" ] - -[[example]] -name = "plastics" -required-features = [ "arrow" ] - -[[example]] -name = "avocado-prices" -required-features = [ "arrow" ] - -[[example]] -name = "sushi" -required-features = [ "arrow" ] - -[[example]] -name = "countries" -required-features = [ "arrow" ] - -[[example]] -name = "rockets" - [[bench]] name = "posql_benches" harness = false diff --git a/crates/proof-of-sql/examples/avocado-prices/avocado-prices.csv b/crates/proof-of-sql/examples/avocado-prices/avocado-prices.csv deleted file mode 100644 index 7750f7a46..000000000 --- a/crates/proof-of-sql/examples/avocado-prices/avocado-prices.csv +++ /dev/null @@ -1,37 +0,0 @@ -Year,Price -1990,96 -1991,100 -1992,269 -1993,149 -1994,127 -1995,153 -1996,232 -1997,127 -1998,249 -1999,240 -2000,241 -2001,90 -2002,91 -2003,169 -2004,167 -2005,56 -2006,230 -2007,174 -2008,124 -2009,92 -2010,201 -2011,167 -2012,125 -2013,147 -2014,285 -2015,154 -2016,106 -2017,223 -2018,85 -2019,145 -2020,147 -2021,68 -2022,142 -2023,281 -2024,164 - diff --git a/crates/proof-of-sql/examples/avocado-prices/main.rs b/crates/proof-of-sql/examples/avocado-prices/main.rs deleted file mode 100644 index 85d5e50b8..000000000 --- a/crates/proof-of-sql/examples/avocado-prices/main.rs +++ /dev/null @@ -1,124 +0,0 @@ -//! Example to use Proof of SQL with datasets -//! To run, use `cargo run --example avocado-prices`. -//! -//! NOTE: If this doesn't work because you do not have the appropriate GPU drivers installed, -//! you can run `cargo run --release --example avocado-prices --no-default-features --features="arrow cpu-perf"` instead. It will be slower for proof generation. -use arrow::datatypes::SchemaRef; -use arrow_csv::{infer_schema_from_files, ReaderBuilder}; -use proof_of_sql::{ - base::database::{OwnedTable, OwnedTableTestAccessor}, - proof_primitive::dory::{ - DynamicDoryCommitment, DynamicDoryEvaluationProof, ProverSetup, PublicParameters, - VerifierSetup, - }, - sql::{parse::QueryExpr, postprocessing::apply_postprocessing_steps, proof::QueryProof}, -}; -use rand::{rngs::StdRng, SeedableRng}; -use std::{fs::File, time::Instant}; - -// We generate the public parameters and the setups used by the prover and verifier for the Dory PCS. -// The `max_nu` should be set such that the maximum table size is less than `2^(2*max_nu-1)`. -// For a sampling: -// max_nu = 3 => max table size is 32 rows -// max_nu = 4 => max table size is 128 rows -// max_nu = 8 => max table size is 32768 rows -// max_nu = 10 => max table size is 0.5 million rows -// max_nu = 15 => max table size is 0.5 billion rows -// max_nu = 20 => max table size is 0.5 trillion rows -// Note: we will eventually load these from a file. -const DORY_SETUP_MAX_NU: usize = 8; -// This should be a "nothing-up-my-sleeve" phrase or number. -const DORY_SEED: [u8; 32] = *b"len 32 rng seed - Space and Time"; - -/// # Panics -/// Will panic if the query does not parse or the proof fails to verify. -fn prove_and_verify_query( - sql: &str, - accessor: &OwnedTableTestAccessor, - prover_setup: &ProverSetup, - verifier_setup: &VerifierSetup, -) { - // Parse the query: - println!("Parsing the query: {sql}..."); - let now = Instant::now(); - let query_plan = QueryExpr::::try_new( - sql.parse().unwrap(), - "avocado".parse().unwrap(), - accessor, - ) - .unwrap(); - println!("Done in {} ms.", now.elapsed().as_secs_f64() * 1000.); - - // Generate the proof and result: - print!("Generating proof..."); - let now = Instant::now(); - let (proof, provable_result) = QueryProof::::new( - query_plan.proof_expr(), - accessor, - &prover_setup, - ); - println!("Done in {} ms.", now.elapsed().as_secs_f64() * 1000.); - - // Verify the result with the proof: - print!("Verifying proof..."); - let now = Instant::now(); - let result = proof - .verify( - query_plan.proof_expr(), - accessor, - &provable_result, - &verifier_setup, - ) - .unwrap(); - let result = apply_postprocessing_steps(result.table, query_plan.postprocessing()); - println!("Verified in {} ms.", now.elapsed().as_secs_f64() * 1000.); - - // Display the result - println!("Query Result:"); - println!("{result:?}"); -} - -fn main() { - let mut rng = StdRng::from_seed(DORY_SEED); - let public_parameters = PublicParameters::rand(DORY_SETUP_MAX_NU, &mut rng); - let prover_setup = ProverSetup::from(&public_parameters); - let verifier_setup = VerifierSetup::from(&public_parameters); - - let filename = "./crates/proof-of-sql/examples/avocado-prices/avocado-prices.csv"; - let data_batch = ReaderBuilder::new(SchemaRef::new( - infer_schema_from_files(&[filename.to_string()], b',', None, true).unwrap(), - )) - .with_header(true) - .build(File::open(filename).unwrap()) - .unwrap() - .next() - .unwrap() - .unwrap(); - - // Load the table into an "Accessor" so that the prover and verifier can access the data/commitments. - let accessor = OwnedTableTestAccessor::::new_from_table( - "avocado.prices".parse().unwrap(), - OwnedTable::try_from(data_batch).unwrap(), - 0, - &prover_setup, - ); - - prove_and_verify_query( - "SELECT COUNT(*) AS total FROM prices", - &accessor, - &prover_setup, - &verifier_setup, - ); - prove_and_verify_query( - "SELECT Price, COUNT(*) AS total FROM prices GROUP BY Price ORDER BY total", - &accessor, - &prover_setup, - &verifier_setup, - ); - prove_and_verify_query( - "SELECT Year, COUNT(*) AS total FROM prices WHERE Price > 100 GROUP BY Year ORDER BY total DESC LIMIT 5", - &accessor, - &prover_setup, - &verifier_setup, - ); -} diff --git a/crates/proof-of-sql/examples/books/books.csv b/crates/proof-of-sql/examples/books/books.csv deleted file mode 100644 index e5e7e841b..000000000 --- a/crates/proof-of-sql/examples/books/books.csv +++ /dev/null @@ -1,21 +0,0 @@ -id,title,author,publication_year,genre,rating -1,To Kill a Mockingbird,Harper Lee,1960,Fiction,4.5 -2,1984,George Orwell,1949,Science Fiction,4.7 -3,Pride and Prejudice,Jane Austen,1813,Romance,4.3 -4,The Great Gatsby,F. Scott Fitzgerald,1925,Fiction,4.2 -5,The Catcher in the Rye,J.D. Salinger,1951,Fiction,4.0 -6,Moby-Dick,Herman Melville,1851,Adventure,4.1 -7,The Lord of the Rings,J.R.R. Tolkien,1954,Fantasy,4.9 -8,The Hobbit,J.R.R. Tolkien,1937,Fantasy,4.6 -9,Brave New World,Aldous Huxley,1932,Science Fiction,4.4 -10,The Hunger Games,Suzanne Collins,2008,Young Adult,4.3 -11,Harry Potter and the Philosopher's Stone,J.K. Rowling,1997,Fantasy,4.8 -12,The Da Vinci Code,Dan Brown,2003,Thriller,3.9 -13,The Alchemist,Paulo Coelho,1988,Fiction,4.2 -14,The Girl with the Dragon Tattoo,Stieg Larsson,2005,Mystery,4.1 -15,The Hitchhiker's Guide to the Galaxy,Douglas Adams,1979,Science Fiction,4.5 -16,The Shining,Stephen King,1977,Horror,4.3 -17,The Catch-22,Joseph Heller,1961,Satire,4.0 -18,The Chronicles of Narnia,C.S. Lewis,1950,Fantasy,4.7 -19,The Fault in Our Stars,John Green,2012,Young Adult,4.2 -20,The Old Man and the Sea,Ernest Hemingway,1952,Fiction,4.1 \ No newline at end of file diff --git a/crates/proof-of-sql/examples/books/main.rs b/crates/proof-of-sql/examples/books/main.rs deleted file mode 100644 index 973e40b15..000000000 --- a/crates/proof-of-sql/examples/books/main.rs +++ /dev/null @@ -1,136 +0,0 @@ -//! This is a non-interactive example of using Proof of SQL with a books dataset. -//! To run this, use `cargo run --release --example books`. -//! -//! NOTE: If this doesn't work because you do not have the appropriate GPU drivers installed, -//! you can run `cargo run --release --example books --no-default-features --features="arrow cpu-perf"` instead. It will be slower for proof generation. - -use arrow::datatypes::SchemaRef; -use arrow_csv::{infer_schema_from_files, ReaderBuilder}; -use proof_of_sql::{ - base::database::{ - arrow_schema_utility::get_posql_compatible_schema, OwnedTable, OwnedTableTestAccessor, - TestAccessor, - }, - proof_primitive::dory::{ - DynamicDoryCommitment, DynamicDoryEvaluationProof, ProverSetup, PublicParameters, - VerifierSetup, - }, - sql::{parse::QueryExpr, postprocessing::apply_postprocessing_steps, proof::QueryProof}, -}; -use rand::{rngs::StdRng, SeedableRng}; -use std::{fs::File, time::Instant}; - -// We generate the public parameters and the setups used by the prover and verifier for the Dory PCS. -// The `max_nu` should be set such that the maximum table size is less than `2^(2*max_nu-1)`. -const DORY_SETUP_MAX_NU: usize = 8; -// This should be a "nothing-up-my-sleeve" phrase or number. -const DORY_SEED: [u8; 32] = *b"ebab60d58dee4cc69658939b7c2a582d"; - -/// # Panics -/// Will panic if the query does not parse or the proof fails to verify. -fn prove_and_verify_query( - sql: &str, - accessor: &OwnedTableTestAccessor, - prover_setup: &ProverSetup, - verifier_setup: &VerifierSetup, -) { - // Parse the query: - println!("Parsing the query: {sql}..."); - let now = Instant::now(); - let query_plan = QueryExpr::::try_new( - sql.parse().unwrap(), - "books".parse().unwrap(), - accessor, - ) - .unwrap(); - println!("Done in {} ms.", now.elapsed().as_secs_f64() * 1000.); - - // Generate the proof and result: - print!("Generating proof..."); - let now = Instant::now(); - let (proof, provable_result) = QueryProof::::new( - query_plan.proof_expr(), - accessor, - &prover_setup, - ); - println!("Done in {} ms.", now.elapsed().as_secs_f64() * 1000.); - - // Verify the result with the proof: - print!("Verifying proof..."); - let now = Instant::now(); - let result = proof - .verify( - query_plan.proof_expr(), - accessor, - &provable_result, - &verifier_setup, - ) - .unwrap(); - let result = apply_postprocessing_steps(result.table, query_plan.postprocessing()); - println!("Verified in {} ms.", now.elapsed().as_secs_f64() * 1000.); - - // Display the result - println!("Query Result:"); - println!("{result:?}"); -} - -fn main() { - let mut rng = StdRng::from_seed(DORY_SEED); - let public_parameters = PublicParameters::rand(DORY_SETUP_MAX_NU, &mut rng); - let prover_setup = ProverSetup::from(&public_parameters); - let verifier_setup = VerifierSetup::from(&public_parameters); - - let filename = "./crates/proof-of-sql/examples/books/books.csv"; - let inferred_schema = - SchemaRef::new(infer_schema_from_files(&[filename.to_string()], b',', None, true).unwrap()); - let posql_compatible_schema = get_posql_compatible_schema(&inferred_schema); - - let books_batch = ReaderBuilder::new(posql_compatible_schema) - .with_header(true) - .build(File::open(filename).unwrap()) - .unwrap() - .next() - .unwrap() - .unwrap(); - - // Load the table into an "Accessor" so that the prover and verifier can access the data/commitments. - let mut accessor = - OwnedTableTestAccessor::::new_empty_with_setup(&prover_setup); - accessor.add_table( - "books.books".parse().unwrap(), - OwnedTable::try_from(books_batch).unwrap(), - 0, - ); - - // Query 1: Count the total number of books - prove_and_verify_query( - "SELECT COUNT(*) AS total_books FROM books", - &accessor, - &prover_setup, - &verifier_setup, - ); - - // Query 2: Find the top 5 highest-rated books - prove_and_verify_query( - "SELECT title, author, rating FROM books ORDER BY rating DESC LIMIT 5", - &accessor, - &prover_setup, - &verifier_setup, - ); - - // Query 3: Count the number of books in each genre - prove_and_verify_query( - "SELECT genre, COUNT(*) AS book_count FROM books GROUP BY genre ORDER BY book_count DESC", - &accessor, - &prover_setup, - &verifier_setup, - ); - - // Query 4: Find books published after 2000 with a rating higher than 4.5 - prove_and_verify_query( - "SELECT title, author, publication_year, rating FROM books WHERE publication_year > 2000 AND rating > 4.5", - &accessor, - &prover_setup, - &verifier_setup, - ); -} diff --git a/crates/proof-of-sql/examples/brands/brands.csv b/crates/proof-of-sql/examples/brands/brands.csv deleted file mode 100644 index f75cdf6a9..000000000 --- a/crates/proof-of-sql/examples/brands/brands.csv +++ /dev/null @@ -1,26 +0,0 @@ -Name,Country,Founded,Revenue -Apple,United States,1976,365.82 -Samsung,South Korea,1938,200.73 -Microsoft,United States,1975,198.27 -Amazon,United States,1994,513.98 -Google,United States,1998,282.84 -Toyota,Japan,1937,278.52 -Coca-Cola,United States,1886,38.66 -Mercedes-Benz,Germany,1926,154.31 -McDonald's,United States,1955,19.2 -Nike,United States,1964,44.54 -Louis Vuitton,France,1854,75.98 -BMW,Germany,1916,121.87 -Disney,United States,1923,67.42 -Honda,Japan,1948,129.21 -Pepsi,United States,1893,79.47 -Adidas,Germany,1949,21.23 -Nestle,Switzerland,1866,94.42 -Unilever,Netherlands,1929,58.26 -Sony,Japan,1946,84.89 -Volkswagen,Germany,1937,250.2 -IKEA,Sweden,1943,44.6 -Starbucks,United States,1971,23.52 -Zara,Spain,1974,27.72 -H&M,Sweden,1947,21.73 -Gucci,Italy,1921,10.34 \ No newline at end of file diff --git a/crates/proof-of-sql/examples/brands/main.rs b/crates/proof-of-sql/examples/brands/main.rs deleted file mode 100644 index 8e1db87a1..000000000 --- a/crates/proof-of-sql/examples/brands/main.rs +++ /dev/null @@ -1,128 +0,0 @@ -//! This is a non-interactive example of using Proof of SQL with a brands dataset. -//! To run this, use `cargo run --release --example brands`. -//! -//! NOTE: If this doesn't work because you do not have the appropriate GPU drivers installed, -//! you can run `cargo run --release --example brands --no-default-features --features="arrow cpu-perf"` instead. It will be slower for proof generation. - -use arrow::datatypes::SchemaRef; -use arrow_csv::{infer_schema_from_files, ReaderBuilder}; -use proof_of_sql::{ - base::database::{ - arrow_schema_utility::get_posql_compatible_schema, OwnedTable, OwnedTableTestAccessor, - TestAccessor, - }, - proof_primitive::dory::{ - DynamicDoryCommitment, DynamicDoryEvaluationProof, ProverSetup, PublicParameters, - VerifierSetup, - }, - sql::{parse::QueryExpr, postprocessing::apply_postprocessing_steps, proof::QueryProof}, -}; -use rand::{rngs::StdRng, SeedableRng}; -use std::{fs::File, time::Instant}; - -// We generate the public parameters and the setups used by the prover and verifier for the Dory PCS. -// The `max_nu` should be set such that the maximum table size is less than `2^(2*max_nu-1)`. -const DORY_SETUP_MAX_NU: usize = 8; -// This should be a "nothing-up-my-sleeve" phrase or number. -const DORY_SEED: [u8; 32] = *b"8f3a2e1c5b9d7f0a6e4d2c8b7a9f1e3d"; - -/// # Panics -/// Will panic if the query does not parse or the proof fails to verify. -fn prove_and_verify_query( - sql: &str, - accessor: &OwnedTableTestAccessor, - prover_setup: &ProverSetup, - verifier_setup: &VerifierSetup, -) { - // Parse the query: - println!("Parsing the query: {sql}..."); - let now = Instant::now(); - let query_plan = QueryExpr::::try_new( - sql.parse().unwrap(), - "brands".parse().unwrap(), - accessor, - ) - .unwrap(); - println!("Done in {} ms.", now.elapsed().as_secs_f64() * 1000.); - - // Generate the proof and result: - print!("Generating proof..."); - let now = Instant::now(); - let (proof, provable_result) = QueryProof::::new( - query_plan.proof_expr(), - accessor, - &prover_setup, - ); - println!("Done in {} ms.", now.elapsed().as_secs_f64() * 1000.); - - // Verify the result with the proof: - print!("Verifying proof..."); - let now = Instant::now(); - let result = proof - .verify( - query_plan.proof_expr(), - accessor, - &provable_result, - &verifier_setup, - ) - .unwrap(); - let result = apply_postprocessing_steps(result.table, query_plan.postprocessing()); - println!("Verified in {} ms.", now.elapsed().as_secs_f64() * 1000.); - - // Display the result - println!("Query Result:"); - println!("{result:?}"); -} - -fn main() { - let mut rng = StdRng::from_seed(DORY_SEED); - let public_parameters = PublicParameters::rand(DORY_SETUP_MAX_NU, &mut rng); - let prover_setup = ProverSetup::from(&public_parameters); - let verifier_setup = VerifierSetup::from(&public_parameters); - - let filename = "./crates/proof-of-sql/examples/brands/brands.csv"; - let inferred_schema = - SchemaRef::new(infer_schema_from_files(&[filename.to_string()], b',', None, true).unwrap()); - let posql_compatible_schema = get_posql_compatible_schema(&inferred_schema); - - let brands_batch = ReaderBuilder::new(posql_compatible_schema) - .with_header(true) - .build(File::open(filename).unwrap()) - .unwrap() - .next() - .unwrap() - .unwrap(); - - // Load the table into an "Accessor" so that the prover and verifier can access the data/commitments. - let mut accessor = - OwnedTableTestAccessor::::new_empty_with_setup(&prover_setup); - accessor.add_table( - "brands.global_brands".parse().unwrap(), - OwnedTable::try_from(brands_batch).unwrap(), - 0, - ); - - // Query 1: Count the total number of brands - prove_and_verify_query( - "SELECT COUNT(*) AS total_brands FROM global_brands", - &accessor, - &prover_setup, - &verifier_setup, - ); - - // Query 2: List the names of brands founded before 1950 - prove_and_verify_query( - "SELECT Name FROM global_brands WHERE Founded < 1950", - &accessor, - &prover_setup, - &verifier_setup, - ); - - // Query 3: List the top 5 countries with the highest total revenue, ordered by total revenue - prove_and_verify_query( - "SELECT Country, SUM(Revenue) AS total_revenue FROM global_brands GROUP BY Country ORDER BY total_revenue DESC LIMIT 5", - &accessor, - &prover_setup, - &verifier_setup, - ); -} diff --git a/crates/proof-of-sql/examples/countries/countries_gdp.csv b/crates/proof-of-sql/examples/countries/countries_gdp.csv deleted file mode 100644 index 397102f8f..000000000 --- a/crates/proof-of-sql/examples/countries/countries_gdp.csv +++ /dev/null @@ -1,35 +0,0 @@ -Country,Continent,GDP,GDPP -UnitedStates,NorthAmerica,21137,63543 -China,Asia,14342,10261 -Japan,Asia,5081,40293 -Germany,Europe,3846,46329 -India,Asia,2875,2099 -UnitedKingdom,Europe,2825,42330 -France,Europe,2716,41463 -Italy,Europe,2001,33279 -Brazil,SouthAmerica,1839,8718 -Canada,NorthAmerica,1643,43119 -Russia,EuropeAsia,1637,11229 -SouthKorea,Asia,1622,31489 -Australia,Oceania,1382,53799 -Spain,Europe,1316,28152 -Mexico,NorthAmerica,1265,9958 -Indonesia,Asia,1119,4152 -Netherlands,Europe,902,52477 -SaudiArabia,Asia,793,23206 -Turkey,EuropeAsia,761,9005 -Switzerland,Europe,703,81392 -Argentina,SouthAmerica,449,9921 -Sweden,Europe,528,52073 -Nigeria,Africa,448,2190 -Poland,Europe,594,15673 -Thailand,Asia,509,7306 -SouthAfrica,Africa,350,5883 -Philippines,Asia,402,3685 -Colombia,SouthAmerica,323,6458 -Egypt,Africa,302,3012 -Pakistan,Asia,278,1450 -Bangladesh,Asia,302,1855 -Vietnam,Asia,283,2900 -Chile,SouthAmerica,252,13120 -Finland,Europe,268,48888 \ No newline at end of file diff --git a/crates/proof-of-sql/examples/countries/main.rs b/crates/proof-of-sql/examples/countries/main.rs deleted file mode 100644 index 10bfb8705..000000000 --- a/crates/proof-of-sql/examples/countries/main.rs +++ /dev/null @@ -1,132 +0,0 @@ -//! This is a non-interactive example of using Proof of SQL with a countries dataset. -//! To run this, use `cargo run --release --example countries`. -//! -//! NOTE: If this doesn't work because you do not have the appropriate GPU drivers installed, -//! you can run `cargo run --release --example countries --no-default-features --features="arrow cpu-perf"` instead. It will be slower for proof generation. - -use arrow::datatypes::SchemaRef; -use arrow_csv::{infer_schema_from_files, ReaderBuilder}; -use proof_of_sql::{ - base::database::{ - arrow_schema_utility::get_posql_compatible_schema, OwnedTable, OwnedTableTestAccessor, - TestAccessor, - }, - proof_primitive::dory::{ - DynamicDoryCommitment, DynamicDoryEvaluationProof, ProverSetup, PublicParameters, - VerifierSetup, - }, - sql::{parse::QueryExpr, postprocessing::apply_postprocessing_steps, proof::QueryProof}, -}; -use rand::{rngs::StdRng, SeedableRng}; -use std::{fs::File, time::Instant}; - -// We generate the public parameters and the setups used by the prover and verifier for the Dory PCS. -// The `max_nu` should be set such that the maximum table size is less than `2^(2*max_nu-1)`. -const DORY_SETUP_MAX_NU: usize = 8; -// This should be a "nothing-up-my-sleeve" phrase or number. -const DORY_SEED: [u8; 32] = *b"7a1b3c8d2e4f9g6h5i0j7k2l8m3n9o1p"; - -/// # Panics -/// Will panic if the query does not parse or the proof fails to verify. -fn prove_and_verify_query( - sql: &str, - accessor: &OwnedTableTestAccessor, - prover_setup: &ProverSetup, - verifier_setup: &VerifierSetup, -) { - // Parse the query: - println!("Parsing the query: {sql}..."); - let now = Instant::now(); - let query_plan = QueryExpr::::try_new( - sql.parse().unwrap(), - "countries".parse().unwrap(), - accessor, - ) - .unwrap(); - println!("Done in {} ms.", now.elapsed().as_secs_f64() * 1000.); - - // Generate the proof and result: - print!("Generating proof..."); - let now = Instant::now(); - let (proof, provable_result) = QueryProof::::new( - query_plan.proof_expr(), - accessor, - &prover_setup, - ); - println!("Done in {} ms.", now.elapsed().as_secs_f64() * 1000.); - - // Verify the result with the proof: - print!("Verifying proof..."); - let now = Instant::now(); - let result = proof - .verify( - query_plan.proof_expr(), - accessor, - &provable_result, - &verifier_setup, - ) - .unwrap(); - let result = apply_postprocessing_steps(result.table, query_plan.postprocessing()); - println!("Verified in {} ms.", now.elapsed().as_secs_f64() * 1000.); - - // Display the result - println!("Query Result:"); - println!("{result:?}"); -} - -fn main() { - let mut rng = StdRng::from_seed(DORY_SEED); - let public_parameters = PublicParameters::rand(DORY_SETUP_MAX_NU, &mut rng); - let prover_setup = ProverSetup::from(&public_parameters); - let verifier_setup = VerifierSetup::from(&public_parameters); - - let filename = "./crates/proof-of-sql/examples/countries/countries_gdp.csv"; - let inferred_schema = - SchemaRef::new(infer_schema_from_files(&[filename.to_string()], b',', None, true).unwrap()); - let posql_compatible_schema = get_posql_compatible_schema(&inferred_schema); - - let countries_batch = ReaderBuilder::new(posql_compatible_schema) - .with_header(true) - .build(File::open(filename).unwrap()) - .unwrap() - .next() - .unwrap() - .unwrap(); - - // Load the table into an "Accessor" so that the prover and verifier can access the data/commitments. - let mut accessor = - OwnedTableTestAccessor::::new_empty_with_setup(&prover_setup); - accessor.add_table( - "countries.countries".parse().unwrap(), - OwnedTable::try_from(countries_batch).unwrap(), - 0, - ); - - prove_and_verify_query( - "SELECT COUNT(*) AS total_countries FROM countries", - &accessor, - &prover_setup, - &verifier_setup, - ); - - prove_and_verify_query( - "SELECT country FROM countries WHERE continent = 'Asia'", - &accessor, - &prover_setup, - &verifier_setup, - ); - - prove_and_verify_query( - "SELECT country FROM countries WHERE gdp > 500 AND gdp < 1500", - &accessor, - &prover_setup, - &verifier_setup, - ); - - prove_and_verify_query( - "SELECT SUM(gdp) AS total_market_cap FROM countries WHERE country = 'China' OR country = 'India'", - &accessor, - &prover_setup, - &verifier_setup, - ); -} diff --git a/crates/proof-of-sql/examples/dinosaurs/dinosaurs.csv b/crates/proof-of-sql/examples/dinosaurs/dinosaurs.csv deleted file mode 100644 index 5e76a81b6..000000000 --- a/crates/proof-of-sql/examples/dinosaurs/dinosaurs.csv +++ /dev/null @@ -1,11 +0,0 @@ -id,name,period,diet,length_meters,weight_tons -1,Tyrannosaurus Rex,Cretaceous,Carnivore,12.3,7.0 -2,Stegosaurus,Jurassic,Herbivore,9.0,5.5 -3,Triceratops,Cretaceous,Herbivore,8.5,10.0 -4,Velociraptor,Cretaceous,Carnivore,1.8,0.015 -5,Brachiosaurus,Jurassic,Herbivore,26.0,50.0 -6,Ankylosaurus,Cretaceous,Herbivore,6.5,6.0 -7,Spinosaurus,Cretaceous,Carnivore,15.0,7.5 -8,Diplodocus,Jurassic,Herbivore,27.0,25.0 -9,Allosaurus,Jurassic,Carnivore,9.7,2.3 -10,Parasaurolophus,Cretaceous,Herbivore,10.0,3.5 \ No newline at end of file diff --git a/crates/proof-of-sql/examples/dinosaurs/main.rs b/crates/proof-of-sql/examples/dinosaurs/main.rs deleted file mode 100644 index 154bb1683..000000000 --- a/crates/proof-of-sql/examples/dinosaurs/main.rs +++ /dev/null @@ -1,125 +0,0 @@ -//! This is a non-interactive example of using Proof of SQL with a dinosaur dataset. -//! To run this, use `cargo run --release --example dinosaurs`. -//! -//! NOTE: If this doesn't work because you do not have the appropriate GPU drivers installed, -//! you can run `cargo run --release --example dinosaurs --no-default-features --features="arrow cpu-perf"` instead. It will be slower for proof generation. - -use arrow::datatypes::SchemaRef; -use arrow_csv::{infer_schema_from_files, ReaderBuilder}; -use proof_of_sql::{ - base::database::{ - arrow_schema_utility::get_posql_compatible_schema, OwnedTable, OwnedTableTestAccessor, - TestAccessor, - }, - proof_primitive::dory::{ - DynamicDoryCommitment, DynamicDoryEvaluationProof, ProverSetup, PublicParameters, - VerifierSetup, - }, - sql::{parse::QueryExpr, postprocessing::apply_postprocessing_steps, proof::QueryProof}, -}; -use rand::{rngs::StdRng, SeedableRng}; -use std::{fs::File, time::Instant}; - -// We generate the public parameters and the setups used by the prover and verifier for the Dory PCS. -// The `max_nu` should be set such that the maximum table size is less than `2^(2*max_nu-1)`. -const DORY_SETUP_MAX_NU: usize = 8; -// This should be a "nothing-up-my-sleeve" phrase or number. -const DORY_SEED: [u8; 32] = *b"262a6aa18b5c43d589677c13dd33e6dc"; - -/// # Panics -/// Will panic if the query does not parse or the proof fails to verify. -fn prove_and_verify_query( - sql: &str, - accessor: &OwnedTableTestAccessor, - prover_setup: &ProverSetup, - verifier_setup: &VerifierSetup, -) { - // Parse the query: - println!("Parsing the query: {sql}..."); - let now = Instant::now(); - let query_plan = QueryExpr::::try_new( - sql.parse().unwrap(), - "dinosaurs".parse().unwrap(), - accessor, - ) - .unwrap(); - println!("Done in {} ms.", now.elapsed().as_secs_f64() * 1000.); - - // Generate the proof and result: - print!("Generating proof..."); - let now = Instant::now(); - let (proof, provable_result) = QueryProof::::new( - query_plan.proof_expr(), - accessor, - &prover_setup, - ); - println!("Done in {} ms.", now.elapsed().as_secs_f64() * 1000.); - - // Verify the result with the proof: - print!("Verifying proof..."); - let now = Instant::now(); - let result = proof - .verify( - query_plan.proof_expr(), - accessor, - &provable_result, - &verifier_setup, - ) - .unwrap(); - let result = apply_postprocessing_steps(result.table, query_plan.postprocessing()); - println!("Verified in {} ms.", now.elapsed().as_secs_f64() * 1000.); - - // Display the result - println!("Query Result:"); - println!("{result:?}"); -} - -fn main() { - let mut rng = StdRng::from_seed(DORY_SEED); - let public_parameters = PublicParameters::rand(DORY_SETUP_MAX_NU, &mut rng); - let prover_setup = ProverSetup::from(&public_parameters); - let verifier_setup = VerifierSetup::from(&public_parameters); - - let filename = "./crates/proof-of-sql/examples/dinosaurs/dinosaurs.csv"; - let inferred_schema = - SchemaRef::new(infer_schema_from_files(&[filename.to_string()], b',', None, true).unwrap()); - let posql_compatible_schema = get_posql_compatible_schema(&inferred_schema); - - let dinosaurs_batch = ReaderBuilder::new(posql_compatible_schema) - .with_header(true) - .build(File::open(filename).unwrap()) - .unwrap() - .next() - .unwrap() - .unwrap(); - - // Load the table into an "Accessor" so that the prover and verifier can access the data/commitments. - let mut accessor = - OwnedTableTestAccessor::::new_empty_with_setup(&prover_setup); - accessor.add_table( - "dinosaurs.dinosaurs".parse().unwrap(), - OwnedTable::try_from(dinosaurs_batch).unwrap(), - 0, - ); - - prove_and_verify_query( - "SELECT COUNT(*) AS total_dinosaurs FROM dinosaurs", - &accessor, - &prover_setup, - &verifier_setup, - ); - - prove_and_verify_query( - "SELECT name, weight_tons FROM dinosaurs WHERE diet = 'Carnivore' ORDER BY weight_tons DESC LIMIT 1", - &accessor, - &prover_setup, - &verifier_setup, - ); - - prove_and_verify_query( - "SELECT name, length_meters FROM dinosaurs ORDER BY length_meters DESC LIMIT 3", - &accessor, - &prover_setup, - &verifier_setup, - ); -} diff --git a/crates/proof-of-sql/examples/plastics/main.rs b/crates/proof-of-sql/examples/plastics/main.rs deleted file mode 100644 index 7263e7538..000000000 --- a/crates/proof-of-sql/examples/plastics/main.rs +++ /dev/null @@ -1,135 +0,0 @@ -//! This is a non-interactive example of using Proof of SQL with a plastics dataset. -//! To run this, use `cargo run --release --example plastics`. -//! -//! NOTE: If this doesn't work because you do not have the appropriate GPU drivers installed, -//! you can run `cargo run --release --example plastics --no-default-features --features="arrow cpu-perf"` instead. It will be slower for proof generation. - -use arrow::datatypes::SchemaRef; -use arrow_csv::{infer_schema_from_files, ReaderBuilder}; -use proof_of_sql::{ - base::database::{ - arrow_schema_utility::get_posql_compatible_schema, OwnedTable, OwnedTableTestAccessor, - TestAccessor, - }, - proof_primitive::dory::{ - DynamicDoryCommitment, DynamicDoryEvaluationProof, ProverSetup, PublicParameters, - VerifierSetup, - }, - sql::{parse::QueryExpr, postprocessing::apply_postprocessing_steps, proof::QueryProof}, -}; -use rand::{rngs::StdRng, SeedableRng}; -use std::{fs::File, time::Instant}; - -// We generate the public parameters and the setups used by the prover and verifier for the Dory PCS. -// The `max_nu` should be set such that the maximum table size is less than `2^(2*max_nu-1)`. -const DORY_SETUP_MAX_NU: usize = 8; -// This should be a "nothing-up-my-sleeve" phrase or number. -const DORY_SEED: [u8; 32] = *b"32f7f321c4ab1234d5e6f7a8b9c0d1e2"; - -/// # Panics -/// Will panic if the query does not parse or the proof fails to verify. -fn prove_and_verify_query( - sql: &str, - accessor: &OwnedTableTestAccessor, - prover_setup: &ProverSetup, - verifier_setup: &VerifierSetup, -) { - // Parse the query: - println!("Parsing the query: {sql}..."); - let now = Instant::now(); - let query_plan = QueryExpr::::try_new( - sql.parse().unwrap(), - "plastics".parse().unwrap(), - accessor, - ) - .unwrap(); - println!("Done in {} ms.", now.elapsed().as_secs_f64() * 1000.); - - // Generate the proof and result: - print!("Generating proof..."); - let now = Instant::now(); - let (proof, provable_result) = QueryProof::::new( - query_plan.proof_expr(), - accessor, - &prover_setup, - ); - println!("Done in {} ms.", now.elapsed().as_secs_f64() * 1000.); - - // Verify the result with the proof: - print!("Verifying proof..."); - let now = Instant::now(); - let result = proof - .verify( - query_plan.proof_expr(), - accessor, - &provable_result, - &verifier_setup, - ) - .unwrap(); - let result = apply_postprocessing_steps(result.table, query_plan.postprocessing()); - println!("Verified in {} ms.", now.elapsed().as_secs_f64() * 1000.); - - // Display the result - println!("Query Result:"); - println!("{result:?}"); -} - -fn main() { - let mut rng = StdRng::from_seed(DORY_SEED); - let public_parameters = PublicParameters::rand(DORY_SETUP_MAX_NU, &mut rng); - let prover_setup = ProverSetup::from(&public_parameters); - let verifier_setup = VerifierSetup::from(&public_parameters); - - let filename = "./crates/proof-of-sql/examples/plastics/plastics.csv"; - let schema = get_posql_compatible_schema(&SchemaRef::new( - infer_schema_from_files(&[filename.to_string()], b',', None, true).unwrap(), - )); - let plastics_batch = ReaderBuilder::new(schema) - .with_header(true) - .build(File::open(filename).unwrap()) - .unwrap() - .next() - .unwrap() - .unwrap(); - - // Load the table into an "Accessor" so that the prover and verifier can access the data/commitments. - let mut accessor = - OwnedTableTestAccessor::::new_empty_with_setup(&prover_setup); - accessor.add_table( - "plastics.types".parse().unwrap(), - OwnedTable::try_from(plastics_batch).unwrap(), - 0, - ); - - // Query 1: Count total number of plastic types - prove_and_verify_query( - "SELECT COUNT(*) AS total_types FROM types", - &accessor, - &prover_setup, - &verifier_setup, - ); - - // Query 2: List names of biodegradable plastics - prove_and_verify_query( - "SELECT Name FROM types WHERE Biodegradable = TRUE ORDER BY Name", - &accessor, - &prover_setup, - &verifier_setup, - ); - - // Query 3: Show average density of plastics by recycling code - prove_and_verify_query( - "SELECT Code, SUM(Density)/COUNT(*) as avg_density FROM types GROUP BY Code ORDER BY Code", - &accessor, - &prover_setup, - &verifier_setup, - ); - - // Query 4: List plastics with density greater than 1.0 g/cm³ - prove_and_verify_query( - "SELECT Name, Density FROM types WHERE Density > 1.0 ORDER BY Density DESC", - &accessor, - &prover_setup, - &verifier_setup, - ); -} diff --git a/crates/proof-of-sql/examples/plastics/plastics.csv b/crates/proof-of-sql/examples/plastics/plastics.csv deleted file mode 100644 index 9b793da0a..000000000 --- a/crates/proof-of-sql/examples/plastics/plastics.csv +++ /dev/null @@ -1,19 +0,0 @@ -Name,Code,Density,Biodegradable -Polyethylene Terephthalate (PET),1,1.38,FALSE -High-Density Polyethylene (HDPE),2,0.97,FALSE -Polyvinyl Chloride (PVC),3,1.40,FALSE -Low-Density Polyethylene (LDPE),4,0.92,FALSE -Polypropylene (PP),5,0.90,FALSE -Polystyrene (PS),6,1.05,FALSE -Polylactic Acid (PLA),7,1.25,TRUE -Polybutylene Adipate Terephthalate (PBAT),7,1.26,TRUE -Polyhydroxyalkanoates (PHA),7,1.24,TRUE -Polybutylene Succinate (PBS),7,1.26,TRUE -Acrylic (PMMA),7,1.18,FALSE -Polycarbonate (PC),7,1.20,FALSE -Polyurethane (PU),7,1.05,FALSE -Acrylonitrile Butadiene Styrene (ABS),7,1.04,FALSE -Polyamide (Nylon),7,1.15,FALSE -Polyethylene Furanoate (PEF),7,1.43,TRUE -Thermoplastic Starch (TPS),7,1.35,TRUE -Cellulose Acetate,7,1.30,TRUE \ No newline at end of file diff --git a/crates/proof-of-sql/examples/posql_db/main.rs b/crates/proof-of-sql/examples/posql_db/main.rs index f2facf2c8..a796ed25e 100644 --- a/crates/proof-of-sql/examples/posql_db/main.rs +++ b/crates/proof-of-sql/examples/posql_db/main.rs @@ -5,7 +5,6 @@ mod commit_accessor; mod csv_accessor; /// TODO: add docs mod record_batch_accessor; - use arrow::{ datatypes::{DataType, Field, Schema}, record_batch::RecordBatch, @@ -274,7 +273,7 @@ fn main() { end_timer(timer); println!( "Verified Result: {:?}", - RecordBatch::try_from(query_result.table).unwrap() + RecordBatch::try_from(query_result).unwrap() ); } } diff --git a/crates/proof-of-sql/examples/posql_db/record_batch_accessor.rs b/crates/proof-of-sql/examples/posql_db/record_batch_accessor.rs index 08e25f4fe..8af046972 100644 --- a/crates/proof-of-sql/examples/posql_db/record_batch_accessor.rs +++ b/crates/proof-of-sql/examples/posql_db/record_batch_accessor.rs @@ -2,9 +2,9 @@ use arrow::record_batch::RecordBatch; use bumpalo::Bump; use indexmap::IndexMap; use proof_of_sql::base::{ - arrow::arrow_array_to_column_conversion::ArrayRefExt, database::{ - Column, ColumnRef, ColumnType, DataAccessor, MetadataAccessor, SchemaAccessor, TableRef, + ArrayRefExt, Column, ColumnRef, ColumnType, DataAccessor, MetadataAccessor, SchemaAccessor, + TableRef, }, scalar::Scalar, }; diff --git a/crates/proof-of-sql/examples/programming_books/main.rs b/crates/proof-of-sql/examples/programming_books/main.rs deleted file mode 100644 index 09af38488..000000000 --- a/crates/proof-of-sql/examples/programming_books/main.rs +++ /dev/null @@ -1,133 +0,0 @@ -//! This is a non-interactive example of using Proof of SQL with an extended books dataset. -//! To run this, use `cargo run --example programming_books`. -//! -//! NOTE: If this doesn't work because you do not have the appropriate GPU drivers installed, -//! you can run `cargo run --example programming_books --no-default-features --features="arrow cpu-perf"` instead. It will be slower for proof generation. - -use arrow::datatypes::SchemaRef; -use arrow_csv::{infer_schema_from_files, ReaderBuilder}; -use proof_of_sql::{ - base::database::{ - arrow_schema_utility::get_posql_compatible_schema, OwnedTable, OwnedTableTestAccessor, - TestAccessor, - }, - proof_primitive::dory::{ - DynamicDoryCommitment, DynamicDoryEvaluationProof, ProverSetup, PublicParameters, - VerifierSetup, - }, - sql::{parse::QueryExpr, postprocessing::apply_postprocessing_steps, proof::QueryProof}, -}; -use rand::{rngs::StdRng, SeedableRng}; -use std::{fs::File, time::Instant}; - -const DORY_SETUP_MAX_NU: usize = 8; -const DORY_SEED: [u8; 32] = *b"ebab60d58dee4cc69658939b7c2a582d"; - -/// # Panics -/// Will panic if the query does not parse or the proof fails to verify. -fn prove_and_verify_query( - sql: &str, - accessor: &OwnedTableTestAccessor, - prover_setup: &ProverSetup, - verifier_setup: &VerifierSetup, -) { - // Parse the query: - println!("Parsing the query: {sql}..."); - let now = Instant::now(); - let query_plan = QueryExpr::::try_new( - sql.parse().unwrap(), - "programming_books".parse().unwrap(), - accessor, - ) - .unwrap(); - println!("Done in {} ms.", now.elapsed().as_secs_f64() * 1000.); - - // Generate the proof and result: - print!("Generating proof..."); - let now = Instant::now(); - let (proof, provable_result) = QueryProof::::new( - query_plan.proof_expr(), - accessor, - &prover_setup, - ); - println!("Done in {} ms.", now.elapsed().as_secs_f64() * 1000.); - - // Verify the result with the proof: - print!("Verifying proof..."); - let now = Instant::now(); - let result = proof - .verify( - query_plan.proof_expr(), - accessor, - &provable_result, - &verifier_setup, - ) - .unwrap(); - let result = apply_postprocessing_steps(result.table, query_plan.postprocessing()); - println!("Verified in {} ms.", now.elapsed().as_secs_f64() * 1000.); - - // Display the result - println!("Query Result:"); - println!("{result:?}"); -} - -fn main() { - let mut rng = StdRng::from_seed(DORY_SEED); - let public_parameters = PublicParameters::rand(DORY_SETUP_MAX_NU, &mut rng); - let prover_setup = ProverSetup::from(&public_parameters); - let verifier_setup = VerifierSetup::from(&public_parameters); - - let filename = "./crates/proof-of-sql/examples/programming_books/programming_books.csv"; - let inferred_schema = - SchemaRef::new(infer_schema_from_files(&[filename.to_string()], b',', None, true).unwrap()); - let posql_compatible_schema = get_posql_compatible_schema(&inferred_schema); - - let books_extra_batch = ReaderBuilder::new(posql_compatible_schema) - .with_header(true) - .build(File::open(filename).unwrap()) - .unwrap() - .next() - .unwrap() - .unwrap(); - - // Load the table into an "Accessor" so that the prover and verifier can access the data/commitments. - let mut accessor = - OwnedTableTestAccessor::::new_empty_with_setup(&prover_setup); - accessor.add_table( - "programming_books.books".parse().unwrap(), - OwnedTable::try_from(books_extra_batch).unwrap(), - 0, - ); - - // Query 1: Count the total number of books - prove_and_verify_query( - "SELECT COUNT(*) AS total_books FROM books", - &accessor, - &prover_setup, - &verifier_setup, - ); - - // Query 2: Find books with a rating higher than 4.5 - prove_and_verify_query( - "SELECT title, author FROM books WHERE rating > 4.5", - &accessor, - &prover_setup, - &verifier_setup, - ); - - // Query 3: List all programming books published after 2000 - prove_and_verify_query( - "SELECT title, publication_year FROM books WHERE genre = 'Programming' AND publication_year > 2000", - &accessor, - &prover_setup, - &verifier_setup, - ); - - // Query 4: Find the top 5 authors with the most books - prove_and_verify_query( - "SELECT author, COUNT(*) AS book_count FROM books GROUP BY author ORDER BY book_count DESC LIMIT 5", - &accessor, - &prover_setup, - &verifier_setup, - ); -} diff --git a/crates/proof-of-sql/examples/programming_books/programming_books.csv b/crates/proof-of-sql/examples/programming_books/programming_books.csv deleted file mode 100644 index dbad4ba3b..000000000 --- a/crates/proof-of-sql/examples/programming_books/programming_books.csv +++ /dev/null @@ -1,11 +0,0 @@ -title,author,publication_year,genre,rating -The Pragmatic Programmer,Andrew Hunt,1999,Programming,4.5 -Clean Code,Robert C. Martin,2008,Programming,4.7 -The Clean Coder,Robert C. Martin,2011,Programming,4.6 -Design Patterns,Erich Gamma,1994,Software Engineering,4.8 -Refactoring,Martin Fowler,1999,Programming,4.5 -Effective Java,Joshua Bloch,2008,Programming,4.7 -Introduction to Algorithms,Thomas H. Cormen,2009,Computer Science,4.8 -Code Complete,Steve McConnell,2004,Programming,4.6 -The Mythical Man-Month,Fred Brooks,1975,Software Engineering,4.3 -Algorithms,Robert Sedgewick,1983,Computer Science,4.5 diff --git a/crates/proof-of-sql/examples/rockets/launch_vehicles.csv b/crates/proof-of-sql/examples/rockets/launch_vehicles.csv deleted file mode 100644 index cba1aeb2f..000000000 --- a/crates/proof-of-sql/examples/rockets/launch_vehicles.csv +++ /dev/null @@ -1,28 +0,0 @@ -name,country,year,mtow -Saturn V,USA,1967,2976000 -Falcon Heavy,USA,2018,1420788 -Space Shuttle,USA,1981,2041167 -Energia,USSR,1987,2400000 -Ariane 5,Europe,1996,780000 -Delta IV Heavy,USA,2004,733400 -Long March 5,China,2016,869000 -Proton,USSR/Russia,1965,705000 -Atlas V,USA,2002,546700 -H-IIA,Japan,2001,445000 -Soyuz,USSR/Russia,1966,308000 -Falcon 9,USA,2010,549054 -Vega,Europe,2012,137000 -PSLV,India,1993,320000 -GSLV Mk III,India,2017,640000 -Titan II,USA,1962,153800 -Angara A5,Russia,2014,1335000 -Delta II,USA,1989,231870 -Electron,New Zealand,2017,12500 -Antares,USA,2013,240000 -Zenit,USSR/Ukraine,1985,462000 -N1,USSR,1969,2735000 -New Glenn,USA,2024,1300000 -Redstone,USA,1953,29500 -Black Arrow,UK,1971,18800 -Diamant,France,1965,18000 -Pegasus,USA,1990,23300 diff --git a/crates/proof-of-sql/examples/rockets/main.rs b/crates/proof-of-sql/examples/rockets/main.rs deleted file mode 100644 index 79ad4c4a4..000000000 --- a/crates/proof-of-sql/examples/rockets/main.rs +++ /dev/null @@ -1,132 +0,0 @@ -//! This is a non-interactive example of using Proof of SQL with a rockets dataset. -//! To run this, use `cargo run --release --example rockets`. -//! -//! NOTE: If this doesn't work because you do not have the appropriate GPU drivers installed, -//! you can run `cargo run --release --example rockets --no-default-features --features="arrow cpu-perf"` instead. It will be slower for proof generation. - -use arrow::datatypes::SchemaRef; -use arrow_csv::{infer_schema_from_files, ReaderBuilder}; -use proof_of_sql::{ - base::database::{ - arrow_schema_utility::get_posql_compatible_schema, OwnedTable, OwnedTableTestAccessor, - TestAccessor, - }, - proof_primitive::dory::{ - DynamicDoryCommitment, DynamicDoryEvaluationProof, ProverSetup, PublicParameters, - VerifierSetup, - }, - sql::{parse::QueryExpr, postprocessing::apply_postprocessing_steps, proof::QueryProof}, -}; -use rand::{rngs::StdRng, SeedableRng}; -use std::{fs::File, time::Instant}; - -// We generate the public parameters and the setups used by the prover and verifier for the Dory PCS. -// The `max_nu` should be set such that the maximum table size is less than `2^(2*max_nu-1)`. -const DORY_SETUP_MAX_NU: usize = 8; -// This should be a "nothing-up-my-sleeve" phrase or number. -const DORY_SEED: [u8; 32] = *b"7a1b3c8d2e4f9g6h5i0j7k2l8m3n9o1p"; - -/// # Panics -/// Will panic if the query does not parse or the proof fails to verify. -fn prove_and_verify_query( - sql: &str, - accessor: &OwnedTableTestAccessor, - prover_setup: &ProverSetup, - verifier_setup: &VerifierSetup, -) { - // Parse the query: - println!("Parsing the query: {sql}..."); - let now = Instant::now(); - let query_plan = QueryExpr::::try_new( - sql.parse().unwrap(), - "rockets".parse().unwrap(), - accessor, - ) - .unwrap(); - println!("Done in {} ms.", now.elapsed().as_secs_f64() * 1000.); - - // Generate the proof and result: - print!("Generating proof..."); - let now = Instant::now(); - let (proof, provable_result) = QueryProof::::new( - query_plan.proof_expr(), - accessor, - &prover_setup, - ); - println!("Done in {} ms.", now.elapsed().as_secs_f64() * 1000.); - - // Verify the result with the proof: - print!("Verifying proof..."); - let now = Instant::now(); - let result = proof - .verify( - query_plan.proof_expr(), - accessor, - &provable_result, - &verifier_setup, - ) - .unwrap(); - let result = apply_postprocessing_steps(result.table, query_plan.postprocessing()); - println!("Verified in {} ms.", now.elapsed().as_secs_f64() * 1000.); - - // Display the result - println!("Query Result:"); - println!("{result:?}"); -} - -fn main() { - let mut rng = StdRng::from_seed(DORY_SEED); - let public_parameters = PublicParameters::rand(DORY_SETUP_MAX_NU, &mut rng); - let prover_setup = ProverSetup::from(&public_parameters); - let verifier_setup = VerifierSetup::from(&public_parameters); - - let filename = "./crates/proof-of-sql/examples/rockets/launch_vehicles.csv"; - let inferred_schema = - SchemaRef::new(infer_schema_from_files(&[filename.to_string()], b',', None, true).unwrap()); - let posql_compatible_schema = get_posql_compatible_schema(&inferred_schema); - - let rockets_batch = ReaderBuilder::new(posql_compatible_schema) - .with_header(true) - .build(File::open(filename).unwrap()) - .unwrap() - .next() - .unwrap() - .unwrap(); - - // Load the table into an "Accessor" so that the prover and verifier can access the data/commitments. - let mut accessor = - OwnedTableTestAccessor::::new_empty_with_setup(&prover_setup); - accessor.add_table( - "rockets.launch_vehicles".parse().unwrap(), - OwnedTable::try_from(rockets_batch).unwrap(), - 0, - ); - - prove_and_verify_query( - "SELECT COUNT(*) AS total_rockets FROM launch_vehicles", - &accessor, - &prover_setup, - &verifier_setup, - ); - - prove_and_verify_query( - "SELECT country, MAX(mtow) as max_mtow, COUNT(*) as rocket_count FROM launch_vehicles GROUP BY country ORDER BY max_mtow DESC", - &accessor, - &prover_setup, - &verifier_setup, - ); - - prove_and_verify_query( - "SELECT name FROM launch_vehicles WHERE country = 'USA'", - &accessor, - &prover_setup, - &verifier_setup, - ); - - prove_and_verify_query( - "SELECT name FROM launch_vehicles WHERE mtow > 100000 and mtow < 150000", - &accessor, - &prover_setup, - &verifier_setup, - ); -} diff --git a/crates/proof-of-sql/examples/sushi/fish.csv b/crates/proof-of-sql/examples/sushi/fish.csv deleted file mode 100644 index e0a14ebc0..000000000 --- a/crates/proof-of-sql/examples/sushi/fish.csv +++ /dev/null @@ -1,13 +0,0 @@ -nameEn,nameJa,kindEn,kindJa,pricePerPound -Tuna,Maguro,Lean Red Meat,Akami,25 -Tuna,Maguro,Medium Fat Red Meat,Toro,65 -Tuna,Maguro,Fatty Red Meat,Otoro,115 -Bonito,Katsuo,Red Meat,Akami,20 -Yellowtail,Hamachi,Red Meat,Akami,27 -Salmon,Salmon,White Fish,Shiromi,17 -Sea Bream,Tai,White Fish,Shiromi,32 -Sea Bass,Suzuki,White Fish,Shiromi,28 -Mackerel,Aji,Silver Skinned,Hikarimono,14 -Sardine,Iwashi,Silver Skinned,Hikarimono,11 -Scallops,Hotate,Shellfish,Kai,26 -Ark-shell clams,Akagai,Shellfish,Kai,29 diff --git a/crates/proof-of-sql/examples/sushi/main.rs b/crates/proof-of-sql/examples/sushi/main.rs deleted file mode 100644 index 0c7f89545..000000000 --- a/crates/proof-of-sql/examples/sushi/main.rs +++ /dev/null @@ -1,141 +0,0 @@ -//! This is an non-interactive example of using Proof of SQL with some sushi related datasets. -//! To run this, use `cargo run --example sushi`. - -//! NOTE: If this doesn't work because you do not have the appropriate GPU drivers installed, -//! you can run `cargo run --release --example sushi --no-default-features --features="arrow cpu-perf"` instead. It will be slower for proof generation. -use arrow::datatypes::SchemaRef; -use arrow_csv::{infer_schema_from_files, ReaderBuilder}; -use proof_of_sql::{ - base::database::{OwnedTable, OwnedTableTestAccessor, TestAccessor}, - proof_primitive::dory::{ - DynamicDoryCommitment, DynamicDoryEvaluationProof, ProverSetup, PublicParameters, - VerifierSetup, - }, - sql::{parse::QueryExpr, proof::QueryProof}, -}; -use rand::{rngs::StdRng, SeedableRng}; -use std::{fs::File, time::Instant}; - -const DORY_SETUP_MAX_NU: usize = 8; -const DORY_SEED: [u8; 32] = *b"sushi-is-the-best-food-available"; - -/// # Panics -/// Will panic if the query does not parse or the proof fails to verify. -fn prove_and_verify_query( - sql: &str, - accessor: &OwnedTableTestAccessor, - prover_setup: &ProverSetup, - verifier_setup: &VerifierSetup, -) { - // Parse the query: - println!("Parsing the query: {sql}..."); - let now = Instant::now(); - let query_plan = QueryExpr::::try_new( - sql.parse().unwrap(), - "sushi".parse().unwrap(), - accessor, - ) - .unwrap(); - println!("Done in {} ms.", now.elapsed().as_secs_f64() * 1000.); - // Generate the proof and result: - print!("Generating proof..."); - let now = Instant::now(); - let (proof, provable_result) = QueryProof::::new( - query_plan.proof_expr(), - accessor, - &prover_setup, - ); - println!("Done in {} ms.", now.elapsed().as_secs_f64() * 1000.); - // Verify the result with the proof: - print!("Verifying proof..."); - let now = Instant::now(); - let result = proof - .verify( - query_plan.proof_expr(), - accessor, - &provable_result, - &verifier_setup, - ) - .unwrap(); - println!("Verified in {} ms.", now.elapsed().as_secs_f64() * 1000.); - // Display the result - println!("Query Result:"); - println!("{:?}", result.table); -} - -fn main() { - let mut rng = StdRng::from_seed(DORY_SEED); - let public_parameters = PublicParameters::rand(DORY_SETUP_MAX_NU, &mut rng); - let prover_setup = ProverSetup::from(&public_parameters); - let verifier_setup = VerifierSetup::from(&public_parameters); - - let filename = "./crates/proof-of-sql/examples/sushi/fish.csv"; - let fish_batch = ReaderBuilder::new(SchemaRef::new( - infer_schema_from_files(&[filename.to_string()], b',', None, true).unwrap(), - )) - .with_header(true) - .build(File::open(filename).unwrap()) - .unwrap() - .next() - .unwrap() - .unwrap(); - println!("{fish_batch:?}"); - - // Load the table into an "Accessor" so that the prover and verifier can access the data/commitments. - let mut accessor = - OwnedTableTestAccessor::::new_empty_with_setup(&prover_setup); - accessor.add_table( - "sushi.fish".parse().unwrap(), - OwnedTable::try_from(fish_batch).unwrap(), - 0, - ); - - prove_and_verify_query( - "SELECT * FROM fish", - &accessor, - &prover_setup, - &verifier_setup, - ); - - prove_and_verify_query( - "SELECT COUNT(*) FROM fish WHERE nameEn = 'Tuna'", - &accessor, - &prover_setup, - &verifier_setup, - ); - - prove_and_verify_query( - "SELECT kindEn FROM fish WHERE kindJa = 'Otoro'", - &accessor, - &prover_setup, - &verifier_setup, - ); - - prove_and_verify_query( - "SELECT kindEn FROM fish WHERE kindJa = 'Otoro'", - &accessor, - &prover_setup, - &verifier_setup, - ); - - prove_and_verify_query( - "SELECT * FROM fish WHERE pricePerPound > 25 AND pricePerPound < 75", - &accessor, - &prover_setup, - &verifier_setup, - ); - - prove_and_verify_query( - "SELECT kindJa, COUNT(*) FROM fish GROUP BY kindJa", - &accessor, - &prover_setup, - &verifier_setup, - ); - - prove_and_verify_query( - "SELECT kindJa, pricePerPound FROM fish WHERE nameEn = 'Tuna' ORDER BY pricePerPound ASC", - &accessor, - &prover_setup, - &verifier_setup, - ); -} diff --git a/crates/proof-of-sql/src/base/arrow/column_arrow_conversions.rs b/crates/proof-of-sql/src/base/arrow/column_arrow_conversions.rs deleted file mode 100644 index 5eade6cf3..000000000 --- a/crates/proof-of-sql/src/base/arrow/column_arrow_conversions.rs +++ /dev/null @@ -1,79 +0,0 @@ -use crate::base::{ - database::{ColumnField, ColumnType}, - math::decimal::Precision, -}; -use alloc::sync::Arc; -use arrow::datatypes::{DataType, Field, TimeUnit as ArrowTimeUnit}; -use proof_of_sql_parser::posql_time::{PoSQLTimeUnit, PoSQLTimeZone}; - -/// Convert [`ColumnType`] values to some arrow [`DataType`] -impl From<&ColumnType> for DataType { - fn from(column_type: &ColumnType) -> Self { - match column_type { - ColumnType::Boolean => DataType::Boolean, - ColumnType::TinyInt => DataType::Int8, - ColumnType::SmallInt => DataType::Int16, - ColumnType::Int => DataType::Int32, - ColumnType::BigInt => DataType::Int64, - ColumnType::Int128 => DataType::Decimal128(38, 0), - ColumnType::Decimal75(precision, scale) => { - DataType::Decimal256(precision.value(), *scale) - } - ColumnType::VarChar => DataType::Utf8, - ColumnType::Scalar => unimplemented!("Cannot convert Scalar type to arrow type"), - ColumnType::TimestampTZ(timeunit, timezone) => { - let arrow_timezone = Some(Arc::from(timezone.to_string())); - let arrow_timeunit = match timeunit { - PoSQLTimeUnit::Second => ArrowTimeUnit::Second, - PoSQLTimeUnit::Millisecond => ArrowTimeUnit::Millisecond, - PoSQLTimeUnit::Microsecond => ArrowTimeUnit::Microsecond, - PoSQLTimeUnit::Nanosecond => ArrowTimeUnit::Nanosecond, - }; - DataType::Timestamp(arrow_timeunit, arrow_timezone) - } - } - } -} - -/// Convert arrow [`DataType`] values to some [`ColumnType`] -impl TryFrom for ColumnType { - type Error = String; - - fn try_from(data_type: DataType) -> Result { - match data_type { - DataType::Boolean => Ok(ColumnType::Boolean), - DataType::Int8 => Ok(ColumnType::TinyInt), - DataType::Int16 => Ok(ColumnType::SmallInt), - DataType::Int32 => Ok(ColumnType::Int), - DataType::Int64 => Ok(ColumnType::BigInt), - DataType::Decimal128(38, 0) => Ok(ColumnType::Int128), - DataType::Decimal256(precision, scale) if precision <= 75 => { - Ok(ColumnType::Decimal75(Precision::new(precision)?, scale)) - } - DataType::Timestamp(time_unit, timezone_option) => { - let posql_time_unit = match time_unit { - ArrowTimeUnit::Second => PoSQLTimeUnit::Second, - ArrowTimeUnit::Millisecond => PoSQLTimeUnit::Millisecond, - ArrowTimeUnit::Microsecond => PoSQLTimeUnit::Microsecond, - ArrowTimeUnit::Nanosecond => PoSQLTimeUnit::Nanosecond, - }; - Ok(ColumnType::TimestampTZ( - posql_time_unit, - PoSQLTimeZone::try_from(&timezone_option)?, - )) - } - DataType::Utf8 => Ok(ColumnType::VarChar), - _ => Err(format!("Unsupported arrow data type {data_type:?}")), - } - } -} -/// Convert [`ColumnField`] values to arrow Field -impl From<&ColumnField> for Field { - fn from(column_field: &ColumnField) -> Self { - Field::new( - column_field.name().name(), - (&column_field.data_type()).into(), - false, - ) - } -} diff --git a/crates/proof-of-sql/src/base/arrow/mod.rs b/crates/proof-of-sql/src/base/arrow/mod.rs deleted file mode 100644 index 0bcac183d..000000000 --- a/crates/proof-of-sql/src/base/arrow/mod.rs +++ /dev/null @@ -1,26 +0,0 @@ -//! This module provides conversions and utilities for working with Arrow data structures. - -/// Module for handling conversion from Arrow arrays to columns. -pub mod arrow_array_to_column_conversion; - -/// Module for converting between owned and Arrow data structures. -pub mod owned_and_arrow_conversions; - -#[cfg(test)] -/// Tests for owned and Arrow conversions. -mod owned_and_arrow_conversions_test; - -/// Module for converting record batches. -pub mod record_batch_conversion; - -/// Module for record batch error definitions. -pub mod record_batch_errors; - -/// Utility functions for record batches. -pub mod record_batch_utility; - -/// Module for scalar and i256 conversions. -pub mod scalar_and_i256_conversions; - -/// Module for handling conversions between columns and Arrow arrays. -pub mod column_arrow_conversions; diff --git a/crates/proof-of-sql/src/base/arrow/record_batch_conversion.rs b/crates/proof-of-sql/src/base/arrow/record_batch_conversion.rs deleted file mode 100644 index 6f24457cc..000000000 --- a/crates/proof-of-sql/src/base/arrow/record_batch_conversion.rs +++ /dev/null @@ -1,160 +0,0 @@ -use super::{ - arrow_array_to_column_conversion::ArrayRefExt, - record_batch_errors::{AppendRecordBatchTableCommitmentError, RecordBatchToColumnsError}, -}; -use crate::base::{ - commitment::{ - AppendColumnCommitmentsError, AppendTableCommitmentError, Commitment, TableCommitment, - TableCommitmentFromColumnsError, - }, - database::Column, - scalar::Scalar, -}; -use arrow::record_batch::RecordBatch; -use bumpalo::Bump; -use proof_of_sql_parser::Identifier; - -/// This function will return an error if: -/// - The field name cannot be parsed into an [`Identifier`]. -/// - The conversion of an Arrow array to a [`Column`] fails. -pub fn batch_to_columns<'a, S: Scalar + 'a>( - batch: &'a RecordBatch, - alloc: &'a Bump, -) -> Result)>, RecordBatchToColumnsError> { - batch - .schema() - .fields() - .into_iter() - .zip(batch.columns()) - .map(|(field, array)| { - let identifier: Identifier = field.name().parse()?; - let column: Column = array.to_column(alloc, &(0..array.len()), None)?; - Ok((identifier, column)) - }) - .collect() -} - -impl TableCommitment { - /// Append an arrow [`RecordBatch`] to the existing [`TableCommitment`]. - /// - /// The row offset is assumed to be the end of the [`TableCommitment`]'s current range. - /// - /// Will error on a variety of mismatches, or if the provided columns have mixed length. - #[allow(clippy::missing_panics_doc)] - pub fn try_append_record_batch( - &mut self, - batch: &RecordBatch, - setup: &C::PublicSetup<'_>, - ) -> Result<(), AppendRecordBatchTableCommitmentError> { - match self.try_append_rows( - batch_to_columns::(batch, &Bump::new())? - .iter() - .map(|(a, b)| (a, b)), - setup, - ) { - Ok(()) => Ok(()), - Err(AppendTableCommitmentError::MixedLengthColumns { .. }) => { - panic!("RecordBatches cannot have columns of mixed length") - } - Err(AppendTableCommitmentError::AppendColumnCommitments { - source: AppendColumnCommitmentsError::DuplicateIdentifiers { .. }, - }) => { - panic!("RecordBatches cannot have duplicate identifiers") - } - Err(AppendTableCommitmentError::AppendColumnCommitments { - source: AppendColumnCommitmentsError::Mismatch { source: e }, - }) => Err(e)?, - } - } - /// Returns a [`TableCommitment`] to the provided arrow [`RecordBatch`]. - pub fn try_from_record_batch( - batch: &RecordBatch, - setup: &C::PublicSetup<'_>, - ) -> Result, RecordBatchToColumnsError> { - Self::try_from_record_batch_with_offset(batch, 0, setup) - } - - /// Returns a [`TableCommitment`] to the provided arrow [`RecordBatch`] with the given row offset. - #[allow(clippy::missing_panics_doc)] - pub fn try_from_record_batch_with_offset( - batch: &RecordBatch, - offset: usize, - setup: &C::PublicSetup<'_>, - ) -> Result, RecordBatchToColumnsError> { - match Self::try_from_columns_with_offset( - batch_to_columns::(batch, &Bump::new())? - .iter() - .map(|(a, b)| (a, b)), - offset, - setup, - ) { - Ok(commitment) => Ok(commitment), - Err(TableCommitmentFromColumnsError::MixedLengthColumns { .. }) => { - panic!("RecordBatches cannot have columns of mixed length") - } - Err(TableCommitmentFromColumnsError::DuplicateIdentifiers { .. }) => { - panic!("RecordBatches cannot have duplicate identifiers") - } - } - } -} - -#[cfg(all(test, feature = "blitzar"))] -mod tests { - use super::*; - use crate::{base::scalar::Curve25519Scalar, record_batch}; - use curve25519_dalek::RistrettoPoint; - - #[test] - fn we_can_create_and_append_table_commitments_with_record_batchs() { - let batch = record_batch!( - "a" => [1i64, 2, 3], - "b" => ["1", "2", "3"], - ); - - let b_scals = ["1".into(), "2".into(), "3".into()]; - - let columns = [ - ( - &"a".parse().unwrap(), - &Column::::BigInt(&[1, 2, 3]), - ), - ( - &"b".parse().unwrap(), - &Column::::VarChar((&["1", "2", "3"], &b_scals)), - ), - ]; - - let mut expected_commitment = - TableCommitment::::try_from_columns_with_offset(columns, 0, &()) - .unwrap(); - - let mut commitment = - TableCommitment::::try_from_record_batch(&batch, &()).unwrap(); - - assert_eq!(commitment, expected_commitment); - - let batch2 = record_batch!( - "a" => [4i64, 5, 6], - "b" => ["4", "5", "6"], - ); - - let b_scals2 = ["4".into(), "5".into(), "6".into()]; - - let columns2 = [ - ( - &"a".parse().unwrap(), - &Column::::BigInt(&[4, 5, 6]), - ), - ( - &"b".parse().unwrap(), - &Column::::VarChar((&["4", "5", "6"], &b_scals2)), - ), - ]; - - expected_commitment.try_append_rows(columns2, &()).unwrap(); - commitment.try_append_record_batch(&batch2, &()).unwrap(); - - assert_eq!(commitment, expected_commitment); - } -} diff --git a/crates/proof-of-sql/src/base/arrow/record_batch_errors.rs b/crates/proof-of-sql/src/base/arrow/record_batch_errors.rs deleted file mode 100644 index b3986d1a6..000000000 --- a/crates/proof-of-sql/src/base/arrow/record_batch_errors.rs +++ /dev/null @@ -1,38 +0,0 @@ -use super::arrow_array_to_column_conversion::ArrowArrayToColumnConversionError; -use crate::base::commitment::ColumnCommitmentsMismatch; -use proof_of_sql_parser::ParseError; -use snafu::Snafu; - -/// Errors that can occur when trying to create or extend a [`TableCommitment`] from a record batch. -#[derive(Debug, Snafu)] -pub enum RecordBatchToColumnsError { - /// Error converting from arrow array - #[snafu(transparent)] - ArrowArrayToColumnConversionError { - /// The underlying source error - source: ArrowArrayToColumnConversionError, - }, - #[snafu(transparent)] - /// This error occurs when convering from a record batch name to an identifier fails. (Which may be impossible.) - FieldParseFail { - /// The underlying source error - source: ParseError, - }, -} - -/// Errors that can occur when attempting to append a record batch to a [`TableCommitment`]. -#[derive(Debug, Snafu)] -pub enum AppendRecordBatchTableCommitmentError { - /// During commitment operation, metadata indicates that operand tables cannot be the same. - #[snafu(transparent)] - ColumnCommitmentsMismatch { - /// The underlying source error - source: ColumnCommitmentsMismatch, - }, - /// Error converting from arrow array - #[snafu(transparent)] - ArrowBatchToColumnError { - /// The underlying source error - source: RecordBatchToColumnsError, - }, -} diff --git a/crates/proof-of-sql/src/base/commitment/table_commitment.rs b/crates/proof-of-sql/src/base/commitment/table_commitment.rs index 1a52b7cea..0f9e21783 100644 --- a/crates/proof-of-sql/src/base/commitment/table_commitment.rs +++ b/crates/proof-of-sql/src/base/commitment/table_commitment.rs @@ -2,13 +2,18 @@ use super::{ committable_column::CommittableColumn, AppendColumnCommitmentsError, ColumnCommitments, ColumnCommitmentsMismatch, Commitment, DuplicateIdentifiers, }; +#[cfg(feature = "arrow")] +use crate::base::database::{ArrayRefExt, ArrowArrayToColumnConversionError}; use crate::base::{ - database::{ColumnField, CommitmentAccessor, OwnedTable, TableRef}, + database::{Column, ColumnField, CommitmentAccessor, OwnedTable, TableRef}, scalar::Scalar, }; use alloc::vec::Vec; +#[cfg(feature = "arrow")] +use arrow::record_batch::RecordBatch; +use bumpalo::Bump; use core::ops::Range; -use proof_of_sql_parser::Identifier; +use proof_of_sql_parser::{Identifier, ParseError}; use serde::{Deserialize, Serialize}; use snafu::Snafu; @@ -78,6 +83,42 @@ pub enum TableCommitmentArithmeticError { NonContiguous, } +/// Errors that can occur when trying to create or extend a [`TableCommitment`] from a record batch. +#[cfg(feature = "arrow")] +#[derive(Debug, Snafu)] +pub enum RecordBatchToColumnsError { + /// Error converting from arrow array + #[snafu(transparent)] + ArrowArrayToColumnConversionError { + /// The underlying source error + source: ArrowArrayToColumnConversionError, + }, + #[snafu(transparent)] + /// This error occurs when convering from a record batch name to an identifier fails. (Which may be impossible.) + FieldParseFail { + /// The underlying source error + source: ParseError, + }, +} + +/// Errors that can occur when attempting to append a record batch to a [`TableCommitment`]. +#[cfg(feature = "arrow")] +#[derive(Debug, Snafu)] +pub enum AppendRecordBatchTableCommitmentError { + /// During commitment operation, metadata indicates that operand tables cannot be the same. + #[snafu(transparent)] + ColumnCommitmentsMismatch { + /// The underlying source error + source: ColumnCommitmentsMismatch, + }, + /// Error converting from arrow array + #[snafu(transparent)] + ArrowBatchToColumnError { + /// The underlying source error + source: RecordBatchToColumnsError, + }, +} + /// Commitment for an entire table, with column and table metadata. /// /// Unlike [`ColumnCommitments`], all columns in this commitment must have the same length. @@ -357,6 +398,90 @@ impl TableCommitment { range, }) } + + /// Append an arrow [`RecordBatch`] to the existing [`TableCommitment`]. + /// + /// The row offset is assumed to be the end of the [`TableCommitment`]'s current range. + /// + /// Will error on a variety of mismatches, or if the provided columns have mixed length. + #[cfg(feature = "arrow")] + #[allow(clippy::missing_panics_doc)] + pub fn try_append_record_batch( + &mut self, + batch: &RecordBatch, + setup: &C::PublicSetup<'_>, + ) -> Result<(), AppendRecordBatchTableCommitmentError> { + match self.try_append_rows( + batch_to_columns::(batch, &Bump::new())? + .iter() + .map(|(a, b)| (a, b)), + setup, + ) { + Ok(()) => Ok(()), + Err(AppendTableCommitmentError::MixedLengthColumns { .. }) => { + panic!("RecordBatches cannot have columns of mixed length") + } + Err(AppendTableCommitmentError::AppendColumnCommitments { + source: AppendColumnCommitmentsError::DuplicateIdentifiers { .. }, + }) => { + panic!("RecordBatches cannot have duplicate identifiers") + } + Err(AppendTableCommitmentError::AppendColumnCommitments { + source: AppendColumnCommitmentsError::Mismatch { source: e }, + }) => Err(e)?, + } + } + /// Returns a [`TableCommitment`] to the provided arrow [`RecordBatch`]. + #[cfg(feature = "arrow")] + pub fn try_from_record_batch( + batch: &RecordBatch, + setup: &C::PublicSetup<'_>, + ) -> Result, RecordBatchToColumnsError> { + Self::try_from_record_batch_with_offset(batch, 0, setup) + } + + /// Returns a [`TableCommitment`] to the provided arrow [`RecordBatch`] with the given row offset. + #[allow(clippy::missing_panics_doc)] + #[cfg(feature = "arrow")] + pub fn try_from_record_batch_with_offset( + batch: &RecordBatch, + offset: usize, + setup: &C::PublicSetup<'_>, + ) -> Result, RecordBatchToColumnsError> { + match Self::try_from_columns_with_offset( + batch_to_columns::(batch, &Bump::new())? + .iter() + .map(|(a, b)| (a, b)), + offset, + setup, + ) { + Ok(commitment) => Ok(commitment), + Err(TableCommitmentFromColumnsError::MixedLengthColumns { .. }) => { + panic!("RecordBatches cannot have columns of mixed length") + } + Err(TableCommitmentFromColumnsError::DuplicateIdentifiers { .. }) => { + panic!("RecordBatches cannot have duplicate identifiers") + } + } + } +} + +#[cfg(feature = "arrow")] +fn batch_to_columns<'a, S: Scalar + 'a>( + batch: &'a RecordBatch, + alloc: &'a Bump, +) -> Result)>, RecordBatchToColumnsError> { + batch + .schema() + .fields() + .into_iter() + .zip(batch.columns()) + .map(|(field, array)| { + let identifier: Identifier = field.name().parse()?; + let column: Column = array.to_column(alloc, &(0..array.len()), None)?; + Ok((identifier, column)) + }) + .collect() } /// Return the number of rows for the provided columns, erroring if they have mixed length. @@ -380,10 +505,13 @@ fn num_rows_of_columns<'a>( #[cfg(all(test, feature = "arrow", feature = "blitzar"))] mod tests { use super::*; - use crate::base::{ - database::{owned_table_utility::*, OwnedColumn}, - map::IndexMap, - scalar::Curve25519Scalar, + use crate::{ + base::{ + database::{owned_table_utility::*, OwnedColumn}, + map::IndexMap, + scalar::Curve25519Scalar, + }, + record_batch, }; use curve25519_dalek::RistrettoPoint; @@ -1135,4 +1263,57 @@ mod tests { Err(TableCommitmentArithmeticError::NegativeRange { .. }) )); } + + #[test] + fn we_can_create_and_append_table_commitments_with_record_batchs() { + let batch = record_batch!( + "a" => [1i64, 2, 3], + "b" => ["1", "2", "3"], + ); + + let b_scals = ["1".into(), "2".into(), "3".into()]; + + let columns = [ + ( + &"a".parse().unwrap(), + &Column::::BigInt(&[1, 2, 3]), + ), + ( + &"b".parse().unwrap(), + &Column::::VarChar((&["1", "2", "3"], &b_scals)), + ), + ]; + + let mut expected_commitment = + TableCommitment::::try_from_columns_with_offset(columns, 0, &()) + .unwrap(); + + let mut commitment = + TableCommitment::::try_from_record_batch(&batch, &()).unwrap(); + + assert_eq!(commitment, expected_commitment); + + let batch2 = record_batch!( + "a" => [4i64, 5, 6], + "b" => ["4", "5", "6"], + ); + + let b_scals2 = ["4".into(), "5".into(), "6".into()]; + + let columns2 = [ + ( + &"a".parse().unwrap(), + &Column::::BigInt(&[4, 5, 6]), + ), + ( + &"b".parse().unwrap(), + &Column::::VarChar((&["4", "5", "6"], &b_scals2)), + ), + ]; + + expected_commitment.try_append_rows(columns2, &()).unwrap(); + commitment.try_append_record_batch(&batch2, &()).unwrap(); + + assert_eq!(commitment, expected_commitment); + } } diff --git a/crates/proof-of-sql/src/base/arrow/arrow_array_to_column_conversion.rs b/crates/proof-of-sql/src/base/database/arrow_array_to_column_conversion.rs similarity index 100% rename from crates/proof-of-sql/src/base/arrow/arrow_array_to_column_conversion.rs rename to crates/proof-of-sql/src/base/database/arrow_array_to_column_conversion.rs diff --git a/crates/proof-of-sql/src/base/database/column.rs b/crates/proof-of-sql/src/base/database/column.rs index be536b1d5..3d3b11372 100644 --- a/crates/proof-of-sql/src/base/database/column.rs +++ b/crates/proof-of-sql/src/base/database/column.rs @@ -4,7 +4,9 @@ use crate::base::{ scalar::{Scalar, ScalarExt}, slice_ops::slice_cast_with, }; -use alloc::vec::Vec; +use alloc::{sync::Arc, vec::Vec}; +#[cfg(feature = "arrow")] +use arrow::datatypes::{DataType, Field, TimeUnit as ArrowTimeUnit}; use bumpalo::Bump; use core::{ fmt, @@ -410,6 +412,70 @@ impl ColumnType { } } +/// Convert [`ColumnType`] values to some arrow [`DataType`] +#[cfg(feature = "arrow")] +impl From<&ColumnType> for DataType { + fn from(column_type: &ColumnType) -> Self { + match column_type { + ColumnType::Boolean => DataType::Boolean, + ColumnType::TinyInt => DataType::Int8, + ColumnType::SmallInt => DataType::Int16, + ColumnType::Int => DataType::Int32, + ColumnType::BigInt => DataType::Int64, + ColumnType::Int128 => DataType::Decimal128(38, 0), + ColumnType::Decimal75(precision, scale) => { + DataType::Decimal256(precision.value(), *scale) + } + ColumnType::VarChar => DataType::Utf8, + ColumnType::Scalar => unimplemented!("Cannot convert Scalar type to arrow type"), + ColumnType::TimestampTZ(timeunit, timezone) => { + let arrow_timezone = Some(Arc::from(timezone.to_string())); + let arrow_timeunit = match timeunit { + PoSQLTimeUnit::Second => ArrowTimeUnit::Second, + PoSQLTimeUnit::Millisecond => ArrowTimeUnit::Millisecond, + PoSQLTimeUnit::Microsecond => ArrowTimeUnit::Microsecond, + PoSQLTimeUnit::Nanosecond => ArrowTimeUnit::Nanosecond, + }; + DataType::Timestamp(arrow_timeunit, arrow_timezone) + } + } + } +} + +/// Convert arrow [`DataType`] values to some [`ColumnType`] +#[cfg(feature = "arrow")] +impl TryFrom for ColumnType { + type Error = String; + + fn try_from(data_type: DataType) -> Result { + match data_type { + DataType::Boolean => Ok(ColumnType::Boolean), + DataType::Int8 => Ok(ColumnType::TinyInt), + DataType::Int16 => Ok(ColumnType::SmallInt), + DataType::Int32 => Ok(ColumnType::Int), + DataType::Int64 => Ok(ColumnType::BigInt), + DataType::Decimal128(38, 0) => Ok(ColumnType::Int128), + DataType::Decimal256(precision, scale) if precision <= 75 => { + Ok(ColumnType::Decimal75(Precision::new(precision)?, scale)) + } + DataType::Timestamp(time_unit, timezone_option) => { + let posql_time_unit = match time_unit { + ArrowTimeUnit::Second => PoSQLTimeUnit::Second, + ArrowTimeUnit::Millisecond => PoSQLTimeUnit::Millisecond, + ArrowTimeUnit::Microsecond => PoSQLTimeUnit::Microsecond, + ArrowTimeUnit::Nanosecond => PoSQLTimeUnit::Nanosecond, + }; + Ok(ColumnType::TimestampTZ( + posql_time_unit, + PoSQLTimeZone::try_from(&timezone_option)?, + )) + } + DataType::Utf8 => Ok(ColumnType::VarChar), + _ => Err(format!("Unsupported arrow data type {data_type:?}")), + } + } +} + /// Display the column type as a str name (in all caps) impl Display for ColumnType { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { @@ -504,6 +570,18 @@ impl ColumnField { } } +/// Convert [`ColumnField`] values to arrow Field +#[cfg(feature = "arrow")] +impl From<&ColumnField> for Field { + fn from(column_field: &ColumnField) -> Self { + Field::new( + column_field.name().name(), + (&column_field.data_type()).into(), + false, + ) + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/crates/proof-of-sql/src/base/database/mod.rs b/crates/proof-of-sql/src/base/database/mod.rs index b40ba10eb..e65b7efb5 100644 --- a/crates/proof-of-sql/src/base/database/mod.rs +++ b/crates/proof-of-sql/src/base/database/mod.rs @@ -19,18 +19,26 @@ mod literal_value; pub use literal_value::LiteralValue; mod table_ref; -#[cfg(feature = "arrow")] -pub use crate::base::arrow::{ - arrow_array_to_column_conversion::{ArrayRefExt, ArrowArrayToColumnConversionError}, - owned_and_arrow_conversions::OwnedArrowConversionError, - record_batch_utility::ToArrow, - scalar_and_i256_conversions, -}; pub use table_ref::TableRef; +#[cfg(feature = "arrow")] +mod arrow_array_to_column_conversion; +#[cfg(feature = "arrow")] +pub use arrow_array_to_column_conversion::{ArrayRefExt, ArrowArrayToColumnConversionError}; + +#[cfg(feature = "arrow")] +mod record_batch_utility; +#[cfg(feature = "arrow")] +pub use record_batch_utility::ToArrow; + #[cfg(feature = "arrow")] pub mod arrow_schema_utility; +#[cfg(all(test, feature = "arrow", feature = "test"))] +mod test_accessor_utility; +#[cfg(all(test, feature = "arrow", feature = "test"))] +pub use test_accessor_utility::{make_random_test_accessor_data, RandomTestAccessorDescriptor}; + mod owned_column; pub(crate) use owned_column::compare_indexes_by_owned_columns_with_direction; pub use owned_column::OwnedColumn; @@ -55,6 +63,13 @@ mod expression_evaluation_error; mod expression_evaluation_test; pub use expression_evaluation_error::{ExpressionEvaluationError, ExpressionEvaluationResult}; +#[cfg(feature = "arrow")] +mod owned_and_arrow_conversions; +#[cfg(feature = "arrow")] +pub use owned_and_arrow_conversions::OwnedArrowConversionError; +#[cfg(all(test, feature = "arrow"))] +mod owned_and_arrow_conversions_test; + mod test_accessor; pub use test_accessor::TestAccessor; #[cfg(test)] @@ -69,6 +84,9 @@ mod owned_table_test_accessor; pub use owned_table_test_accessor::OwnedTableTestAccessor; #[cfg(all(test, feature = "blitzar"))] mod owned_table_test_accessor_test; +/// Contains traits for scalar <-> i256 conversions +#[cfg(feature = "arrow")] +pub mod scalar_and_i256_conversions; /// TODO: add docs pub(crate) mod filter_util; diff --git a/crates/proof-of-sql/src/base/arrow/owned_and_arrow_conversions.rs b/crates/proof-of-sql/src/base/database/owned_and_arrow_conversions.rs similarity index 98% rename from crates/proof-of-sql/src/base/arrow/owned_and_arrow_conversions.rs rename to crates/proof-of-sql/src/base/database/owned_and_arrow_conversions.rs index 74ad96839..adf4f94af 100644 --- a/crates/proof-of-sql/src/base/arrow/owned_and_arrow_conversions.rs +++ b/crates/proof-of-sql/src/base/database/owned_and_arrow_conversions.rs @@ -12,9 +12,12 @@ //! This is because there is no `Int128` type in Arrow. //! This does not check that the values are less than 39 digits. //! However, the actual arrow backing `i128` is the correct value. -use super::scalar_and_i256_conversions::{convert_i256_to_scalar, convert_scalar_to_i256}; +use super::scalar_and_i256_conversions::convert_scalar_to_i256; use crate::base::{ - database::{OwnedColumn, OwnedTable, OwnedTableError}, + database::{ + scalar_and_i256_conversions::convert_i256_to_scalar, OwnedColumn, OwnedTable, + OwnedTableError, + }, map::IndexMap, math::decimal::Precision, scalar::Scalar, diff --git a/crates/proof-of-sql/src/base/arrow/owned_and_arrow_conversions_test.rs b/crates/proof-of-sql/src/base/database/owned_and_arrow_conversions_test.rs similarity index 97% rename from crates/proof-of-sql/src/base/arrow/owned_and_arrow_conversions_test.rs rename to crates/proof-of-sql/src/base/database/owned_and_arrow_conversions_test.rs index 539d94eaa..970df4bad 100644 --- a/crates/proof-of-sql/src/base/arrow/owned_and_arrow_conversions_test.rs +++ b/crates/proof-of-sql/src/base/database/owned_and_arrow_conversions_test.rs @@ -1,7 +1,7 @@ -use super::owned_and_arrow_conversions::OwnedArrowConversionError; +use super::{OwnedColumn, OwnedTable}; use crate::{ base::{ - database::{owned_table_utility::*, OwnedColumn, OwnedTable}, + database::{owned_table_utility::*, OwnedArrowConversionError}, map::IndexMap, scalar::Curve25519Scalar, }, diff --git a/crates/proof-of-sql/src/base/arrow/record_batch_utility.rs b/crates/proof-of-sql/src/base/database/record_batch_utility.rs similarity index 99% rename from crates/proof-of-sql/src/base/arrow/record_batch_utility.rs rename to crates/proof-of-sql/src/base/database/record_batch_utility.rs index 3ede592bd..d1180005b 100644 --- a/crates/proof-of-sql/src/base/arrow/record_batch_utility.rs +++ b/crates/proof-of-sql/src/base/database/record_batch_utility.rs @@ -169,7 +169,7 @@ macro_rules! record_batch { use arrow::datatypes::Field; use arrow::datatypes::Schema; use arrow::record_batch::RecordBatch; - use $crate::base::arrow::record_batch_utility::ToArrow; + use $crate::base::database::ToArrow; let schema = Arc::new(Schema::new( vec![$( diff --git a/crates/proof-of-sql/src/base/arrow/scalar_and_i256_conversions.rs b/crates/proof-of-sql/src/base/database/scalar_and_i256_conversions.rs similarity index 96% rename from crates/proof-of-sql/src/base/arrow/scalar_and_i256_conversions.rs rename to crates/proof-of-sql/src/base/database/scalar_and_i256_conversions.rs index f606c03cb..9a44c3766 100644 --- a/crates/proof-of-sql/src/base/arrow/scalar_and_i256_conversions.rs +++ b/crates/proof-of-sql/src/base/database/scalar_and_i256_conversions.rs @@ -54,10 +54,12 @@ pub fn convert_i256_to_scalar(value: &i256) -> Option { #[cfg(test)] mod tests { - use super::{ - convert_i256_to_scalar, convert_scalar_to_i256, MAX_SUPPORTED_I256, MIN_SUPPORTED_I256, + + use super::{convert_i256_to_scalar, convert_scalar_to_i256}; + use crate::base::{ + database::scalar_and_i256_conversions::{MAX_SUPPORTED_I256, MIN_SUPPORTED_I256}, + scalar::{Curve25519Scalar, Scalar}, }; - use crate::base::scalar::{Curve25519Scalar, Scalar}; use arrow::datatypes::i256; use num_traits::Zero; use rand::RngCore; diff --git a/crates/proof-of-sql/src/base/database/test_accessor_utility.rs b/crates/proof-of-sql/src/base/database/test_accessor_utility.rs new file mode 100644 index 000000000..2b06081dd --- /dev/null +++ b/crates/proof-of-sql/src/base/database/test_accessor_utility.rs @@ -0,0 +1,218 @@ +use crate::base::database::ColumnType; +use arrow::{ + array::{ + Array, BooleanArray, Decimal128Array, Decimal256Array, Int16Array, Int32Array, Int64Array, + Int8Array, StringArray, TimestampMicrosecondArray, TimestampMillisecondArray, + TimestampNanosecondArray, TimestampSecondArray, + }, + datatypes::{i256, DataType, Field, Schema, TimeUnit}, + record_batch::RecordBatch, +}; +use proof_of_sql_parser::posql_time::PoSQLTimeUnit; +use rand::{ + distributions::{Distribution, Uniform}, + rngs::StdRng, +}; +use std::sync::Arc; + +/// Specify what form a randomly generated `TestAccessor` can take +pub struct RandomTestAccessorDescriptor { + /// The minimum number of rows in the generated `RecordBatch` + pub min_rows: usize, + /// The maximum number of rows in the generated `RecordBatch` + pub max_rows: usize, + /// The minimum value of the generated data + pub min_value: i64, + /// The maximum value of the generated data + pub max_value: i64, +} + +impl Default for RandomTestAccessorDescriptor { + fn default() -> Self { + Self { + min_rows: 0, + max_rows: 100, + min_value: -5, + max_value: 5, + } + } +} + +/// Generate a `DataFrame` with random data +/// +/// # Panics +/// +/// This function may panic in the following cases: +/// - If `Precision::new(7)` fails when creating a `Decimal75` column type, which would occur +/// if the precision is invalid. +/// - When calling `.unwrap()` on the result of `RecordBatch::try_new(schema, columns)`, which +/// will panic if the schema and columns do not align correctly or if there are any other +/// underlying errors. +#[allow(dead_code, clippy::too_many_lines)] +pub fn make_random_test_accessor_data( + rng: &mut StdRng, + cols: &[(&str, ColumnType)], + descriptor: &RandomTestAccessorDescriptor, +) -> RecordBatch { + let n = Uniform::new(descriptor.min_rows, descriptor.max_rows + 1).sample(rng); + let dist = Uniform::new(descriptor.min_value, descriptor.max_value + 1); + + let mut columns: Vec> = Vec::with_capacity(n); + let mut column_fields: Vec<_> = Vec::with_capacity(n); + + for (col_name, col_type) in cols { + let values: Vec = dist.sample_iter(&mut *rng).take(n).collect(); + + match col_type { + ColumnType::Boolean => { + column_fields.push(Field::new(*col_name, DataType::Boolean, false)); + let boolean_values: Vec = values.iter().map(|x| x % 2 != 0).collect(); + columns.push(Arc::new(BooleanArray::from(boolean_values))); + } + ColumnType::TinyInt => { + column_fields.push(Field::new(*col_name, DataType::Int8, false)); + let values: Vec = values + .iter() + .map(|x| ((*x >> 56) as i8)) // Shift right to align the lower 8 bits + .collect(); + columns.push(Arc::new(Int8Array::from(values))); + } + ColumnType::SmallInt => { + column_fields.push(Field::new(*col_name, DataType::Int16, false)); + let values: Vec = values + .iter() + .map(|x| ((*x >> 48) as i16)) // Shift right to align the lower 16 bits + .collect(); + columns.push(Arc::new(Int16Array::from(values))); + } + ColumnType::Int => { + column_fields.push(Field::new(*col_name, DataType::Int32, false)); + let values: Vec = values + .iter() + .map(|x| ((*x >> 32) as i32)) // Shift right to align the lower 32 bits + .collect(); + columns.push(Arc::new(Int32Array::from(values))); + } + ColumnType::BigInt => { + column_fields.push(Field::new(*col_name, DataType::Int64, false)); + let values: Vec = values.clone(); + columns.push(Arc::new(Int64Array::from(values))); + } + ColumnType::Int128 => { + column_fields.push(Field::new(*col_name, DataType::Decimal128(38, 0), false)); + + let values: Vec = values.iter().map(|x| i128::from(*x)).collect(); + columns.push(Arc::new( + Decimal128Array::from(values.clone()) + .with_precision_and_scale(38, 0) + .unwrap(), + )); + } + ColumnType::Decimal75(precision, scale) => { + column_fields.push(Field::new( + *col_name, + DataType::Decimal256(precision.value(), *scale), + false, + )); + + let values: Vec = values.iter().map(|x| i256::from(*x)).collect(); + columns.push(Arc::new( + Decimal256Array::from(values.clone()) + .with_precision_and_scale(precision.value(), *scale) + .unwrap(), + )); + } + ColumnType::VarChar => { + let col = &values + .iter() + .map(|v| "s".to_owned() + &v.to_string()[..]) + .collect::>()[..]; + let col: Vec<_> = col.iter().map(String::as_str).collect(); + + column_fields.push(Field::new(*col_name, DataType::Utf8, false)); + + columns.push(Arc::new(StringArray::from(col))); + } + ColumnType::Scalar => unimplemented!("Scalar columns are not supported by arrow"), + ColumnType::TimestampTZ(tu, tz) => { + column_fields.push(Field::new( + *col_name, + DataType::Timestamp( + match tu { + PoSQLTimeUnit::Second => TimeUnit::Second, + PoSQLTimeUnit::Millisecond => TimeUnit::Millisecond, + PoSQLTimeUnit::Microsecond => TimeUnit::Microsecond, + PoSQLTimeUnit::Nanosecond => TimeUnit::Nanosecond, + }, + Some(Arc::from(tz.to_string())), + ), + false, + )); + // Create the correct timestamp array based on the time unit + let timestamp_array: Arc = match tu { + PoSQLTimeUnit::Second => Arc::new(TimestampSecondArray::from(values.clone())), + PoSQLTimeUnit::Millisecond => { + Arc::new(TimestampMillisecondArray::from(values.clone())) + } + PoSQLTimeUnit::Microsecond => { + Arc::new(TimestampMicrosecondArray::from(values.clone())) + } + PoSQLTimeUnit::Nanosecond => { + Arc::new(TimestampNanosecondArray::from(values.clone())) + } + }; + columns.push(timestamp_array); + } + } + } + + let schema = Arc::new(Schema::new(column_fields)); + RecordBatch::try_new(schema, columns).unwrap() +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::record_batch; + use rand_core::SeedableRng; + + #[test] + fn we_can_construct_a_random_test_data() { + let descriptor = RandomTestAccessorDescriptor::default(); + let mut rng = StdRng::from_seed([0u8; 32]); + let cols = [ + ("a", ColumnType::BigInt), + ("b", ColumnType::VarChar), + ("c", ColumnType::Int128), + ("d", ColumnType::SmallInt), + ("e", ColumnType::Int), + ("f", ColumnType::TinyInt), + ]; + + let data1 = make_random_test_accessor_data(&mut rng, &cols, &descriptor); + let data2 = make_random_test_accessor_data(&mut rng, &cols, &descriptor); + assert_ne!(data1.num_rows(), data2.num_rows()); + } + + #[test] + fn we_can_construct_a_random_test_data_with_the_correct_data() { + let descriptor = RandomTestAccessorDescriptor { + min_rows: 1, + max_rows: 1, + min_value: -2, + max_value: -2, + }; + let mut rng = StdRng::from_seed([0u8; 32]); + let cols = [ + ("b", ColumnType::BigInt), + ("a", ColumnType::VarChar), + ("c", ColumnType::Int128), + ]; + let data = make_random_test_accessor_data(&mut rng, &cols, &descriptor); + + assert_eq!( + data, + record_batch!("b" => [-2_i64], "a" => ["s-2"], "c" => [-2_i128]) + ); + } +} diff --git a/crates/proof-of-sql/src/base/mod.rs b/crates/proof-of-sql/src/base/mod.rs index 657b855d1..ad5573639 100644 --- a/crates/proof-of-sql/src/base/mod.rs +++ b/crates/proof-of-sql/src/base/mod.rs @@ -1,8 +1,5 @@ //! This module contains basic shared functionalities of the library. /// TODO: add docs -#[cfg(feature = "arrow")] -pub mod arrow; - pub(crate) mod bit; pub mod commitment; pub mod database; diff --git a/crates/proof-of-sql/src/proof_primitive/dory/dynamic_dory_commitment_helper_cpu.rs b/crates/proof-of-sql/src/proof_primitive/dory/dynamic_dory_commitment_helper_cpu.rs index b36e1177d..8ed2ddbb5 100644 --- a/crates/proof-of-sql/src/proof_primitive/dory/dynamic_dory_commitment_helper_cpu.rs +++ b/crates/proof-of-sql/src/proof_primitive/dory/dynamic_dory_commitment_helper_cpu.rs @@ -1,14 +1,10 @@ use super::{ - dynamic_dory_structure::{full_width_of_row, row_and_column_from_index, row_start_index}, - pairings, DoryScalar, DynamicDoryCommitment, G1Projective, ProverSetup, GT, + dynamic_dory_structure::row_and_column_from_index, pairings, DoryScalar, DynamicDoryCommitment, + G1Affine, G1Projective, ProverSetup, GT, }; -use crate::base::{commitment::CommittableColumn, if_rayon, slice_ops::slice_cast}; -use alloc::vec::Vec; -use ark_ec::VariableBaseMSM; -use bytemuck::TransparentWrapper; +use crate::base::commitment::CommittableColumn; +use alloc::{vec, vec::Vec}; use num_traits::Zero; -#[cfg(feature = "rayon")] -use rayon::iter::{IntoParallelIterator, IntoParallelRefIterator, ParallelIterator}; #[tracing::instrument(name = "compute_dory_commitment_impl (cpu)", level = "debug", skip_all)] /// # Panics @@ -17,7 +13,6 @@ use rayon::iter::{IntoParallelIterator, IntoParallelRefIterator, ParallelIterato /// - `setup.Gamma_1.last()` returns `None`, indicating that `Gamma_1` is empty. /// - `setup.Gamma_2.last()` returns `None`, indicating that `Gamma_2` is empty. /// - The indexing for `Gamma_2` with `first_row..=last_row` goes out of bounds. -#[allow(clippy::range_plus_one)] fn compute_dory_commitment_impl<'a, T>( column: &'a [T], offset: usize, @@ -27,39 +22,18 @@ where &'a T: Into, T: Sync, { - if column.is_empty() { - return DynamicDoryCommitment::default(); - } let Gamma_1 = setup.Gamma_1.last().unwrap(); let Gamma_2 = setup.Gamma_2.last().unwrap(); - let (first_row, first_col) = row_and_column_from_index(offset); - let (last_row, last_col) = row_and_column_from_index(offset + column.len() - 1); - - let row_commits: Vec<_> = if_rayon!( - (first_row..=last_row).into_par_iter(), - (first_row..=last_row) - ) - .map(|row| { - let width = full_width_of_row(row); - let row_start = row_start_index(row); - let (gamma_range, column_range) = if first_row == last_row { - (first_col..last_col + 1, 0..column.len()) - } else if row == 1 { - (1..2, (1 - offset)..(2 - offset)) - } else if row == first_row { - (first_col..width, 0..width - first_col) - } else if row == last_row { - (0..last_col + 1, column.len() - last_col - 1..column.len()) - } else { - (0..width, row_start - offset..width + row_start - offset) - }; - G1Projective::msm_unchecked( - &Gamma_1[gamma_range], - TransparentWrapper::peel_slice(&slice_cast::<_, DoryScalar>(&column[column_range])), - ) - }) - .collect(); - + let (first_row, _) = row_and_column_from_index(offset); + let (last_row, _) = row_and_column_from_index(offset + column.len() - 1); + let row_commits = column.iter().enumerate().fold( + vec![G1Projective::from(G1Affine::identity()); last_row - first_row + 1], + |mut row_commits, (i, v)| { + let (row, col) = row_and_column_from_index(i + offset); + row_commits[row - first_row] += Gamma_1[col] * v.into().0; + row_commits + }, + ); DynamicDoryCommitment(pairings::multi_pairing( row_commits, &Gamma_2[first_row..=last_row], @@ -96,7 +70,8 @@ pub(super) fn compute_dynamic_dory_commitments( offset: usize, setup: &ProverSetup, ) -> Vec { - if_rayon!(committable_columns.par_iter(), committable_columns.iter()) + committable_columns + .iter() .map(|column| { column .is_empty() diff --git a/crates/proof-of-sql/src/proof_primitive/dory/dynamic_dory_structure.rs b/crates/proof-of-sql/src/proof_primitive/dory/dynamic_dory_structure.rs index 03f6ffaa3..2598db988 100644 --- a/crates/proof-of-sql/src/proof_primitive/dory/dynamic_dory_structure.rs +++ b/crates/proof-of-sql/src/proof_primitive/dory/dynamic_dory_structure.rs @@ -40,7 +40,7 @@ pub(crate) const fn full_width_of_row(row: usize) -> usize { /// Returns the index that belongs in the first column in a particular row. /// /// Note: when row = 1, this correctly returns 0, even though no data belongs there. -#[cfg(any(test, not(feature = "blitzar")))] +#[cfg(test)] pub(crate) const fn row_start_index(row: usize) -> usize { let width_of_row = full_width_of_row(row); width_of_row * (row - width_of_row / 2) diff --git a/crates/proof-of-sql/src/sql/parse/query_expr_tests.rs b/crates/proof-of-sql/src/sql/parse/query_expr_tests.rs index 0c1cfd965..4f68869d9 100644 --- a/crates/proof-of-sql/src/sql/parse/query_expr_tests.rs +++ b/crates/proof-of-sql/src/sql/parse/query_expr_tests.rs @@ -54,6 +54,25 @@ pub fn schema_accessor_from_table_ref_with_schema( TestSchemaAccessor::new(indexmap! {table => schema}) } +fn get_test_accessor() -> (TableRef, TestSchemaAccessor) { + let table = "sxt.t".parse().unwrap(); + let accessor = schema_accessor_from_table_ref_with_schema( + table, + indexmap! { + "s".parse().unwrap() => ColumnType::VarChar, + "i".parse().unwrap() => ColumnType::BigInt, + "d".parse().unwrap() => ColumnType::Int128, + "s0".parse().unwrap() => ColumnType::VarChar, + "i0".parse().unwrap() => ColumnType::BigInt, + "d0".parse().unwrap() => ColumnType::Int128, + "s1".parse().unwrap() => ColumnType::VarChar, + "i1".parse().unwrap() => ColumnType::BigInt, + "d1".parse().unwrap() => ColumnType::Int128, + }, + ); + (table, accessor) +} + #[test] fn we_can_convert_an_ast_with_one_column() { let t = "sxt.sxt_tab".parse().unwrap(); @@ -1109,17 +1128,8 @@ fn we_can_group_by_without_using_aggregate_functions() { #[test] fn group_by_expressions_are_parsed_before_an_order_by_referencing_an_aggregate_alias_result() { let query_text = - "select max(salary) max_sal, department_budget d, count(department_budget) from sxt.employees group by department_budget, tax order by max_sal"; - - let t = "sxt.employees".parse().unwrap(); - let accessor = schema_accessor_from_table_ref_with_schema( - t, - indexmap! { - "department_budget".parse().unwrap() => ColumnType::BigInt, - "salary".parse().unwrap() => ColumnType::BigInt, - "tax".parse().unwrap() => ColumnType::BigInt, - }, - ); + "select max(i) max_sal, i0 d, count(i0) from sxt.t group by i0, i1 order by max_sal"; + let (t, accessor) = get_test_accessor(); let intermediate_ast = SelectStatementParser::new().parse(query_text).unwrap(); let query = @@ -1128,20 +1138,20 @@ fn group_by_expressions_are_parsed_before_an_order_by_referencing_an_aggregate_a let expected_query = QueryExpr::new( filter( vec![ - col_expr_plan(t, "department_budget", &accessor), - col_expr_plan(t, "salary", &accessor), - col_expr_plan(t, "tax", &accessor), + col_expr_plan(t, "i", &accessor), + col_expr_plan(t, "i0", &accessor), + col_expr_plan(t, "i1", &accessor), ], tab(t), const_bool(true), ), vec![ group_by_postprocessing( - &["department_budget", "tax"], + &["i0", "i1"], &[ - aliased_expr(max(col("salary")), "max_sal"), - aliased_expr(col("department_budget"), "d"), - aliased_expr(count(col("department_budget")), "__count__"), + aliased_expr(max(col("i")), "max_sal"), + aliased_expr(col("i0"), "d"), + aliased_expr(count(col("i0")), "__count__"), ], ), orders(&["max_sal"], &[Asc]), @@ -1230,14 +1240,8 @@ fn group_by_column_cannot_be_a_column_result_alias() { #[test] fn we_can_have_aggregate_functions_without_a_group_by_clause() { - let query_text = "select count(name) from sxt.employees"; - let t = "sxt.employees".parse().unwrap(); - let accessor = schema_accessor_from_table_ref_with_schema( - t, - indexmap! { - "name".parse().unwrap() => ColumnType::VarChar, - }, - ); + let query_text = "select count(s) from sxt.t"; + let (t, accessor) = get_test_accessor(); let intermediate_ast = SelectStatementParser::new().parse(query_text).unwrap(); let ast = @@ -1397,17 +1401,8 @@ fn we_can_use_the_same_result_columns_with_different_aliases_and_associate_it_wi #[test] fn we_can_use_multiple_group_by_clauses_with_multiple_agg_and_non_agg_exprs() { - let t = "sxt.employees".parse().unwrap(); - let accessor = schema_accessor_from_table_ref_with_schema( - t, - indexmap! { - "bonus".parse().unwrap() => ColumnType::BigInt, - "name".parse().unwrap() => ColumnType::VarChar, - "salary".parse().unwrap() => ColumnType::BigInt, - "tax".parse().unwrap() => ColumnType::BigInt, - }, - ); - let query_text = "select salary d1, max(tax), salary d2, sum(bonus) sum_bonus, count(name) count_s from sxt.employees group by salary, bonus, salary"; + let (t, accessor) = get_test_accessor(); + let query_text = "select i d1, max(i1), i d2, sum(i0) sum_bonus, count(s) count_s from sxt.t group by i, i0, i"; let intermediate_ast = SelectStatementParser::new().parse(query_text).unwrap(); let ast = @@ -1415,18 +1410,18 @@ fn we_can_use_multiple_group_by_clauses_with_multiple_agg_and_non_agg_exprs() { let expected_ast = QueryExpr::new( filter( - cols_expr_plan(t, &["bonus", "name", "salary", "tax"], &accessor), + cols_expr_plan(t, &["i", "i0", "i1", "s"], &accessor), tab(t), const_bool(true), ), vec![group_by_postprocessing( - &["salary", "bonus", "salary"], + &["i", "i0", "i"], &[ - aliased_expr(col("salary"), "d1"), - aliased_expr(max(col("tax")), "__max__"), - aliased_expr(col("salary"), "d2"), - aliased_expr(sum(col("bonus")), "sum_bonus"), - aliased_expr(count(col("name")), "count_s"), + aliased_expr(col("i"), "d1"), + aliased_expr(max(col("i1")), "__max__"), + aliased_expr(col("i"), "d2"), + aliased_expr(sum(col("i0")), "sum_bonus"), + aliased_expr(count(col("s")), "count_s"), ], )], ); @@ -1572,19 +1567,12 @@ fn we_can_parse_arithmetic_expression_within_aggregations_in_the_result_expr() { #[test] fn we_cannot_use_non_grouped_columns_outside_agg() { - let t = "sxt.employees".parse().unwrap(); - let accessor = schema_accessor_from_table_ref_with_schema( - t, - indexmap! { - "salary".parse().unwrap() => ColumnType::BigInt, - "name".parse().unwrap() => ColumnType::VarChar, - }, - ); + let (t, accessor) = get_test_accessor(); let identifier_not_in_agg_queries = vec![ - "select salary from sxt.employees group by name", - "select sum(salary), salary from sxt.employees group by name", - "select min(salary) + salary from sxt.employees group by name", - "select 2 * salary, min(salary) from sxt.employees group by name", + "select i from sxt.t group by s", + "select sum(i), i from sxt.t group by s", + "select min(i) + i from sxt.t group by s", + "select 2 * i, min(i) from sxt.t group by s", ]; for query_text in &identifier_not_in_agg_queries { @@ -1601,9 +1589,9 @@ fn we_cannot_use_non_grouped_columns_outside_agg() { } let invalid_group_by_queries = vec![ - "select 2 * salary, min(salary) from sxt.employees", - "select sum(salary), salary from sxt.employees", - "select max(salary) + 2 * salary from sxt.employees", + "select 2 * i, min(i) from sxt.t", + "select sum(i), i from sxt.t", + "select max(i) + 2 * i from sxt.t", ]; for query_text in &invalid_group_by_queries { @@ -1620,23 +1608,11 @@ fn we_cannot_use_non_grouped_columns_outside_agg() { #[test] fn varchar_column_is_not_compatible_with_integer_column() { - let t = "sxt.employees".parse().unwrap(); - let accessor = schema_accessor_from_table_ref_with_schema( - t, - indexmap! { - "salary".parse().unwrap() => ColumnType::BigInt, - "name".parse().unwrap() => ColumnType::VarChar, - }, - ); - - let bigint_to_varchar_queries = vec![ - "select -123 * name from sxt.employees", - "select salary - name from sxt.employees", - ]; - + let bigint_to_varchar_queries = vec!["select -123 * s from sxt.t", "select i - s from sxt.t"]; + let (t, accessor) = get_test_accessor(); let varchar_to_bigint_queries = vec![ - "select name from sxt.employees where 'abc' = salary", - "select name from sxt.employees where 'abc' != salary", + "select s from sxt.t where 'abc' = i", + "select s from sxt.t where 'abc' != i", ]; for query_text in &bigint_to_varchar_queries { @@ -1670,16 +1646,8 @@ fn varchar_column_is_not_compatible_with_integer_column() { #[test] fn arithmetic_operations_are_not_allowed_with_varchar_column() { - let t = "sxt.employees".parse().unwrap(); - let accessor = schema_accessor_from_table_ref_with_schema( - t, - indexmap! { - "name".parse().unwrap() => ColumnType::VarChar, - "position".parse().unwrap() => ColumnType::VarChar, - }, - ); - - let query_text = "select name - position from sxt.employees"; + let (t, accessor) = get_test_accessor(); + let query_text = "select s - s1 from sxt.t"; let intermediate_ast = SelectStatementParser::new().parse(query_text).unwrap(); let result = QueryExpr::::try_new(intermediate_ast, t.schema_id(), &accessor); @@ -1694,14 +1662,8 @@ fn arithmetic_operations_are_not_allowed_with_varchar_column() { #[test] fn varchar_column_is_not_allowed_within_numeric_aggregations() { - let t = "sxt.employees".parse().unwrap(); - let accessor = schema_accessor_from_table_ref_with_schema( - t, - indexmap! { - "name".parse().unwrap() => ColumnType::VarChar, - }, - ); - let sum_query = "select sum(name) from sxt.employees"; + let (t, accessor) = get_test_accessor(); + let sum_query = "select sum(s) from sxt.t"; let intermediate_ast = SelectStatementParser::new().parse(sum_query).unwrap(); let result = QueryExpr::::try_new(intermediate_ast, t.schema_id(), &accessor); @@ -1711,7 +1673,7 @@ fn varchar_column_is_not_allowed_within_numeric_aggregations() { if expression == "cannot use expression of type 'varchar' with numeric aggregation function 'sum'" )); - let max_query = "select max(name) from sxt.employees"; + let max_query = "select max(s) from sxt.t"; let intermediate_ast = SelectStatementParser::new().parse(max_query).unwrap(); let result = QueryExpr::::try_new(intermediate_ast, t.schema_id(), &accessor); @@ -1721,7 +1683,7 @@ fn varchar_column_is_not_allowed_within_numeric_aggregations() { if expression == "cannot use expression of type 'varchar' with numeric aggregation function 'max'" )); - let min_query = "select min(name) from sxt.employees"; + let min_query = "select min(s) from sxt.t"; let intermediate_ast = SelectStatementParser::new().parse(min_query).unwrap(); let result = QueryExpr::::try_new(intermediate_ast, t.schema_id(), &accessor); @@ -1734,14 +1696,8 @@ fn varchar_column_is_not_allowed_within_numeric_aggregations() { #[test] fn group_by_with_bigint_column_is_valid() { - let t = "sxt.employees".parse().unwrap(); - let accessor = schema_accessor_from_table_ref_with_schema( - t, - indexmap! { - "salary".parse().unwrap() => ColumnType::BigInt, - }, - ); - let query_text = "select salary from sxt.employees group by salary"; + let (t, accessor) = get_test_accessor(); + let query_text = "select i from sxt.t group by i"; let intermediate_ast = SelectStatementParser::new().parse(query_text).unwrap(); let query = @@ -1749,13 +1705,13 @@ fn group_by_with_bigint_column_is_valid() { let expected_query = QueryExpr::new( filter( - cols_expr_plan(t, &["salary"], &accessor), + cols_expr_plan(t, &["i"], &accessor), tab(t), const_bool(true), ), vec![group_by_postprocessing( - &["salary"], - &[aliased_expr(col("salary"), "salary")], + &["i"], + &[aliased_expr(col("i"), "i")], )], ); assert_eq!(query, expected_query); @@ -1763,14 +1719,8 @@ fn group_by_with_bigint_column_is_valid() { #[test] fn group_by_with_decimal_column_is_valid() { - let t = "sxt.employees".parse().unwrap(); - let accessor = schema_accessor_from_table_ref_with_schema( - t, - indexmap! { - "salary".parse().unwrap() => ColumnType::Int128, - }, - ); - let query_text = "select salary from sxt.employees group by salary"; + let (t, accessor) = get_test_accessor(); + let query_text = "select d from sxt.t group by d"; let intermediate_ast = SelectStatementParser::new().parse(query_text).unwrap(); let query = @@ -1778,13 +1728,13 @@ fn group_by_with_decimal_column_is_valid() { let expected_query = QueryExpr::new( filter( - cols_expr_plan(t, &["salary"], &accessor), + cols_expr_plan(t, &["d"], &accessor), tab(t), const_bool(true), ), vec![group_by_postprocessing( - &["salary"], - &[aliased_expr(col("salary"), "salary")], + &["d"], + &[aliased_expr(col("d"), "d")], )], ); assert_eq!(query, expected_query); @@ -1792,14 +1742,8 @@ fn group_by_with_decimal_column_is_valid() { #[test] fn group_by_with_varchar_column_is_valid() { - let t = "sxt.employees".parse().unwrap(); - let accessor = schema_accessor_from_table_ref_with_schema( - t, - indexmap! { - "name".parse().unwrap() => ColumnType::VarChar, - }, - ); - let query_text = "select name from sxt.employees group by name"; + let (t, accessor) = get_test_accessor(); + let query_text = "select s from sxt.t group by s"; let intermediate_ast = SelectStatementParser::new().parse(query_text).unwrap(); let query = @@ -1807,13 +1751,13 @@ fn group_by_with_varchar_column_is_valid() { let expected_query = QueryExpr::new( filter( - cols_expr_plan(t, &["name"], &accessor), + cols_expr_plan(t, &["s"], &accessor), tab(t), const_bool(true), ), vec![group_by_postprocessing( - &["name"], - &[aliased_expr(col("name"), "name")], + &["s"], + &[aliased_expr(col("s"), "s")], )], ); assert_eq!(query, expected_query); @@ -1821,16 +1765,8 @@ fn group_by_with_varchar_column_is_valid() { #[test] fn we_can_use_arithmetic_outside_agg_expressions_while_using_group_by() { - let t = "sxt.employees".parse().unwrap(); - let accessor = schema_accessor_from_table_ref_with_schema( - t, - indexmap! { - "salary".parse().unwrap() => ColumnType::BigInt, - "tax".parse().unwrap() => ColumnType::BigInt, - }, - ); - let query_text = - "select 2 * salary + sum(salary) - tax from sxt.employees group by salary, tax"; + let (t, accessor) = get_test_accessor(); + let query_text = "select 2 * i + sum(i) - i1 from sxt.t group by i, i1"; let intermediate_ast = SelectStatementParser::new().parse(query_text).unwrap(); let query = @@ -1838,26 +1774,20 @@ fn we_can_use_arithmetic_outside_agg_expressions_while_using_group_by() { let expected_query = QueryExpr::new( filter( - cols_expr_plan(t, &["salary", "tax"], &accessor), + cols_expr_plan(t, &["i", "i1"], &accessor), tab(t), const_bool(true), ), vec![ group_by_postprocessing( - &["salary", "tax"], + &["i", "i1"], &[aliased_expr( - psub( - padd(pmul(lit(2), col("salary")), sum(col("salary"))), - col("tax"), - ), + psub(padd(pmul(lit(2), col("i")), sum(col("i"))), col("i1")), "__expr__", )], ), select_expr(&[aliased_expr( - psub( - padd(pmul(lit(2), col("salary")), col("__col_agg_0")), - col("tax"), - ), + psub(padd(pmul(lit(2), col("i")), col("__col_agg_0")), col("i1")), "__expr__", )]), ], @@ -1867,15 +1797,8 @@ fn we_can_use_arithmetic_outside_agg_expressions_while_using_group_by() { #[test] fn we_can_use_arithmetic_outside_agg_expressions_without_using_group_by() { - let t = "sxt.employees".parse().unwrap(); - let accessor = schema_accessor_from_table_ref_with_schema( - t, - indexmap! { - "salary".parse().unwrap() => ColumnType::BigInt, - "bonus".parse().unwrap() => ColumnType::Int128, - }, - ); - let query_text = "select 7 + max(salary) as max_i, min(salary + 777 * bonus) * -5 as min_d from sxt.employees"; + let (t, accessor) = get_test_accessor(); + let query_text = "select 7 + max(i) as max_i, min(i + 777 * d) * -5 as min_d from t"; let intermediate_ast = SelectStatementParser::new().parse(query_text).unwrap(); let ast = @@ -1883,7 +1806,7 @@ fn we_can_use_arithmetic_outside_agg_expressions_without_using_group_by() { let expected_ast = QueryExpr::new( filter( - cols_expr_plan(t, &["bonus", "salary"], &accessor), + cols_expr_plan(t, &["d", "i"], &accessor), tab(t), const_bool(true), ), @@ -1891,12 +1814,9 @@ fn we_can_use_arithmetic_outside_agg_expressions_without_using_group_by() { group_by_postprocessing( &[], &[ - aliased_expr(padd(lit(7), max(col("salary"))), "max_i"), + aliased_expr(padd(lit(7), max(col("i"))), "max_i"), aliased_expr( - pmul( - min(padd(col("salary"), pmul(lit(777), col("bonus")))), - lit(-5), - ), + pmul(min(padd(col("i"), pmul(lit(777), col("d")))), lit(-5)), "min_d", ), ], @@ -1912,17 +1832,8 @@ fn we_can_use_arithmetic_outside_agg_expressions_without_using_group_by() { #[test] fn count_aggregation_always_have_integer_type() { - let t = "sxt.employees".parse().unwrap(); - let accessor = schema_accessor_from_table_ref_with_schema( - t, - indexmap! { - "name".parse().unwrap() => ColumnType::VarChar, - "salary".parse().unwrap() => ColumnType::BigInt, - "tax".parse().unwrap() => ColumnType::Int128, - }, - ); - let query_text = - "select 7 + count(name) as cs, count(salary) * -5 as ci, count(tax) from sxt.employees"; + let (t, accessor) = get_test_accessor(); + let query_text = "select 7 + count(s) as cs, count(i) * -5 as ci, count(d) from t"; let intermediate_ast = SelectStatementParser::new().parse(query_text).unwrap(); let ast = @@ -1930,7 +1841,7 @@ fn count_aggregation_always_have_integer_type() { let expected_ast = QueryExpr::new( filter( - cols_expr_plan(t, &["name", "salary", "tax"], &accessor), + cols_expr_plan(t, &["d", "i", "s"], &accessor), tab(t), const_bool(true), ), @@ -1938,9 +1849,9 @@ fn count_aggregation_always_have_integer_type() { group_by_postprocessing( &[], &[ - aliased_expr(padd(lit(7), count(col("name"))), "cs"), - aliased_expr(pmul(count(col("salary")), lit(-5)), "ci"), - aliased_expr(count(col("tax")), "__count__"), + aliased_expr(padd(lit(7), count(col("s"))), "cs"), + aliased_expr(pmul(count(col("i")), lit(-5)), "ci"), + aliased_expr(count(col("d")), "__count__"), ], ), select_expr(&[ @@ -1955,41 +1866,17 @@ fn count_aggregation_always_have_integer_type() { #[test] fn select_wildcard_is_valid_with_group_by_exprs() { - let columns = [ - "employee_name", - "base_salary", - "annual_bonus", - "manager_name", - "manager_salary", - "manager_bonus", - "department_name", - "department_budget", - "department_headcount", - ]; + let columns = ["s", "i", "d", "s0", "i0", "d0", "s1", "i1", "d1"]; let sorted_columns = columns.iter().sorted().collect::>(); let aliased_exprs = columns .iter() .map(|c| aliased_expr(col(c), c)) .collect::>(); - let t = "sxt.employees".parse().unwrap(); - let accessor = schema_accessor_from_table_ref_with_schema( - t, - indexmap! { - "employee_name".parse().unwrap() => ColumnType::VarChar, - "base_salary".parse().unwrap() => ColumnType::BigInt, - "annual_bonus".parse().unwrap() => ColumnType::Int128, - "manager_name".parse().unwrap() => ColumnType::VarChar, - "manager_salary".parse().unwrap() => ColumnType::BigInt, - "manager_bonus".parse().unwrap() => ColumnType::Int128, - "department_name".parse().unwrap() => ColumnType::VarChar, - "department_budget".parse().unwrap() => ColumnType::BigInt, - "department_headcount".parse().unwrap() => ColumnType::Int128, - }, - ); - + let (t, accessor) = get_test_accessor(); + let table_name = "sxt.t"; let query_text = format!( "SELECT * FROM {} GROUP BY {}", - "sxt.employees", + table_name, columns.join(", ") ); @@ -2014,19 +1901,10 @@ fn select_wildcard_is_valid_with_group_by_exprs() { #[test] fn nested_aggregations_are_not_supported() { let supported_agg = ["max", "min", "sum", "count"]; - let t = "sxt.employees".parse().unwrap(); - let accessor = schema_accessor_from_table_ref_with_schema( - t, - indexmap! { - "salary".parse().unwrap() => ColumnType::BigInt, - }, - ); + let (t, accessor) = get_test_accessor(); for perm_aggs in supported_agg.iter().permutations(2) { - let query_text = format!( - "SELECT {}({}(salary)) FROM sxt.employees", - perm_aggs[0], perm_aggs[1] - ); + let query_text = format!("SELECT {}({}(i)) FROM t", perm_aggs[0], perm_aggs[1]); let intermediate_ast = SelectStatementParser::new().parse(&query_text).unwrap(); let result = @@ -2044,17 +1922,8 @@ fn nested_aggregations_are_not_supported() { #[test] fn select_group_and_order_by_preserve_the_column_order_reference() { const N: usize = 4; - let t = "sxt.employees".parse().unwrap(); - let accessor = schema_accessor_from_table_ref_with_schema( - t, - indexmap! { - "salary".parse().unwrap() => ColumnType::BigInt, - "department".parse().unwrap() => ColumnType::BigInt, - "tax".parse().unwrap() => ColumnType::BigInt, - "name".parse().unwrap() => ColumnType::VarChar, - }, - ); - let base_cols: [&str; N] = ["salary", "department", "tax", "name"]; // sorted because of `select: [cols = ... ]` + let (t, accessor) = get_test_accessor(); + let base_cols: [&str; N] = ["i", "i0", "i1", "s"]; // sorted because of `select: [cols = ... ]` let base_ordering = [Asc, Desc, Asc, Desc]; for (idx, perm_cols) in base_cols .into_iter() diff --git a/crates/proof-of-sql/src/sql/proof/mod.rs b/crates/proof-of-sql/src/sql/proof/mod.rs index b33be315c..48139dc22 100644 --- a/crates/proof-of-sql/src/sql/proof/mod.rs +++ b/crates/proof-of-sql/src/sql/proof/mod.rs @@ -25,6 +25,8 @@ pub(crate) use provable_result_column::ProvableResultColumn; mod provable_query_result; pub use provable_query_result::ProvableQueryResult; +#[cfg(all(test, feature = "arrow"))] +mod provable_query_result_test; mod sumcheck_mle_evaluations; pub(crate) use sumcheck_mle_evaluations::SumcheckMleEvaluations; @@ -68,6 +70,3 @@ pub(crate) use result_element_serialization::{ mod first_round_builder; pub(crate) use first_round_builder::FirstRoundBuilder; - -#[cfg(all(test, feature = "arrow"))] -mod provable_query_result_test; diff --git a/crates/proof-of-sql/src/sql/proof/proof_plan.rs b/crates/proof-of-sql/src/sql/proof/proof_plan.rs index 42ceceab1..430485308 100644 --- a/crates/proof-of-sql/src/sql/proof/proof_plan.rs +++ b/crates/proof-of-sql/src/sql/proof/proof_plan.rs @@ -3,7 +3,7 @@ use crate::base::{ commitment::Commitment, database::{ Column, ColumnField, ColumnRef, CommitmentAccessor, DataAccessor, MetadataAccessor, - OwnedTable, TableRef, + OwnedTable, }, map::IndexSet, proof::ProofError, @@ -46,9 +46,6 @@ pub trait ProofPlan: Debug + Send + Sync + ProverEvaluate IndexSet; - - /// Return all the tables referenced in the Query - fn get_table_references(&self) -> IndexSet; } pub trait ProverEvaluate { diff --git a/crates/proof-of-sql/src/sql/proof/query_proof_test.rs b/crates/proof-of-sql/src/sql/proof/query_proof_test.rs index e6e685673..a4fa8a65a 100644 --- a/crates/proof-of-sql/src/sql/proof/query_proof_test.rs +++ b/crates/proof-of-sql/src/sql/proof/query_proof_test.rs @@ -7,7 +7,7 @@ use crate::{ database::{ owned_table_utility::{bigint, owned_table}, Column, ColumnField, ColumnRef, ColumnType, CommitmentAccessor, DataAccessor, - MetadataAccessor, OwnedTable, OwnedTableTestAccessor, TableRef, TestAccessor, + MetadataAccessor, OwnedTable, OwnedTableTestAccessor, TestAccessor, UnimplementedTestAccessor, }, map::IndexSet, @@ -109,9 +109,6 @@ impl ProofPlan for TrivialTestProofPlan { fn get_column_references(&self) -> IndexSet { unimplemented!("no real usage for this function yet") } - fn get_table_references(&self) -> IndexSet { - unimplemented!("no real usage for this function yet") - } } fn verify_a_trivial_query_proof_with_given_offset(n: usize, offset_generators: usize) { @@ -281,9 +278,6 @@ impl ProofPlan for SquareTestProofPlan { fn get_column_references(&self) -> IndexSet { unimplemented!("no real usage for this function yet") } - fn get_table_references(&self) -> IndexSet { - unimplemented!("no real usage for this function yet") - } } fn verify_a_proof_with_an_anchored_commitment_and_given_offset(offset_generators: usize) { @@ -487,9 +481,6 @@ impl ProofPlan for DoubleSquareTestProofPlan { fn get_column_references(&self) -> IndexSet { unimplemented!("no real usage for this function yet") } - fn get_table_references(&self) -> IndexSet { - unimplemented!("no real usage for this function yet") - } } fn verify_a_proof_with_an_intermediate_commitment_and_given_offset(offset_generators: usize) { @@ -686,9 +677,6 @@ impl ProofPlan for ChallengeTestProofPlan { fn get_column_references(&self) -> IndexSet { unimplemented!("no real usage for this function yet") } - fn get_table_references(&self) -> IndexSet { - unimplemented!("no real usage for this function yet") - } } fn verify_a_proof_with_a_post_result_challenge_and_given_offset(offset_generators: usize) { diff --git a/crates/proof-of-sql/src/sql/proof/query_result.rs b/crates/proof-of-sql/src/sql/proof/query_result.rs index 647e4ad0b..31b9ad994 100644 --- a/crates/proof-of-sql/src/sql/proof/query_result.rs +++ b/crates/proof-of-sql/src/sql/proof/query_result.rs @@ -3,6 +3,8 @@ use crate::base::{ proof::ProofError, scalar::Scalar, }; +#[cfg(feature = "arrow")] +use arrow::{error::ArrowError, record_batch::RecordBatch}; use snafu::Snafu; /// Verifiable query errors @@ -52,5 +54,22 @@ pub struct QueryData { pub verification_hash: [u8; 32], } +impl QueryData { + #[cfg(all(test, feature = "arrow"))] + #[must_use] + pub fn into_record_batch(self) -> RecordBatch { + self.try_into().unwrap() + } +} + +#[cfg(feature = "arrow")] +impl TryFrom> for RecordBatch { + type Error = ArrowError; + + fn try_from(value: QueryData) -> Result { + Self::try_from(value.table) + } +} + /// The result of a query -- either an error or a table. pub type QueryResult = Result, QueryError>; diff --git a/crates/proof-of-sql/src/sql/proof/verifiable_query_result_test.rs b/crates/proof-of-sql/src/sql/proof/verifiable_query_result_test.rs index d2db5df0e..5d299e408 100644 --- a/crates/proof-of-sql/src/sql/proof/verifiable_query_result_test.rs +++ b/crates/proof-of-sql/src/sql/proof/verifiable_query_result_test.rs @@ -8,7 +8,7 @@ use crate::{ database::{ owned_table_utility::{bigint, owned_table}, Column, ColumnField, ColumnRef, ColumnType, CommitmentAccessor, DataAccessor, - MetadataAccessor, OwnedTable, TableRef, TestAccessor, UnimplementedTestAccessor, + MetadataAccessor, OwnedTable, TestAccessor, UnimplementedTestAccessor, }, map::IndexSet, proof::ProofError, @@ -88,10 +88,6 @@ impl ProofPlan for EmptyTestQueryExpr { fn get_column_references(&self) -> IndexSet { unimplemented!("no real usage for this function yet") } - - fn get_table_references(&self) -> IndexSet { - unimplemented!("no real usage for this function yet") - } } #[test] diff --git a/crates/proof-of-sql/src/sql/proof_plans/dyn_proof_plan.rs b/crates/proof-of-sql/src/sql/proof_plans/dyn_proof_plan.rs index b7edcc70a..c524a2c76 100644 --- a/crates/proof-of-sql/src/sql/proof_plans/dyn_proof_plan.rs +++ b/crates/proof-of-sql/src/sql/proof_plans/dyn_proof_plan.rs @@ -1,21 +1,9 @@ use super::{FilterExec, GroupByExec, ProjectionExec}; use crate::{ - base::{ - commitment::Commitment, - database::{ - Column, ColumnField, ColumnRef, CommitmentAccessor, DataAccessor, MetadataAccessor, - OwnedTable, TableRef, - }, - map::IndexSet, - proof::ProofError, - }, - sql::proof::{ - CountBuilder, FinalRoundBuilder, FirstRoundBuilder, ProofPlan, ProverEvaluate, - VerificationBuilder, - }, + base::{commitment::Commitment, database::Column, map::IndexSet}, + sql::proof::{ProofPlan, ProverEvaluate}, }; use alloc::vec::Vec; -use bumpalo::Bump; use serde::{Deserialize, Serialize}; /// The query plan for proving a query @@ -46,9 +34,9 @@ pub enum DynProofPlan { impl ProofPlan for DynProofPlan { fn count( &self, - builder: &mut CountBuilder, - accessor: &dyn MetadataAccessor, - ) -> Result<(), ProofError> { + builder: &mut crate::sql::proof::CountBuilder, + accessor: &dyn crate::base::database::MetadataAccessor, + ) -> Result<(), crate::base::proof::ProofError> { match self { DynProofPlan::Projection(expr) => expr.count(builder, accessor), DynProofPlan::GroupBy(expr) => expr.count(builder, accessor), @@ -56,7 +44,7 @@ impl ProofPlan for DynProofPlan { } } - fn get_length(&self, accessor: &dyn MetadataAccessor) -> usize { + fn get_length(&self, accessor: &dyn crate::base::database::MetadataAccessor) -> usize { match self { DynProofPlan::Projection(expr) => expr.get_length(accessor), DynProofPlan::GroupBy(expr) => expr.get_length(accessor), @@ -64,7 +52,7 @@ impl ProofPlan for DynProofPlan { } } - fn get_offset(&self, accessor: &dyn MetadataAccessor) -> usize { + fn get_offset(&self, accessor: &dyn crate::base::database::MetadataAccessor) -> usize { match self { DynProofPlan::Projection(expr) => expr.get_offset(accessor), DynProofPlan::GroupBy(expr) => expr.get_offset(accessor), @@ -75,10 +63,10 @@ impl ProofPlan for DynProofPlan { #[tracing::instrument(name = "DynProofPlan::verifier_evaluate", level = "debug", skip_all)] fn verifier_evaluate( &self, - builder: &mut VerificationBuilder, - accessor: &dyn CommitmentAccessor, - result: Option<&OwnedTable>, - ) -> Result, ProofError> { + builder: &mut crate::sql::proof::VerificationBuilder, + accessor: &dyn crate::base::database::CommitmentAccessor, + result: Option<&crate::base::database::OwnedTable>, + ) -> Result, crate::base::proof::ProofError> { match self { DynProofPlan::Projection(expr) => expr.verifier_evaluate(builder, accessor, result), DynProofPlan::GroupBy(expr) => expr.verifier_evaluate(builder, accessor, result), @@ -86,7 +74,7 @@ impl ProofPlan for DynProofPlan { } } - fn get_column_result_fields(&self) -> Vec { + fn get_column_result_fields(&self) -> Vec { match self { DynProofPlan::Projection(expr) => expr.get_column_result_fields(), DynProofPlan::GroupBy(expr) => expr.get_column_result_fields(), @@ -94,21 +82,13 @@ impl ProofPlan for DynProofPlan { } } - fn get_column_references(&self) -> IndexSet { + fn get_column_references(&self) -> IndexSet { match self { DynProofPlan::Projection(expr) => expr.get_column_references(), DynProofPlan::GroupBy(expr) => expr.get_column_references(), DynProofPlan::Filter(expr) => expr.get_column_references(), } } - - fn get_table_references(&self) -> IndexSet { - match self { - DynProofPlan::Projection(expr) => expr.get_table_references(), - DynProofPlan::GroupBy(expr) => expr.get_table_references(), - DynProofPlan::Filter(expr) => expr.get_table_references(), - } - } } impl ProverEvaluate for DynProofPlan { @@ -116,8 +96,8 @@ impl ProverEvaluate for DynProofPlan { fn result_evaluate<'a>( &self, input_length: usize, - alloc: &'a Bump, - accessor: &'a dyn DataAccessor, + alloc: &'a bumpalo::Bump, + accessor: &'a dyn crate::base::database::DataAccessor, ) -> Vec> { match self { DynProofPlan::Projection(expr) => expr.result_evaluate(input_length, alloc, accessor), @@ -126,7 +106,7 @@ impl ProverEvaluate for DynProofPlan { } } - fn first_round_evaluate(&self, builder: &mut FirstRoundBuilder) { + fn first_round_evaluate(&self, builder: &mut crate::sql::proof::FirstRoundBuilder) { match self { DynProofPlan::Projection(expr) => expr.first_round_evaluate(builder), DynProofPlan::GroupBy(expr) => expr.first_round_evaluate(builder), @@ -137,9 +117,9 @@ impl ProverEvaluate for DynProofPlan { #[tracing::instrument(name = "DynProofPlan::final_round_evaluate", level = "debug", skip_all)] fn final_round_evaluate<'a>( &self, - builder: &mut FinalRoundBuilder<'a, C::Scalar>, - alloc: &'a Bump, - accessor: &'a dyn DataAccessor, + builder: &mut crate::sql::proof::FinalRoundBuilder<'a, C::Scalar>, + alloc: &'a bumpalo::Bump, + accessor: &'a dyn crate::base::database::DataAccessor, ) -> Vec> { match self { DynProofPlan::Projection(expr) => expr.final_round_evaluate(builder, alloc, accessor), diff --git a/crates/proof-of-sql/src/sql/proof_plans/filter_exec.rs b/crates/proof-of-sql/src/sql/proof_plans/filter_exec.rs index 5a1b6106b..4259d3d88 100644 --- a/crates/proof-of-sql/src/sql/proof_plans/filter_exec.rs +++ b/crates/proof-of-sql/src/sql/proof_plans/filter_exec.rs @@ -4,7 +4,7 @@ use crate::{ commitment::Commitment, database::{ filter_util::filter_columns, Column, ColumnField, ColumnRef, CommitmentAccessor, - DataAccessor, MetadataAccessor, OwnedTable, TableRef, + DataAccessor, MetadataAccessor, OwnedTable, }, map::IndexSet, proof::ProofError, @@ -139,10 +139,6 @@ where columns } - - fn get_table_references(&self) -> IndexSet { - IndexSet::from_iter([self.table.table_ref]) - } } /// Alias for a filter expression with a honest prover. diff --git a/crates/proof-of-sql/src/sql/proof_plans/filter_exec_test.rs b/crates/proof-of-sql/src/sql/proof_plans/filter_exec_test.rs index 062781985..c6252d133 100644 --- a/crates/proof-of-sql/src/sql/proof_plans/filter_exec_test.rs +++ b/crates/proof-of-sql/src/sql/proof_plans/filter_exec_test.rs @@ -153,10 +153,6 @@ fn we_can_correctly_fetch_all_the_referenced_columns() { ) ]) ); - - let ref_tables = provable_ast.get_table_references(); - - assert_eq!(ref_tables, IndexSet::from_iter([table_ref])); } #[test] diff --git a/crates/proof-of-sql/src/sql/proof_plans/group_by_exec.rs b/crates/proof-of-sql/src/sql/proof_plans/group_by_exec.rs index 385b8a2e7..0a43da82f 100644 --- a/crates/proof-of-sql/src/sql/proof_plans/group_by_exec.rs +++ b/crates/proof-of-sql/src/sql/proof_plans/group_by_exec.rs @@ -7,7 +7,7 @@ use crate::{ aggregate_columns, compare_indexes_by_owned_columns, AggregatedColumns, }, Column, ColumnField, ColumnRef, ColumnType, CommitmentAccessor, DataAccessor, - MetadataAccessor, OwnedTable, TableRef, + MetadataAccessor, OwnedTable, }, map::IndexSet, proof::ProofError, @@ -202,10 +202,6 @@ impl ProofPlan for GroupByExec { columns } - - fn get_table_references(&self) -> IndexSet { - IndexSet::from_iter([self.table.table_ref]) - } } impl ProverEvaluate for GroupByExec { diff --git a/crates/proof-of-sql/src/sql/proof_plans/projection_exec.rs b/crates/proof-of-sql/src/sql/proof_plans/projection_exec.rs index f3038b310..fb66bff00 100644 --- a/crates/proof-of-sql/src/sql/proof_plans/projection_exec.rs +++ b/crates/proof-of-sql/src/sql/proof_plans/projection_exec.rs @@ -3,7 +3,7 @@ use crate::{ commitment::Commitment, database::{ Column, ColumnField, ColumnRef, CommitmentAccessor, DataAccessor, MetadataAccessor, - OwnedTable, TableRef, + OwnedTable, }, map::IndexSet, proof::ProofError, @@ -92,10 +92,6 @@ impl ProofPlan for ProjectionExec { }); columns } - - fn get_table_references(&self) -> IndexSet { - IndexSet::from_iter([self.table.table_ref]) - } } impl ProverEvaluate for ProjectionExec { diff --git a/crates/proof-of-sql/src/sql/proof_plans/projection_exec_test.rs b/crates/proof-of-sql/src/sql/proof_plans/projection_exec_test.rs index c97ecf471..3addcfb17 100644 --- a/crates/proof-of-sql/src/sql/proof_plans/projection_exec_test.rs +++ b/crates/proof-of-sql/src/sql/proof_plans/projection_exec_test.rs @@ -102,10 +102,6 @@ fn we_can_correctly_fetch_all_the_referenced_columns() { ), ]) ); - - let ref_tables = provable_ast.get_table_references(); - - assert_eq!(ref_tables, IndexSet::from_iter([table_ref])); } #[test]