From 6d435e4add6005712fd18c982234ea4c7581e698 Mon Sep 17 00:00:00 2001 From: Henry Mai Date: Tue, 22 Oct 2024 22:19:08 -0400 Subject: [PATCH 01/74] feat: dinosaurs-example --- .github/workflows/lint-and-test.yml | 2 + crates/proof-of-sql/Cargo.toml | 4 + .../examples/dinosaurs/dinosaurs.csv | 11 ++ .../proof-of-sql/examples/dinosaurs/main.rs | 125 ++++++++++++++++++ 4 files changed, 142 insertions(+) create mode 100644 crates/proof-of-sql/examples/dinosaurs/dinosaurs.csv create mode 100644 crates/proof-of-sql/examples/dinosaurs/main.rs diff --git a/.github/workflows/lint-and-test.yml b/.github/workflows/lint-and-test.yml index be1f56d12..18c6c4d2e 100644 --- a/.github/workflows/lint-and-test.yml +++ b/.github/workflows/lint-and-test.yml @@ -118,6 +118,8 @@ jobs: run: cargo run --example dog_breeds - name: Run wood types example run: cargo run --example wood_types + - name: Run dinosaurs example + run: cargo run --example dinosaurs - name: Run posql_db example (With Blitzar) run: bash crates/proof-of-sql/examples/posql_db/run_example.sh - name: Run posql_db example (Without Blitzar) diff --git a/crates/proof-of-sql/Cargo.toml b/crates/proof-of-sql/Cargo.toml index 8b408d840..320af0082 100644 --- a/crates/proof-of-sql/Cargo.toml +++ b/crates/proof-of-sql/Cargo.toml @@ -103,6 +103,10 @@ required-features = [ "arrow" ] name = "wood_types" required-features = [ "arrow" ] +[[example]] +name = "dinosaurs" +required-features = [ "arrow" ] + [[bench]] name = "posql_benches" harness = false diff --git a/crates/proof-of-sql/examples/dinosaurs/dinosaurs.csv b/crates/proof-of-sql/examples/dinosaurs/dinosaurs.csv new file mode 100644 index 000000000..5e76a81b6 --- /dev/null +++ b/crates/proof-of-sql/examples/dinosaurs/dinosaurs.csv @@ -0,0 +1,11 @@ +id,name,period,diet,length_meters,weight_tons +1,Tyrannosaurus Rex,Cretaceous,Carnivore,12.3,7.0 +2,Stegosaurus,Jurassic,Herbivore,9.0,5.5 +3,Triceratops,Cretaceous,Herbivore,8.5,10.0 +4,Velociraptor,Cretaceous,Carnivore,1.8,0.015 +5,Brachiosaurus,Jurassic,Herbivore,26.0,50.0 +6,Ankylosaurus,Cretaceous,Herbivore,6.5,6.0 +7,Spinosaurus,Cretaceous,Carnivore,15.0,7.5 +8,Diplodocus,Jurassic,Herbivore,27.0,25.0 +9,Allosaurus,Jurassic,Carnivore,9.7,2.3 +10,Parasaurolophus,Cretaceous,Herbivore,10.0,3.5 \ No newline at end of file diff --git a/crates/proof-of-sql/examples/dinosaurs/main.rs b/crates/proof-of-sql/examples/dinosaurs/main.rs new file mode 100644 index 000000000..154bb1683 --- /dev/null +++ b/crates/proof-of-sql/examples/dinosaurs/main.rs @@ -0,0 +1,125 @@ +//! This is a non-interactive example of using Proof of SQL with a dinosaur dataset. +//! To run this, use `cargo run --release --example dinosaurs`. +//! +//! NOTE: If this doesn't work because you do not have the appropriate GPU drivers installed, +//! you can run `cargo run --release --example dinosaurs --no-default-features --features="arrow cpu-perf"` instead. It will be slower for proof generation. + +use arrow::datatypes::SchemaRef; +use arrow_csv::{infer_schema_from_files, ReaderBuilder}; +use proof_of_sql::{ + base::database::{ + arrow_schema_utility::get_posql_compatible_schema, OwnedTable, OwnedTableTestAccessor, + TestAccessor, + }, + proof_primitive::dory::{ + DynamicDoryCommitment, DynamicDoryEvaluationProof, ProverSetup, PublicParameters, + VerifierSetup, + }, + sql::{parse::QueryExpr, postprocessing::apply_postprocessing_steps, proof::QueryProof}, +}; +use rand::{rngs::StdRng, SeedableRng}; +use std::{fs::File, time::Instant}; + +// We generate the public parameters and the setups used by the prover and verifier for the Dory PCS. +// The `max_nu` should be set such that the maximum table size is less than `2^(2*max_nu-1)`. +const DORY_SETUP_MAX_NU: usize = 8; +// This should be a "nothing-up-my-sleeve" phrase or number. +const DORY_SEED: [u8; 32] = *b"262a6aa18b5c43d589677c13dd33e6dc"; + +/// # Panics +/// Will panic if the query does not parse or the proof fails to verify. +fn prove_and_verify_query( + sql: &str, + accessor: &OwnedTableTestAccessor, + prover_setup: &ProverSetup, + verifier_setup: &VerifierSetup, +) { + // Parse the query: + println!("Parsing the query: {sql}..."); + let now = Instant::now(); + let query_plan = QueryExpr::::try_new( + sql.parse().unwrap(), + "dinosaurs".parse().unwrap(), + accessor, + ) + .unwrap(); + println!("Done in {} ms.", now.elapsed().as_secs_f64() * 1000.); + + // Generate the proof and result: + print!("Generating proof..."); + let now = Instant::now(); + let (proof, provable_result) = QueryProof::::new( + query_plan.proof_expr(), + accessor, + &prover_setup, + ); + println!("Done in {} ms.", now.elapsed().as_secs_f64() * 1000.); + + // Verify the result with the proof: + print!("Verifying proof..."); + let now = Instant::now(); + let result = proof + .verify( + query_plan.proof_expr(), + accessor, + &provable_result, + &verifier_setup, + ) + .unwrap(); + let result = apply_postprocessing_steps(result.table, query_plan.postprocessing()); + println!("Verified in {} ms.", now.elapsed().as_secs_f64() * 1000.); + + // Display the result + println!("Query Result:"); + println!("{result:?}"); +} + +fn main() { + let mut rng = StdRng::from_seed(DORY_SEED); + let public_parameters = PublicParameters::rand(DORY_SETUP_MAX_NU, &mut rng); + let prover_setup = ProverSetup::from(&public_parameters); + let verifier_setup = VerifierSetup::from(&public_parameters); + + let filename = "./crates/proof-of-sql/examples/dinosaurs/dinosaurs.csv"; + let inferred_schema = + SchemaRef::new(infer_schema_from_files(&[filename.to_string()], b',', None, true).unwrap()); + let posql_compatible_schema = get_posql_compatible_schema(&inferred_schema); + + let dinosaurs_batch = ReaderBuilder::new(posql_compatible_schema) + .with_header(true) + .build(File::open(filename).unwrap()) + .unwrap() + .next() + .unwrap() + .unwrap(); + + // Load the table into an "Accessor" so that the prover and verifier can access the data/commitments. + let mut accessor = + OwnedTableTestAccessor::::new_empty_with_setup(&prover_setup); + accessor.add_table( + "dinosaurs.dinosaurs".parse().unwrap(), + OwnedTable::try_from(dinosaurs_batch).unwrap(), + 0, + ); + + prove_and_verify_query( + "SELECT COUNT(*) AS total_dinosaurs FROM dinosaurs", + &accessor, + &prover_setup, + &verifier_setup, + ); + + prove_and_verify_query( + "SELECT name, weight_tons FROM dinosaurs WHERE diet = 'Carnivore' ORDER BY weight_tons DESC LIMIT 1", + &accessor, + &prover_setup, + &verifier_setup, + ); + + prove_and_verify_query( + "SELECT name, length_meters FROM dinosaurs ORDER BY length_meters DESC LIMIT 3", + &accessor, + &prover_setup, + &verifier_setup, + ); +} From d13f6546ef0e3d9499bb25ae309840f9bbad6f7f Mon Sep 17 00:00:00 2001 From: Henry Mai Date: Wed, 23 Oct 2024 01:53:36 -0400 Subject: [PATCH 02/74] feat: books example --- .github/workflows/lint-and-test.yml | 2 + crates/proof-of-sql/Cargo.toml | 4 + crates/proof-of-sql/examples/books/books.csv | 21 +++ crates/proof-of-sql/examples/books/main.rs | 136 +++++++++++++++++++ 4 files changed, 163 insertions(+) create mode 100644 crates/proof-of-sql/examples/books/books.csv create mode 100644 crates/proof-of-sql/examples/books/main.rs diff --git a/.github/workflows/lint-and-test.yml b/.github/workflows/lint-and-test.yml index 18c6c4d2e..7ee3505a5 100644 --- a/.github/workflows/lint-and-test.yml +++ b/.github/workflows/lint-and-test.yml @@ -120,6 +120,8 @@ jobs: run: cargo run --example wood_types - name: Run dinosaurs example run: cargo run --example dinosaurs + - name: Run books example + run: cargo run --example books - name: Run posql_db example (With Blitzar) run: bash crates/proof-of-sql/examples/posql_db/run_example.sh - name: Run posql_db example (Without Blitzar) diff --git a/crates/proof-of-sql/Cargo.toml b/crates/proof-of-sql/Cargo.toml index 320af0082..d30d3b803 100644 --- a/crates/proof-of-sql/Cargo.toml +++ b/crates/proof-of-sql/Cargo.toml @@ -107,6 +107,10 @@ required-features = [ "arrow" ] name = "dinosaurs" required-features = [ "arrow" ] +[[example]] +name = "books" +required-features = [ "arrow" ] + [[bench]] name = "posql_benches" harness = false diff --git a/crates/proof-of-sql/examples/books/books.csv b/crates/proof-of-sql/examples/books/books.csv new file mode 100644 index 000000000..e5e7e841b --- /dev/null +++ b/crates/proof-of-sql/examples/books/books.csv @@ -0,0 +1,21 @@ +id,title,author,publication_year,genre,rating +1,To Kill a Mockingbird,Harper Lee,1960,Fiction,4.5 +2,1984,George Orwell,1949,Science Fiction,4.7 +3,Pride and Prejudice,Jane Austen,1813,Romance,4.3 +4,The Great Gatsby,F. Scott Fitzgerald,1925,Fiction,4.2 +5,The Catcher in the Rye,J.D. Salinger,1951,Fiction,4.0 +6,Moby-Dick,Herman Melville,1851,Adventure,4.1 +7,The Lord of the Rings,J.R.R. Tolkien,1954,Fantasy,4.9 +8,The Hobbit,J.R.R. Tolkien,1937,Fantasy,4.6 +9,Brave New World,Aldous Huxley,1932,Science Fiction,4.4 +10,The Hunger Games,Suzanne Collins,2008,Young Adult,4.3 +11,Harry Potter and the Philosopher's Stone,J.K. Rowling,1997,Fantasy,4.8 +12,The Da Vinci Code,Dan Brown,2003,Thriller,3.9 +13,The Alchemist,Paulo Coelho,1988,Fiction,4.2 +14,The Girl with the Dragon Tattoo,Stieg Larsson,2005,Mystery,4.1 +15,The Hitchhiker's Guide to the Galaxy,Douglas Adams,1979,Science Fiction,4.5 +16,The Shining,Stephen King,1977,Horror,4.3 +17,The Catch-22,Joseph Heller,1961,Satire,4.0 +18,The Chronicles of Narnia,C.S. Lewis,1950,Fantasy,4.7 +19,The Fault in Our Stars,John Green,2012,Young Adult,4.2 +20,The Old Man and the Sea,Ernest Hemingway,1952,Fiction,4.1 \ No newline at end of file diff --git a/crates/proof-of-sql/examples/books/main.rs b/crates/proof-of-sql/examples/books/main.rs new file mode 100644 index 000000000..973e40b15 --- /dev/null +++ b/crates/proof-of-sql/examples/books/main.rs @@ -0,0 +1,136 @@ +//! This is a non-interactive example of using Proof of SQL with a books dataset. +//! To run this, use `cargo run --release --example books`. +//! +//! NOTE: If this doesn't work because you do not have the appropriate GPU drivers installed, +//! you can run `cargo run --release --example books --no-default-features --features="arrow cpu-perf"` instead. It will be slower for proof generation. + +use arrow::datatypes::SchemaRef; +use arrow_csv::{infer_schema_from_files, ReaderBuilder}; +use proof_of_sql::{ + base::database::{ + arrow_schema_utility::get_posql_compatible_schema, OwnedTable, OwnedTableTestAccessor, + TestAccessor, + }, + proof_primitive::dory::{ + DynamicDoryCommitment, DynamicDoryEvaluationProof, ProverSetup, PublicParameters, + VerifierSetup, + }, + sql::{parse::QueryExpr, postprocessing::apply_postprocessing_steps, proof::QueryProof}, +}; +use rand::{rngs::StdRng, SeedableRng}; +use std::{fs::File, time::Instant}; + +// We generate the public parameters and the setups used by the prover and verifier for the Dory PCS. +// The `max_nu` should be set such that the maximum table size is less than `2^(2*max_nu-1)`. +const DORY_SETUP_MAX_NU: usize = 8; +// This should be a "nothing-up-my-sleeve" phrase or number. +const DORY_SEED: [u8; 32] = *b"ebab60d58dee4cc69658939b7c2a582d"; + +/// # Panics +/// Will panic if the query does not parse or the proof fails to verify. +fn prove_and_verify_query( + sql: &str, + accessor: &OwnedTableTestAccessor, + prover_setup: &ProverSetup, + verifier_setup: &VerifierSetup, +) { + // Parse the query: + println!("Parsing the query: {sql}..."); + let now = Instant::now(); + let query_plan = QueryExpr::::try_new( + sql.parse().unwrap(), + "books".parse().unwrap(), + accessor, + ) + .unwrap(); + println!("Done in {} ms.", now.elapsed().as_secs_f64() * 1000.); + + // Generate the proof and result: + print!("Generating proof..."); + let now = Instant::now(); + let (proof, provable_result) = QueryProof::::new( + query_plan.proof_expr(), + accessor, + &prover_setup, + ); + println!("Done in {} ms.", now.elapsed().as_secs_f64() * 1000.); + + // Verify the result with the proof: + print!("Verifying proof..."); + let now = Instant::now(); + let result = proof + .verify( + query_plan.proof_expr(), + accessor, + &provable_result, + &verifier_setup, + ) + .unwrap(); + let result = apply_postprocessing_steps(result.table, query_plan.postprocessing()); + println!("Verified in {} ms.", now.elapsed().as_secs_f64() * 1000.); + + // Display the result + println!("Query Result:"); + println!("{result:?}"); +} + +fn main() { + let mut rng = StdRng::from_seed(DORY_SEED); + let public_parameters = PublicParameters::rand(DORY_SETUP_MAX_NU, &mut rng); + let prover_setup = ProverSetup::from(&public_parameters); + let verifier_setup = VerifierSetup::from(&public_parameters); + + let filename = "./crates/proof-of-sql/examples/books/books.csv"; + let inferred_schema = + SchemaRef::new(infer_schema_from_files(&[filename.to_string()], b',', None, true).unwrap()); + let posql_compatible_schema = get_posql_compatible_schema(&inferred_schema); + + let books_batch = ReaderBuilder::new(posql_compatible_schema) + .with_header(true) + .build(File::open(filename).unwrap()) + .unwrap() + .next() + .unwrap() + .unwrap(); + + // Load the table into an "Accessor" so that the prover and verifier can access the data/commitments. + let mut accessor = + OwnedTableTestAccessor::::new_empty_with_setup(&prover_setup); + accessor.add_table( + "books.books".parse().unwrap(), + OwnedTable::try_from(books_batch).unwrap(), + 0, + ); + + // Query 1: Count the total number of books + prove_and_verify_query( + "SELECT COUNT(*) AS total_books FROM books", + &accessor, + &prover_setup, + &verifier_setup, + ); + + // Query 2: Find the top 5 highest-rated books + prove_and_verify_query( + "SELECT title, author, rating FROM books ORDER BY rating DESC LIMIT 5", + &accessor, + &prover_setup, + &verifier_setup, + ); + + // Query 3: Count the number of books in each genre + prove_and_verify_query( + "SELECT genre, COUNT(*) AS book_count FROM books GROUP BY genre ORDER BY book_count DESC", + &accessor, + &prover_setup, + &verifier_setup, + ); + + // Query 4: Find books published after 2000 with a rating higher than 4.5 + prove_and_verify_query( + "SELECT title, author, publication_year, rating FROM books WHERE publication_year > 2000 AND rating > 4.5", + &accessor, + &prover_setup, + &verifier_setup, + ); +} From 41b09dce0b7b0b2ef091f4743d64e1bcb33140b0 Mon Sep 17 00:00:00 2001 From: Henry Mai Date: Wed, 23 Oct 2024 02:45:34 -0400 Subject: [PATCH 03/74] feat: brands example --- .github/workflows/lint-and-test.yml | 2 + crates/proof-of-sql/Cargo.toml | 4 + .../proof-of-sql/examples/brands/brands.csv | 26 ++++ crates/proof-of-sql/examples/brands/main.rs | 128 ++++++++++++++++++ 4 files changed, 160 insertions(+) create mode 100644 crates/proof-of-sql/examples/brands/brands.csv create mode 100644 crates/proof-of-sql/examples/brands/main.rs diff --git a/.github/workflows/lint-and-test.yml b/.github/workflows/lint-and-test.yml index 7ee3505a5..dfebc0ea6 100644 --- a/.github/workflows/lint-and-test.yml +++ b/.github/workflows/lint-and-test.yml @@ -122,6 +122,8 @@ jobs: run: cargo run --example dinosaurs - name: Run books example run: cargo run --example books + - name: Run brands example + run: cargo run --example brands - name: Run posql_db example (With Blitzar) run: bash crates/proof-of-sql/examples/posql_db/run_example.sh - name: Run posql_db example (Without Blitzar) diff --git a/crates/proof-of-sql/Cargo.toml b/crates/proof-of-sql/Cargo.toml index d30d3b803..d4fe9f69d 100644 --- a/crates/proof-of-sql/Cargo.toml +++ b/crates/proof-of-sql/Cargo.toml @@ -111,6 +111,10 @@ required-features = [ "arrow" ] name = "books" required-features = [ "arrow" ] +[[example]] +name = "brands" +required-features = [ "arrow" ] + [[bench]] name = "posql_benches" harness = false diff --git a/crates/proof-of-sql/examples/brands/brands.csv b/crates/proof-of-sql/examples/brands/brands.csv new file mode 100644 index 000000000..f75cdf6a9 --- /dev/null +++ b/crates/proof-of-sql/examples/brands/brands.csv @@ -0,0 +1,26 @@ +Name,Country,Founded,Revenue +Apple,United States,1976,365.82 +Samsung,South Korea,1938,200.73 +Microsoft,United States,1975,198.27 +Amazon,United States,1994,513.98 +Google,United States,1998,282.84 +Toyota,Japan,1937,278.52 +Coca-Cola,United States,1886,38.66 +Mercedes-Benz,Germany,1926,154.31 +McDonald's,United States,1955,19.2 +Nike,United States,1964,44.54 +Louis Vuitton,France,1854,75.98 +BMW,Germany,1916,121.87 +Disney,United States,1923,67.42 +Honda,Japan,1948,129.21 +Pepsi,United States,1893,79.47 +Adidas,Germany,1949,21.23 +Nestle,Switzerland,1866,94.42 +Unilever,Netherlands,1929,58.26 +Sony,Japan,1946,84.89 +Volkswagen,Germany,1937,250.2 +IKEA,Sweden,1943,44.6 +Starbucks,United States,1971,23.52 +Zara,Spain,1974,27.72 +H&M,Sweden,1947,21.73 +Gucci,Italy,1921,10.34 \ No newline at end of file diff --git a/crates/proof-of-sql/examples/brands/main.rs b/crates/proof-of-sql/examples/brands/main.rs new file mode 100644 index 000000000..8e1db87a1 --- /dev/null +++ b/crates/proof-of-sql/examples/brands/main.rs @@ -0,0 +1,128 @@ +//! This is a non-interactive example of using Proof of SQL with a brands dataset. +//! To run this, use `cargo run --release --example brands`. +//! +//! NOTE: If this doesn't work because you do not have the appropriate GPU drivers installed, +//! you can run `cargo run --release --example brands --no-default-features --features="arrow cpu-perf"` instead. It will be slower for proof generation. + +use arrow::datatypes::SchemaRef; +use arrow_csv::{infer_schema_from_files, ReaderBuilder}; +use proof_of_sql::{ + base::database::{ + arrow_schema_utility::get_posql_compatible_schema, OwnedTable, OwnedTableTestAccessor, + TestAccessor, + }, + proof_primitive::dory::{ + DynamicDoryCommitment, DynamicDoryEvaluationProof, ProverSetup, PublicParameters, + VerifierSetup, + }, + sql::{parse::QueryExpr, postprocessing::apply_postprocessing_steps, proof::QueryProof}, +}; +use rand::{rngs::StdRng, SeedableRng}; +use std::{fs::File, time::Instant}; + +// We generate the public parameters and the setups used by the prover and verifier for the Dory PCS. +// The `max_nu` should be set such that the maximum table size is less than `2^(2*max_nu-1)`. +const DORY_SETUP_MAX_NU: usize = 8; +// This should be a "nothing-up-my-sleeve" phrase or number. +const DORY_SEED: [u8; 32] = *b"8f3a2e1c5b9d7f0a6e4d2c8b7a9f1e3d"; + +/// # Panics +/// Will panic if the query does not parse or the proof fails to verify. +fn prove_and_verify_query( + sql: &str, + accessor: &OwnedTableTestAccessor, + prover_setup: &ProverSetup, + verifier_setup: &VerifierSetup, +) { + // Parse the query: + println!("Parsing the query: {sql}..."); + let now = Instant::now(); + let query_plan = QueryExpr::::try_new( + sql.parse().unwrap(), + "brands".parse().unwrap(), + accessor, + ) + .unwrap(); + println!("Done in {} ms.", now.elapsed().as_secs_f64() * 1000.); + + // Generate the proof and result: + print!("Generating proof..."); + let now = Instant::now(); + let (proof, provable_result) = QueryProof::::new( + query_plan.proof_expr(), + accessor, + &prover_setup, + ); + println!("Done in {} ms.", now.elapsed().as_secs_f64() * 1000.); + + // Verify the result with the proof: + print!("Verifying proof..."); + let now = Instant::now(); + let result = proof + .verify( + query_plan.proof_expr(), + accessor, + &provable_result, + &verifier_setup, + ) + .unwrap(); + let result = apply_postprocessing_steps(result.table, query_plan.postprocessing()); + println!("Verified in {} ms.", now.elapsed().as_secs_f64() * 1000.); + + // Display the result + println!("Query Result:"); + println!("{result:?}"); +} + +fn main() { + let mut rng = StdRng::from_seed(DORY_SEED); + let public_parameters = PublicParameters::rand(DORY_SETUP_MAX_NU, &mut rng); + let prover_setup = ProverSetup::from(&public_parameters); + let verifier_setup = VerifierSetup::from(&public_parameters); + + let filename = "./crates/proof-of-sql/examples/brands/brands.csv"; + let inferred_schema = + SchemaRef::new(infer_schema_from_files(&[filename.to_string()], b',', None, true).unwrap()); + let posql_compatible_schema = get_posql_compatible_schema(&inferred_schema); + + let brands_batch = ReaderBuilder::new(posql_compatible_schema) + .with_header(true) + .build(File::open(filename).unwrap()) + .unwrap() + .next() + .unwrap() + .unwrap(); + + // Load the table into an "Accessor" so that the prover and verifier can access the data/commitments. + let mut accessor = + OwnedTableTestAccessor::::new_empty_with_setup(&prover_setup); + accessor.add_table( + "brands.global_brands".parse().unwrap(), + OwnedTable::try_from(brands_batch).unwrap(), + 0, + ); + + // Query 1: Count the total number of brands + prove_and_verify_query( + "SELECT COUNT(*) AS total_brands FROM global_brands", + &accessor, + &prover_setup, + &verifier_setup, + ); + + // Query 2: List the names of brands founded before 1950 + prove_and_verify_query( + "SELECT Name FROM global_brands WHERE Founded < 1950", + &accessor, + &prover_setup, + &verifier_setup, + ); + + // Query 3: List the top 5 countries with the highest total revenue, ordered by total revenue + prove_and_verify_query( + "SELECT Country, SUM(Revenue) AS total_revenue FROM global_brands GROUP BY Country ORDER BY total_revenue DESC LIMIT 5", + &accessor, + &prover_setup, + &verifier_setup, + ); +} From 43a174e44569bd3c150bf3edd2f6a22fee989d2e Mon Sep 17 00:00:00 2001 From: Henry Mai Date: Wed, 23 Oct 2024 11:24:53 -0400 Subject: [PATCH 04/74] feat: plastics example --- .github/workflows/lint-and-test.yml | 4 +- crates/proof-of-sql/Cargo.toml | 6 +- crates/proof-of-sql/examples/plastics/main.rs | 135 ++++++++++++++++++ .../examples/plastics/plastics.csv | 19 +++ 4 files changed, 162 insertions(+), 2 deletions(-) create mode 100644 crates/proof-of-sql/examples/plastics/main.rs create mode 100644 crates/proof-of-sql/examples/plastics/plastics.csv diff --git a/.github/workflows/lint-and-test.yml b/.github/workflows/lint-and-test.yml index dfebc0ea6..85014b80a 100644 --- a/.github/workflows/lint-and-test.yml +++ b/.github/workflows/lint-and-test.yml @@ -124,6 +124,8 @@ jobs: run: cargo run --example books - name: Run brands example run: cargo run --example brands + - name: Run plastics example + run: cargo run --example plastics - name: Run posql_db example (With Blitzar) run: bash crates/proof-of-sql/examples/posql_db/run_example.sh - name: Run posql_db example (Without Blitzar) @@ -238,4 +240,4 @@ jobs: - name: Install solhint run: npm install -g solhint - name: Run tests - run: solhint -c 'crates/proof-of-sql/.solhint.json' 'crates/proof-of-sql/**/*.sol' -w 0 + run: solhint -c 'crates/proof-of-sql/.solhint.json' 'crates/proof-of-sql/**/*.sol' -w 0 \ No newline at end of file diff --git a/crates/proof-of-sql/Cargo.toml b/crates/proof-of-sql/Cargo.toml index d4fe9f69d..853e2e31f 100644 --- a/crates/proof-of-sql/Cargo.toml +++ b/crates/proof-of-sql/Cargo.toml @@ -115,6 +115,10 @@ required-features = [ "arrow" ] name = "brands" required-features = [ "arrow" ] +[[example]] +name = "plastics" +required-features = [ "arrow" ] + [[bench]] name = "posql_benches" harness = false @@ -128,4 +132,4 @@ required-features = [ "test" ] [[bench]] name = "jaeger_benches" harness = false -required-features = [ "blitzar" ] +required-features = [ "blitzar" ] \ No newline at end of file diff --git a/crates/proof-of-sql/examples/plastics/main.rs b/crates/proof-of-sql/examples/plastics/main.rs new file mode 100644 index 000000000..7263e7538 --- /dev/null +++ b/crates/proof-of-sql/examples/plastics/main.rs @@ -0,0 +1,135 @@ +//! This is a non-interactive example of using Proof of SQL with a plastics dataset. +//! To run this, use `cargo run --release --example plastics`. +//! +//! NOTE: If this doesn't work because you do not have the appropriate GPU drivers installed, +//! you can run `cargo run --release --example plastics --no-default-features --features="arrow cpu-perf"` instead. It will be slower for proof generation. + +use arrow::datatypes::SchemaRef; +use arrow_csv::{infer_schema_from_files, ReaderBuilder}; +use proof_of_sql::{ + base::database::{ + arrow_schema_utility::get_posql_compatible_schema, OwnedTable, OwnedTableTestAccessor, + TestAccessor, + }, + proof_primitive::dory::{ + DynamicDoryCommitment, DynamicDoryEvaluationProof, ProverSetup, PublicParameters, + VerifierSetup, + }, + sql::{parse::QueryExpr, postprocessing::apply_postprocessing_steps, proof::QueryProof}, +}; +use rand::{rngs::StdRng, SeedableRng}; +use std::{fs::File, time::Instant}; + +// We generate the public parameters and the setups used by the prover and verifier for the Dory PCS. +// The `max_nu` should be set such that the maximum table size is less than `2^(2*max_nu-1)`. +const DORY_SETUP_MAX_NU: usize = 8; +// This should be a "nothing-up-my-sleeve" phrase or number. +const DORY_SEED: [u8; 32] = *b"32f7f321c4ab1234d5e6f7a8b9c0d1e2"; + +/// # Panics +/// Will panic if the query does not parse or the proof fails to verify. +fn prove_and_verify_query( + sql: &str, + accessor: &OwnedTableTestAccessor, + prover_setup: &ProverSetup, + verifier_setup: &VerifierSetup, +) { + // Parse the query: + println!("Parsing the query: {sql}..."); + let now = Instant::now(); + let query_plan = QueryExpr::::try_new( + sql.parse().unwrap(), + "plastics".parse().unwrap(), + accessor, + ) + .unwrap(); + println!("Done in {} ms.", now.elapsed().as_secs_f64() * 1000.); + + // Generate the proof and result: + print!("Generating proof..."); + let now = Instant::now(); + let (proof, provable_result) = QueryProof::::new( + query_plan.proof_expr(), + accessor, + &prover_setup, + ); + println!("Done in {} ms.", now.elapsed().as_secs_f64() * 1000.); + + // Verify the result with the proof: + print!("Verifying proof..."); + let now = Instant::now(); + let result = proof + .verify( + query_plan.proof_expr(), + accessor, + &provable_result, + &verifier_setup, + ) + .unwrap(); + let result = apply_postprocessing_steps(result.table, query_plan.postprocessing()); + println!("Verified in {} ms.", now.elapsed().as_secs_f64() * 1000.); + + // Display the result + println!("Query Result:"); + println!("{result:?}"); +} + +fn main() { + let mut rng = StdRng::from_seed(DORY_SEED); + let public_parameters = PublicParameters::rand(DORY_SETUP_MAX_NU, &mut rng); + let prover_setup = ProverSetup::from(&public_parameters); + let verifier_setup = VerifierSetup::from(&public_parameters); + + let filename = "./crates/proof-of-sql/examples/plastics/plastics.csv"; + let schema = get_posql_compatible_schema(&SchemaRef::new( + infer_schema_from_files(&[filename.to_string()], b',', None, true).unwrap(), + )); + let plastics_batch = ReaderBuilder::new(schema) + .with_header(true) + .build(File::open(filename).unwrap()) + .unwrap() + .next() + .unwrap() + .unwrap(); + + // Load the table into an "Accessor" so that the prover and verifier can access the data/commitments. + let mut accessor = + OwnedTableTestAccessor::::new_empty_with_setup(&prover_setup); + accessor.add_table( + "plastics.types".parse().unwrap(), + OwnedTable::try_from(plastics_batch).unwrap(), + 0, + ); + + // Query 1: Count total number of plastic types + prove_and_verify_query( + "SELECT COUNT(*) AS total_types FROM types", + &accessor, + &prover_setup, + &verifier_setup, + ); + + // Query 2: List names of biodegradable plastics + prove_and_verify_query( + "SELECT Name FROM types WHERE Biodegradable = TRUE ORDER BY Name", + &accessor, + &prover_setup, + &verifier_setup, + ); + + // Query 3: Show average density of plastics by recycling code + prove_and_verify_query( + "SELECT Code, SUM(Density)/COUNT(*) as avg_density FROM types GROUP BY Code ORDER BY Code", + &accessor, + &prover_setup, + &verifier_setup, + ); + + // Query 4: List plastics with density greater than 1.0 g/cm³ + prove_and_verify_query( + "SELECT Name, Density FROM types WHERE Density > 1.0 ORDER BY Density DESC", + &accessor, + &prover_setup, + &verifier_setup, + ); +} diff --git a/crates/proof-of-sql/examples/plastics/plastics.csv b/crates/proof-of-sql/examples/plastics/plastics.csv new file mode 100644 index 000000000..9b793da0a --- /dev/null +++ b/crates/proof-of-sql/examples/plastics/plastics.csv @@ -0,0 +1,19 @@ +Name,Code,Density,Biodegradable +Polyethylene Terephthalate (PET),1,1.38,FALSE +High-Density Polyethylene (HDPE),2,0.97,FALSE +Polyvinyl Chloride (PVC),3,1.40,FALSE +Low-Density Polyethylene (LDPE),4,0.92,FALSE +Polypropylene (PP),5,0.90,FALSE +Polystyrene (PS),6,1.05,FALSE +Polylactic Acid (PLA),7,1.25,TRUE +Polybutylene Adipate Terephthalate (PBAT),7,1.26,TRUE +Polyhydroxyalkanoates (PHA),7,1.24,TRUE +Polybutylene Succinate (PBS),7,1.26,TRUE +Acrylic (PMMA),7,1.18,FALSE +Polycarbonate (PC),7,1.20,FALSE +Polyurethane (PU),7,1.05,FALSE +Acrylonitrile Butadiene Styrene (ABS),7,1.04,FALSE +Polyamide (Nylon),7,1.15,FALSE +Polyethylene Furanoate (PEF),7,1.43,TRUE +Thermoplastic Starch (TPS),7,1.35,TRUE +Cellulose Acetate,7,1.30,TRUE \ No newline at end of file From 6253f63122067cde99ca7f40d147aeabbfa55cc8 Mon Sep 17 00:00:00 2001 From: Johnny <132952411+jd-sxt@users.noreply.github.com> Date: Wed, 23 Oct 2024 21:03:39 -0500 Subject: [PATCH 05/74] feat: adding stub for avocado prices example --- crates/proof-of-sql/Cargo.toml | 3 +++ crates/proof-of-sql/examples/avocado-prices/main.rs | 3 +++ 2 files changed, 6 insertions(+) create mode 100644 crates/proof-of-sql/examples/avocado-prices/main.rs diff --git a/crates/proof-of-sql/Cargo.toml b/crates/proof-of-sql/Cargo.toml index 853e2e31f..f0973b312 100644 --- a/crates/proof-of-sql/Cargo.toml +++ b/crates/proof-of-sql/Cargo.toml @@ -119,6 +119,9 @@ required-features = [ "arrow" ] name = "plastics" required-features = [ "arrow" ] +[[example]] +name = "avocado-prices" + [[bench]] name = "posql_benches" harness = false diff --git a/crates/proof-of-sql/examples/avocado-prices/main.rs b/crates/proof-of-sql/examples/avocado-prices/main.rs new file mode 100644 index 000000000..44c925049 --- /dev/null +++ b/crates/proof-of-sql/examples/avocado-prices/main.rs @@ -0,0 +1,3 @@ +//! Example to use Proof of SQL with datasets +//! To run, use `cargo run --example avocado-prices`. +fn main() {} From aef29091af37fe6f6988d67e9560934936f089f5 Mon Sep 17 00:00:00 2001 From: Johnny <132952411+jd-sxt@users.noreply.github.com> Date: Wed, 23 Oct 2024 21:04:05 -0500 Subject: [PATCH 06/74] feat: add dataset for example --- .../avocado-prices/avocado-prices.csv | 36 +++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 crates/proof-of-sql/examples/avocado-prices/avocado-prices.csv diff --git a/crates/proof-of-sql/examples/avocado-prices/avocado-prices.csv b/crates/proof-of-sql/examples/avocado-prices/avocado-prices.csv new file mode 100644 index 000000000..a5b94b592 --- /dev/null +++ b/crates/proof-of-sql/examples/avocado-prices/avocado-prices.csv @@ -0,0 +1,36 @@ +Year,Price +1990,0.96 +1991,1.0 +1992,2.69 +1993,1.49 +1994,1.27 +1995,1.53 +1996,2.32 +1997,1.27 +1998,2.49 +1999,2.4 +2000,2.41 +2001,0.9 +2002,0.91 +2003,1.69 +2004,1.67 +2005,0.56 +2006,2.3 +2007,1.74 +2008,1.24 +2009,0.92 +2010,2.01 +2011,1.67 +2012,1.25 +2013,1.47 +2014,2.85 +2015,1.54 +2016,1.06 +2017,2.23 +2018,0.85 +2019,1.45 +2020,1.47 +2021,0.68 +2022,1.42 +2023,2.81 +2024,1.64 From 5660de25af88b0a33493515a18988bf3b143655c Mon Sep 17 00:00:00 2001 From: Johnny <132952411+jd-sxt@users.noreply.github.com> Date: Wed, 23 Oct 2024 21:04:33 -0500 Subject: [PATCH 07/74] fix: multiply price by 100 to eliminate float --- .../avocado-prices/avocado-prices.csv | 71 ++++++++++--------- 1 file changed, 36 insertions(+), 35 deletions(-) diff --git a/crates/proof-of-sql/examples/avocado-prices/avocado-prices.csv b/crates/proof-of-sql/examples/avocado-prices/avocado-prices.csv index a5b94b592..7750f7a46 100644 --- a/crates/proof-of-sql/examples/avocado-prices/avocado-prices.csv +++ b/crates/proof-of-sql/examples/avocado-prices/avocado-prices.csv @@ -1,36 +1,37 @@ Year,Price -1990,0.96 -1991,1.0 -1992,2.69 -1993,1.49 -1994,1.27 -1995,1.53 -1996,2.32 -1997,1.27 -1998,2.49 -1999,2.4 -2000,2.41 -2001,0.9 -2002,0.91 -2003,1.69 -2004,1.67 -2005,0.56 -2006,2.3 -2007,1.74 -2008,1.24 -2009,0.92 -2010,2.01 -2011,1.67 -2012,1.25 -2013,1.47 -2014,2.85 -2015,1.54 -2016,1.06 -2017,2.23 -2018,0.85 -2019,1.45 -2020,1.47 -2021,0.68 -2022,1.42 -2023,2.81 -2024,1.64 +1990,96 +1991,100 +1992,269 +1993,149 +1994,127 +1995,153 +1996,232 +1997,127 +1998,249 +1999,240 +2000,241 +2001,90 +2002,91 +2003,169 +2004,167 +2005,56 +2006,230 +2007,174 +2008,124 +2009,92 +2010,201 +2011,167 +2012,125 +2013,147 +2014,285 +2015,154 +2016,106 +2017,223 +2018,85 +2019,145 +2020,147 +2021,68 +2022,142 +2023,281 +2024,164 + From c9956ee19dfcee5910f7653c6412e9bee8636018 Mon Sep 17 00:00:00 2001 From: Johnny <132952411+jd-sxt@users.noreply.github.com> Date: Wed, 23 Oct 2024 21:04:26 -0500 Subject: [PATCH 08/74] feat: load csv file in example --- crates/proof-of-sql/Cargo.toml | 1 + .../examples/avocado-prices/main.rs | 18 +++++++++++++++++- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/crates/proof-of-sql/Cargo.toml b/crates/proof-of-sql/Cargo.toml index f0973b312..f11121a68 100644 --- a/crates/proof-of-sql/Cargo.toml +++ b/crates/proof-of-sql/Cargo.toml @@ -121,6 +121,7 @@ required-features = [ "arrow" ] [[example]] name = "avocado-prices" +required-features = [ "arrow" ] [[bench]] name = "posql_benches" diff --git a/crates/proof-of-sql/examples/avocado-prices/main.rs b/crates/proof-of-sql/examples/avocado-prices/main.rs index 44c925049..ea7f3a5ba 100644 --- a/crates/proof-of-sql/examples/avocado-prices/main.rs +++ b/crates/proof-of-sql/examples/avocado-prices/main.rs @@ -1,3 +1,19 @@ //! Example to use Proof of SQL with datasets //! To run, use `cargo run --example avocado-prices`. -fn main() {} +use arrow::datatypes::SchemaRef; +use arrow_csv::{infer_schema_from_files, ReaderBuilder}; +use std::fs::File; + +fn main() { + let filename = "./crates/proof-of-sql/examples/avocado-prices/avocado-prices.csv"; + let data_batch = ReaderBuilder::new(SchemaRef::new( + infer_schema_from_files(&[filename.to_string()], b',', None, true).unwrap(), + )) + .with_header(true) + .build(File::open(filename).unwrap()) + .unwrap() + .next() + .unwrap() + .unwrap(); + println!("{data_batch:?}"); +} From 5199b76348ff8a33b2717edb11f1f5d472b97a6d Mon Sep 17 00:00:00 2001 From: Johnny <132952411+jd-sxt@users.noreply.github.com> Date: Wed, 23 Oct 2024 21:04:27 -0500 Subject: [PATCH 09/74] feat: run SELECT * query in example --- .../examples/avocado-prices/main.rs | 73 ++++++++++++++++++- 1 file changed, 72 insertions(+), 1 deletion(-) diff --git a/crates/proof-of-sql/examples/avocado-prices/main.rs b/crates/proof-of-sql/examples/avocado-prices/main.rs index ea7f3a5ba..60043d3e0 100644 --- a/crates/proof-of-sql/examples/avocado-prices/main.rs +++ b/crates/proof-of-sql/examples/avocado-prices/main.rs @@ -1,10 +1,41 @@ //! Example to use Proof of SQL with datasets //! To run, use `cargo run --example avocado-prices`. +//! +//! NOTE: If this doesn't work because you do not have the appropriate GPU drivers installed, +//! you can run `cargo run --release --example avocado-prices --no-default-features --features="arrow cpu-perf"` instead. It will be slower for proof generation. use arrow::datatypes::SchemaRef; use arrow_csv::{infer_schema_from_files, ReaderBuilder}; +use proof_of_sql::{ + base::database::{OwnedTable, OwnedTableTestAccessor}, + proof_primitive::dory::{ + DynamicDoryCommitment, DynamicDoryEvaluationProof, ProverSetup, PublicParameters, + VerifierSetup, + }, + sql::{parse::QueryExpr, proof::QueryProof}, +}; +use rand::{rngs::StdRng, SeedableRng}; use std::fs::File; +// We generate the public parameters and the setups used by the prover and verifier for the Dory PCS. +// The `max_nu` should be set such that the maximum table size is less than `2^(2*max_nu-1)`. +// For a sampling: +// max_nu = 3 => max table size is 32 rows +// max_nu = 4 => max table size is 128 rows +// max_nu = 8 => max table size is 32768 rows +// max_nu = 10 => max table size is 0.5 million rows +// max_nu = 15 => max table size is 0.5 billion rows +// max_nu = 20 => max table size is 0.5 trillion rows +// Note: we will eventually load these from a file. +const DORY_SETUP_MAX_NU: usize = 8; +// This should be a "nothing-up-my-sleeve" phrase or number. +const DORY_SEED: [u8; 32] = *b"len 32 rng seed - Space and Time"; + fn main() { + let mut rng = StdRng::from_seed(DORY_SEED); + let public_parameters = PublicParameters::rand(DORY_SETUP_MAX_NU, &mut rng); + let prover_setup = ProverSetup::from(&public_parameters); + let verifier_setup = VerifierSetup::from(&public_parameters); + let filename = "./crates/proof-of-sql/examples/avocado-prices/avocado-prices.csv"; let data_batch = ReaderBuilder::new(SchemaRef::new( infer_schema_from_files(&[filename.to_string()], b',', None, true).unwrap(), @@ -15,5 +46,45 @@ fn main() { .next() .unwrap() .unwrap(); - println!("{data_batch:?}"); + + // Load the table into an "Accessor" so that the prover and verifier can access the data/commitments. + let accessor = OwnedTableTestAccessor::::new_from_table( + "census.income".parse().unwrap(), + OwnedTable::try_from(data_batch).unwrap(), + 0, + &prover_setup, + ); + + // Parse the query: + let query_plan = QueryExpr::::try_new( + "SELECT * FROM income".parse().unwrap(), + "census".parse().unwrap(), + &accessor, + ) + .unwrap(); + + // Generate the proof and result: + print!("Generating proof..."); + let (proof, provable_result) = QueryProof::::new( + query_plan.proof_expr(), + &accessor, + &&prover_setup, + ); + println!("Done."); + + // Verify the result with the proof: + print!("Verifying proof..."); + let result = proof + .verify( + query_plan.proof_expr(), + &accessor, + &provable_result, + &&verifier_setup, + ) + .unwrap(); + println!("Verified."); + + // Display the result + println!("Query Result:"); + println!("{:?}", result.table); } From b504e2d55b00a723a8b66214207889eb66872b4f Mon Sep 17 00:00:00 2001 From: Johnny <132952411+jd-sxt@users.noreply.github.com> Date: Wed, 23 Oct 2024 21:04:28 -0500 Subject: [PATCH 10/74] feat: add more queries to example --- .../examples/avocado-prices/main.rs | 55 +++++++++++++++++-- 1 file changed, 51 insertions(+), 4 deletions(-) diff --git a/crates/proof-of-sql/examples/avocado-prices/main.rs b/crates/proof-of-sql/examples/avocado-prices/main.rs index 60043d3e0..30cd03365 100644 --- a/crates/proof-of-sql/examples/avocado-prices/main.rs +++ b/crates/proof-of-sql/examples/avocado-prices/main.rs @@ -14,7 +14,7 @@ use proof_of_sql::{ sql::{parse::QueryExpr, proof::QueryProof}, }; use rand::{rngs::StdRng, SeedableRng}; -use std::fs::File; +use std::{fs::File, time::Instant}; // We generate the public parameters and the setups used by the prover and verifier for the Dory PCS. // The `max_nu` should be set such that the maximum table size is less than `2^(2*max_nu-1)`. @@ -30,6 +30,53 @@ const DORY_SETUP_MAX_NU: usize = 8; // This should be a "nothing-up-my-sleeve" phrase or number. const DORY_SEED: [u8; 32] = *b"len 32 rng seed - Space and Time"; +/// # Panics +/// Will panic if the query does not parse or the proof fails to verify. +fn prove_and_verify_query( + sql: &str, + accessor: &OwnedTableTestAccessor, + prover_setup: &ProverSetup, + verifier_setup: &VerifierSetup, +) { + // Parse the query: + println!("Parsing the query: {sql}..."); + let now = Instant::now(); + let query_plan = QueryExpr::::try_new( + sql.parse().unwrap(), + "avocado".parse().unwrap(), + accessor, + ) + .unwrap(); + println!("Done in {} ms.", now.elapsed().as_secs_f64() * 1000.); + + // Generate the proof and result: + print!("Generating proof..."); + let now = Instant::now(); + let (proof, provable_result) = QueryProof::::new( + query_plan.proof_expr(), + accessor, + &prover_setup, + ); + println!("Done in {} ms.", now.elapsed().as_secs_f64() * 1000.); + + // Verify the result with the proof: + print!("Verifying proof..."); + let now = Instant::now(); + let result = proof + .verify( + query_plan.proof_expr(), + accessor, + &provable_result, + &verifier_setup, + ) + .unwrap(); + println!("Verified in {} ms.", now.elapsed().as_secs_f64() * 1000.); + + // Display the result + println!("Query Result:"); + println!("{:?}", result.table); +} + fn main() { let mut rng = StdRng::from_seed(DORY_SEED); let public_parameters = PublicParameters::rand(DORY_SETUP_MAX_NU, &mut rng); @@ -49,7 +96,7 @@ fn main() { // Load the table into an "Accessor" so that the prover and verifier can access the data/commitments. let accessor = OwnedTableTestAccessor::::new_from_table( - "census.income".parse().unwrap(), + "avocado.prices".parse().unwrap(), OwnedTable::try_from(data_batch).unwrap(), 0, &prover_setup, @@ -57,8 +104,8 @@ fn main() { // Parse the query: let query_plan = QueryExpr::::try_new( - "SELECT * FROM income".parse().unwrap(), - "census".parse().unwrap(), + "SELECT * FROM prices".parse().unwrap(), + "avocado".parse().unwrap(), &accessor, ) .unwrap(); From 0410ecd59c4bf15bc7f01a85bd1c5eda65165143 Mon Sep 17 00:00:00 2001 From: Johnny <132952411+jd-sxt@users.noreply.github.com> Date: Wed, 23 Oct 2024 21:04:29 -0500 Subject: [PATCH 11/74] feat: add more queries to example --- .../examples/avocado-prices/main.rs | 38 +++++-------------- 1 file changed, 9 insertions(+), 29 deletions(-) diff --git a/crates/proof-of-sql/examples/avocado-prices/main.rs b/crates/proof-of-sql/examples/avocado-prices/main.rs index 30cd03365..97bce93b6 100644 --- a/crates/proof-of-sql/examples/avocado-prices/main.rs +++ b/crates/proof-of-sql/examples/avocado-prices/main.rs @@ -102,36 +102,16 @@ fn main() { &prover_setup, ); - // Parse the query: - let query_plan = QueryExpr::::try_new( - "SELECT * FROM prices".parse().unwrap(), - "avocado".parse().unwrap(), + prove_and_verify_query( + "SELECT COUNT(*) AS total FROM prices", &accessor, - ) - .unwrap(); - - // Generate the proof and result: - print!("Generating proof..."); - let (proof, provable_result) = QueryProof::::new( - query_plan.proof_expr(), + &prover_setup, + &verifier_setup, + ); + prove_and_verify_query( + "SELECT Geography, COUNT(*) AS num_geographies FROM income GROUP BY Geography", &accessor, - &&prover_setup, + &prover_setup, + &verifier_setup, ); - println!("Done."); - - // Verify the result with the proof: - print!("Verifying proof..."); - let result = proof - .verify( - query_plan.proof_expr(), - &accessor, - &provable_result, - &&verifier_setup, - ) - .unwrap(); - println!("Verified."); - - // Display the result - println!("Query Result:"); - println!("{:?}", result.table); } From eca4b1a6aa425ee5764847a9f765f95cf7c4b5a9 Mon Sep 17 00:00:00 2001 From: Johnny <132952411+jd-sxt@users.noreply.github.com> Date: Wed, 23 Oct 2024 21:04:30 -0500 Subject: [PATCH 12/74] feat: add example using postprocessing --- crates/proof-of-sql/examples/avocado-prices/main.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/crates/proof-of-sql/examples/avocado-prices/main.rs b/crates/proof-of-sql/examples/avocado-prices/main.rs index 97bce93b6..a06a332db 100644 --- a/crates/proof-of-sql/examples/avocado-prices/main.rs +++ b/crates/proof-of-sql/examples/avocado-prices/main.rs @@ -11,7 +11,7 @@ use proof_of_sql::{ DynamicDoryCommitment, DynamicDoryEvaluationProof, ProverSetup, PublicParameters, VerifierSetup, }, - sql::{parse::QueryExpr, proof::QueryProof}, + sql::{parse::QueryExpr, postprocessing::apply_postprocessing_steps, proof::QueryProof}, }; use rand::{rngs::StdRng, SeedableRng}; use std::{fs::File, time::Instant}; @@ -70,11 +70,12 @@ fn prove_and_verify_query( &verifier_setup, ) .unwrap(); + let result = apply_postprocessing_steps(result.table, query_plan.postprocessing()); println!("Verified in {} ms.", now.elapsed().as_secs_f64() * 1000.); // Display the result println!("Query Result:"); - println!("{:?}", result.table); + println!("{result:?}"); } fn main() { @@ -109,7 +110,7 @@ fn main() { &verifier_setup, ); prove_and_verify_query( - "SELECT Geography, COUNT(*) AS num_geographies FROM income GROUP BY Geography", + "SELECT Price, COUNT(*) AS total FROM prices GROUP BY Price ORDER BY total", &accessor, &prover_setup, &verifier_setup, From 6274515d9f51e6dec2e4eb3c9bcb0a07176c3d20 Mon Sep 17 00:00:00 2001 From: Johnny <132952411+jd-sxt@users.noreply.github.com> Date: Wed, 23 Oct 2024 21:04:31 -0500 Subject: [PATCH 13/74] feat: add complex query to example --- crates/proof-of-sql/examples/avocado-prices/main.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/crates/proof-of-sql/examples/avocado-prices/main.rs b/crates/proof-of-sql/examples/avocado-prices/main.rs index a06a332db..85d5e50b8 100644 --- a/crates/proof-of-sql/examples/avocado-prices/main.rs +++ b/crates/proof-of-sql/examples/avocado-prices/main.rs @@ -115,4 +115,10 @@ fn main() { &prover_setup, &verifier_setup, ); + prove_and_verify_query( + "SELECT Year, COUNT(*) AS total FROM prices WHERE Price > 100 GROUP BY Year ORDER BY total DESC LIMIT 5", + &accessor, + &prover_setup, + &verifier_setup, + ); } From 97d112d36db8889926a408d622ce31039ad72752 Mon Sep 17 00:00:00 2001 From: Johnny <132952411+jd-sxt@users.noreply.github.com> Date: Wed, 23 Oct 2024 21:04:32 -0500 Subject: [PATCH 14/74] ci: run example in ci --- .github/workflows/lint-and-test.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/lint-and-test.yml b/.github/workflows/lint-and-test.yml index 85014b80a..1c3ecadba 100644 --- a/.github/workflows/lint-and-test.yml +++ b/.github/workflows/lint-and-test.yml @@ -124,6 +124,8 @@ jobs: run: cargo run --example books - name: Run brands example run: cargo run --example brands + - name: Run avocado-prices example + run: cargo run --example avocado-prices - name: Run plastics example run: cargo run --example plastics - name: Run posql_db example (With Blitzar) @@ -240,4 +242,4 @@ jobs: - name: Install solhint run: npm install -g solhint - name: Run tests - run: solhint -c 'crates/proof-of-sql/.solhint.json' 'crates/proof-of-sql/**/*.sol' -w 0 \ No newline at end of file + run: solhint -c 'crates/proof-of-sql/.solhint.json' 'crates/proof-of-sql/**/*.sol' -w 0 From 69216e40c6663410392f601791a2fb627158a55c Mon Sep 17 00:00:00 2001 From: aw-sxt Date: Thu, 24 Oct 2024 19:59:32 -0500 Subject: [PATCH 15/74] feat: add sushi example stub --- crates/proof-of-sql/Cargo.toml | 4 ++++ crates/proof-of-sql/examples/sushi/main.rs | 6 ++++++ 2 files changed, 10 insertions(+) create mode 100644 crates/proof-of-sql/examples/sushi/main.rs diff --git a/crates/proof-of-sql/Cargo.toml b/crates/proof-of-sql/Cargo.toml index f11121a68..cfa73da37 100644 --- a/crates/proof-of-sql/Cargo.toml +++ b/crates/proof-of-sql/Cargo.toml @@ -123,6 +123,10 @@ required-features = [ "arrow" ] name = "avocado-prices" required-features = [ "arrow" ] +[[example]] +name = "sushi" +required-features = [ "arrow" ] + [[bench]] name = "posql_benches" harness = false diff --git a/crates/proof-of-sql/examples/sushi/main.rs b/crates/proof-of-sql/examples/sushi/main.rs new file mode 100644 index 000000000..c77c93f69 --- /dev/null +++ b/crates/proof-of-sql/examples/sushi/main.rs @@ -0,0 +1,6 @@ +//! This is an non-interactive example of using Proof of SQL with some sushi related datasets. +//! To run this, use `cargo run --example sushi`. + +//! NOTE: If this doesn't work because you do not have the appropriate GPU drivers installed, +//! you can run `cargo run --release --example space --no-default-features --features="arrow cpu-perf"` instead. It will be slower for proof generation. +fn main() {} \ No newline at end of file From 3fce6d3f436a3b957cadf64b2deba0ea78598348 Mon Sep 17 00:00:00 2001 From: aw-sxt Date: Thu, 24 Oct 2024 18:29:19 -0500 Subject: [PATCH 16/74] feat: add dataset for sushi example --- crates/proof-of-sql/examples/sushi/fish.csv | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 crates/proof-of-sql/examples/sushi/fish.csv diff --git a/crates/proof-of-sql/examples/sushi/fish.csv b/crates/proof-of-sql/examples/sushi/fish.csv new file mode 100644 index 000000000..74eb0dbd5 --- /dev/null +++ b/crates/proof-of-sql/examples/sushi/fish.csv @@ -0,0 +1,13 @@ +nameEn,nameJa,kindEn,kindJa,pricePerPound +Tuna,Maguro,Lean Red Meat,Akami,25 +Tuna,Maguro,Medium Fat Read Meat,Toro,65 +Tuna,Maguro,Fatty Red Meat,Otoro,115 +Bonito,Katsuo,Red Meat,Akami,20 +Yellowtail,Hamachi,Red Meat,Akami,27 +Salmon,Salmon,White Fish,Shiromi,17 +Sea Bream,Tai,White Fish,Shiromi,32 +Sea Bass,Suzuki,White Fish,Shiromi,28 +Mackerel,Aji,Silver Skinned,Hikarimono,14 +Sardine,Iwashi,Silver Skinned,Hikarimono,11 +Scallops,Hotate,Shellfish,Kai,26 +Ark-shell clams,Akagai,Shellfish,Kai,29 From d68dce4de202c80689ba8f7c9dcfc48f0a49c995 Mon Sep 17 00:00:00 2001 From: aw-sxt Date: Thu, 24 Oct 2024 18:36:32 -0500 Subject: [PATCH 17/74] feat: load dataset for sushi example --- crates/proof-of-sql/examples/sushi/main.rs | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/crates/proof-of-sql/examples/sushi/main.rs b/crates/proof-of-sql/examples/sushi/main.rs index c77c93f69..69096bba8 100644 --- a/crates/proof-of-sql/examples/sushi/main.rs +++ b/crates/proof-of-sql/examples/sushi/main.rs @@ -3,4 +3,20 @@ //! NOTE: If this doesn't work because you do not have the appropriate GPU drivers installed, //! you can run `cargo run --release --example space --no-default-features --features="arrow cpu-perf"` instead. It will be slower for proof generation. -fn main() {} \ No newline at end of file +use arrow::datatypes::SchemaRef; +use arrow_csv::{infer_schema_from_files, ReaderBuilder}; +use std::fs::File; + +fn main() { + let filename = "./crates/proof-of-sql/examples/sushi/fish.csv"; + let fish_batch = ReaderBuilder::new(SchemaRef::new( + infer_schema_from_files(&[filename.to_string()], b',', None, true).unwrap(), + )) + .with_header(true) + .build(File::open(filename).unwrap()) + .unwrap() + .next() + .unwrap() + .unwrap(); + println!("{fish_batch:?}"); +} \ No newline at end of file From a98cc0469898dd364949febac23f9d601f8bbf76 Mon Sep 17 00:00:00 2001 From: aw-sxt Date: Thu, 24 Oct 2024 18:42:40 -0500 Subject: [PATCH 18/74] feat: setup prover, verifier, & accessor; add function to prove and verify queries --- crates/proof-of-sql/examples/sushi/main.rs | 88 +++++++++++++++++++++- 1 file changed, 86 insertions(+), 2 deletions(-) diff --git a/crates/proof-of-sql/examples/sushi/main.rs b/crates/proof-of-sql/examples/sushi/main.rs index 69096bba8..f0318f719 100644 --- a/crates/proof-of-sql/examples/sushi/main.rs +++ b/crates/proof-of-sql/examples/sushi/main.rs @@ -4,10 +4,90 @@ //! NOTE: If this doesn't work because you do not have the appropriate GPU drivers installed, //! you can run `cargo run --release --example space --no-default-features --features="arrow cpu-perf"` instead. It will be slower for proof generation. use arrow::datatypes::SchemaRef; -use arrow_csv::{infer_schema_from_files, ReaderBuilder}; -use std::fs::File; +use arrow_csv::{ + infer_schema_from_files, + ReaderBuilder +}; +use std::{ + fs::File, + time::Instant +}; +use proof_of_sql::{ + base::database::{ + OwnedTable, + OwnedTableTestAccessor, + TestAccessor + }, + proof_primitive::dory::{ + DynamicDoryCommitment, + DynamicDoryEvaluationProof, + ProverSetup, + PublicParameters, + VerifierSetup + }, + sql::{ + parse::QueryExpr, + proof::QueryProof + } +}; +use rand::{ + rngs::StdRng, + SeedableRng +}; + +const DORY_SETUP_MAX_NU: usize = 8; +const DORY_SEED: [u8; 32] = *b"sushi-is-the-best-food-available"; + +/// # Panics +/// Will panic if the query does not parse or the proof fails to verify. +fn prove_and_verify_query( + sql: &str, + accessor: &OwnedTableTestAccessor, + prover_setup: &ProverSetup, + verifier_setup: &VerifierSetup, +) { + // Parse the query: + println!("Parsing the query: {sql}..."); + let now = Instant::now(); + let query_plan = QueryExpr::::try_new( + sql.parse().unwrap(), + "sushi".parse().unwrap(), + accessor, + ) + .unwrap(); + println!("Done in {} ms.", now.elapsed().as_secs_f64() * 1000.); + // Generate the proof and result: + print!("Generating proof..."); + let now = Instant::now(); + let (proof, provable_result) = QueryProof::::new( + query_plan.proof_expr(), + accessor, + &prover_setup, + ); + println!("Done in {} ms.", now.elapsed().as_secs_f64() * 1000.); + // Verify the result with the proof: + print!("Verifying proof..."); + let now = Instant::now(); + let result = proof + .verify( + query_plan.proof_expr(), + accessor, + &provable_result, + &verifier_setup, + ) + .unwrap(); + println!("Verified in {} ms.", now.elapsed().as_secs_f64() * 1000.); + // Display the result + println!("Query Result:"); + println!("{:?}", result.table); +} fn main() { + let mut rng = StdRng::from_seed(DORY_SEED); + let public_parameters = PublicParameters::rand(DORY_SETUP_MAX_NU, &mut rng); + let prover_setup = ProverSetup::from(&public_parameters); + let verifier_setup = VerifierSetup::from(&public_parameters); + let filename = "./crates/proof-of-sql/examples/sushi/fish.csv"; let fish_batch = ReaderBuilder::new(SchemaRef::new( infer_schema_from_files(&[filename.to_string()], b',', None, true).unwrap(), @@ -19,4 +99,8 @@ fn main() { .unwrap() .unwrap(); println!("{fish_batch:?}"); + + // Load the table into an "Accessor" so that the prover and verifier can access the data/commitments. + let mut accessor = OwnedTableTestAccessor::::new_empty_with_setup(&prover_setup); + accessor.add_table("sushi.fish".parse().unwrap(), OwnedTable::try_from(fish_batch).unwrap(), 0); } \ No newline at end of file From 2ab75b63974b7edf4f74d9c82f7b719be8a9b2f4 Mon Sep 17 00:00:00 2001 From: aw-sxt Date: Thu, 24 Oct 2024 19:03:34 -0500 Subject: [PATCH 19/74] feat: add select * query for sushi example --- crates/proof-of-sql/examples/sushi/main.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/crates/proof-of-sql/examples/sushi/main.rs b/crates/proof-of-sql/examples/sushi/main.rs index f0318f719..c57c6f958 100644 --- a/crates/proof-of-sql/examples/sushi/main.rs +++ b/crates/proof-of-sql/examples/sushi/main.rs @@ -103,4 +103,11 @@ fn main() { // Load the table into an "Accessor" so that the prover and verifier can access the data/commitments. let mut accessor = OwnedTableTestAccessor::::new_empty_with_setup(&prover_setup); accessor.add_table("sushi.fish".parse().unwrap(), OwnedTable::try_from(fish_batch).unwrap(), 0); + + prove_and_verify_query( + "SELECT * FROM fish", + &accessor, + &prover_setup, + &verifier_setup, + ); } \ No newline at end of file From 2dc3b84a6519b5f0cb5c6abea7dc3afc1908ace1 Mon Sep 17 00:00:00 2001 From: aw-sxt Date: Thu, 24 Oct 2024 19:09:06 -0500 Subject: [PATCH 20/74] feat: add tuna queries for sushi example --- crates/proof-of-sql/examples/sushi/main.rs | 23 ++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/crates/proof-of-sql/examples/sushi/main.rs b/crates/proof-of-sql/examples/sushi/main.rs index c57c6f958..6ab1fb0fe 100644 --- a/crates/proof-of-sql/examples/sushi/main.rs +++ b/crates/proof-of-sql/examples/sushi/main.rs @@ -110,4 +110,27 @@ fn main() { &prover_setup, &verifier_setup, ); + + prove_and_verify_query( + "SELECT COUNT(*) FROM fish WHERE nameEn = 'Tuna'", + &accessor, + &prover_setup, + &verifier_setup, + ); + + prove_and_verify_query( + "SELECT kindEn FROM fish WHERE kindJa = 'Otoro'", + &accessor, + &prover_setup, + &verifier_setup, + ); + + prove_and_verify_query( + "SELECT kindEn FROM fish WHERE kindJa = 'Otoro'", + &accessor, + &prover_setup, + &verifier_setup, + ); + + } \ No newline at end of file From 9998f3de9a4b62bc741854c3b060f6fdd251737c Mon Sep 17 00:00:00 2001 From: aw-sxt Date: Thu, 24 Oct 2024 19:37:06 -0500 Subject: [PATCH 21/74] feat: add price-based query for sushi example --- crates/proof-of-sql/examples/sushi/main.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/crates/proof-of-sql/examples/sushi/main.rs b/crates/proof-of-sql/examples/sushi/main.rs index 6ab1fb0fe..086201e1a 100644 --- a/crates/proof-of-sql/examples/sushi/main.rs +++ b/crates/proof-of-sql/examples/sushi/main.rs @@ -131,6 +131,13 @@ fn main() { &prover_setup, &verifier_setup, ); + + prove_and_verify_query( + "SELECT * FROM fish WHERE pricePerPound > 25 AND pricePerPound < 75", + &accessor, + &prover_setup, + &verifier_setup, + ); } \ No newline at end of file From ad153e318572b49e147dd6b662d611390c54e488 Mon Sep 17 00:00:00 2001 From: aw-sxt Date: Thu, 24 Oct 2024 19:45:28 -0500 Subject: [PATCH 22/74] feat: add group by query for sushi example --- crates/proof-of-sql/examples/sushi/main.rs | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/crates/proof-of-sql/examples/sushi/main.rs b/crates/proof-of-sql/examples/sushi/main.rs index 086201e1a..31dd8e8a3 100644 --- a/crates/proof-of-sql/examples/sushi/main.rs +++ b/crates/proof-of-sql/examples/sushi/main.rs @@ -138,6 +138,11 @@ fn main() { &prover_setup, &verifier_setup, ); - - + + prove_and_verify_query( + "SELECT kindJa, COUNT(*) FROM fish GROUP BY kindJa", + &accessor, + &prover_setup, + &verifier_setup, + ); } \ No newline at end of file From 3c77f19ca61031029d59b879b3a4718ae7f27fb7 Mon Sep 17 00:00:00 2001 From: aw-sxt Date: Thu, 24 Oct 2024 19:49:20 -0500 Subject: [PATCH 23/74] feat: add order by query for sushi example --- crates/proof-of-sql/examples/sushi/main.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/crates/proof-of-sql/examples/sushi/main.rs b/crates/proof-of-sql/examples/sushi/main.rs index 31dd8e8a3..29bc91b30 100644 --- a/crates/proof-of-sql/examples/sushi/main.rs +++ b/crates/proof-of-sql/examples/sushi/main.rs @@ -145,4 +145,11 @@ fn main() { &prover_setup, &verifier_setup, ); + + prove_and_verify_query( + "SELECT kindJa, pricePerPound FROM fish WHERE nameEn = 'Tuna' ORDER BY pricePerPound ASC", + &accessor, + &prover_setup, + &verifier_setup, + ); } \ No newline at end of file From 6d43aaf37b1914c6fc5001e82ea17eeb9b5232a3 Mon Sep 17 00:00:00 2001 From: aw-sxt Date: Thu, 24 Oct 2024 19:50:29 -0500 Subject: [PATCH 24/74] style: run cargo fmt for sushi example --- crates/proof-of-sql/examples/sushi/main.rs | 76 +++++++++------------- 1 file changed, 31 insertions(+), 45 deletions(-) diff --git a/crates/proof-of-sql/examples/sushi/main.rs b/crates/proof-of-sql/examples/sushi/main.rs index 29bc91b30..6fed16ce0 100644 --- a/crates/proof-of-sql/examples/sushi/main.rs +++ b/crates/proof-of-sql/examples/sushi/main.rs @@ -4,36 +4,17 @@ //! NOTE: If this doesn't work because you do not have the appropriate GPU drivers installed, //! you can run `cargo run --release --example space --no-default-features --features="arrow cpu-perf"` instead. It will be slower for proof generation. use arrow::datatypes::SchemaRef; -use arrow_csv::{ - infer_schema_from_files, - ReaderBuilder -}; -use std::{ - fs::File, - time::Instant -}; +use arrow_csv::{infer_schema_from_files, ReaderBuilder}; use proof_of_sql::{ - base::database::{ - OwnedTable, - OwnedTableTestAccessor, - TestAccessor - }, + base::database::{OwnedTable, OwnedTableTestAccessor, TestAccessor}, proof_primitive::dory::{ - DynamicDoryCommitment, - DynamicDoryEvaluationProof, - ProverSetup, - PublicParameters, - VerifierSetup + DynamicDoryCommitment, DynamicDoryEvaluationProof, ProverSetup, PublicParameters, + VerifierSetup, }, - sql::{ - parse::QueryExpr, - proof::QueryProof - } -}; -use rand::{ - rngs::StdRng, - SeedableRng + sql::{parse::QueryExpr, proof::QueryProof}, }; +use rand::{rngs::StdRng, SeedableRng}; +use std::{fs::File, time::Instant}; const DORY_SETUP_MAX_NU: usize = 8; const DORY_SEED: [u8; 32] = *b"sushi-is-the-best-food-available"; @@ -83,73 +64,78 @@ fn prove_and_verify_query( } fn main() { - let mut rng = StdRng::from_seed(DORY_SEED); + let mut rng = StdRng::from_seed(DORY_SEED); let public_parameters = PublicParameters::rand(DORY_SETUP_MAX_NU, &mut rng); let prover_setup = ProverSetup::from(&public_parameters); let verifier_setup = VerifierSetup::from(&public_parameters); - let filename = "./crates/proof-of-sql/examples/sushi/fish.csv"; + let filename = "./crates/proof-of-sql/examples/sushi/fish.csv"; let fish_batch = ReaderBuilder::new(SchemaRef::new( infer_schema_from_files(&[filename.to_string()], b',', None, true).unwrap(), )) - .with_header(true) - .build(File::open(filename).unwrap()) - .unwrap() - .next() - .unwrap() - .unwrap(); + .with_header(true) + .build(File::open(filename).unwrap()) + .unwrap() + .next() + .unwrap() + .unwrap(); println!("{fish_batch:?}"); - // Load the table into an "Accessor" so that the prover and verifier can access the data/commitments. - let mut accessor = OwnedTableTestAccessor::::new_empty_with_setup(&prover_setup); - accessor.add_table("sushi.fish".parse().unwrap(), OwnedTable::try_from(fish_batch).unwrap(), 0); + // Load the table into an "Accessor" so that the prover and verifier can access the data/commitments. + let mut accessor = + OwnedTableTestAccessor::::new_empty_with_setup(&prover_setup); + accessor.add_table( + "sushi.fish".parse().unwrap(), + OwnedTable::try_from(fish_batch).unwrap(), + 0, + ); - prove_and_verify_query( + prove_and_verify_query( "SELECT * FROM fish", &accessor, &prover_setup, &verifier_setup, ); - prove_and_verify_query( + prove_and_verify_query( "SELECT COUNT(*) FROM fish WHERE nameEn = 'Tuna'", &accessor, &prover_setup, &verifier_setup, ); - prove_and_verify_query( + prove_and_verify_query( "SELECT kindEn FROM fish WHERE kindJa = 'Otoro'", &accessor, &prover_setup, &verifier_setup, ); - prove_and_verify_query( + prove_and_verify_query( "SELECT kindEn FROM fish WHERE kindJa = 'Otoro'", &accessor, &prover_setup, &verifier_setup, ); - prove_and_verify_query( + prove_and_verify_query( "SELECT * FROM fish WHERE pricePerPound > 25 AND pricePerPound < 75", &accessor, &prover_setup, &verifier_setup, ); - prove_and_verify_query( + prove_and_verify_query( "SELECT kindJa, COUNT(*) FROM fish GROUP BY kindJa", &accessor, &prover_setup, &verifier_setup, ); - prove_and_verify_query( + prove_and_verify_query( "SELECT kindJa, pricePerPound FROM fish WHERE nameEn = 'Tuna' ORDER BY pricePerPound ASC", &accessor, &prover_setup, &verifier_setup, ); -} \ No newline at end of file +} From 37235f4edded8a2d7372f4b8c51eae804c86bc76 Mon Sep 17 00:00:00 2001 From: aw-sxt Date: Thu, 24 Oct 2024 19:51:26 -0500 Subject: [PATCH 25/74] ci: add sushi example to lint-and-test workflow --- .github/workflows/lint-and-test.yml | 2 ++ crates/proof-of-sql/examples/sushi/fish.csv | 2 +- crates/proof-of-sql/examples/sushi/main.rs | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/lint-and-test.yml b/.github/workflows/lint-and-test.yml index 1c3ecadba..50d45e760 100644 --- a/.github/workflows/lint-and-test.yml +++ b/.github/workflows/lint-and-test.yml @@ -128,6 +128,8 @@ jobs: run: cargo run --example avocado-prices - name: Run plastics example run: cargo run --example plastics + - name: Run sushi example + run: cargo run --example sushi - name: Run posql_db example (With Blitzar) run: bash crates/proof-of-sql/examples/posql_db/run_example.sh - name: Run posql_db example (Without Blitzar) diff --git a/crates/proof-of-sql/examples/sushi/fish.csv b/crates/proof-of-sql/examples/sushi/fish.csv index 74eb0dbd5..e0a14ebc0 100644 --- a/crates/proof-of-sql/examples/sushi/fish.csv +++ b/crates/proof-of-sql/examples/sushi/fish.csv @@ -1,6 +1,6 @@ nameEn,nameJa,kindEn,kindJa,pricePerPound Tuna,Maguro,Lean Red Meat,Akami,25 -Tuna,Maguro,Medium Fat Read Meat,Toro,65 +Tuna,Maguro,Medium Fat Red Meat,Toro,65 Tuna,Maguro,Fatty Red Meat,Otoro,115 Bonito,Katsuo,Red Meat,Akami,20 Yellowtail,Hamachi,Red Meat,Akami,27 diff --git a/crates/proof-of-sql/examples/sushi/main.rs b/crates/proof-of-sql/examples/sushi/main.rs index 6fed16ce0..0c7f89545 100644 --- a/crates/proof-of-sql/examples/sushi/main.rs +++ b/crates/proof-of-sql/examples/sushi/main.rs @@ -2,7 +2,7 @@ //! To run this, use `cargo run --example sushi`. //! NOTE: If this doesn't work because you do not have the appropriate GPU drivers installed, -//! you can run `cargo run --release --example space --no-default-features --features="arrow cpu-perf"` instead. It will be slower for proof generation. +//! you can run `cargo run --release --example sushi --no-default-features --features="arrow cpu-perf"` instead. It will be slower for proof generation. use arrow::datatypes::SchemaRef; use arrow_csv::{infer_schema_from_files, ReaderBuilder}; use proof_of_sql::{ From c103e798ec4502bf24a6d8d221007e2ccd7c8665 Mon Sep 17 00:00:00 2001 From: jay white Date: Thu, 24 Oct 2024 20:23:45 -0400 Subject: [PATCH 26/74] perf: make cpu commitment computation more efficient --- .../dynamic_dory_commitment_helper_cpu.rs | 57 +++++++++++++------ .../dory/dynamic_dory_structure.rs | 2 +- 2 files changed, 42 insertions(+), 17 deletions(-) diff --git a/crates/proof-of-sql/src/proof_primitive/dory/dynamic_dory_commitment_helper_cpu.rs b/crates/proof-of-sql/src/proof_primitive/dory/dynamic_dory_commitment_helper_cpu.rs index 8ed2ddbb5..b36e1177d 100644 --- a/crates/proof-of-sql/src/proof_primitive/dory/dynamic_dory_commitment_helper_cpu.rs +++ b/crates/proof-of-sql/src/proof_primitive/dory/dynamic_dory_commitment_helper_cpu.rs @@ -1,10 +1,14 @@ use super::{ - dynamic_dory_structure::row_and_column_from_index, pairings, DoryScalar, DynamicDoryCommitment, - G1Affine, G1Projective, ProverSetup, GT, + dynamic_dory_structure::{full_width_of_row, row_and_column_from_index, row_start_index}, + pairings, DoryScalar, DynamicDoryCommitment, G1Projective, ProverSetup, GT, }; -use crate::base::commitment::CommittableColumn; -use alloc::{vec, vec::Vec}; +use crate::base::{commitment::CommittableColumn, if_rayon, slice_ops::slice_cast}; +use alloc::vec::Vec; +use ark_ec::VariableBaseMSM; +use bytemuck::TransparentWrapper; use num_traits::Zero; +#[cfg(feature = "rayon")] +use rayon::iter::{IntoParallelIterator, IntoParallelRefIterator, ParallelIterator}; #[tracing::instrument(name = "compute_dory_commitment_impl (cpu)", level = "debug", skip_all)] /// # Panics @@ -13,6 +17,7 @@ use num_traits::Zero; /// - `setup.Gamma_1.last()` returns `None`, indicating that `Gamma_1` is empty. /// - `setup.Gamma_2.last()` returns `None`, indicating that `Gamma_2` is empty. /// - The indexing for `Gamma_2` with `first_row..=last_row` goes out of bounds. +#[allow(clippy::range_plus_one)] fn compute_dory_commitment_impl<'a, T>( column: &'a [T], offset: usize, @@ -22,18 +27,39 @@ where &'a T: Into, T: Sync, { + if column.is_empty() { + return DynamicDoryCommitment::default(); + } let Gamma_1 = setup.Gamma_1.last().unwrap(); let Gamma_2 = setup.Gamma_2.last().unwrap(); - let (first_row, _) = row_and_column_from_index(offset); - let (last_row, _) = row_and_column_from_index(offset + column.len() - 1); - let row_commits = column.iter().enumerate().fold( - vec![G1Projective::from(G1Affine::identity()); last_row - first_row + 1], - |mut row_commits, (i, v)| { - let (row, col) = row_and_column_from_index(i + offset); - row_commits[row - first_row] += Gamma_1[col] * v.into().0; - row_commits - }, - ); + let (first_row, first_col) = row_and_column_from_index(offset); + let (last_row, last_col) = row_and_column_from_index(offset + column.len() - 1); + + let row_commits: Vec<_> = if_rayon!( + (first_row..=last_row).into_par_iter(), + (first_row..=last_row) + ) + .map(|row| { + let width = full_width_of_row(row); + let row_start = row_start_index(row); + let (gamma_range, column_range) = if first_row == last_row { + (first_col..last_col + 1, 0..column.len()) + } else if row == 1 { + (1..2, (1 - offset)..(2 - offset)) + } else if row == first_row { + (first_col..width, 0..width - first_col) + } else if row == last_row { + (0..last_col + 1, column.len() - last_col - 1..column.len()) + } else { + (0..width, row_start - offset..width + row_start - offset) + }; + G1Projective::msm_unchecked( + &Gamma_1[gamma_range], + TransparentWrapper::peel_slice(&slice_cast::<_, DoryScalar>(&column[column_range])), + ) + }) + .collect(); + DynamicDoryCommitment(pairings::multi_pairing( row_commits, &Gamma_2[first_row..=last_row], @@ -70,8 +96,7 @@ pub(super) fn compute_dynamic_dory_commitments( offset: usize, setup: &ProverSetup, ) -> Vec { - committable_columns - .iter() + if_rayon!(committable_columns.par_iter(), committable_columns.iter()) .map(|column| { column .is_empty() diff --git a/crates/proof-of-sql/src/proof_primitive/dory/dynamic_dory_structure.rs b/crates/proof-of-sql/src/proof_primitive/dory/dynamic_dory_structure.rs index 2598db988..03f6ffaa3 100644 --- a/crates/proof-of-sql/src/proof_primitive/dory/dynamic_dory_structure.rs +++ b/crates/proof-of-sql/src/proof_primitive/dory/dynamic_dory_structure.rs @@ -40,7 +40,7 @@ pub(crate) const fn full_width_of_row(row: usize) -> usize { /// Returns the index that belongs in the first column in a particular row. /// /// Note: when row = 1, this correctly returns 0, even though no data belongs there. -#[cfg(test)] +#[cfg(any(test, not(feature = "blitzar")))] pub(crate) const fn row_start_index(row: usize) -> usize { let width_of_row = full_width_of_row(row); width_of_row * (row - width_of_row / 2) From 904b9723307584bd5d40902c0b0352b4a6fd855d Mon Sep 17 00:00:00 2001 From: Venu Vemula Date: Fri, 25 Oct 2024 17:12:47 -0400 Subject: [PATCH 27/74] feat: add countries CSV --- .../examples/countries/countries.csv | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 crates/proof-of-sql/examples/countries/countries.csv diff --git a/crates/proof-of-sql/examples/countries/countries.csv b/crates/proof-of-sql/examples/countries/countries.csv new file mode 100644 index 000000000..ee40560e7 --- /dev/null +++ b/crates/proof-of-sql/examples/countries/countries.csv @@ -0,0 +1,31 @@ +Country,Continent,GDP,GDPP +UnitedStates,NorthAmerica,21137,63543 +China,Asia,14342,10261 +Japan,Asia,5081,40293 +Germany,Europe,3846,46329 +India,Asia,2875,2099 +UnitedKingdom,Europe,2825,42330 +France,Europe,2716,41463 +Italy,Europe,2001,33279 +Brazil,SouthAmerica,1839,8718 +Canada,NorthAmerica,1643,43119 +Russia,EuropeAsia,1637,11229 +SouthKorea,Asia,1622,31489 +Australia,Oceania,1382,53799 +Spain,Europe,1316,28152 +Mexico,NorthAmerica,1265,9958 +Indonesia,Asia,1119,4152 +Netherlands,Europe,902,52477 +SaudiArabia,Asia,793,23206 +Turkey,EuropeAsia,761,9005 +Switzerland,Europe,703,81392 +Argentina,SouthAmerica,449,9921 +Sweden,Europe,528,52073 +Nigeria,Africa,448,2190 +Poland,Europe,594,15673 +Thailand,Asia,509,7306 +SouthAfrica,Africa,350,5883 +Philippines,Asia,402,3685 +Colombia,SouthAmerica,323,6458 +Egypt,Africa,302,3012 +Pakistan,Asia,278,1260 \ No newline at end of file From 552979dbf930262d505b31ceeb2c56cb0676ec24 Mon Sep 17 00:00:00 2001 From: Venu Vemula Date: Fri, 25 Oct 2024 17:14:25 -0400 Subject: [PATCH 28/74] feat: add more countries gdp data --- crates/proof-of-sql/examples/countries/countries.csv | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/crates/proof-of-sql/examples/countries/countries.csv b/crates/proof-of-sql/examples/countries/countries.csv index ee40560e7..7cc4c9cb5 100644 --- a/crates/proof-of-sql/examples/countries/countries.csv +++ b/crates/proof-of-sql/examples/countries/countries.csv @@ -28,4 +28,8 @@ SouthAfrica,Africa,350,5883 Philippines,Asia,402,3685 Colombia,SouthAmerica,323,6458 Egypt,Africa,302,3012 -Pakistan,Asia,278,1260 \ No newline at end of file +Pakistan,Asia,278,1260 +Bangladesh,Asia,302,1855 +Vietnam,Asia,283,2900 +Chile,SouthAmerica,252,13120 +Finland,Europe,268,48888 \ No newline at end of file From ffeec5e30202fec5a242b1187cf0943602f847fa Mon Sep 17 00:00:00 2001 From: Venu Vemula Date: Fri, 25 Oct 2024 17:15:44 -0400 Subject: [PATCH 29/74] feat: correct countries gdp data --- crates/proof-of-sql/examples/countries/countries.csv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/proof-of-sql/examples/countries/countries.csv b/crates/proof-of-sql/examples/countries/countries.csv index 7cc4c9cb5..397102f8f 100644 --- a/crates/proof-of-sql/examples/countries/countries.csv +++ b/crates/proof-of-sql/examples/countries/countries.csv @@ -28,7 +28,7 @@ SouthAfrica,Africa,350,5883 Philippines,Asia,402,3685 Colombia,SouthAmerica,323,6458 Egypt,Africa,302,3012 -Pakistan,Asia,278,1260 +Pakistan,Asia,278,1450 Bangladesh,Asia,302,1855 Vietnam,Asia,283,2900 Chile,SouthAmerica,252,13120 From f67f8335026ef1d560acb7fca7cd4c51ff3a1e8c Mon Sep 17 00:00:00 2001 From: Venu Vemula Date: Fri, 25 Oct 2024 17:17:16 -0400 Subject: [PATCH 30/74] refactor: rename countries csv to countries_gdp csv --- .../examples/countries/{countries.csv => countries_gdp.csv} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename crates/proof-of-sql/examples/countries/{countries.csv => countries_gdp.csv} (100%) diff --git a/crates/proof-of-sql/examples/countries/countries.csv b/crates/proof-of-sql/examples/countries/countries_gdp.csv similarity index 100% rename from crates/proof-of-sql/examples/countries/countries.csv rename to crates/proof-of-sql/examples/countries/countries_gdp.csv From 8bbfa1c6f4e165007a71ff622cb9b8116f39d543 Mon Sep 17 00:00:00 2001 From: Venu Vemula Date: Fri, 25 Oct 2024 17:47:34 -0400 Subject: [PATCH 31/74] feat: add countries example code --- .../proof-of-sql/examples/countries/main.rs | 118 ++++++++++++++++++ 1 file changed, 118 insertions(+) create mode 100644 crates/proof-of-sql/examples/countries/main.rs diff --git a/crates/proof-of-sql/examples/countries/main.rs b/crates/proof-of-sql/examples/countries/main.rs new file mode 100644 index 000000000..5e2fc8e29 --- /dev/null +++ b/crates/proof-of-sql/examples/countries/main.rs @@ -0,0 +1,118 @@ +//! This is a non-interactive example of using Proof of SQL with a countries dataset. +//! To run this, use `cargo run --release --example countries`. +//! +//! NOTE: If this doesn't work because you do not have the appropriate GPU drivers installed, +//! you can run `cargo run --release --example countries --no-default-features --features="arrow cpu-perf"` instead. It will be slower for proof generation. + +use arrow::datatypes::SchemaRef; +use arrow_csv::{infer_schema_from_files, ReaderBuilder}; +use proof_of_sql::{ + base::database::{ + arrow_schema_utility::get_posql_compatible_schema, OwnedTable, OwnedTableTestAccessor, + TestAccessor, + }, + proof_primitive::dory::{ + DynamicDoryCommitment, DynamicDoryEvaluationProof, ProverSetup, PublicParameters, + VerifierSetup, + }, + sql::{parse::QueryExpr, postprocessing::apply_postprocessing_steps, proof::QueryProof}, +}; +use rand::{rngs::StdRng, SeedableRng}; +use std::{fs::File, time::Instant}; + +// We generate the public parameters and the setups used by the prover and verifier for the Dory PCS. +// The `max_nu` should be set such that the maximum table size is less than `2^(2*max_nu-1)`. +const DORY_SETUP_MAX_NU: usize = 8; +// This should be a "nothing-up-my-sleeve" phrase or number. +const DORY_SEED: [u8; 32] = *b"7a1b3c8d2e4f9g6h5i0j7k2l8m3n9o1p"; + +/// # Panics +/// Will panic if the query does not parse or the proof fails to verify. +fn prove_and_verify_query( + sql: &str, + accessor: &OwnedTableTestAccessor, + prover_setup: &ProverSetup, + verifier_setup: &VerifierSetup, +) { + // Parse the query: + println!("Parsing the query: {sql}..."); + let now = Instant::now(); + let query_plan = QueryExpr::::try_new( + sql.parse().unwrap(), + "countries".parse().unwrap(), + accessor, + ) + .unwrap(); + println!("Done in {} ms.", now.elapsed().as_secs_f64() * 1000.); + + // Generate the proof and result: + print!("Generating proof..."); + let now = Instant::now(); + let (proof, provable_result) = QueryProof::::new( + query_plan.proof_expr(), + accessor, + &prover_setup, + ); + println!("Done in {} ms.", now.elapsed().as_secs_f64() * 1000.); + + // Verify the result with the proof: + print!("Verifying proof..."); + let now = Instant::now(); + let result = proof + .verify( + query_plan.proof_expr(), + accessor, + &provable_result, + &verifier_setup, + ) + .unwrap(); + let result = apply_postprocessing_steps(result.table, query_plan.postprocessing()); + println!("Verified in {} ms.", now.elapsed().as_secs_f64() * 1000.); + + // Display the result + println!("Query Result:"); + println!("{result:?}"); +} + +fn main() { + let mut rng = StdRng::from_seed(DORY_SEED); + let public_parameters = PublicParameters::rand(DORY_SETUP_MAX_NU, &mut rng); + let prover_setup = ProverSetup::from(&public_parameters); + let verifier_setup = VerifierSetup::from(&public_parameters); + + let filename = "./crates/proof-of-sql/examples/countries/countries_gdp.csv"; + let inferred_schema = + SchemaRef::new(infer_schema_from_files(&[filename.to_string()], b',', None, true).unwrap()); + let posql_compatible_schema = get_posql_compatible_schema(&inferred_schema); + + let countries_batch = ReaderBuilder::new(posql_compatible_schema) + .with_header(true) + .build(File::open(filename).unwrap()) + .unwrap() + .next() + .unwrap() + .unwrap(); + + // Load the table into an "Accessor" so that the prover and verifier can access the data/commitments. + let mut accessor = + OwnedTableTestAccessor::::new_empty_with_setup(&prover_setup); + accessor.add_table( + "countries.countries".parse().unwrap(), + OwnedTable::try_from(countries_batch).unwrap(), + 0, + ); + + prove_and_verify_query( + "SELECT COUNT(*) AS total_countries FROM countries", + &accessor, + &prover_setup, + &verifier_setup, + ); + + prove_and_verify_query( + "SELECT continent, MAX(gdp) as max_gdp, COUNT(*) as country_count FROM countries GROUP BY continent ORDER BY max_gdp DESC", + &accessor, + &prover_setup, + &verifier_setup, + ); +} From 0c284469f84388d2394bdaa03acbf543ebb4e88d Mon Sep 17 00:00:00 2001 From: Venu Vemula Date: Fri, 25 Oct 2024 17:51:45 -0400 Subject: [PATCH 32/74] feat: add example query with filter --- crates/proof-of-sql/examples/countries/main.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/crates/proof-of-sql/examples/countries/main.rs b/crates/proof-of-sql/examples/countries/main.rs index 5e2fc8e29..a17c16c1f 100644 --- a/crates/proof-of-sql/examples/countries/main.rs +++ b/crates/proof-of-sql/examples/countries/main.rs @@ -115,4 +115,11 @@ fn main() { &prover_setup, &verifier_setup, ); + + prove_and_verify_query( + "SELECT country FROM countries WHERE continent = 'Asia'", + &accessor, + &prover_setup, + &verifier_setup, + ); } From 471f64605944ef0644d62cbf1dbc2f844d5533c2 Mon Sep 17 00:00:00 2001 From: Venu Vemula Date: Fri, 25 Oct 2024 17:56:07 -0400 Subject: [PATCH 33/74] feat: add example query with complex filter --- crates/proof-of-sql/examples/countries/main.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/crates/proof-of-sql/examples/countries/main.rs b/crates/proof-of-sql/examples/countries/main.rs index a17c16c1f..5a8244704 100644 --- a/crates/proof-of-sql/examples/countries/main.rs +++ b/crates/proof-of-sql/examples/countries/main.rs @@ -122,4 +122,11 @@ fn main() { &prover_setup, &verifier_setup, ); + + prove_and_verify_query( + "SELECT country FROM countries WHERE gdp > 500 AND gdp < 1500", + &accessor, + &prover_setup, + &verifier_setup, + ); } From 5d106cb2e60ca2824ebddc3d18ff001a5159bff6 Mon Sep 17 00:00:00 2001 From: Venu Vemula Date: Fri, 25 Oct 2024 17:58:43 -0400 Subject: [PATCH 34/74] feat: add countries example to Cargo.toml --- crates/proof-of-sql/Cargo.toml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/crates/proof-of-sql/Cargo.toml b/crates/proof-of-sql/Cargo.toml index cfa73da37..c2be7d100 100644 --- a/crates/proof-of-sql/Cargo.toml +++ b/crates/proof-of-sql/Cargo.toml @@ -127,6 +127,10 @@ required-features = [ "arrow" ] name = "sushi" required-features = [ "arrow" ] +[[example]] +name = "countries" +required-features = [ "arrow" ] + [[bench]] name = "posql_benches" harness = false From 5dfd6ec780a831518b7ba4eaaee6a9e7857e0170 Mon Sep 17 00:00:00 2001 From: Venu Vemula Date: Fri, 25 Oct 2024 18:01:48 -0400 Subject: [PATCH 35/74] feat: add countries example to lint-and-test.yml --- .github/workflows/lint-and-test.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/lint-and-test.yml b/.github/workflows/lint-and-test.yml index 50d45e760..51191d986 100644 --- a/.github/workflows/lint-and-test.yml +++ b/.github/workflows/lint-and-test.yml @@ -130,6 +130,8 @@ jobs: run: cargo run --example plastics - name: Run sushi example run: cargo run --example sushi + - name: Run countries example + run: cargo run --example countries - name: Run posql_db example (With Blitzar) run: bash crates/proof-of-sql/examples/posql_db/run_example.sh - name: Run posql_db example (Without Blitzar) From 08e361992c89a088a40366e14936efb8d5a4ee45 Mon Sep 17 00:00:00 2001 From: Venu Vemula Date: Fri, 25 Oct 2024 20:08:33 -0400 Subject: [PATCH 36/74] fix: remove query which genertes wild card --- crates/proof-of-sql/examples/countries/main.rs | 7 ------- 1 file changed, 7 deletions(-) diff --git a/crates/proof-of-sql/examples/countries/main.rs b/crates/proof-of-sql/examples/countries/main.rs index 5a8244704..520d92775 100644 --- a/crates/proof-of-sql/examples/countries/main.rs +++ b/crates/proof-of-sql/examples/countries/main.rs @@ -109,13 +109,6 @@ fn main() { &verifier_setup, ); - prove_and_verify_query( - "SELECT continent, MAX(gdp) as max_gdp, COUNT(*) as country_count FROM countries GROUP BY continent ORDER BY max_gdp DESC", - &accessor, - &prover_setup, - &verifier_setup, - ); - prove_and_verify_query( "SELECT country FROM countries WHERE continent = 'Asia'", &accessor, From 7d7712f1f06308acba056332682d0100a7b03636 Mon Sep 17 00:00:00 2001 From: Venu Vemula Date: Fri, 25 Oct 2024 20:14:29 -0400 Subject: [PATCH 37/74] refactor: add query for sum --- crates/proof-of-sql/examples/countries/main.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/crates/proof-of-sql/examples/countries/main.rs b/crates/proof-of-sql/examples/countries/main.rs index 520d92775..10bfb8705 100644 --- a/crates/proof-of-sql/examples/countries/main.rs +++ b/crates/proof-of-sql/examples/countries/main.rs @@ -122,4 +122,11 @@ fn main() { &prover_setup, &verifier_setup, ); + + prove_and_verify_query( + "SELECT SUM(gdp) AS total_market_cap FROM countries WHERE country = 'China' OR country = 'India'", + &accessor, + &prover_setup, + &verifier_setup, + ); } From ddcd1a48b739655a68aa77cb05c3f86b8a3b070f Mon Sep 17 00:00:00 2001 From: Vamshi Maskuri <117595548+varshith257@users.noreply.github.com> Date: Sat, 26 Oct 2024 15:24:55 -0400 Subject: [PATCH 38/74] chore: remove `test_accessor_utility` --- crates/proof-of-sql/src/base/database/mod.rs | 5 - .../base/database/test_accessor_utility.rs | 218 ------------------ 2 files changed, 223 deletions(-) delete mode 100644 crates/proof-of-sql/src/base/database/test_accessor_utility.rs diff --git a/crates/proof-of-sql/src/base/database/mod.rs b/crates/proof-of-sql/src/base/database/mod.rs index e65b7efb5..a5079e732 100644 --- a/crates/proof-of-sql/src/base/database/mod.rs +++ b/crates/proof-of-sql/src/base/database/mod.rs @@ -34,11 +34,6 @@ pub use record_batch_utility::ToArrow; #[cfg(feature = "arrow")] pub mod arrow_schema_utility; -#[cfg(all(test, feature = "arrow", feature = "test"))] -mod test_accessor_utility; -#[cfg(all(test, feature = "arrow", feature = "test"))] -pub use test_accessor_utility::{make_random_test_accessor_data, RandomTestAccessorDescriptor}; - mod owned_column; pub(crate) use owned_column::compare_indexes_by_owned_columns_with_direction; pub use owned_column::OwnedColumn; diff --git a/crates/proof-of-sql/src/base/database/test_accessor_utility.rs b/crates/proof-of-sql/src/base/database/test_accessor_utility.rs deleted file mode 100644 index 2b06081dd..000000000 --- a/crates/proof-of-sql/src/base/database/test_accessor_utility.rs +++ /dev/null @@ -1,218 +0,0 @@ -use crate::base::database::ColumnType; -use arrow::{ - array::{ - Array, BooleanArray, Decimal128Array, Decimal256Array, Int16Array, Int32Array, Int64Array, - Int8Array, StringArray, TimestampMicrosecondArray, TimestampMillisecondArray, - TimestampNanosecondArray, TimestampSecondArray, - }, - datatypes::{i256, DataType, Field, Schema, TimeUnit}, - record_batch::RecordBatch, -}; -use proof_of_sql_parser::posql_time::PoSQLTimeUnit; -use rand::{ - distributions::{Distribution, Uniform}, - rngs::StdRng, -}; -use std::sync::Arc; - -/// Specify what form a randomly generated `TestAccessor` can take -pub struct RandomTestAccessorDescriptor { - /// The minimum number of rows in the generated `RecordBatch` - pub min_rows: usize, - /// The maximum number of rows in the generated `RecordBatch` - pub max_rows: usize, - /// The minimum value of the generated data - pub min_value: i64, - /// The maximum value of the generated data - pub max_value: i64, -} - -impl Default for RandomTestAccessorDescriptor { - fn default() -> Self { - Self { - min_rows: 0, - max_rows: 100, - min_value: -5, - max_value: 5, - } - } -} - -/// Generate a `DataFrame` with random data -/// -/// # Panics -/// -/// This function may panic in the following cases: -/// - If `Precision::new(7)` fails when creating a `Decimal75` column type, which would occur -/// if the precision is invalid. -/// - When calling `.unwrap()` on the result of `RecordBatch::try_new(schema, columns)`, which -/// will panic if the schema and columns do not align correctly or if there are any other -/// underlying errors. -#[allow(dead_code, clippy::too_many_lines)] -pub fn make_random_test_accessor_data( - rng: &mut StdRng, - cols: &[(&str, ColumnType)], - descriptor: &RandomTestAccessorDescriptor, -) -> RecordBatch { - let n = Uniform::new(descriptor.min_rows, descriptor.max_rows + 1).sample(rng); - let dist = Uniform::new(descriptor.min_value, descriptor.max_value + 1); - - let mut columns: Vec> = Vec::with_capacity(n); - let mut column_fields: Vec<_> = Vec::with_capacity(n); - - for (col_name, col_type) in cols { - let values: Vec = dist.sample_iter(&mut *rng).take(n).collect(); - - match col_type { - ColumnType::Boolean => { - column_fields.push(Field::new(*col_name, DataType::Boolean, false)); - let boolean_values: Vec = values.iter().map(|x| x % 2 != 0).collect(); - columns.push(Arc::new(BooleanArray::from(boolean_values))); - } - ColumnType::TinyInt => { - column_fields.push(Field::new(*col_name, DataType::Int8, false)); - let values: Vec = values - .iter() - .map(|x| ((*x >> 56) as i8)) // Shift right to align the lower 8 bits - .collect(); - columns.push(Arc::new(Int8Array::from(values))); - } - ColumnType::SmallInt => { - column_fields.push(Field::new(*col_name, DataType::Int16, false)); - let values: Vec = values - .iter() - .map(|x| ((*x >> 48) as i16)) // Shift right to align the lower 16 bits - .collect(); - columns.push(Arc::new(Int16Array::from(values))); - } - ColumnType::Int => { - column_fields.push(Field::new(*col_name, DataType::Int32, false)); - let values: Vec = values - .iter() - .map(|x| ((*x >> 32) as i32)) // Shift right to align the lower 32 bits - .collect(); - columns.push(Arc::new(Int32Array::from(values))); - } - ColumnType::BigInt => { - column_fields.push(Field::new(*col_name, DataType::Int64, false)); - let values: Vec = values.clone(); - columns.push(Arc::new(Int64Array::from(values))); - } - ColumnType::Int128 => { - column_fields.push(Field::new(*col_name, DataType::Decimal128(38, 0), false)); - - let values: Vec = values.iter().map(|x| i128::from(*x)).collect(); - columns.push(Arc::new( - Decimal128Array::from(values.clone()) - .with_precision_and_scale(38, 0) - .unwrap(), - )); - } - ColumnType::Decimal75(precision, scale) => { - column_fields.push(Field::new( - *col_name, - DataType::Decimal256(precision.value(), *scale), - false, - )); - - let values: Vec = values.iter().map(|x| i256::from(*x)).collect(); - columns.push(Arc::new( - Decimal256Array::from(values.clone()) - .with_precision_and_scale(precision.value(), *scale) - .unwrap(), - )); - } - ColumnType::VarChar => { - let col = &values - .iter() - .map(|v| "s".to_owned() + &v.to_string()[..]) - .collect::>()[..]; - let col: Vec<_> = col.iter().map(String::as_str).collect(); - - column_fields.push(Field::new(*col_name, DataType::Utf8, false)); - - columns.push(Arc::new(StringArray::from(col))); - } - ColumnType::Scalar => unimplemented!("Scalar columns are not supported by arrow"), - ColumnType::TimestampTZ(tu, tz) => { - column_fields.push(Field::new( - *col_name, - DataType::Timestamp( - match tu { - PoSQLTimeUnit::Second => TimeUnit::Second, - PoSQLTimeUnit::Millisecond => TimeUnit::Millisecond, - PoSQLTimeUnit::Microsecond => TimeUnit::Microsecond, - PoSQLTimeUnit::Nanosecond => TimeUnit::Nanosecond, - }, - Some(Arc::from(tz.to_string())), - ), - false, - )); - // Create the correct timestamp array based on the time unit - let timestamp_array: Arc = match tu { - PoSQLTimeUnit::Second => Arc::new(TimestampSecondArray::from(values.clone())), - PoSQLTimeUnit::Millisecond => { - Arc::new(TimestampMillisecondArray::from(values.clone())) - } - PoSQLTimeUnit::Microsecond => { - Arc::new(TimestampMicrosecondArray::from(values.clone())) - } - PoSQLTimeUnit::Nanosecond => { - Arc::new(TimestampNanosecondArray::from(values.clone())) - } - }; - columns.push(timestamp_array); - } - } - } - - let schema = Arc::new(Schema::new(column_fields)); - RecordBatch::try_new(schema, columns).unwrap() -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::record_batch; - use rand_core::SeedableRng; - - #[test] - fn we_can_construct_a_random_test_data() { - let descriptor = RandomTestAccessorDescriptor::default(); - let mut rng = StdRng::from_seed([0u8; 32]); - let cols = [ - ("a", ColumnType::BigInt), - ("b", ColumnType::VarChar), - ("c", ColumnType::Int128), - ("d", ColumnType::SmallInt), - ("e", ColumnType::Int), - ("f", ColumnType::TinyInt), - ]; - - let data1 = make_random_test_accessor_data(&mut rng, &cols, &descriptor); - let data2 = make_random_test_accessor_data(&mut rng, &cols, &descriptor); - assert_ne!(data1.num_rows(), data2.num_rows()); - } - - #[test] - fn we_can_construct_a_random_test_data_with_the_correct_data() { - let descriptor = RandomTestAccessorDescriptor { - min_rows: 1, - max_rows: 1, - min_value: -2, - max_value: -2, - }; - let mut rng = StdRng::from_seed([0u8; 32]); - let cols = [ - ("b", ColumnType::BigInt), - ("a", ColumnType::VarChar), - ("c", ColumnType::Int128), - ]; - let data = make_random_test_accessor_data(&mut rng, &cols, &descriptor); - - assert_eq!( - data, - record_batch!("b" => [-2_i64], "a" => ["s-2"], "c" => [-2_i128]) - ); - } -} From 76207882296fb0a4c23e256eb48464edb5495395 Mon Sep 17 00:00:00 2001 From: Vamshi Maskuri <117595548+varshith257@users.noreply.github.com> Date: Sat, 26 Oct 2024 15:24:56 -0400 Subject: [PATCH 39/74] chore: add `arrow` module --- Cargo.toml | 4 ++-- crates/proof-of-sql/src/base/arrow/mod.rs | 1 + crates/proof-of-sql/src/base/mod.rs | 3 +++ 3 files changed, 6 insertions(+), 2 deletions(-) create mode 100644 crates/proof-of-sql/src/base/arrow/mod.rs diff --git a/Cargo.toml b/Cargo.toml index a0d8f7216..035636f51 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,8 +20,8 @@ ark-poly = { version = "0.4.0" } ark-serialize = { version = "0.4.0" } ark-std = { version = "0.4.0", default-features = false } arrayvec = { version = "0.7", default-features = false } -arrow = { version = "51.0" } -arrow-csv = { version = "51.0" } +arrow = { version = "51.0.0" } +arrow-csv = { version = "51.0.0" } bit-iter = { version = "1.1.1" } bigdecimal = { version = "0.4.5", default-features = false, features = ["serde"] } blake3 = { version = "1.3.3", default-features = false } diff --git a/crates/proof-of-sql/src/base/arrow/mod.rs b/crates/proof-of-sql/src/base/arrow/mod.rs new file mode 100644 index 000000000..61ca01f43 --- /dev/null +++ b/crates/proof-of-sql/src/base/arrow/mod.rs @@ -0,0 +1 @@ +//! This module provides conversions and utilities for working with Arrow data structures. diff --git a/crates/proof-of-sql/src/base/mod.rs b/crates/proof-of-sql/src/base/mod.rs index ad5573639..657b855d1 100644 --- a/crates/proof-of-sql/src/base/mod.rs +++ b/crates/proof-of-sql/src/base/mod.rs @@ -1,5 +1,8 @@ //! This module contains basic shared functionalities of the library. /// TODO: add docs +#[cfg(feature = "arrow")] +pub mod arrow; + pub(crate) mod bit; pub mod commitment; pub mod database; From ea328c1cfcf73dba283650bf542ce3049c244d92 Mon Sep 17 00:00:00 2001 From: Vamshi Maskuri <117595548+varshith257@users.noreply.github.com> Date: Sat, 26 Oct 2024 15:24:56 -0400 Subject: [PATCH 40/74] chore: move `owned_and_arrow_conversions` module into `arrow` module --- crates/proof-of-sql/src/base/arrow/mod.rs | 7 +++++++ .../{database => arrow}/owned_and_arrow_conversions.rs | 5 ++--- .../owned_and_arrow_conversions_test.rs | 4 ++-- crates/proof-of-sql/src/base/database/mod.rs | 9 ++------- 4 files changed, 13 insertions(+), 12 deletions(-) rename crates/proof-of-sql/src/base/{database => arrow}/owned_and_arrow_conversions.rs (98%) rename crates/proof-of-sql/src/base/{database => arrow}/owned_and_arrow_conversions_test.rs (97%) diff --git a/crates/proof-of-sql/src/base/arrow/mod.rs b/crates/proof-of-sql/src/base/arrow/mod.rs index 61ca01f43..c00d5b063 100644 --- a/crates/proof-of-sql/src/base/arrow/mod.rs +++ b/crates/proof-of-sql/src/base/arrow/mod.rs @@ -1 +1,8 @@ //! This module provides conversions and utilities for working with Arrow data structures. + +/// Module for converting between owned and Arrow data structures. +pub mod owned_and_arrow_conversions; + +#[cfg(test)] +/// Tests for owned and Arrow conversions. +mod owned_and_arrow_conversions_test; diff --git a/crates/proof-of-sql/src/base/database/owned_and_arrow_conversions.rs b/crates/proof-of-sql/src/base/arrow/owned_and_arrow_conversions.rs similarity index 98% rename from crates/proof-of-sql/src/base/database/owned_and_arrow_conversions.rs rename to crates/proof-of-sql/src/base/arrow/owned_and_arrow_conversions.rs index adf4f94af..cf16f0376 100644 --- a/crates/proof-of-sql/src/base/database/owned_and_arrow_conversions.rs +++ b/crates/proof-of-sql/src/base/arrow/owned_and_arrow_conversions.rs @@ -12,11 +12,10 @@ //! This is because there is no `Int128` type in Arrow. //! This does not check that the values are less than 39 digits. //! However, the actual arrow backing `i128` is the correct value. -use super::scalar_and_i256_conversions::convert_scalar_to_i256; use crate::base::{ database::{ - scalar_and_i256_conversions::convert_i256_to_scalar, OwnedColumn, OwnedTable, - OwnedTableError, + scalar_and_i256_conversions::{convert_i256_to_scalar, convert_scalar_to_i256}, + OwnedColumn, OwnedTable, OwnedTableError, }, map::IndexMap, math::decimal::Precision, diff --git a/crates/proof-of-sql/src/base/database/owned_and_arrow_conversions_test.rs b/crates/proof-of-sql/src/base/arrow/owned_and_arrow_conversions_test.rs similarity index 97% rename from crates/proof-of-sql/src/base/database/owned_and_arrow_conversions_test.rs rename to crates/proof-of-sql/src/base/arrow/owned_and_arrow_conversions_test.rs index 970df4bad..539d94eaa 100644 --- a/crates/proof-of-sql/src/base/database/owned_and_arrow_conversions_test.rs +++ b/crates/proof-of-sql/src/base/arrow/owned_and_arrow_conversions_test.rs @@ -1,7 +1,7 @@ -use super::{OwnedColumn, OwnedTable}; +use super::owned_and_arrow_conversions::OwnedArrowConversionError; use crate::{ base::{ - database::{owned_table_utility::*, OwnedArrowConversionError}, + database::{owned_table_utility::*, OwnedColumn, OwnedTable}, map::IndexMap, scalar::Curve25519Scalar, }, diff --git a/crates/proof-of-sql/src/base/database/mod.rs b/crates/proof-of-sql/src/base/database/mod.rs index a5079e732..a630529ee 100644 --- a/crates/proof-of-sql/src/base/database/mod.rs +++ b/crates/proof-of-sql/src/base/database/mod.rs @@ -19,6 +19,8 @@ mod literal_value; pub use literal_value::LiteralValue; mod table_ref; +#[cfg(feature = "arrow")] +pub use crate::base::arrow::owned_and_arrow_conversions::OwnedArrowConversionError; pub use table_ref::TableRef; #[cfg(feature = "arrow")] @@ -58,13 +60,6 @@ mod expression_evaluation_error; mod expression_evaluation_test; pub use expression_evaluation_error::{ExpressionEvaluationError, ExpressionEvaluationResult}; -#[cfg(feature = "arrow")] -mod owned_and_arrow_conversions; -#[cfg(feature = "arrow")] -pub use owned_and_arrow_conversions::OwnedArrowConversionError; -#[cfg(all(test, feature = "arrow"))] -mod owned_and_arrow_conversions_test; - mod test_accessor; pub use test_accessor::TestAccessor; #[cfg(test)] From 6346e81663903a22194663ba719aa842c80f21a4 Mon Sep 17 00:00:00 2001 From: Vamshi Maskuri <117595548+varshith257@users.noreply.github.com> Date: Sat, 26 Oct 2024 15:24:57 -0400 Subject: [PATCH 41/74] chore: move `scalar_and_i256_conversions` module into `arrow` module --- crates/proof-of-sql/src/base/arrow/mod.rs | 3 +++ .../src/base/arrow/owned_and_arrow_conversions.rs | 6 ++---- .../{database => arrow}/scalar_and_i256_conversions.rs | 8 +++----- .../src/base/database/arrow_array_to_column_conversion.rs | 6 ++++-- crates/proof-of-sql/src/base/database/mod.rs | 7 +++---- 5 files changed, 15 insertions(+), 15 deletions(-) rename crates/proof-of-sql/src/base/{database => arrow}/scalar_and_i256_conversions.rs (96%) diff --git a/crates/proof-of-sql/src/base/arrow/mod.rs b/crates/proof-of-sql/src/base/arrow/mod.rs index c00d5b063..8ac51c34e 100644 --- a/crates/proof-of-sql/src/base/arrow/mod.rs +++ b/crates/proof-of-sql/src/base/arrow/mod.rs @@ -6,3 +6,6 @@ pub mod owned_and_arrow_conversions; #[cfg(test)] /// Tests for owned and Arrow conversions. mod owned_and_arrow_conversions_test; + +/// Module for scalar and i256 conversions. +pub mod scalar_and_i256_conversions; diff --git a/crates/proof-of-sql/src/base/arrow/owned_and_arrow_conversions.rs b/crates/proof-of-sql/src/base/arrow/owned_and_arrow_conversions.rs index cf16f0376..74ad96839 100644 --- a/crates/proof-of-sql/src/base/arrow/owned_and_arrow_conversions.rs +++ b/crates/proof-of-sql/src/base/arrow/owned_and_arrow_conversions.rs @@ -12,11 +12,9 @@ //! This is because there is no `Int128` type in Arrow. //! This does not check that the values are less than 39 digits. //! However, the actual arrow backing `i128` is the correct value. +use super::scalar_and_i256_conversions::{convert_i256_to_scalar, convert_scalar_to_i256}; use crate::base::{ - database::{ - scalar_and_i256_conversions::{convert_i256_to_scalar, convert_scalar_to_i256}, - OwnedColumn, OwnedTable, OwnedTableError, - }, + database::{OwnedColumn, OwnedTable, OwnedTableError}, map::IndexMap, math::decimal::Precision, scalar::Scalar, diff --git a/crates/proof-of-sql/src/base/database/scalar_and_i256_conversions.rs b/crates/proof-of-sql/src/base/arrow/scalar_and_i256_conversions.rs similarity index 96% rename from crates/proof-of-sql/src/base/database/scalar_and_i256_conversions.rs rename to crates/proof-of-sql/src/base/arrow/scalar_and_i256_conversions.rs index 9a44c3766..f606c03cb 100644 --- a/crates/proof-of-sql/src/base/database/scalar_and_i256_conversions.rs +++ b/crates/proof-of-sql/src/base/arrow/scalar_and_i256_conversions.rs @@ -54,12 +54,10 @@ pub fn convert_i256_to_scalar(value: &i256) -> Option { #[cfg(test)] mod tests { - - use super::{convert_i256_to_scalar, convert_scalar_to_i256}; - use crate::base::{ - database::scalar_and_i256_conversions::{MAX_SUPPORTED_I256, MIN_SUPPORTED_I256}, - scalar::{Curve25519Scalar, Scalar}, + use super::{ + convert_i256_to_scalar, convert_scalar_to_i256, MAX_SUPPORTED_I256, MIN_SUPPORTED_I256, }; + use crate::base::scalar::{Curve25519Scalar, Scalar}; use arrow::datatypes::i256; use num_traits::Zero; use rand::RngCore; diff --git a/crates/proof-of-sql/src/base/database/arrow_array_to_column_conversion.rs b/crates/proof-of-sql/src/base/database/arrow_array_to_column_conversion.rs index 15770f312..d6b7d7bbc 100644 --- a/crates/proof-of-sql/src/base/database/arrow_array_to_column_conversion.rs +++ b/crates/proof-of-sql/src/base/database/arrow_array_to_column_conversion.rs @@ -1,5 +1,7 @@ -use super::scalar_and_i256_conversions::convert_i256_to_scalar; -use crate::base::{database::Column, math::decimal::Precision, scalar::Scalar}; +use crate::base::{ + arrow::scalar_and_i256_conversions::convert_i256_to_scalar, database::Column, + math::decimal::Precision, scalar::Scalar, +}; use arrow::{ array::{ Array, ArrayRef, BooleanArray, Decimal128Array, Decimal256Array, Int16Array, Int32Array, diff --git a/crates/proof-of-sql/src/base/database/mod.rs b/crates/proof-of-sql/src/base/database/mod.rs index a630529ee..c8c4697c5 100644 --- a/crates/proof-of-sql/src/base/database/mod.rs +++ b/crates/proof-of-sql/src/base/database/mod.rs @@ -20,7 +20,9 @@ pub use literal_value::LiteralValue; mod table_ref; #[cfg(feature = "arrow")] -pub use crate::base::arrow::owned_and_arrow_conversions::OwnedArrowConversionError; +pub use crate::base::arrow::{ + owned_and_arrow_conversions::OwnedArrowConversionError, scalar_and_i256_conversions, +}; pub use table_ref::TableRef; #[cfg(feature = "arrow")] @@ -74,9 +76,6 @@ mod owned_table_test_accessor; pub use owned_table_test_accessor::OwnedTableTestAccessor; #[cfg(all(test, feature = "blitzar"))] mod owned_table_test_accessor_test; -/// Contains traits for scalar <-> i256 conversions -#[cfg(feature = "arrow")] -pub mod scalar_and_i256_conversions; /// TODO: add docs pub(crate) mod filter_util; From e585f53d175d5a36f893731eb995a9c99c171e7c Mon Sep 17 00:00:00 2001 From: Vamshi Maskuri <117595548+varshith257@users.noreply.github.com> Date: Sat, 26 Oct 2024 15:24:57 -0400 Subject: [PATCH 42/74] chore: move `arrow_array_to_column_conversion` module into `arrow` module --- crates/proof-of-sql/examples/posql_db/main.rs | 3 ++- .../examples/posql_db/record_batch_accessor.rs | 4 ++-- .../arrow_array_to_column_conversion.rs | 6 ++---- crates/proof-of-sql/src/base/arrow/mod.rs | 3 +++ .../proof-of-sql/src/base/commitment/table_commitment.rs | 4 +++- crates/proof-of-sql/src/base/database/mod.rs | 9 +++------ 6 files changed, 15 insertions(+), 14 deletions(-) rename crates/proof-of-sql/src/base/{database => arrow}/arrow_array_to_column_conversion.rs (99%) diff --git a/crates/proof-of-sql/examples/posql_db/main.rs b/crates/proof-of-sql/examples/posql_db/main.rs index a796ed25e..f2facf2c8 100644 --- a/crates/proof-of-sql/examples/posql_db/main.rs +++ b/crates/proof-of-sql/examples/posql_db/main.rs @@ -5,6 +5,7 @@ mod commit_accessor; mod csv_accessor; /// TODO: add docs mod record_batch_accessor; + use arrow::{ datatypes::{DataType, Field, Schema}, record_batch::RecordBatch, @@ -273,7 +274,7 @@ fn main() { end_timer(timer); println!( "Verified Result: {:?}", - RecordBatch::try_from(query_result).unwrap() + RecordBatch::try_from(query_result.table).unwrap() ); } } diff --git a/crates/proof-of-sql/examples/posql_db/record_batch_accessor.rs b/crates/proof-of-sql/examples/posql_db/record_batch_accessor.rs index 8af046972..08e25f4fe 100644 --- a/crates/proof-of-sql/examples/posql_db/record_batch_accessor.rs +++ b/crates/proof-of-sql/examples/posql_db/record_batch_accessor.rs @@ -2,9 +2,9 @@ use arrow::record_batch::RecordBatch; use bumpalo::Bump; use indexmap::IndexMap; use proof_of_sql::base::{ + arrow::arrow_array_to_column_conversion::ArrayRefExt, database::{ - ArrayRefExt, Column, ColumnRef, ColumnType, DataAccessor, MetadataAccessor, SchemaAccessor, - TableRef, + Column, ColumnRef, ColumnType, DataAccessor, MetadataAccessor, SchemaAccessor, TableRef, }, scalar::Scalar, }; diff --git a/crates/proof-of-sql/src/base/database/arrow_array_to_column_conversion.rs b/crates/proof-of-sql/src/base/arrow/arrow_array_to_column_conversion.rs similarity index 99% rename from crates/proof-of-sql/src/base/database/arrow_array_to_column_conversion.rs rename to crates/proof-of-sql/src/base/arrow/arrow_array_to_column_conversion.rs index d6b7d7bbc..15770f312 100644 --- a/crates/proof-of-sql/src/base/database/arrow_array_to_column_conversion.rs +++ b/crates/proof-of-sql/src/base/arrow/arrow_array_to_column_conversion.rs @@ -1,7 +1,5 @@ -use crate::base::{ - arrow::scalar_and_i256_conversions::convert_i256_to_scalar, database::Column, - math::decimal::Precision, scalar::Scalar, -}; +use super::scalar_and_i256_conversions::convert_i256_to_scalar; +use crate::base::{database::Column, math::decimal::Precision, scalar::Scalar}; use arrow::{ array::{ Array, ArrayRef, BooleanArray, Decimal128Array, Decimal256Array, Int16Array, Int32Array, diff --git a/crates/proof-of-sql/src/base/arrow/mod.rs b/crates/proof-of-sql/src/base/arrow/mod.rs index 8ac51c34e..16fdf1b95 100644 --- a/crates/proof-of-sql/src/base/arrow/mod.rs +++ b/crates/proof-of-sql/src/base/arrow/mod.rs @@ -1,5 +1,8 @@ //! This module provides conversions and utilities for working with Arrow data structures. +/// Module for handling conversion from Arrow arrays to columns. +pub mod arrow_array_to_column_conversion; + /// Module for converting between owned and Arrow data structures. pub mod owned_and_arrow_conversions; diff --git a/crates/proof-of-sql/src/base/commitment/table_commitment.rs b/crates/proof-of-sql/src/base/commitment/table_commitment.rs index 0f9e21783..f013cce90 100644 --- a/crates/proof-of-sql/src/base/commitment/table_commitment.rs +++ b/crates/proof-of-sql/src/base/commitment/table_commitment.rs @@ -3,7 +3,9 @@ use super::{ ColumnCommitmentsMismatch, Commitment, DuplicateIdentifiers, }; #[cfg(feature = "arrow")] -use crate::base::database::{ArrayRefExt, ArrowArrayToColumnConversionError}; +use crate::base::arrow::arrow_array_to_column_conversion::{ + ArrayRefExt, ArrowArrayToColumnConversionError, +}; use crate::base::{ database::{Column, ColumnField, CommitmentAccessor, OwnedTable, TableRef}, scalar::Scalar, diff --git a/crates/proof-of-sql/src/base/database/mod.rs b/crates/proof-of-sql/src/base/database/mod.rs index c8c4697c5..55546802c 100644 --- a/crates/proof-of-sql/src/base/database/mod.rs +++ b/crates/proof-of-sql/src/base/database/mod.rs @@ -21,15 +21,12 @@ pub use literal_value::LiteralValue; mod table_ref; #[cfg(feature = "arrow")] pub use crate::base::arrow::{ - owned_and_arrow_conversions::OwnedArrowConversionError, scalar_and_i256_conversions, + arrow_array_to_column_conversion::{ArrayRefExt, ArrowArrayToColumnConversionError}, + owned_and_arrow_conversions::OwnedArrowConversionError, + scalar_and_i256_conversions, }; pub use table_ref::TableRef; -#[cfg(feature = "arrow")] -mod arrow_array_to_column_conversion; -#[cfg(feature = "arrow")] -pub use arrow_array_to_column_conversion::{ArrayRefExt, ArrowArrayToColumnConversionError}; - #[cfg(feature = "arrow")] mod record_batch_utility; #[cfg(feature = "arrow")] From 75b5dfc7e476c89f706c315a089ae209e246cb22 Mon Sep 17 00:00:00 2001 From: Vamshi Maskuri <117595548+varshith257@users.noreply.github.com> Date: Sat, 26 Oct 2024 15:24:58 -0400 Subject: [PATCH 43/74] chore: move `record_batch_utility` module to `arrow` module --- crates/proof-of-sql/src/base/arrow/mod.rs | 3 +++ .../src/base/{database => arrow}/record_batch_utility.rs | 2 +- crates/proof-of-sql/src/base/database/mod.rs | 6 +----- 3 files changed, 5 insertions(+), 6 deletions(-) rename crates/proof-of-sql/src/base/{database => arrow}/record_batch_utility.rs (99%) diff --git a/crates/proof-of-sql/src/base/arrow/mod.rs b/crates/proof-of-sql/src/base/arrow/mod.rs index 16fdf1b95..3a3c4500d 100644 --- a/crates/proof-of-sql/src/base/arrow/mod.rs +++ b/crates/proof-of-sql/src/base/arrow/mod.rs @@ -10,5 +10,8 @@ pub mod owned_and_arrow_conversions; /// Tests for owned and Arrow conversions. mod owned_and_arrow_conversions_test; +/// Utility functions for record batches. +pub mod record_batch_utility; + /// Module for scalar and i256 conversions. pub mod scalar_and_i256_conversions; diff --git a/crates/proof-of-sql/src/base/database/record_batch_utility.rs b/crates/proof-of-sql/src/base/arrow/record_batch_utility.rs similarity index 99% rename from crates/proof-of-sql/src/base/database/record_batch_utility.rs rename to crates/proof-of-sql/src/base/arrow/record_batch_utility.rs index d1180005b..3ede592bd 100644 --- a/crates/proof-of-sql/src/base/database/record_batch_utility.rs +++ b/crates/proof-of-sql/src/base/arrow/record_batch_utility.rs @@ -169,7 +169,7 @@ macro_rules! record_batch { use arrow::datatypes::Field; use arrow::datatypes::Schema; use arrow::record_batch::RecordBatch; - use $crate::base::database::ToArrow; + use $crate::base::arrow::record_batch_utility::ToArrow; let schema = Arc::new(Schema::new( vec![$( diff --git a/crates/proof-of-sql/src/base/database/mod.rs b/crates/proof-of-sql/src/base/database/mod.rs index 55546802c..b40ba10eb 100644 --- a/crates/proof-of-sql/src/base/database/mod.rs +++ b/crates/proof-of-sql/src/base/database/mod.rs @@ -23,15 +23,11 @@ mod table_ref; pub use crate::base::arrow::{ arrow_array_to_column_conversion::{ArrayRefExt, ArrowArrayToColumnConversionError}, owned_and_arrow_conversions::OwnedArrowConversionError, + record_batch_utility::ToArrow, scalar_and_i256_conversions, }; pub use table_ref::TableRef; -#[cfg(feature = "arrow")] -mod record_batch_utility; -#[cfg(feature = "arrow")] -pub use record_batch_utility::ToArrow; - #[cfg(feature = "arrow")] pub mod arrow_schema_utility; From 3b563d9ab6a2e50fab1890ee15ff028e9f4d4f21 Mon Sep 17 00:00:00 2001 From: Vamshi Maskuri <117595548+varshith257@users.noreply.github.com> Date: Sat, 26 Oct 2024 15:24:58 -0400 Subject: [PATCH 44/74] chore!: remove direct conversions from `QueryData` to `RecordBatch` --- crates/proof-of-sql/src/sql/proof/mod.rs | 5 +++-- .../src/sql/proof/query_result.rs | 19 ------------------- 2 files changed, 3 insertions(+), 21 deletions(-) diff --git a/crates/proof-of-sql/src/sql/proof/mod.rs b/crates/proof-of-sql/src/sql/proof/mod.rs index 48139dc22..b33be315c 100644 --- a/crates/proof-of-sql/src/sql/proof/mod.rs +++ b/crates/proof-of-sql/src/sql/proof/mod.rs @@ -25,8 +25,6 @@ pub(crate) use provable_result_column::ProvableResultColumn; mod provable_query_result; pub use provable_query_result::ProvableQueryResult; -#[cfg(all(test, feature = "arrow"))] -mod provable_query_result_test; mod sumcheck_mle_evaluations; pub(crate) use sumcheck_mle_evaluations::SumcheckMleEvaluations; @@ -70,3 +68,6 @@ pub(crate) use result_element_serialization::{ mod first_round_builder; pub(crate) use first_round_builder::FirstRoundBuilder; + +#[cfg(all(test, feature = "arrow"))] +mod provable_query_result_test; diff --git a/crates/proof-of-sql/src/sql/proof/query_result.rs b/crates/proof-of-sql/src/sql/proof/query_result.rs index 31b9ad994..647e4ad0b 100644 --- a/crates/proof-of-sql/src/sql/proof/query_result.rs +++ b/crates/proof-of-sql/src/sql/proof/query_result.rs @@ -3,8 +3,6 @@ use crate::base::{ proof::ProofError, scalar::Scalar, }; -#[cfg(feature = "arrow")] -use arrow::{error::ArrowError, record_batch::RecordBatch}; use snafu::Snafu; /// Verifiable query errors @@ -54,22 +52,5 @@ pub struct QueryData { pub verification_hash: [u8; 32], } -impl QueryData { - #[cfg(all(test, feature = "arrow"))] - #[must_use] - pub fn into_record_batch(self) -> RecordBatch { - self.try_into().unwrap() - } -} - -#[cfg(feature = "arrow")] -impl TryFrom> for RecordBatch { - type Error = ArrowError; - - fn try_from(value: QueryData) -> Result { - Self::try_from(value.table) - } -} - /// The result of a query -- either an error or a table. pub type QueryResult = Result, QueryError>; From 106e68a3e7bf9dde9d64ac1da8e59c1ea4acccb5 Mon Sep 17 00:00:00 2001 From: Vamshi Maskuri <117595548+varshith257@users.noreply.github.com> Date: Sat, 26 Oct 2024 15:24:59 -0400 Subject: [PATCH 45/74] chore: create `column_arrow_conversions` module within `arrow` module --- .../base/arrow/column_arrow_conversions.rs | 79 ++++++++++++++++++ crates/proof-of-sql/src/base/arrow/mod.rs | 3 + .../proof-of-sql/src/base/database/column.rs | 80 +------------------ 3 files changed, 83 insertions(+), 79 deletions(-) create mode 100644 crates/proof-of-sql/src/base/arrow/column_arrow_conversions.rs diff --git a/crates/proof-of-sql/src/base/arrow/column_arrow_conversions.rs b/crates/proof-of-sql/src/base/arrow/column_arrow_conversions.rs new file mode 100644 index 000000000..5eade6cf3 --- /dev/null +++ b/crates/proof-of-sql/src/base/arrow/column_arrow_conversions.rs @@ -0,0 +1,79 @@ +use crate::base::{ + database::{ColumnField, ColumnType}, + math::decimal::Precision, +}; +use alloc::sync::Arc; +use arrow::datatypes::{DataType, Field, TimeUnit as ArrowTimeUnit}; +use proof_of_sql_parser::posql_time::{PoSQLTimeUnit, PoSQLTimeZone}; + +/// Convert [`ColumnType`] values to some arrow [`DataType`] +impl From<&ColumnType> for DataType { + fn from(column_type: &ColumnType) -> Self { + match column_type { + ColumnType::Boolean => DataType::Boolean, + ColumnType::TinyInt => DataType::Int8, + ColumnType::SmallInt => DataType::Int16, + ColumnType::Int => DataType::Int32, + ColumnType::BigInt => DataType::Int64, + ColumnType::Int128 => DataType::Decimal128(38, 0), + ColumnType::Decimal75(precision, scale) => { + DataType::Decimal256(precision.value(), *scale) + } + ColumnType::VarChar => DataType::Utf8, + ColumnType::Scalar => unimplemented!("Cannot convert Scalar type to arrow type"), + ColumnType::TimestampTZ(timeunit, timezone) => { + let arrow_timezone = Some(Arc::from(timezone.to_string())); + let arrow_timeunit = match timeunit { + PoSQLTimeUnit::Second => ArrowTimeUnit::Second, + PoSQLTimeUnit::Millisecond => ArrowTimeUnit::Millisecond, + PoSQLTimeUnit::Microsecond => ArrowTimeUnit::Microsecond, + PoSQLTimeUnit::Nanosecond => ArrowTimeUnit::Nanosecond, + }; + DataType::Timestamp(arrow_timeunit, arrow_timezone) + } + } + } +} + +/// Convert arrow [`DataType`] values to some [`ColumnType`] +impl TryFrom for ColumnType { + type Error = String; + + fn try_from(data_type: DataType) -> Result { + match data_type { + DataType::Boolean => Ok(ColumnType::Boolean), + DataType::Int8 => Ok(ColumnType::TinyInt), + DataType::Int16 => Ok(ColumnType::SmallInt), + DataType::Int32 => Ok(ColumnType::Int), + DataType::Int64 => Ok(ColumnType::BigInt), + DataType::Decimal128(38, 0) => Ok(ColumnType::Int128), + DataType::Decimal256(precision, scale) if precision <= 75 => { + Ok(ColumnType::Decimal75(Precision::new(precision)?, scale)) + } + DataType::Timestamp(time_unit, timezone_option) => { + let posql_time_unit = match time_unit { + ArrowTimeUnit::Second => PoSQLTimeUnit::Second, + ArrowTimeUnit::Millisecond => PoSQLTimeUnit::Millisecond, + ArrowTimeUnit::Microsecond => PoSQLTimeUnit::Microsecond, + ArrowTimeUnit::Nanosecond => PoSQLTimeUnit::Nanosecond, + }; + Ok(ColumnType::TimestampTZ( + posql_time_unit, + PoSQLTimeZone::try_from(&timezone_option)?, + )) + } + DataType::Utf8 => Ok(ColumnType::VarChar), + _ => Err(format!("Unsupported arrow data type {data_type:?}")), + } + } +} +/// Convert [`ColumnField`] values to arrow Field +impl From<&ColumnField> for Field { + fn from(column_field: &ColumnField) -> Self { + Field::new( + column_field.name().name(), + (&column_field.data_type()).into(), + false, + ) + } +} diff --git a/crates/proof-of-sql/src/base/arrow/mod.rs b/crates/proof-of-sql/src/base/arrow/mod.rs index 3a3c4500d..301defb0f 100644 --- a/crates/proof-of-sql/src/base/arrow/mod.rs +++ b/crates/proof-of-sql/src/base/arrow/mod.rs @@ -15,3 +15,6 @@ pub mod record_batch_utility; /// Module for scalar and i256 conversions. pub mod scalar_and_i256_conversions; + +/// Module for handling conversions between columns and Arrow arrays. +pub mod column_arrow_conversions; diff --git a/crates/proof-of-sql/src/base/database/column.rs b/crates/proof-of-sql/src/base/database/column.rs index 3d3b11372..be536b1d5 100644 --- a/crates/proof-of-sql/src/base/database/column.rs +++ b/crates/proof-of-sql/src/base/database/column.rs @@ -4,9 +4,7 @@ use crate::base::{ scalar::{Scalar, ScalarExt}, slice_ops::slice_cast_with, }; -use alloc::{sync::Arc, vec::Vec}; -#[cfg(feature = "arrow")] -use arrow::datatypes::{DataType, Field, TimeUnit as ArrowTimeUnit}; +use alloc::vec::Vec; use bumpalo::Bump; use core::{ fmt, @@ -412,70 +410,6 @@ impl ColumnType { } } -/// Convert [`ColumnType`] values to some arrow [`DataType`] -#[cfg(feature = "arrow")] -impl From<&ColumnType> for DataType { - fn from(column_type: &ColumnType) -> Self { - match column_type { - ColumnType::Boolean => DataType::Boolean, - ColumnType::TinyInt => DataType::Int8, - ColumnType::SmallInt => DataType::Int16, - ColumnType::Int => DataType::Int32, - ColumnType::BigInt => DataType::Int64, - ColumnType::Int128 => DataType::Decimal128(38, 0), - ColumnType::Decimal75(precision, scale) => { - DataType::Decimal256(precision.value(), *scale) - } - ColumnType::VarChar => DataType::Utf8, - ColumnType::Scalar => unimplemented!("Cannot convert Scalar type to arrow type"), - ColumnType::TimestampTZ(timeunit, timezone) => { - let arrow_timezone = Some(Arc::from(timezone.to_string())); - let arrow_timeunit = match timeunit { - PoSQLTimeUnit::Second => ArrowTimeUnit::Second, - PoSQLTimeUnit::Millisecond => ArrowTimeUnit::Millisecond, - PoSQLTimeUnit::Microsecond => ArrowTimeUnit::Microsecond, - PoSQLTimeUnit::Nanosecond => ArrowTimeUnit::Nanosecond, - }; - DataType::Timestamp(arrow_timeunit, arrow_timezone) - } - } - } -} - -/// Convert arrow [`DataType`] values to some [`ColumnType`] -#[cfg(feature = "arrow")] -impl TryFrom for ColumnType { - type Error = String; - - fn try_from(data_type: DataType) -> Result { - match data_type { - DataType::Boolean => Ok(ColumnType::Boolean), - DataType::Int8 => Ok(ColumnType::TinyInt), - DataType::Int16 => Ok(ColumnType::SmallInt), - DataType::Int32 => Ok(ColumnType::Int), - DataType::Int64 => Ok(ColumnType::BigInt), - DataType::Decimal128(38, 0) => Ok(ColumnType::Int128), - DataType::Decimal256(precision, scale) if precision <= 75 => { - Ok(ColumnType::Decimal75(Precision::new(precision)?, scale)) - } - DataType::Timestamp(time_unit, timezone_option) => { - let posql_time_unit = match time_unit { - ArrowTimeUnit::Second => PoSQLTimeUnit::Second, - ArrowTimeUnit::Millisecond => PoSQLTimeUnit::Millisecond, - ArrowTimeUnit::Microsecond => PoSQLTimeUnit::Microsecond, - ArrowTimeUnit::Nanosecond => PoSQLTimeUnit::Nanosecond, - }; - Ok(ColumnType::TimestampTZ( - posql_time_unit, - PoSQLTimeZone::try_from(&timezone_option)?, - )) - } - DataType::Utf8 => Ok(ColumnType::VarChar), - _ => Err(format!("Unsupported arrow data type {data_type:?}")), - } - } -} - /// Display the column type as a str name (in all caps) impl Display for ColumnType { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { @@ -570,18 +504,6 @@ impl ColumnField { } } -/// Convert [`ColumnField`] values to arrow Field -#[cfg(feature = "arrow")] -impl From<&ColumnField> for Field { - fn from(column_field: &ColumnField) -> Self { - Field::new( - column_field.name().name(), - (&column_field.data_type()).into(), - false, - ) - } -} - #[cfg(test)] mod tests { use super::*; From ff87390e595ee3bac2102b97368613cc74309e9b Mon Sep 17 00:00:00 2001 From: Vamshi Maskuri <117595548+varshith257@users.noreply.github.com> Date: Sat, 26 Oct 2024 15:24:59 -0400 Subject: [PATCH 46/74] chore: create `record_batch_errors` module within `arrow` module --- crates/proof-of-sql/src/base/arrow/mod.rs | 3 ++ .../src/base/arrow/record_batch_errors.rs | 38 ++++++++++++++++ .../src/base/commitment/table_commitment.rs | 43 ++----------------- 3 files changed, 45 insertions(+), 39 deletions(-) create mode 100644 crates/proof-of-sql/src/base/arrow/record_batch_errors.rs diff --git a/crates/proof-of-sql/src/base/arrow/mod.rs b/crates/proof-of-sql/src/base/arrow/mod.rs index 301defb0f..48197e05b 100644 --- a/crates/proof-of-sql/src/base/arrow/mod.rs +++ b/crates/proof-of-sql/src/base/arrow/mod.rs @@ -10,6 +10,9 @@ pub mod owned_and_arrow_conversions; /// Tests for owned and Arrow conversions. mod owned_and_arrow_conversions_test; +/// Module for record batch error definitions. +pub mod record_batch_errors; + /// Utility functions for record batches. pub mod record_batch_utility; diff --git a/crates/proof-of-sql/src/base/arrow/record_batch_errors.rs b/crates/proof-of-sql/src/base/arrow/record_batch_errors.rs new file mode 100644 index 000000000..b3986d1a6 --- /dev/null +++ b/crates/proof-of-sql/src/base/arrow/record_batch_errors.rs @@ -0,0 +1,38 @@ +use super::arrow_array_to_column_conversion::ArrowArrayToColumnConversionError; +use crate::base::commitment::ColumnCommitmentsMismatch; +use proof_of_sql_parser::ParseError; +use snafu::Snafu; + +/// Errors that can occur when trying to create or extend a [`TableCommitment`] from a record batch. +#[derive(Debug, Snafu)] +pub enum RecordBatchToColumnsError { + /// Error converting from arrow array + #[snafu(transparent)] + ArrowArrayToColumnConversionError { + /// The underlying source error + source: ArrowArrayToColumnConversionError, + }, + #[snafu(transparent)] + /// This error occurs when convering from a record batch name to an identifier fails. (Which may be impossible.) + FieldParseFail { + /// The underlying source error + source: ParseError, + }, +} + +/// Errors that can occur when attempting to append a record batch to a [`TableCommitment`]. +#[derive(Debug, Snafu)] +pub enum AppendRecordBatchTableCommitmentError { + /// During commitment operation, metadata indicates that operand tables cannot be the same. + #[snafu(transparent)] + ColumnCommitmentsMismatch { + /// The underlying source error + source: ColumnCommitmentsMismatch, + }, + /// Error converting from arrow array + #[snafu(transparent)] + ArrowBatchToColumnError { + /// The underlying source error + source: RecordBatchToColumnsError, + }, +} diff --git a/crates/proof-of-sql/src/base/commitment/table_commitment.rs b/crates/proof-of-sql/src/base/commitment/table_commitment.rs index f013cce90..b4387a765 100644 --- a/crates/proof-of-sql/src/base/commitment/table_commitment.rs +++ b/crates/proof-of-sql/src/base/commitment/table_commitment.rs @@ -3,8 +3,9 @@ use super::{ ColumnCommitmentsMismatch, Commitment, DuplicateIdentifiers, }; #[cfg(feature = "arrow")] -use crate::base::arrow::arrow_array_to_column_conversion::{ - ArrayRefExt, ArrowArrayToColumnConversionError, +use crate::base::arrow::{ + arrow_array_to_column_conversion::ArrayRefExt, + record_batch_errors::{AppendRecordBatchTableCommitmentError, RecordBatchToColumnsError}, }; use crate::base::{ database::{Column, ColumnField, CommitmentAccessor, OwnedTable, TableRef}, @@ -15,7 +16,7 @@ use alloc::vec::Vec; use arrow::record_batch::RecordBatch; use bumpalo::Bump; use core::ops::Range; -use proof_of_sql_parser::{Identifier, ParseError}; +use proof_of_sql_parser::Identifier; use serde::{Deserialize, Serialize}; use snafu::Snafu; @@ -85,42 +86,6 @@ pub enum TableCommitmentArithmeticError { NonContiguous, } -/// Errors that can occur when trying to create or extend a [`TableCommitment`] from a record batch. -#[cfg(feature = "arrow")] -#[derive(Debug, Snafu)] -pub enum RecordBatchToColumnsError { - /// Error converting from arrow array - #[snafu(transparent)] - ArrowArrayToColumnConversionError { - /// The underlying source error - source: ArrowArrayToColumnConversionError, - }, - #[snafu(transparent)] - /// This error occurs when convering from a record batch name to an identifier fails. (Which may be impossible.) - FieldParseFail { - /// The underlying source error - source: ParseError, - }, -} - -/// Errors that can occur when attempting to append a record batch to a [`TableCommitment`]. -#[cfg(feature = "arrow")] -#[derive(Debug, Snafu)] -pub enum AppendRecordBatchTableCommitmentError { - /// During commitment operation, metadata indicates that operand tables cannot be the same. - #[snafu(transparent)] - ColumnCommitmentsMismatch { - /// The underlying source error - source: ColumnCommitmentsMismatch, - }, - /// Error converting from arrow array - #[snafu(transparent)] - ArrowBatchToColumnError { - /// The underlying source error - source: RecordBatchToColumnsError, - }, -} - /// Commitment for an entire table, with column and table metadata. /// /// Unlike [`ColumnCommitments`], all columns in this commitment must have the same length. From d6a1eda515a60b808db1d81f73c4f789ef24ebcb Mon Sep 17 00:00:00 2001 From: Vamshi Maskuri <117595548+varshith257@users.noreply.github.com> Date: Sat, 26 Oct 2024 15:25:00 -0400 Subject: [PATCH 47/74] chore: create `record_batch_conversion` module within `arrow` module --- crates/proof-of-sql/src/base/arrow/mod.rs | 3 + .../src/base/arrow/record_batch_conversion.rs | 160 ++++++++++++++++++ .../src/base/commitment/table_commitment.rs | 158 +---------------- 3 files changed, 168 insertions(+), 153 deletions(-) create mode 100644 crates/proof-of-sql/src/base/arrow/record_batch_conversion.rs diff --git a/crates/proof-of-sql/src/base/arrow/mod.rs b/crates/proof-of-sql/src/base/arrow/mod.rs index 48197e05b..0bcac183d 100644 --- a/crates/proof-of-sql/src/base/arrow/mod.rs +++ b/crates/proof-of-sql/src/base/arrow/mod.rs @@ -10,6 +10,9 @@ pub mod owned_and_arrow_conversions; /// Tests for owned and Arrow conversions. mod owned_and_arrow_conversions_test; +/// Module for converting record batches. +pub mod record_batch_conversion; + /// Module for record batch error definitions. pub mod record_batch_errors; diff --git a/crates/proof-of-sql/src/base/arrow/record_batch_conversion.rs b/crates/proof-of-sql/src/base/arrow/record_batch_conversion.rs new file mode 100644 index 000000000..6f24457cc --- /dev/null +++ b/crates/proof-of-sql/src/base/arrow/record_batch_conversion.rs @@ -0,0 +1,160 @@ +use super::{ + arrow_array_to_column_conversion::ArrayRefExt, + record_batch_errors::{AppendRecordBatchTableCommitmentError, RecordBatchToColumnsError}, +}; +use crate::base::{ + commitment::{ + AppendColumnCommitmentsError, AppendTableCommitmentError, Commitment, TableCommitment, + TableCommitmentFromColumnsError, + }, + database::Column, + scalar::Scalar, +}; +use arrow::record_batch::RecordBatch; +use bumpalo::Bump; +use proof_of_sql_parser::Identifier; + +/// This function will return an error if: +/// - The field name cannot be parsed into an [`Identifier`]. +/// - The conversion of an Arrow array to a [`Column`] fails. +pub fn batch_to_columns<'a, S: Scalar + 'a>( + batch: &'a RecordBatch, + alloc: &'a Bump, +) -> Result)>, RecordBatchToColumnsError> { + batch + .schema() + .fields() + .into_iter() + .zip(batch.columns()) + .map(|(field, array)| { + let identifier: Identifier = field.name().parse()?; + let column: Column = array.to_column(alloc, &(0..array.len()), None)?; + Ok((identifier, column)) + }) + .collect() +} + +impl TableCommitment { + /// Append an arrow [`RecordBatch`] to the existing [`TableCommitment`]. + /// + /// The row offset is assumed to be the end of the [`TableCommitment`]'s current range. + /// + /// Will error on a variety of mismatches, or if the provided columns have mixed length. + #[allow(clippy::missing_panics_doc)] + pub fn try_append_record_batch( + &mut self, + batch: &RecordBatch, + setup: &C::PublicSetup<'_>, + ) -> Result<(), AppendRecordBatchTableCommitmentError> { + match self.try_append_rows( + batch_to_columns::(batch, &Bump::new())? + .iter() + .map(|(a, b)| (a, b)), + setup, + ) { + Ok(()) => Ok(()), + Err(AppendTableCommitmentError::MixedLengthColumns { .. }) => { + panic!("RecordBatches cannot have columns of mixed length") + } + Err(AppendTableCommitmentError::AppendColumnCommitments { + source: AppendColumnCommitmentsError::DuplicateIdentifiers { .. }, + }) => { + panic!("RecordBatches cannot have duplicate identifiers") + } + Err(AppendTableCommitmentError::AppendColumnCommitments { + source: AppendColumnCommitmentsError::Mismatch { source: e }, + }) => Err(e)?, + } + } + /// Returns a [`TableCommitment`] to the provided arrow [`RecordBatch`]. + pub fn try_from_record_batch( + batch: &RecordBatch, + setup: &C::PublicSetup<'_>, + ) -> Result, RecordBatchToColumnsError> { + Self::try_from_record_batch_with_offset(batch, 0, setup) + } + + /// Returns a [`TableCommitment`] to the provided arrow [`RecordBatch`] with the given row offset. + #[allow(clippy::missing_panics_doc)] + pub fn try_from_record_batch_with_offset( + batch: &RecordBatch, + offset: usize, + setup: &C::PublicSetup<'_>, + ) -> Result, RecordBatchToColumnsError> { + match Self::try_from_columns_with_offset( + batch_to_columns::(batch, &Bump::new())? + .iter() + .map(|(a, b)| (a, b)), + offset, + setup, + ) { + Ok(commitment) => Ok(commitment), + Err(TableCommitmentFromColumnsError::MixedLengthColumns { .. }) => { + panic!("RecordBatches cannot have columns of mixed length") + } + Err(TableCommitmentFromColumnsError::DuplicateIdentifiers { .. }) => { + panic!("RecordBatches cannot have duplicate identifiers") + } + } + } +} + +#[cfg(all(test, feature = "blitzar"))] +mod tests { + use super::*; + use crate::{base::scalar::Curve25519Scalar, record_batch}; + use curve25519_dalek::RistrettoPoint; + + #[test] + fn we_can_create_and_append_table_commitments_with_record_batchs() { + let batch = record_batch!( + "a" => [1i64, 2, 3], + "b" => ["1", "2", "3"], + ); + + let b_scals = ["1".into(), "2".into(), "3".into()]; + + let columns = [ + ( + &"a".parse().unwrap(), + &Column::::BigInt(&[1, 2, 3]), + ), + ( + &"b".parse().unwrap(), + &Column::::VarChar((&["1", "2", "3"], &b_scals)), + ), + ]; + + let mut expected_commitment = + TableCommitment::::try_from_columns_with_offset(columns, 0, &()) + .unwrap(); + + let mut commitment = + TableCommitment::::try_from_record_batch(&batch, &()).unwrap(); + + assert_eq!(commitment, expected_commitment); + + let batch2 = record_batch!( + "a" => [4i64, 5, 6], + "b" => ["4", "5", "6"], + ); + + let b_scals2 = ["4".into(), "5".into(), "6".into()]; + + let columns2 = [ + ( + &"a".parse().unwrap(), + &Column::::BigInt(&[4, 5, 6]), + ), + ( + &"b".parse().unwrap(), + &Column::::VarChar((&["4", "5", "6"], &b_scals2)), + ), + ]; + + expected_commitment.try_append_rows(columns2, &()).unwrap(); + commitment.try_append_record_batch(&batch2, &()).unwrap(); + + assert_eq!(commitment, expected_commitment); + } +} diff --git a/crates/proof-of-sql/src/base/commitment/table_commitment.rs b/crates/proof-of-sql/src/base/commitment/table_commitment.rs index b4387a765..1a52b7cea 100644 --- a/crates/proof-of-sql/src/base/commitment/table_commitment.rs +++ b/crates/proof-of-sql/src/base/commitment/table_commitment.rs @@ -2,19 +2,11 @@ use super::{ committable_column::CommittableColumn, AppendColumnCommitmentsError, ColumnCommitments, ColumnCommitmentsMismatch, Commitment, DuplicateIdentifiers, }; -#[cfg(feature = "arrow")] -use crate::base::arrow::{ - arrow_array_to_column_conversion::ArrayRefExt, - record_batch_errors::{AppendRecordBatchTableCommitmentError, RecordBatchToColumnsError}, -}; use crate::base::{ - database::{Column, ColumnField, CommitmentAccessor, OwnedTable, TableRef}, + database::{ColumnField, CommitmentAccessor, OwnedTable, TableRef}, scalar::Scalar, }; use alloc::vec::Vec; -#[cfg(feature = "arrow")] -use arrow::record_batch::RecordBatch; -use bumpalo::Bump; use core::ops::Range; use proof_of_sql_parser::Identifier; use serde::{Deserialize, Serialize}; @@ -365,90 +357,6 @@ impl TableCommitment { range, }) } - - /// Append an arrow [`RecordBatch`] to the existing [`TableCommitment`]. - /// - /// The row offset is assumed to be the end of the [`TableCommitment`]'s current range. - /// - /// Will error on a variety of mismatches, or if the provided columns have mixed length. - #[cfg(feature = "arrow")] - #[allow(clippy::missing_panics_doc)] - pub fn try_append_record_batch( - &mut self, - batch: &RecordBatch, - setup: &C::PublicSetup<'_>, - ) -> Result<(), AppendRecordBatchTableCommitmentError> { - match self.try_append_rows( - batch_to_columns::(batch, &Bump::new())? - .iter() - .map(|(a, b)| (a, b)), - setup, - ) { - Ok(()) => Ok(()), - Err(AppendTableCommitmentError::MixedLengthColumns { .. }) => { - panic!("RecordBatches cannot have columns of mixed length") - } - Err(AppendTableCommitmentError::AppendColumnCommitments { - source: AppendColumnCommitmentsError::DuplicateIdentifiers { .. }, - }) => { - panic!("RecordBatches cannot have duplicate identifiers") - } - Err(AppendTableCommitmentError::AppendColumnCommitments { - source: AppendColumnCommitmentsError::Mismatch { source: e }, - }) => Err(e)?, - } - } - /// Returns a [`TableCommitment`] to the provided arrow [`RecordBatch`]. - #[cfg(feature = "arrow")] - pub fn try_from_record_batch( - batch: &RecordBatch, - setup: &C::PublicSetup<'_>, - ) -> Result, RecordBatchToColumnsError> { - Self::try_from_record_batch_with_offset(batch, 0, setup) - } - - /// Returns a [`TableCommitment`] to the provided arrow [`RecordBatch`] with the given row offset. - #[allow(clippy::missing_panics_doc)] - #[cfg(feature = "arrow")] - pub fn try_from_record_batch_with_offset( - batch: &RecordBatch, - offset: usize, - setup: &C::PublicSetup<'_>, - ) -> Result, RecordBatchToColumnsError> { - match Self::try_from_columns_with_offset( - batch_to_columns::(batch, &Bump::new())? - .iter() - .map(|(a, b)| (a, b)), - offset, - setup, - ) { - Ok(commitment) => Ok(commitment), - Err(TableCommitmentFromColumnsError::MixedLengthColumns { .. }) => { - panic!("RecordBatches cannot have columns of mixed length") - } - Err(TableCommitmentFromColumnsError::DuplicateIdentifiers { .. }) => { - panic!("RecordBatches cannot have duplicate identifiers") - } - } - } -} - -#[cfg(feature = "arrow")] -fn batch_to_columns<'a, S: Scalar + 'a>( - batch: &'a RecordBatch, - alloc: &'a Bump, -) -> Result)>, RecordBatchToColumnsError> { - batch - .schema() - .fields() - .into_iter() - .zip(batch.columns()) - .map(|(field, array)| { - let identifier: Identifier = field.name().parse()?; - let column: Column = array.to_column(alloc, &(0..array.len()), None)?; - Ok((identifier, column)) - }) - .collect() } /// Return the number of rows for the provided columns, erroring if they have mixed length. @@ -472,13 +380,10 @@ fn num_rows_of_columns<'a>( #[cfg(all(test, feature = "arrow", feature = "blitzar"))] mod tests { use super::*; - use crate::{ - base::{ - database::{owned_table_utility::*, OwnedColumn}, - map::IndexMap, - scalar::Curve25519Scalar, - }, - record_batch, + use crate::base::{ + database::{owned_table_utility::*, OwnedColumn}, + map::IndexMap, + scalar::Curve25519Scalar, }; use curve25519_dalek::RistrettoPoint; @@ -1230,57 +1135,4 @@ mod tests { Err(TableCommitmentArithmeticError::NegativeRange { .. }) )); } - - #[test] - fn we_can_create_and_append_table_commitments_with_record_batchs() { - let batch = record_batch!( - "a" => [1i64, 2, 3], - "b" => ["1", "2", "3"], - ); - - let b_scals = ["1".into(), "2".into(), "3".into()]; - - let columns = [ - ( - &"a".parse().unwrap(), - &Column::::BigInt(&[1, 2, 3]), - ), - ( - &"b".parse().unwrap(), - &Column::::VarChar((&["1", "2", "3"], &b_scals)), - ), - ]; - - let mut expected_commitment = - TableCommitment::::try_from_columns_with_offset(columns, 0, &()) - .unwrap(); - - let mut commitment = - TableCommitment::::try_from_record_batch(&batch, &()).unwrap(); - - assert_eq!(commitment, expected_commitment); - - let batch2 = record_batch!( - "a" => [4i64, 5, 6], - "b" => ["4", "5", "6"], - ); - - let b_scals2 = ["4".into(), "5".into(), "6".into()]; - - let columns2 = [ - ( - &"a".parse().unwrap(), - &Column::::BigInt(&[4, 5, 6]), - ), - ( - &"b".parse().unwrap(), - &Column::::VarChar((&["4", "5", "6"], &b_scals2)), - ), - ]; - - expected_commitment.try_append_rows(columns2, &()).unwrap(); - commitment.try_append_record_batch(&batch2, &()).unwrap(); - - assert_eq!(commitment, expected_commitment); - } } From 574db2fd2f17fdf7b689cc45454e031120e6d28e Mon Sep 17 00:00:00 2001 From: jay Date: Sat, 26 Oct 2024 23:16:39 -0400 Subject: [PATCH 48/74] feat: add `ProofPlan::get_table_references` --- crates/proof-of-sql/src/sql/proof/proof_plan.rs | 5 ++++- .../proof-of-sql/src/sql/proof/query_proof_test.rs | 14 +++++++++++++- .../src/sql/proof/verifiable_query_result_test.rs | 6 +++++- .../src/sql/proof_plans/dyn_proof_plan.rs | 14 +++++++++++++- .../src/sql/proof_plans/filter_exec.rs | 6 +++++- .../src/sql/proof_plans/group_by_exec.rs | 6 +++++- .../src/sql/proof_plans/projection_exec.rs | 6 +++++- 7 files changed, 50 insertions(+), 7 deletions(-) diff --git a/crates/proof-of-sql/src/sql/proof/proof_plan.rs b/crates/proof-of-sql/src/sql/proof/proof_plan.rs index 430485308..42ceceab1 100644 --- a/crates/proof-of-sql/src/sql/proof/proof_plan.rs +++ b/crates/proof-of-sql/src/sql/proof/proof_plan.rs @@ -3,7 +3,7 @@ use crate::base::{ commitment::Commitment, database::{ Column, ColumnField, ColumnRef, CommitmentAccessor, DataAccessor, MetadataAccessor, - OwnedTable, + OwnedTable, TableRef, }, map::IndexSet, proof::ProofError, @@ -46,6 +46,9 @@ pub trait ProofPlan: Debug + Send + Sync + ProverEvaluate IndexSet; + + /// Return all the tables referenced in the Query + fn get_table_references(&self) -> IndexSet; } pub trait ProverEvaluate { diff --git a/crates/proof-of-sql/src/sql/proof/query_proof_test.rs b/crates/proof-of-sql/src/sql/proof/query_proof_test.rs index a4fa8a65a..e6e685673 100644 --- a/crates/proof-of-sql/src/sql/proof/query_proof_test.rs +++ b/crates/proof-of-sql/src/sql/proof/query_proof_test.rs @@ -7,7 +7,7 @@ use crate::{ database::{ owned_table_utility::{bigint, owned_table}, Column, ColumnField, ColumnRef, ColumnType, CommitmentAccessor, DataAccessor, - MetadataAccessor, OwnedTable, OwnedTableTestAccessor, TestAccessor, + MetadataAccessor, OwnedTable, OwnedTableTestAccessor, TableRef, TestAccessor, UnimplementedTestAccessor, }, map::IndexSet, @@ -109,6 +109,9 @@ impl ProofPlan for TrivialTestProofPlan { fn get_column_references(&self) -> IndexSet { unimplemented!("no real usage for this function yet") } + fn get_table_references(&self) -> IndexSet { + unimplemented!("no real usage for this function yet") + } } fn verify_a_trivial_query_proof_with_given_offset(n: usize, offset_generators: usize) { @@ -278,6 +281,9 @@ impl ProofPlan for SquareTestProofPlan { fn get_column_references(&self) -> IndexSet { unimplemented!("no real usage for this function yet") } + fn get_table_references(&self) -> IndexSet { + unimplemented!("no real usage for this function yet") + } } fn verify_a_proof_with_an_anchored_commitment_and_given_offset(offset_generators: usize) { @@ -481,6 +487,9 @@ impl ProofPlan for DoubleSquareTestProofPlan { fn get_column_references(&self) -> IndexSet { unimplemented!("no real usage for this function yet") } + fn get_table_references(&self) -> IndexSet { + unimplemented!("no real usage for this function yet") + } } fn verify_a_proof_with_an_intermediate_commitment_and_given_offset(offset_generators: usize) { @@ -677,6 +686,9 @@ impl ProofPlan for ChallengeTestProofPlan { fn get_column_references(&self) -> IndexSet { unimplemented!("no real usage for this function yet") } + fn get_table_references(&self) -> IndexSet { + unimplemented!("no real usage for this function yet") + } } fn verify_a_proof_with_a_post_result_challenge_and_given_offset(offset_generators: usize) { diff --git a/crates/proof-of-sql/src/sql/proof/verifiable_query_result_test.rs b/crates/proof-of-sql/src/sql/proof/verifiable_query_result_test.rs index 5d299e408..d2db5df0e 100644 --- a/crates/proof-of-sql/src/sql/proof/verifiable_query_result_test.rs +++ b/crates/proof-of-sql/src/sql/proof/verifiable_query_result_test.rs @@ -8,7 +8,7 @@ use crate::{ database::{ owned_table_utility::{bigint, owned_table}, Column, ColumnField, ColumnRef, ColumnType, CommitmentAccessor, DataAccessor, - MetadataAccessor, OwnedTable, TestAccessor, UnimplementedTestAccessor, + MetadataAccessor, OwnedTable, TableRef, TestAccessor, UnimplementedTestAccessor, }, map::IndexSet, proof::ProofError, @@ -88,6 +88,10 @@ impl ProofPlan for EmptyTestQueryExpr { fn get_column_references(&self) -> IndexSet { unimplemented!("no real usage for this function yet") } + + fn get_table_references(&self) -> IndexSet { + unimplemented!("no real usage for this function yet") + } } #[test] diff --git a/crates/proof-of-sql/src/sql/proof_plans/dyn_proof_plan.rs b/crates/proof-of-sql/src/sql/proof_plans/dyn_proof_plan.rs index c524a2c76..9b1222b4b 100644 --- a/crates/proof-of-sql/src/sql/proof_plans/dyn_proof_plan.rs +++ b/crates/proof-of-sql/src/sql/proof_plans/dyn_proof_plan.rs @@ -1,6 +1,10 @@ use super::{FilterExec, GroupByExec, ProjectionExec}; use crate::{ - base::{commitment::Commitment, database::Column, map::IndexSet}, + base::{ + commitment::Commitment, + database::{Column, TableRef}, + map::IndexSet, + }, sql::proof::{ProofPlan, ProverEvaluate}, }; use alloc::vec::Vec; @@ -89,6 +93,14 @@ impl ProofPlan for DynProofPlan { DynProofPlan::Filter(expr) => expr.get_column_references(), } } + + fn get_table_references(&self) -> IndexSet { + match self { + DynProofPlan::Projection(expr) => expr.get_table_references(), + DynProofPlan::GroupBy(expr) => expr.get_table_references(), + DynProofPlan::Filter(expr) => expr.get_table_references(), + } + } } impl ProverEvaluate for DynProofPlan { diff --git a/crates/proof-of-sql/src/sql/proof_plans/filter_exec.rs b/crates/proof-of-sql/src/sql/proof_plans/filter_exec.rs index 4259d3d88..5a1b6106b 100644 --- a/crates/proof-of-sql/src/sql/proof_plans/filter_exec.rs +++ b/crates/proof-of-sql/src/sql/proof_plans/filter_exec.rs @@ -4,7 +4,7 @@ use crate::{ commitment::Commitment, database::{ filter_util::filter_columns, Column, ColumnField, ColumnRef, CommitmentAccessor, - DataAccessor, MetadataAccessor, OwnedTable, + DataAccessor, MetadataAccessor, OwnedTable, TableRef, }, map::IndexSet, proof::ProofError, @@ -139,6 +139,10 @@ where columns } + + fn get_table_references(&self) -> IndexSet { + IndexSet::from_iter([self.table.table_ref]) + } } /// Alias for a filter expression with a honest prover. diff --git a/crates/proof-of-sql/src/sql/proof_plans/group_by_exec.rs b/crates/proof-of-sql/src/sql/proof_plans/group_by_exec.rs index 0a43da82f..385b8a2e7 100644 --- a/crates/proof-of-sql/src/sql/proof_plans/group_by_exec.rs +++ b/crates/proof-of-sql/src/sql/proof_plans/group_by_exec.rs @@ -7,7 +7,7 @@ use crate::{ aggregate_columns, compare_indexes_by_owned_columns, AggregatedColumns, }, Column, ColumnField, ColumnRef, ColumnType, CommitmentAccessor, DataAccessor, - MetadataAccessor, OwnedTable, + MetadataAccessor, OwnedTable, TableRef, }, map::IndexSet, proof::ProofError, @@ -202,6 +202,10 @@ impl ProofPlan for GroupByExec { columns } + + fn get_table_references(&self) -> IndexSet { + IndexSet::from_iter([self.table.table_ref]) + } } impl ProverEvaluate for GroupByExec { diff --git a/crates/proof-of-sql/src/sql/proof_plans/projection_exec.rs b/crates/proof-of-sql/src/sql/proof_plans/projection_exec.rs index fb66bff00..f3038b310 100644 --- a/crates/proof-of-sql/src/sql/proof_plans/projection_exec.rs +++ b/crates/proof-of-sql/src/sql/proof_plans/projection_exec.rs @@ -3,7 +3,7 @@ use crate::{ commitment::Commitment, database::{ Column, ColumnField, ColumnRef, CommitmentAccessor, DataAccessor, MetadataAccessor, - OwnedTable, + OwnedTable, TableRef, }, map::IndexSet, proof::ProofError, @@ -92,6 +92,10 @@ impl ProofPlan for ProjectionExec { }); columns } + + fn get_table_references(&self) -> IndexSet { + IndexSet::from_iter([self.table.table_ref]) + } } impl ProverEvaluate for ProjectionExec { From c7a98394b3a1307b27b3ed671b0130ae544c71c0 Mon Sep 17 00:00:00 2001 From: jay Date: Sat, 26 Oct 2024 23:16:40 -0400 Subject: [PATCH 49/74] style: leverage more imports in `dyn_proof_plan` --- .../src/sql/proof_plans/dyn_proof_plan.rs | 46 +++++++++++-------- 1 file changed, 27 insertions(+), 19 deletions(-) diff --git a/crates/proof-of-sql/src/sql/proof_plans/dyn_proof_plan.rs b/crates/proof-of-sql/src/sql/proof_plans/dyn_proof_plan.rs index 9b1222b4b..b7edcc70a 100644 --- a/crates/proof-of-sql/src/sql/proof_plans/dyn_proof_plan.rs +++ b/crates/proof-of-sql/src/sql/proof_plans/dyn_proof_plan.rs @@ -2,12 +2,20 @@ use super::{FilterExec, GroupByExec, ProjectionExec}; use crate::{ base::{ commitment::Commitment, - database::{Column, TableRef}, + database::{ + Column, ColumnField, ColumnRef, CommitmentAccessor, DataAccessor, MetadataAccessor, + OwnedTable, TableRef, + }, map::IndexSet, + proof::ProofError, + }, + sql::proof::{ + CountBuilder, FinalRoundBuilder, FirstRoundBuilder, ProofPlan, ProverEvaluate, + VerificationBuilder, }, - sql::proof::{ProofPlan, ProverEvaluate}, }; use alloc::vec::Vec; +use bumpalo::Bump; use serde::{Deserialize, Serialize}; /// The query plan for proving a query @@ -38,9 +46,9 @@ pub enum DynProofPlan { impl ProofPlan for DynProofPlan { fn count( &self, - builder: &mut crate::sql::proof::CountBuilder, - accessor: &dyn crate::base::database::MetadataAccessor, - ) -> Result<(), crate::base::proof::ProofError> { + builder: &mut CountBuilder, + accessor: &dyn MetadataAccessor, + ) -> Result<(), ProofError> { match self { DynProofPlan::Projection(expr) => expr.count(builder, accessor), DynProofPlan::GroupBy(expr) => expr.count(builder, accessor), @@ -48,7 +56,7 @@ impl ProofPlan for DynProofPlan { } } - fn get_length(&self, accessor: &dyn crate::base::database::MetadataAccessor) -> usize { + fn get_length(&self, accessor: &dyn MetadataAccessor) -> usize { match self { DynProofPlan::Projection(expr) => expr.get_length(accessor), DynProofPlan::GroupBy(expr) => expr.get_length(accessor), @@ -56,7 +64,7 @@ impl ProofPlan for DynProofPlan { } } - fn get_offset(&self, accessor: &dyn crate::base::database::MetadataAccessor) -> usize { + fn get_offset(&self, accessor: &dyn MetadataAccessor) -> usize { match self { DynProofPlan::Projection(expr) => expr.get_offset(accessor), DynProofPlan::GroupBy(expr) => expr.get_offset(accessor), @@ -67,10 +75,10 @@ impl ProofPlan for DynProofPlan { #[tracing::instrument(name = "DynProofPlan::verifier_evaluate", level = "debug", skip_all)] fn verifier_evaluate( &self, - builder: &mut crate::sql::proof::VerificationBuilder, - accessor: &dyn crate::base::database::CommitmentAccessor, - result: Option<&crate::base::database::OwnedTable>, - ) -> Result, crate::base::proof::ProofError> { + builder: &mut VerificationBuilder, + accessor: &dyn CommitmentAccessor, + result: Option<&OwnedTable>, + ) -> Result, ProofError> { match self { DynProofPlan::Projection(expr) => expr.verifier_evaluate(builder, accessor, result), DynProofPlan::GroupBy(expr) => expr.verifier_evaluate(builder, accessor, result), @@ -78,7 +86,7 @@ impl ProofPlan for DynProofPlan { } } - fn get_column_result_fields(&self) -> Vec { + fn get_column_result_fields(&self) -> Vec { match self { DynProofPlan::Projection(expr) => expr.get_column_result_fields(), DynProofPlan::GroupBy(expr) => expr.get_column_result_fields(), @@ -86,7 +94,7 @@ impl ProofPlan for DynProofPlan { } } - fn get_column_references(&self) -> IndexSet { + fn get_column_references(&self) -> IndexSet { match self { DynProofPlan::Projection(expr) => expr.get_column_references(), DynProofPlan::GroupBy(expr) => expr.get_column_references(), @@ -108,8 +116,8 @@ impl ProverEvaluate for DynProofPlan { fn result_evaluate<'a>( &self, input_length: usize, - alloc: &'a bumpalo::Bump, - accessor: &'a dyn crate::base::database::DataAccessor, + alloc: &'a Bump, + accessor: &'a dyn DataAccessor, ) -> Vec> { match self { DynProofPlan::Projection(expr) => expr.result_evaluate(input_length, alloc, accessor), @@ -118,7 +126,7 @@ impl ProverEvaluate for DynProofPlan { } } - fn first_round_evaluate(&self, builder: &mut crate::sql::proof::FirstRoundBuilder) { + fn first_round_evaluate(&self, builder: &mut FirstRoundBuilder) { match self { DynProofPlan::Projection(expr) => expr.first_round_evaluate(builder), DynProofPlan::GroupBy(expr) => expr.first_round_evaluate(builder), @@ -129,9 +137,9 @@ impl ProverEvaluate for DynProofPlan { #[tracing::instrument(name = "DynProofPlan::final_round_evaluate", level = "debug", skip_all)] fn final_round_evaluate<'a>( &self, - builder: &mut crate::sql::proof::FinalRoundBuilder<'a, C::Scalar>, - alloc: &'a bumpalo::Bump, - accessor: &'a dyn crate::base::database::DataAccessor, + builder: &mut FinalRoundBuilder<'a, C::Scalar>, + alloc: &'a Bump, + accessor: &'a dyn DataAccessor, ) -> Vec> { match self { DynProofPlan::Projection(expr) => expr.final_round_evaluate(builder, alloc, accessor), From 464f3fd83b045f62fd276bb6d308079a88364d1c Mon Sep 17 00:00:00 2001 From: jay Date: Sat, 26 Oct 2024 23:16:40 -0400 Subject: [PATCH 50/74] test: add some tests for `ProofPlan::get_table_references` --- crates/proof-of-sql/src/sql/proof_plans/filter_exec_test.rs | 4 ++++ .../proof-of-sql/src/sql/proof_plans/projection_exec_test.rs | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/crates/proof-of-sql/src/sql/proof_plans/filter_exec_test.rs b/crates/proof-of-sql/src/sql/proof_plans/filter_exec_test.rs index c6252d133..062781985 100644 --- a/crates/proof-of-sql/src/sql/proof_plans/filter_exec_test.rs +++ b/crates/proof-of-sql/src/sql/proof_plans/filter_exec_test.rs @@ -153,6 +153,10 @@ fn we_can_correctly_fetch_all_the_referenced_columns() { ) ]) ); + + let ref_tables = provable_ast.get_table_references(); + + assert_eq!(ref_tables, IndexSet::from_iter([table_ref])); } #[test] diff --git a/crates/proof-of-sql/src/sql/proof_plans/projection_exec_test.rs b/crates/proof-of-sql/src/sql/proof_plans/projection_exec_test.rs index 3addcfb17..c97ecf471 100644 --- a/crates/proof-of-sql/src/sql/proof_plans/projection_exec_test.rs +++ b/crates/proof-of-sql/src/sql/proof_plans/projection_exec_test.rs @@ -102,6 +102,10 @@ fn we_can_correctly_fetch_all_the_referenced_columns() { ), ]) ); + + let ref_tables = provable_ast.get_table_references(); + + assert_eq!(ref_tables, IndexSet::from_iter([table_ref])); } #[test] From a0fd2938b5512cdc12704b9a65ae34375a7c49e8 Mon Sep 17 00:00:00 2001 From: Akhilender Bongirwar <112749383+akhilender-bongirwar@users.noreply.github.com> Date: Mon, 28 Oct 2024 22:39:34 +0530 Subject: [PATCH 51/74] refactor: removed `get_test_accessor` and renamed vars with meaningful txt (#284) # Rationale for this change This is a follow up to #261 that further tidies up the related tests. # What changes are included in this PR? - Removed the `get_test_accessor` function, replacing it with a schema-specific accessor. - Renamed variables like `i`, `i0`, `i1`, and `s` to more meaningful names like `salary`, `department`, `tax`, and `name` reflecting the context of the data. # Are these changes tested? These changes are in tests. --- .../src/sql/parse/query_expr_tests.rs | 325 ++++++++++++------ 1 file changed, 228 insertions(+), 97 deletions(-) diff --git a/crates/proof-of-sql/src/sql/parse/query_expr_tests.rs b/crates/proof-of-sql/src/sql/parse/query_expr_tests.rs index 4f68869d9..0c1cfd965 100644 --- a/crates/proof-of-sql/src/sql/parse/query_expr_tests.rs +++ b/crates/proof-of-sql/src/sql/parse/query_expr_tests.rs @@ -54,25 +54,6 @@ pub fn schema_accessor_from_table_ref_with_schema( TestSchemaAccessor::new(indexmap! {table => schema}) } -fn get_test_accessor() -> (TableRef, TestSchemaAccessor) { - let table = "sxt.t".parse().unwrap(); - let accessor = schema_accessor_from_table_ref_with_schema( - table, - indexmap! { - "s".parse().unwrap() => ColumnType::VarChar, - "i".parse().unwrap() => ColumnType::BigInt, - "d".parse().unwrap() => ColumnType::Int128, - "s0".parse().unwrap() => ColumnType::VarChar, - "i0".parse().unwrap() => ColumnType::BigInt, - "d0".parse().unwrap() => ColumnType::Int128, - "s1".parse().unwrap() => ColumnType::VarChar, - "i1".parse().unwrap() => ColumnType::BigInt, - "d1".parse().unwrap() => ColumnType::Int128, - }, - ); - (table, accessor) -} - #[test] fn we_can_convert_an_ast_with_one_column() { let t = "sxt.sxt_tab".parse().unwrap(); @@ -1128,8 +1109,17 @@ fn we_can_group_by_without_using_aggregate_functions() { #[test] fn group_by_expressions_are_parsed_before_an_order_by_referencing_an_aggregate_alias_result() { let query_text = - "select max(i) max_sal, i0 d, count(i0) from sxt.t group by i0, i1 order by max_sal"; - let (t, accessor) = get_test_accessor(); + "select max(salary) max_sal, department_budget d, count(department_budget) from sxt.employees group by department_budget, tax order by max_sal"; + + let t = "sxt.employees".parse().unwrap(); + let accessor = schema_accessor_from_table_ref_with_schema( + t, + indexmap! { + "department_budget".parse().unwrap() => ColumnType::BigInt, + "salary".parse().unwrap() => ColumnType::BigInt, + "tax".parse().unwrap() => ColumnType::BigInt, + }, + ); let intermediate_ast = SelectStatementParser::new().parse(query_text).unwrap(); let query = @@ -1138,20 +1128,20 @@ fn group_by_expressions_are_parsed_before_an_order_by_referencing_an_aggregate_a let expected_query = QueryExpr::new( filter( vec![ - col_expr_plan(t, "i", &accessor), - col_expr_plan(t, "i0", &accessor), - col_expr_plan(t, "i1", &accessor), + col_expr_plan(t, "department_budget", &accessor), + col_expr_plan(t, "salary", &accessor), + col_expr_plan(t, "tax", &accessor), ], tab(t), const_bool(true), ), vec![ group_by_postprocessing( - &["i0", "i1"], + &["department_budget", "tax"], &[ - aliased_expr(max(col("i")), "max_sal"), - aliased_expr(col("i0"), "d"), - aliased_expr(count(col("i0")), "__count__"), + aliased_expr(max(col("salary")), "max_sal"), + aliased_expr(col("department_budget"), "d"), + aliased_expr(count(col("department_budget")), "__count__"), ], ), orders(&["max_sal"], &[Asc]), @@ -1240,8 +1230,14 @@ fn group_by_column_cannot_be_a_column_result_alias() { #[test] fn we_can_have_aggregate_functions_without_a_group_by_clause() { - let query_text = "select count(s) from sxt.t"; - let (t, accessor) = get_test_accessor(); + let query_text = "select count(name) from sxt.employees"; + let t = "sxt.employees".parse().unwrap(); + let accessor = schema_accessor_from_table_ref_with_schema( + t, + indexmap! { + "name".parse().unwrap() => ColumnType::VarChar, + }, + ); let intermediate_ast = SelectStatementParser::new().parse(query_text).unwrap(); let ast = @@ -1401,8 +1397,17 @@ fn we_can_use_the_same_result_columns_with_different_aliases_and_associate_it_wi #[test] fn we_can_use_multiple_group_by_clauses_with_multiple_agg_and_non_agg_exprs() { - let (t, accessor) = get_test_accessor(); - let query_text = "select i d1, max(i1), i d2, sum(i0) sum_bonus, count(s) count_s from sxt.t group by i, i0, i"; + let t = "sxt.employees".parse().unwrap(); + let accessor = schema_accessor_from_table_ref_with_schema( + t, + indexmap! { + "bonus".parse().unwrap() => ColumnType::BigInt, + "name".parse().unwrap() => ColumnType::VarChar, + "salary".parse().unwrap() => ColumnType::BigInt, + "tax".parse().unwrap() => ColumnType::BigInt, + }, + ); + let query_text = "select salary d1, max(tax), salary d2, sum(bonus) sum_bonus, count(name) count_s from sxt.employees group by salary, bonus, salary"; let intermediate_ast = SelectStatementParser::new().parse(query_text).unwrap(); let ast = @@ -1410,18 +1415,18 @@ fn we_can_use_multiple_group_by_clauses_with_multiple_agg_and_non_agg_exprs() { let expected_ast = QueryExpr::new( filter( - cols_expr_plan(t, &["i", "i0", "i1", "s"], &accessor), + cols_expr_plan(t, &["bonus", "name", "salary", "tax"], &accessor), tab(t), const_bool(true), ), vec![group_by_postprocessing( - &["i", "i0", "i"], + &["salary", "bonus", "salary"], &[ - aliased_expr(col("i"), "d1"), - aliased_expr(max(col("i1")), "__max__"), - aliased_expr(col("i"), "d2"), - aliased_expr(sum(col("i0")), "sum_bonus"), - aliased_expr(count(col("s")), "count_s"), + aliased_expr(col("salary"), "d1"), + aliased_expr(max(col("tax")), "__max__"), + aliased_expr(col("salary"), "d2"), + aliased_expr(sum(col("bonus")), "sum_bonus"), + aliased_expr(count(col("name")), "count_s"), ], )], ); @@ -1567,12 +1572,19 @@ fn we_can_parse_arithmetic_expression_within_aggregations_in_the_result_expr() { #[test] fn we_cannot_use_non_grouped_columns_outside_agg() { - let (t, accessor) = get_test_accessor(); + let t = "sxt.employees".parse().unwrap(); + let accessor = schema_accessor_from_table_ref_with_schema( + t, + indexmap! { + "salary".parse().unwrap() => ColumnType::BigInt, + "name".parse().unwrap() => ColumnType::VarChar, + }, + ); let identifier_not_in_agg_queries = vec![ - "select i from sxt.t group by s", - "select sum(i), i from sxt.t group by s", - "select min(i) + i from sxt.t group by s", - "select 2 * i, min(i) from sxt.t group by s", + "select salary from sxt.employees group by name", + "select sum(salary), salary from sxt.employees group by name", + "select min(salary) + salary from sxt.employees group by name", + "select 2 * salary, min(salary) from sxt.employees group by name", ]; for query_text in &identifier_not_in_agg_queries { @@ -1589,9 +1601,9 @@ fn we_cannot_use_non_grouped_columns_outside_agg() { } let invalid_group_by_queries = vec![ - "select 2 * i, min(i) from sxt.t", - "select sum(i), i from sxt.t", - "select max(i) + 2 * i from sxt.t", + "select 2 * salary, min(salary) from sxt.employees", + "select sum(salary), salary from sxt.employees", + "select max(salary) + 2 * salary from sxt.employees", ]; for query_text in &invalid_group_by_queries { @@ -1608,11 +1620,23 @@ fn we_cannot_use_non_grouped_columns_outside_agg() { #[test] fn varchar_column_is_not_compatible_with_integer_column() { - let bigint_to_varchar_queries = vec!["select -123 * s from sxt.t", "select i - s from sxt.t"]; - let (t, accessor) = get_test_accessor(); + let t = "sxt.employees".parse().unwrap(); + let accessor = schema_accessor_from_table_ref_with_schema( + t, + indexmap! { + "salary".parse().unwrap() => ColumnType::BigInt, + "name".parse().unwrap() => ColumnType::VarChar, + }, + ); + + let bigint_to_varchar_queries = vec![ + "select -123 * name from sxt.employees", + "select salary - name from sxt.employees", + ]; + let varchar_to_bigint_queries = vec![ - "select s from sxt.t where 'abc' = i", - "select s from sxt.t where 'abc' != i", + "select name from sxt.employees where 'abc' = salary", + "select name from sxt.employees where 'abc' != salary", ]; for query_text in &bigint_to_varchar_queries { @@ -1646,8 +1670,16 @@ fn varchar_column_is_not_compatible_with_integer_column() { #[test] fn arithmetic_operations_are_not_allowed_with_varchar_column() { - let (t, accessor) = get_test_accessor(); - let query_text = "select s - s1 from sxt.t"; + let t = "sxt.employees".parse().unwrap(); + let accessor = schema_accessor_from_table_ref_with_schema( + t, + indexmap! { + "name".parse().unwrap() => ColumnType::VarChar, + "position".parse().unwrap() => ColumnType::VarChar, + }, + ); + + let query_text = "select name - position from sxt.employees"; let intermediate_ast = SelectStatementParser::new().parse(query_text).unwrap(); let result = QueryExpr::::try_new(intermediate_ast, t.schema_id(), &accessor); @@ -1662,8 +1694,14 @@ fn arithmetic_operations_are_not_allowed_with_varchar_column() { #[test] fn varchar_column_is_not_allowed_within_numeric_aggregations() { - let (t, accessor) = get_test_accessor(); - let sum_query = "select sum(s) from sxt.t"; + let t = "sxt.employees".parse().unwrap(); + let accessor = schema_accessor_from_table_ref_with_schema( + t, + indexmap! { + "name".parse().unwrap() => ColumnType::VarChar, + }, + ); + let sum_query = "select sum(name) from sxt.employees"; let intermediate_ast = SelectStatementParser::new().parse(sum_query).unwrap(); let result = QueryExpr::::try_new(intermediate_ast, t.schema_id(), &accessor); @@ -1673,7 +1711,7 @@ fn varchar_column_is_not_allowed_within_numeric_aggregations() { if expression == "cannot use expression of type 'varchar' with numeric aggregation function 'sum'" )); - let max_query = "select max(s) from sxt.t"; + let max_query = "select max(name) from sxt.employees"; let intermediate_ast = SelectStatementParser::new().parse(max_query).unwrap(); let result = QueryExpr::::try_new(intermediate_ast, t.schema_id(), &accessor); @@ -1683,7 +1721,7 @@ fn varchar_column_is_not_allowed_within_numeric_aggregations() { if expression == "cannot use expression of type 'varchar' with numeric aggregation function 'max'" )); - let min_query = "select min(s) from sxt.t"; + let min_query = "select min(name) from sxt.employees"; let intermediate_ast = SelectStatementParser::new().parse(min_query).unwrap(); let result = QueryExpr::::try_new(intermediate_ast, t.schema_id(), &accessor); @@ -1696,8 +1734,14 @@ fn varchar_column_is_not_allowed_within_numeric_aggregations() { #[test] fn group_by_with_bigint_column_is_valid() { - let (t, accessor) = get_test_accessor(); - let query_text = "select i from sxt.t group by i"; + let t = "sxt.employees".parse().unwrap(); + let accessor = schema_accessor_from_table_ref_with_schema( + t, + indexmap! { + "salary".parse().unwrap() => ColumnType::BigInt, + }, + ); + let query_text = "select salary from sxt.employees group by salary"; let intermediate_ast = SelectStatementParser::new().parse(query_text).unwrap(); let query = @@ -1705,13 +1749,13 @@ fn group_by_with_bigint_column_is_valid() { let expected_query = QueryExpr::new( filter( - cols_expr_plan(t, &["i"], &accessor), + cols_expr_plan(t, &["salary"], &accessor), tab(t), const_bool(true), ), vec![group_by_postprocessing( - &["i"], - &[aliased_expr(col("i"), "i")], + &["salary"], + &[aliased_expr(col("salary"), "salary")], )], ); assert_eq!(query, expected_query); @@ -1719,8 +1763,14 @@ fn group_by_with_bigint_column_is_valid() { #[test] fn group_by_with_decimal_column_is_valid() { - let (t, accessor) = get_test_accessor(); - let query_text = "select d from sxt.t group by d"; + let t = "sxt.employees".parse().unwrap(); + let accessor = schema_accessor_from_table_ref_with_schema( + t, + indexmap! { + "salary".parse().unwrap() => ColumnType::Int128, + }, + ); + let query_text = "select salary from sxt.employees group by salary"; let intermediate_ast = SelectStatementParser::new().parse(query_text).unwrap(); let query = @@ -1728,13 +1778,13 @@ fn group_by_with_decimal_column_is_valid() { let expected_query = QueryExpr::new( filter( - cols_expr_plan(t, &["d"], &accessor), + cols_expr_plan(t, &["salary"], &accessor), tab(t), const_bool(true), ), vec![group_by_postprocessing( - &["d"], - &[aliased_expr(col("d"), "d")], + &["salary"], + &[aliased_expr(col("salary"), "salary")], )], ); assert_eq!(query, expected_query); @@ -1742,8 +1792,14 @@ fn group_by_with_decimal_column_is_valid() { #[test] fn group_by_with_varchar_column_is_valid() { - let (t, accessor) = get_test_accessor(); - let query_text = "select s from sxt.t group by s"; + let t = "sxt.employees".parse().unwrap(); + let accessor = schema_accessor_from_table_ref_with_schema( + t, + indexmap! { + "name".parse().unwrap() => ColumnType::VarChar, + }, + ); + let query_text = "select name from sxt.employees group by name"; let intermediate_ast = SelectStatementParser::new().parse(query_text).unwrap(); let query = @@ -1751,13 +1807,13 @@ fn group_by_with_varchar_column_is_valid() { let expected_query = QueryExpr::new( filter( - cols_expr_plan(t, &["s"], &accessor), + cols_expr_plan(t, &["name"], &accessor), tab(t), const_bool(true), ), vec![group_by_postprocessing( - &["s"], - &[aliased_expr(col("s"), "s")], + &["name"], + &[aliased_expr(col("name"), "name")], )], ); assert_eq!(query, expected_query); @@ -1765,8 +1821,16 @@ fn group_by_with_varchar_column_is_valid() { #[test] fn we_can_use_arithmetic_outside_agg_expressions_while_using_group_by() { - let (t, accessor) = get_test_accessor(); - let query_text = "select 2 * i + sum(i) - i1 from sxt.t group by i, i1"; + let t = "sxt.employees".parse().unwrap(); + let accessor = schema_accessor_from_table_ref_with_schema( + t, + indexmap! { + "salary".parse().unwrap() => ColumnType::BigInt, + "tax".parse().unwrap() => ColumnType::BigInt, + }, + ); + let query_text = + "select 2 * salary + sum(salary) - tax from sxt.employees group by salary, tax"; let intermediate_ast = SelectStatementParser::new().parse(query_text).unwrap(); let query = @@ -1774,20 +1838,26 @@ fn we_can_use_arithmetic_outside_agg_expressions_while_using_group_by() { let expected_query = QueryExpr::new( filter( - cols_expr_plan(t, &["i", "i1"], &accessor), + cols_expr_plan(t, &["salary", "tax"], &accessor), tab(t), const_bool(true), ), vec![ group_by_postprocessing( - &["i", "i1"], + &["salary", "tax"], &[aliased_expr( - psub(padd(pmul(lit(2), col("i")), sum(col("i"))), col("i1")), + psub( + padd(pmul(lit(2), col("salary")), sum(col("salary"))), + col("tax"), + ), "__expr__", )], ), select_expr(&[aliased_expr( - psub(padd(pmul(lit(2), col("i")), col("__col_agg_0")), col("i1")), + psub( + padd(pmul(lit(2), col("salary")), col("__col_agg_0")), + col("tax"), + ), "__expr__", )]), ], @@ -1797,8 +1867,15 @@ fn we_can_use_arithmetic_outside_agg_expressions_while_using_group_by() { #[test] fn we_can_use_arithmetic_outside_agg_expressions_without_using_group_by() { - let (t, accessor) = get_test_accessor(); - let query_text = "select 7 + max(i) as max_i, min(i + 777 * d) * -5 as min_d from t"; + let t = "sxt.employees".parse().unwrap(); + let accessor = schema_accessor_from_table_ref_with_schema( + t, + indexmap! { + "salary".parse().unwrap() => ColumnType::BigInt, + "bonus".parse().unwrap() => ColumnType::Int128, + }, + ); + let query_text = "select 7 + max(salary) as max_i, min(salary + 777 * bonus) * -5 as min_d from sxt.employees"; let intermediate_ast = SelectStatementParser::new().parse(query_text).unwrap(); let ast = @@ -1806,7 +1883,7 @@ fn we_can_use_arithmetic_outside_agg_expressions_without_using_group_by() { let expected_ast = QueryExpr::new( filter( - cols_expr_plan(t, &["d", "i"], &accessor), + cols_expr_plan(t, &["bonus", "salary"], &accessor), tab(t), const_bool(true), ), @@ -1814,9 +1891,12 @@ fn we_can_use_arithmetic_outside_agg_expressions_without_using_group_by() { group_by_postprocessing( &[], &[ - aliased_expr(padd(lit(7), max(col("i"))), "max_i"), + aliased_expr(padd(lit(7), max(col("salary"))), "max_i"), aliased_expr( - pmul(min(padd(col("i"), pmul(lit(777), col("d")))), lit(-5)), + pmul( + min(padd(col("salary"), pmul(lit(777), col("bonus")))), + lit(-5), + ), "min_d", ), ], @@ -1832,8 +1912,17 @@ fn we_can_use_arithmetic_outside_agg_expressions_without_using_group_by() { #[test] fn count_aggregation_always_have_integer_type() { - let (t, accessor) = get_test_accessor(); - let query_text = "select 7 + count(s) as cs, count(i) * -5 as ci, count(d) from t"; + let t = "sxt.employees".parse().unwrap(); + let accessor = schema_accessor_from_table_ref_with_schema( + t, + indexmap! { + "name".parse().unwrap() => ColumnType::VarChar, + "salary".parse().unwrap() => ColumnType::BigInt, + "tax".parse().unwrap() => ColumnType::Int128, + }, + ); + let query_text = + "select 7 + count(name) as cs, count(salary) * -5 as ci, count(tax) from sxt.employees"; let intermediate_ast = SelectStatementParser::new().parse(query_text).unwrap(); let ast = @@ -1841,7 +1930,7 @@ fn count_aggregation_always_have_integer_type() { let expected_ast = QueryExpr::new( filter( - cols_expr_plan(t, &["d", "i", "s"], &accessor), + cols_expr_plan(t, &["name", "salary", "tax"], &accessor), tab(t), const_bool(true), ), @@ -1849,9 +1938,9 @@ fn count_aggregation_always_have_integer_type() { group_by_postprocessing( &[], &[ - aliased_expr(padd(lit(7), count(col("s"))), "cs"), - aliased_expr(pmul(count(col("i")), lit(-5)), "ci"), - aliased_expr(count(col("d")), "__count__"), + aliased_expr(padd(lit(7), count(col("name"))), "cs"), + aliased_expr(pmul(count(col("salary")), lit(-5)), "ci"), + aliased_expr(count(col("tax")), "__count__"), ], ), select_expr(&[ @@ -1866,17 +1955,41 @@ fn count_aggregation_always_have_integer_type() { #[test] fn select_wildcard_is_valid_with_group_by_exprs() { - let columns = ["s", "i", "d", "s0", "i0", "d0", "s1", "i1", "d1"]; + let columns = [ + "employee_name", + "base_salary", + "annual_bonus", + "manager_name", + "manager_salary", + "manager_bonus", + "department_name", + "department_budget", + "department_headcount", + ]; let sorted_columns = columns.iter().sorted().collect::>(); let aliased_exprs = columns .iter() .map(|c| aliased_expr(col(c), c)) .collect::>(); - let (t, accessor) = get_test_accessor(); - let table_name = "sxt.t"; + let t = "sxt.employees".parse().unwrap(); + let accessor = schema_accessor_from_table_ref_with_schema( + t, + indexmap! { + "employee_name".parse().unwrap() => ColumnType::VarChar, + "base_salary".parse().unwrap() => ColumnType::BigInt, + "annual_bonus".parse().unwrap() => ColumnType::Int128, + "manager_name".parse().unwrap() => ColumnType::VarChar, + "manager_salary".parse().unwrap() => ColumnType::BigInt, + "manager_bonus".parse().unwrap() => ColumnType::Int128, + "department_name".parse().unwrap() => ColumnType::VarChar, + "department_budget".parse().unwrap() => ColumnType::BigInt, + "department_headcount".parse().unwrap() => ColumnType::Int128, + }, + ); + let query_text = format!( "SELECT * FROM {} GROUP BY {}", - table_name, + "sxt.employees", columns.join(", ") ); @@ -1901,10 +2014,19 @@ fn select_wildcard_is_valid_with_group_by_exprs() { #[test] fn nested_aggregations_are_not_supported() { let supported_agg = ["max", "min", "sum", "count"]; - let (t, accessor) = get_test_accessor(); + let t = "sxt.employees".parse().unwrap(); + let accessor = schema_accessor_from_table_ref_with_schema( + t, + indexmap! { + "salary".parse().unwrap() => ColumnType::BigInt, + }, + ); for perm_aggs in supported_agg.iter().permutations(2) { - let query_text = format!("SELECT {}({}(i)) FROM t", perm_aggs[0], perm_aggs[1]); + let query_text = format!( + "SELECT {}({}(salary)) FROM sxt.employees", + perm_aggs[0], perm_aggs[1] + ); let intermediate_ast = SelectStatementParser::new().parse(&query_text).unwrap(); let result = @@ -1922,8 +2044,17 @@ fn nested_aggregations_are_not_supported() { #[test] fn select_group_and_order_by_preserve_the_column_order_reference() { const N: usize = 4; - let (t, accessor) = get_test_accessor(); - let base_cols: [&str; N] = ["i", "i0", "i1", "s"]; // sorted because of `select: [cols = ... ]` + let t = "sxt.employees".parse().unwrap(); + let accessor = schema_accessor_from_table_ref_with_schema( + t, + indexmap! { + "salary".parse().unwrap() => ColumnType::BigInt, + "department".parse().unwrap() => ColumnType::BigInt, + "tax".parse().unwrap() => ColumnType::BigInt, + "name".parse().unwrap() => ColumnType::VarChar, + }, + ); + let base_cols: [&str; N] = ["salary", "department", "tax", "name"]; // sorted because of `select: [cols = ... ]` let base_ordering = [Asc, Desc, Asc, Desc]; for (idx, perm_cols) in base_cols .into_iter() From 9be5d333ea7036b2a5a71564f3cd59c34721054d Mon Sep 17 00:00:00 2001 From: Miguel Blanco Date: Wed, 23 Oct 2024 11:40:08 +0200 Subject: [PATCH 52/74] feat: add rockets CSV --- .../proof-of-sql/examples/rockets/rockets.csv | 25 +++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 crates/proof-of-sql/examples/rockets/rockets.csv diff --git a/crates/proof-of-sql/examples/rockets/rockets.csv b/crates/proof-of-sql/examples/rockets/rockets.csv new file mode 100644 index 000000000..341ab978b --- /dev/null +++ b/crates/proof-of-sql/examples/rockets/rockets.csv @@ -0,0 +1,25 @@ +Name,Country,Year,MTOW +Saturn V,USA,1967,2976000 +Falcon Heavy,USA,2018,1420788 +Space Shuttle,USA,1981,2041167 +Energia,USSR,1987,2400000 +Ariane 5,Europe,1996,780000 +Delta IV Heavy,USA,2004,733400 +Long March 5,China,2016,869000 +Proton,USSR/Russia,1965,705000 +Atlas V,USA,2002,546700 +H-IIA,Japan,2001,445000 +Soyuz,USSR/Russia,1966,308000 +Vulcan Centaur,USA,Expected 2024,630000 +Falcon 9,USA,2010,549054 +Vega,Europe,2012,137000 +PSLV,India,1993,320000 +GSLV Mk III,India,2017,640000 +Titan II,USA,1962,153800 +Angara A5,Russia,2014,1335000 +Delta II,USA,1989,231870 +Electron,New Zealand,2017,13500 +Antares,USA,2013,240000 +Zenit,USSR/Ukraine,1985,462000 +N1,USSR,1969,2735000 +New Glenn,USA,2024,1300000 From 01507d5639af9894f1a1de08a9ac61d6dfab33ee Mon Sep 17 00:00:00 2001 From: Miguel Blanco Date: Wed, 23 Oct 2024 11:43:10 +0200 Subject: [PATCH 53/74] feat: add more examples --- crates/proof-of-sql/examples/rockets/rockets.csv | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/crates/proof-of-sql/examples/rockets/rockets.csv b/crates/proof-of-sql/examples/rockets/rockets.csv index 341ab978b..cdb1bc963 100644 --- a/crates/proof-of-sql/examples/rockets/rockets.csv +++ b/crates/proof-of-sql/examples/rockets/rockets.csv @@ -23,3 +23,7 @@ Antares,USA,2013,240000 Zenit,USSR/Ukraine,1985,462000 N1,USSR,1969,2735000 New Glenn,USA,2024,1300000 +Redstone,USA,1953,29500 +Black Arrow,UK,1971,18800 +Diamant,France,1965,18000 +Pegasus,USA,1990,23300 From bfa92f7a48d76c53a39be2fdf17b11c2e4b04227 Mon Sep 17 00:00:00 2001 From: Miguel Blanco Date: Wed, 23 Oct 2024 11:44:05 +0200 Subject: [PATCH 54/74] fix: remove bad example --- crates/proof-of-sql/examples/rockets/rockets.csv | 1 - 1 file changed, 1 deletion(-) diff --git a/crates/proof-of-sql/examples/rockets/rockets.csv b/crates/proof-of-sql/examples/rockets/rockets.csv index cdb1bc963..e72920eef 100644 --- a/crates/proof-of-sql/examples/rockets/rockets.csv +++ b/crates/proof-of-sql/examples/rockets/rockets.csv @@ -10,7 +10,6 @@ Proton,USSR/Russia,1965,705000 Atlas V,USA,2002,546700 H-IIA,Japan,2001,445000 Soyuz,USSR/Russia,1966,308000 -Vulcan Centaur,USA,Expected 2024,630000 Falcon 9,USA,2010,549054 Vega,Europe,2012,137000 PSLV,India,1993,320000 From ca969437c6eff746d6fad86491386263c5534037 Mon Sep 17 00:00:00 2001 From: Miguel Blanco Date: Wed, 23 Oct 2024 11:44:42 +0200 Subject: [PATCH 55/74] fix: correct MTOW for Electron example --- crates/proof-of-sql/examples/rockets/rockets.csv | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/proof-of-sql/examples/rockets/rockets.csv b/crates/proof-of-sql/examples/rockets/rockets.csv index e72920eef..92e96f884 100644 --- a/crates/proof-of-sql/examples/rockets/rockets.csv +++ b/crates/proof-of-sql/examples/rockets/rockets.csv @@ -17,7 +17,7 @@ GSLV Mk III,India,2017,640000 Titan II,USA,1962,153800 Angara A5,Russia,2014,1335000 Delta II,USA,1989,231870 -Electron,New Zealand,2017,13500 +Electron,New Zealand,2017,12500 Antares,USA,2013,240000 Zenit,USSR/Ukraine,1985,462000 N1,USSR,1969,2735000 From 54a134b92a920b41a71ae38fa9e7769faac15ef9 Mon Sep 17 00:00:00 2001 From: Miguel Blanco Date: Wed, 23 Oct 2024 11:58:57 +0200 Subject: [PATCH 56/74] refactor: rename CSV to launch_vehicles.csv --- .../examples/rockets/{rockets.csv => launch_vehicles.csv} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename crates/proof-of-sql/examples/rockets/{rockets.csv => launch_vehicles.csv} (96%) diff --git a/crates/proof-of-sql/examples/rockets/rockets.csv b/crates/proof-of-sql/examples/rockets/launch_vehicles.csv similarity index 96% rename from crates/proof-of-sql/examples/rockets/rockets.csv rename to crates/proof-of-sql/examples/rockets/launch_vehicles.csv index 92e96f884..cba1aeb2f 100644 --- a/crates/proof-of-sql/examples/rockets/rockets.csv +++ b/crates/proof-of-sql/examples/rockets/launch_vehicles.csv @@ -1,4 +1,4 @@ -Name,Country,Year,MTOW +name,country,year,mtow Saturn V,USA,1967,2976000 Falcon Heavy,USA,2018,1420788 Space Shuttle,USA,1981,2041167 From a73db31af90f6c08427751d91b0d7d1c6a71dd02 Mon Sep 17 00:00:00 2001 From: Miguel Blanco Date: Wed, 23 Oct 2024 11:59:07 +0200 Subject: [PATCH 57/74] feat: add example code --- crates/proof-of-sql/examples/rockets/main.rs | 118 +++++++++++++++++++ 1 file changed, 118 insertions(+) create mode 100644 crates/proof-of-sql/examples/rockets/main.rs diff --git a/crates/proof-of-sql/examples/rockets/main.rs b/crates/proof-of-sql/examples/rockets/main.rs new file mode 100644 index 000000000..3057fa510 --- /dev/null +++ b/crates/proof-of-sql/examples/rockets/main.rs @@ -0,0 +1,118 @@ +//! This is a non-interactive example of using Proof of SQL with a rockets dataset. +//! To run this, use `cargo run --release --example rockets`. +//! +//! NOTE: If this doesn't work because you do not have the appropriate GPU drivers installed, +//! you can run `cargo run --release --example rockets --no-default-features --features="arrow cpu-perf"` instead. It will be slower for proof generation. + +use arrow::datatypes::SchemaRef; +use arrow_csv::{infer_schema_from_files, ReaderBuilder}; +use proof_of_sql::{ + base::database::{ + arrow_schema_utility::get_posql_compatible_schema, OwnedTable, OwnedTableTestAccessor, + TestAccessor, + }, + proof_primitive::dory::{ + DynamicDoryCommitment, DynamicDoryEvaluationProof, ProverSetup, PublicParameters, + VerifierSetup, + }, + sql::{parse::QueryExpr, postprocessing::apply_postprocessing_steps, proof::QueryProof}, +}; +use rand::{rngs::StdRng, SeedableRng}; +use std::{fs::File, time::Instant}; + +// We generate the public parameters and the setups used by the prover and verifier for the Dory PCS. +// The `max_nu` should be set such that the maximum table size is less than `2^(2*max_nu-1)`. +const DORY_SETUP_MAX_NU: usize = 8; +// This should be a "nothing-up-my-sleeve" phrase or number. +const DORY_SEED: [u8; 32] = *b"7a1b3c8d2e4f9g6h5i0j7k2l8m3n9o1p"; + +/// # Panics +/// Will panic if the query does not parse or the proof fails to verify. +fn prove_and_verify_query( + sql: &str, + accessor: &OwnedTableTestAccessor, + prover_setup: &ProverSetup, + verifier_setup: &VerifierSetup, +) { + // Parse the query: + println!("Parsing the query: {sql}..."); + let now = Instant::now(); + let query_plan = QueryExpr::::try_new( + sql.parse().unwrap(), + "rockets".parse().unwrap(), + accessor, + ) + .unwrap(); + println!("Done in {} ms.", now.elapsed().as_secs_f64() * 1000.); + + // Generate the proof and result: + print!("Generating proof..."); + let now = Instant::now(); + let (proof, provable_result) = QueryProof::::new( + query_plan.proof_expr(), + accessor, + &prover_setup, + ); + println!("Done in {} ms.", now.elapsed().as_secs_f64() * 1000.); + + // Verify the result with the proof: + print!("Verifying proof..."); + let now = Instant::now(); + let result = proof + .verify( + query_plan.proof_expr(), + accessor, + &provable_result, + &verifier_setup, + ) + .unwrap(); + let result = apply_postprocessing_steps(result.table, query_plan.postprocessing()); + println!("Verified in {} ms.", now.elapsed().as_secs_f64() * 1000.); + + // Display the result + println!("Query Result:"); + println!("{result:?}"); +} + +fn main() { + let mut rng = StdRng::from_seed(DORY_SEED); + let public_parameters = PublicParameters::rand(DORY_SETUP_MAX_NU, &mut rng); + let prover_setup = ProverSetup::from(&public_parameters); + let verifier_setup = VerifierSetup::from(&public_parameters); + + let filename = "./crates/proof-of-sql/examples/rockets/launch_vehicles.csv"; + let inferred_schema = + SchemaRef::new(infer_schema_from_files(&[filename.to_string()], b',', None, true).unwrap()); + let posql_compatible_schema = get_posql_compatible_schema(&inferred_schema); + + let rockets_batch = ReaderBuilder::new(posql_compatible_schema) + .with_header(true) + .build(File::open(filename).unwrap()) + .unwrap() + .next() + .unwrap() + .unwrap(); + + // Load the table into an "Accessor" so that the prover and verifier can access the data/commitments. + let mut accessor = + OwnedTableTestAccessor::::new_empty_with_setup(&prover_setup); + accessor.add_table( + "rockets.launch_vehicles".parse().unwrap(), + OwnedTable::try_from(rockets_batch).unwrap(), + 0, + ); + + prove_and_verify_query( + "SELECT COUNT(*) AS total_rockets FROM launch_vehicles", + &accessor, + &prover_setup, + &verifier_setup, + ); + + prove_and_verify_query( + "SELECT country, MAX(mtow) as max_mtow, COUNT(*) as rocket_count FROM launch_vehicles GROUP BY country ORDER BY max_mtow DESC", + &accessor, + &prover_setup, + &verifier_setup, + ); +} From fcb8e8a582e6c5053dac44702717824ec6598da0 Mon Sep 17 00:00:00 2001 From: Miguel Blanco Date: Wed, 23 Oct 2024 12:00:40 +0200 Subject: [PATCH 58/74] feat: add example query with filter --- crates/proof-of-sql/examples/rockets/main.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/crates/proof-of-sql/examples/rockets/main.rs b/crates/proof-of-sql/examples/rockets/main.rs index 3057fa510..1506a928f 100644 --- a/crates/proof-of-sql/examples/rockets/main.rs +++ b/crates/proof-of-sql/examples/rockets/main.rs @@ -115,4 +115,11 @@ fn main() { &prover_setup, &verifier_setup, ); + + prove_and_verify_query( + "SELECT name FROM launch_vehicles WHERE country = 'USA'", + &accessor, + &prover_setup, + &verifier_setup, + ); } From 59a7a2797566779f5a72690c88047cd23ab09ed2 Mon Sep 17 00:00:00 2001 From: Miguel Blanco Date: Wed, 23 Oct 2024 12:01:39 +0200 Subject: [PATCH 59/74] feat: add query example with complex filter --- crates/proof-of-sql/examples/rockets/main.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/crates/proof-of-sql/examples/rockets/main.rs b/crates/proof-of-sql/examples/rockets/main.rs index 1506a928f..79ad4c4a4 100644 --- a/crates/proof-of-sql/examples/rockets/main.rs +++ b/crates/proof-of-sql/examples/rockets/main.rs @@ -122,4 +122,11 @@ fn main() { &prover_setup, &verifier_setup, ); + + prove_and_verify_query( + "SELECT name FROM launch_vehicles WHERE mtow > 100000 and mtow < 150000", + &accessor, + &prover_setup, + &verifier_setup, + ); } From 75b452403cfa2407ea9e362d300de1e1a1f13539 Mon Sep 17 00:00:00 2001 From: Miguel Blanco Date: Wed, 23 Oct 2024 12:03:30 +0200 Subject: [PATCH 60/74] feat: add rockets example to Cargo.toml --- crates/proof-of-sql/Cargo.toml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/crates/proof-of-sql/Cargo.toml b/crates/proof-of-sql/Cargo.toml index c2be7d100..0c783d5eb 100644 --- a/crates/proof-of-sql/Cargo.toml +++ b/crates/proof-of-sql/Cargo.toml @@ -131,6 +131,10 @@ required-features = [ "arrow" ] name = "countries" required-features = [ "arrow" ] +[[example]] +name = "rockets" +required-features = [ "arrow" ] + [[bench]] name = "posql_benches" harness = false From f2d6573ecc99c89530ae5435dfefdf17d603facf Mon Sep 17 00:00:00 2001 From: Miguel Blanco Date: Wed, 23 Oct 2024 12:04:39 +0200 Subject: [PATCH 61/74] feat: add rockets example to lint-and-test.yml --- .github/workflows/lint-and-test.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/lint-and-test.yml b/.github/workflows/lint-and-test.yml index 51191d986..5e291eda1 100644 --- a/.github/workflows/lint-and-test.yml +++ b/.github/workflows/lint-and-test.yml @@ -132,6 +132,8 @@ jobs: run: cargo run --example sushi - name: Run countries example run: cargo run --example countries + - name: Run rockets example + run: cargo run --example rockets - name: Run posql_db example (With Blitzar) run: bash crates/proof-of-sql/examples/posql_db/run_example.sh - name: Run posql_db example (Without Blitzar) From 16b3e45ced30edf9e332910ba5fdc9fc48ff398f Mon Sep 17 00:00:00 2001 From: Hunter Yarbrough Date: Thu, 24 Oct 2024 13:23:04 -0600 Subject: [PATCH 62/74] feat: programming_books example --- .../examples/programming_books/main.rs | 125 ++++++++++++++++++ 1 file changed, 125 insertions(+) create mode 100644 crates/proof-of-sql/examples/programming_books/main.rs diff --git a/crates/proof-of-sql/examples/programming_books/main.rs b/crates/proof-of-sql/examples/programming_books/main.rs new file mode 100644 index 000000000..a524fff34 --- /dev/null +++ b/crates/proof-of-sql/examples/programming_books/main.rs @@ -0,0 +1,125 @@ +//! This is a non-interactive example of using Proof of SQL with an extended books dataset. +//! To run this, use `cargo run --example books_extra`. +//! +//! NOTE: If this doesn't work because you do not have the appropriate GPU drivers installed, +//! you can run `cargo run --example books_extra --no-default-features --features="arrow cpu-perf"` instead. It will be slower for proof generation. + +use arrow::datatypes::SchemaRef; +use arrow_csv::{infer_schema_from_files, ReaderBuilder}; +use proof_of_sql::{ + base::database::{ + arrow_schema_utility::get_posql_compatible_schema, OwnedTable, OwnedTableTestAccessor, + TestAccessor, + }, + proof_primitive::dory::{ + DynamicDoryCommitment, DynamicDoryEvaluationProof, ProverSetup, PublicParameters, + VerifierSetup, + }, + sql::{parse::QueryExpr, postprocessing::apply_postprocessing_steps, proof::QueryProof}, +}; +use rand::{rngs::StdRng, SeedableRng}; +use std::{fs::File, time::Instant}; + +const DORY_SETUP_MAX_NU: usize = 8; +const DORY_SEED: [u8; 32] = *b"ebab60d58dee4cc69658939b7c2a582d"; + +/// # Panics +/// Will panic if the query does not parse or the proof fails to verify. +fn prove_and_verify_query( + sql: &str, + accessor: &OwnedTableTestAccessor, + prover_setup: &ProverSetup, + verifier_setup: &VerifierSetup, +) { + // Parse the query: + println!("Parsing the query: {sql}..."); + let now = Instant::now(); + let query_plan = QueryExpr::::try_new( + sql.parse().unwrap(), + "books_extra".parse().unwrap(), + accessor, + ) + .unwrap(); + println!("Done in {} ms.", now.elapsed().as_secs_f64() * 1000.); + + // Generate the proof and result: + print!("Generating proof..."); + let now = Instant::now(); + let (proof, provable_result) = QueryProof::::new( + query_plan.proof_expr(), + accessor, + &prover_setup, + ); + println!("Done in {} ms.", now.elapsed().as_secs_f64() * 1000.); + + // Verify the result with the proof: + print!("Verifying proof..."); + let now = Instant::now(); + let result = proof + .verify( + query_plan.proof_expr(), + accessor, + &provable_result, + &verifier_setup, + ) + .unwrap(); + let result = apply_postprocessing_steps(result.table, query_plan.postprocessing()); + println!("Verified in {} ms.", now.elapsed().as_secs_f64() * 1000.); + + // Display the result + println!("Query Result:"); + println!("{result:?}"); +} + +fn main() { + let mut rng = StdRng::from_seed(DORY_SEED); + let public_parameters = PublicParameters::rand(DORY_SETUP_MAX_NU, &mut rng); + let prover_setup = ProverSetup::from(&public_parameters); + let verifier_setup = VerifierSetup::from(&public_parameters); + + let filename = "./crates/proof-of-sql/examples/books_extra/books_extra.csv"; + let inferred_schema = + SchemaRef::new(infer_schema_from_files(&[filename.to_string()], b',', None, true).unwrap()); + let posql_compatible_schema = get_posql_compatible_schema(&inferred_schema); + + let books_extra_batch = ReaderBuilder::new(posql_compatible_schema) + .with_header(true) + .build(File::open(filename).unwrap()) + .unwrap() + .next() + .unwrap() + .unwrap(); + + // Load the table into an "Accessor" so that the prover and verifier can access the data/commitments. + let mut accessor = + OwnedTableTestAccessor::::new_empty_with_setup(&prover_setup); + accessor.add_table( + "books_extra.books".parse().unwrap(), + OwnedTable::try_from(books_extra_batch).unwrap(), + 0, + ); + + // Query 1: Count the total number of books + prove_and_verify_query( + "SELECT COUNT(*) AS total_books FROM books", + &accessor, + &prover_setup, + &verifier_setup, + ); + + // Query 2: Find books with a rating higher than 4.5 + prove_and_verify_query( + "SELECT title, author FROM books WHERE rating > 4.5", + &accessor, + &prover_setup, + &verifier_setup, + ); + + // Query 3: List all programming books published after 2000 + prove_and_verify_query( + "SELECT title, publication_year FROM books WHERE genre = 'Programming' AND publication_year > 2000", + &accessor, + &prover_setup, + &verifier_setup, + ); +} \ No newline at end of file From e1ae87ecc3847980b48dd949b66b968b75fd8465 Mon Sep 17 00:00:00 2001 From: Hunter Yarbrough Date: Thu, 24 Oct 2024 13:24:23 -0600 Subject: [PATCH 63/74] chore: Add workflow for programming books --- .github/workflows/lint-and-test.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/lint-and-test.yml b/.github/workflows/lint-and-test.yml index 5e291eda1..9395d5bee 100644 --- a/.github/workflows/lint-and-test.yml +++ b/.github/workflows/lint-and-test.yml @@ -122,6 +122,8 @@ jobs: run: cargo run --example dinosaurs - name: Run books example run: cargo run --example books + - name: Run programming books example + run: cargo run --example programmingbooks - name: Run brands example run: cargo run --example brands - name: Run avocado-prices example From b338551a99dd8b12a39d8eec82ae059d93b65970 Mon Sep 17 00:00:00 2001 From: Hunter Yarbrough Date: Thu, 24 Oct 2024 13:24:50 -0600 Subject: [PATCH 64/74] chore: Add proper example to cargo file --- crates/proof-of-sql/Cargo.toml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/crates/proof-of-sql/Cargo.toml b/crates/proof-of-sql/Cargo.toml index 0c783d5eb..e966aa2bd 100644 --- a/crates/proof-of-sql/Cargo.toml +++ b/crates/proof-of-sql/Cargo.toml @@ -111,6 +111,10 @@ required-features = [ "arrow" ] name = "books" required-features = [ "arrow" ] +[[example]] +name = "programming_books" +required-features = ["arrow"] + [[example]] name = "brands" required-features = [ "arrow" ] @@ -148,4 +152,4 @@ required-features = [ "test" ] [[bench]] name = "jaeger_benches" harness = false -required-features = [ "blitzar" ] \ No newline at end of file +required-features = [ "blitzar" ] From dd7232a56c6cf7bd4c7ea575e6d06ed7faf16b2b Mon Sep 17 00:00:00 2001 From: Hunter Yarbrough Date: Thu, 24 Oct 2024 13:25:18 -0600 Subject: [PATCH 65/74] chore: Add programming books dataset file --- .../examples/programming_books/programming_books.csv | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 crates/proof-of-sql/examples/programming_books/programming_books.csv diff --git a/crates/proof-of-sql/examples/programming_books/programming_books.csv b/crates/proof-of-sql/examples/programming_books/programming_books.csv new file mode 100644 index 000000000..a520b71c8 --- /dev/null +++ b/crates/proof-of-sql/examples/programming_books/programming_books.csv @@ -0,0 +1,6 @@ +title,author,publication_year,genre,rating +The Pragmatic Programmer,Andrew Hunt,1999,Programming,4.5 +Clean Code,Robert C. Martin,2008,Programming,4.7 +The Clean Coder,Robert C. Martin,2011,Programming,4.6 +Design Patterns,Erich Gamma,1994,Software Engineering,4.8 +Refactoring,Martin Fowler,1999,Programming,4.5 \ No newline at end of file From 5c1000e7d03d1ea641c2273e9beab95e18d9f781 Mon Sep 17 00:00:00 2001 From: Hunter Yarbrough Date: Thu, 24 Oct 2024 13:53:04 -0600 Subject: [PATCH 66/74] fix: Fix naming convention --- crates/proof-of-sql/examples/programming_books/main.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/crates/proof-of-sql/examples/programming_books/main.rs b/crates/proof-of-sql/examples/programming_books/main.rs index a524fff34..a40f557b2 100644 --- a/crates/proof-of-sql/examples/programming_books/main.rs +++ b/crates/proof-of-sql/examples/programming_books/main.rs @@ -1,8 +1,8 @@ //! This is a non-interactive example of using Proof of SQL with an extended books dataset. -//! To run this, use `cargo run --example books_extra`. +//! To run this, use `cargo run --example programming_books`. //! //! NOTE: If this doesn't work because you do not have the appropriate GPU drivers installed, -//! you can run `cargo run --example books_extra --no-default-features --features="arrow cpu-perf"` instead. It will be slower for proof generation. +//! you can run `cargo run --example programming_books --no-default-features --features="arrow cpu-perf"` instead. It will be slower for proof generation. use arrow::datatypes::SchemaRef; use arrow_csv::{infer_schema_from_files, ReaderBuilder}; @@ -36,7 +36,7 @@ fn prove_and_verify_query( let now = Instant::now(); let query_plan = QueryExpr::::try_new( sql.parse().unwrap(), - "books_extra".parse().unwrap(), + "programming_books".parse().unwrap(), accessor, ) .unwrap(); @@ -77,7 +77,7 @@ fn main() { let prover_setup = ProverSetup::from(&public_parameters); let verifier_setup = VerifierSetup::from(&public_parameters); - let filename = "./crates/proof-of-sql/examples/books_extra/books_extra.csv"; + let filename = "./crates/proof-of-sql/examples/programming_books/programming_books.csv"; let inferred_schema = SchemaRef::new(infer_schema_from_files(&[filename.to_string()], b',', None, true).unwrap()); let posql_compatible_schema = get_posql_compatible_schema(&inferred_schema); @@ -94,7 +94,7 @@ fn main() { let mut accessor = OwnedTableTestAccessor::::new_empty_with_setup(&prover_setup); accessor.add_table( - "books_extra.books".parse().unwrap(), + "programming_books.books".parse().unwrap(), OwnedTable::try_from(books_extra_batch).unwrap(), 0, ); From c9835eedae0b6e9952e79476005d3d64afb9787f Mon Sep 17 00:00:00 2001 From: Hunter Yarbrough Date: Mon, 28 Oct 2024 11:46:47 -0600 Subject: [PATCH 67/74] chore: Added more programming books to csv file --- .../examples/programming_books/programming_books.csv | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/crates/proof-of-sql/examples/programming_books/programming_books.csv b/crates/proof-of-sql/examples/programming_books/programming_books.csv index a520b71c8..dbad4ba3b 100644 --- a/crates/proof-of-sql/examples/programming_books/programming_books.csv +++ b/crates/proof-of-sql/examples/programming_books/programming_books.csv @@ -3,4 +3,9 @@ The Pragmatic Programmer,Andrew Hunt,1999,Programming,4.5 Clean Code,Robert C. Martin,2008,Programming,4.7 The Clean Coder,Robert C. Martin,2011,Programming,4.6 Design Patterns,Erich Gamma,1994,Software Engineering,4.8 -Refactoring,Martin Fowler,1999,Programming,4.5 \ No newline at end of file +Refactoring,Martin Fowler,1999,Programming,4.5 +Effective Java,Joshua Bloch,2008,Programming,4.7 +Introduction to Algorithms,Thomas H. Cormen,2009,Computer Science,4.8 +Code Complete,Steve McConnell,2004,Programming,4.6 +The Mythical Man-Month,Fred Brooks,1975,Software Engineering,4.3 +Algorithms,Robert Sedgewick,1983,Computer Science,4.5 From 4976dc4e7b48fc2432dea944d175682871e0202d Mon Sep 17 00:00:00 2001 From: Hunter Yarbrough Date: Mon, 28 Oct 2024 11:47:58 -0600 Subject: [PATCH 68/74] fix: fixed cargo run command --- .github/workflows/lint-and-test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/lint-and-test.yml b/.github/workflows/lint-and-test.yml index 9395d5bee..5fee053d9 100644 --- a/.github/workflows/lint-and-test.yml +++ b/.github/workflows/lint-and-test.yml @@ -123,7 +123,7 @@ jobs: - name: Run books example run: cargo run --example books - name: Run programming books example - run: cargo run --example programmingbooks + run: cargo run --example programming_books - name: Run brands example run: cargo run --example brands - name: Run avocado-prices example From 2e544c1851677c1e64e29b445550956e4163b1e3 Mon Sep 17 00:00:00 2001 From: Hunter Yarbrough Date: Mon, 28 Oct 2024 11:50:39 -0600 Subject: [PATCH 69/74] fix: fix formatting issue with extra line at the end of file --- crates/proof-of-sql/examples/programming_books/main.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/proof-of-sql/examples/programming_books/main.rs b/crates/proof-of-sql/examples/programming_books/main.rs index a40f557b2..c22960815 100644 --- a/crates/proof-of-sql/examples/programming_books/main.rs +++ b/crates/proof-of-sql/examples/programming_books/main.rs @@ -122,4 +122,4 @@ fn main() { &prover_setup, &verifier_setup, ); -} \ No newline at end of file +} From ebbdeeb0e5395593b865a1b9be178abef9b8b0fe Mon Sep 17 00:00:00 2001 From: Hunter Yarbrough Date: Mon, 28 Oct 2024 11:54:20 -0600 Subject: [PATCH 70/74] chore: add a top 5 authors example --- crates/proof-of-sql/examples/programming_books/main.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/crates/proof-of-sql/examples/programming_books/main.rs b/crates/proof-of-sql/examples/programming_books/main.rs index c22960815..09af38488 100644 --- a/crates/proof-of-sql/examples/programming_books/main.rs +++ b/crates/proof-of-sql/examples/programming_books/main.rs @@ -122,4 +122,12 @@ fn main() { &prover_setup, &verifier_setup, ); + + // Query 4: Find the top 5 authors with the most books + prove_and_verify_query( + "SELECT author, COUNT(*) AS book_count FROM books GROUP BY author ORDER BY book_count DESC LIMIT 5", + &accessor, + &prover_setup, + &verifier_setup, + ); } From a0e32405d938fa2581afb18be670502ab90fb01a Mon Sep 17 00:00:00 2001 From: Hunter Yarbrough Date: Mon, 28 Oct 2024 11:56:33 -0600 Subject: [PATCH 71/74] feat: Add an average rating check for programming books --- crates/proof-of-sql/examples/programming_books/main.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/crates/proof-of-sql/examples/programming_books/main.rs b/crates/proof-of-sql/examples/programming_books/main.rs index 09af38488..4bbcdbb28 100644 --- a/crates/proof-of-sql/examples/programming_books/main.rs +++ b/crates/proof-of-sql/examples/programming_books/main.rs @@ -130,4 +130,12 @@ fn main() { &prover_setup, &verifier_setup, ); + + // Query 5: Calculate the average rating of books in each genre + prove_and_verify_query( + "SELECT genre, AVG(rating) AS average_rating FROM books GROUP BY genre ORDER BY average_rating DESC", + &accessor, + &prover_setup, + &verifier_setup, + ); } From 4d51ff214f31a0f9d631c2a60ce802867009c8d7 Mon Sep 17 00:00:00 2001 From: Hunter Yarbrough Date: Mon, 28 Oct 2024 12:11:47 -0600 Subject: [PATCH 72/74] chore: Remove average rating query proof --- crates/proof-of-sql/examples/programming_books/main.rs | 8 -------- 1 file changed, 8 deletions(-) diff --git a/crates/proof-of-sql/examples/programming_books/main.rs b/crates/proof-of-sql/examples/programming_books/main.rs index 4bbcdbb28..09af38488 100644 --- a/crates/proof-of-sql/examples/programming_books/main.rs +++ b/crates/proof-of-sql/examples/programming_books/main.rs @@ -130,12 +130,4 @@ fn main() { &prover_setup, &verifier_setup, ); - - // Query 5: Calculate the average rating of books in each genre - prove_and_verify_query( - "SELECT genre, AVG(rating) AS average_rating FROM books GROUP BY genre ORDER BY average_rating DESC", - &accessor, - &prover_setup, - &verifier_setup, - ); } From baa74df21de17142987856c90b9f33399f3bf8f8 Mon Sep 17 00:00:00 2001 From: "dustin.ray" Date: Mon, 28 Oct 2024 16:48:03 -0700 Subject: [PATCH 73/74] fix: move examples into a different job --- .github/workflows/examples.yml | 67 +++++++++++++++++++++++++++++ .github/workflows/lint-and-test.yml | 32 -------------- 2 files changed, 67 insertions(+), 32 deletions(-) create mode 100644 .github/workflows/examples.yml diff --git a/.github/workflows/examples.yml b/.github/workflows/examples.yml new file mode 100644 index 000000000..ac7c2a212 --- /dev/null +++ b/.github/workflows/examples.yml @@ -0,0 +1,67 @@ +name: Example Runs + +on: + workflow_call: + pull_request: + types: [opened, synchronize, reopened] + merge_group: + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +env: + CARGO_TERM_COLOR: always + BLITZAR_BACKEND: cpu + +jobs: + example-tests: + name: Run Example Commands + runs-on: large-8-core-32gb-22-04 + + steps: + - name: Checkout sources + uses: actions/checkout@v3 + + - name: Install stable toolchain + run: curl https://sh.rustup.rs -sSf | bash -s -- -y --profile minimal && source ~/.cargo/env + + - name: Install Dependencies + run: | + export DEBIAN_FRONTEND=non-interactive + sudo apt-get update + sudo apt-get install -y clang lld + + # Runs the examples + - name: Run hello_world example (With Blitzar) + run: cargo run --example hello_world --features="test" + - name: Run hello_world example (Without Blitzar and With Rayon) + run: cargo run --example hello_world --no-default-features --features="rayon test" + - name: Run hello_world example (Without Blitzar and Without Rayon) + run: cargo run --example hello_world --no-default-features --features="test" + - name: Run space example + run: cargo run --example space + - name: Run dog breeds example + run: cargo run --example dog_breeds + - name: Run wood types example + run: cargo run --example wood_types + - name: Run dinosaurs example + run: cargo run --example dinosaurs + - name: Run books example + run: cargo run --example books + - name: Run brands example + run: cargo run --example brands + - name: Run avocado-prices example + run: cargo run --example avocado-prices + - name: Run plastics example + run: cargo run --example plastics + - name: Run sushi example + run: cargo run --example sushi + - name: Run countries example + run: cargo run --example countries + - name: Run rockets example + run: cargo run --example rockets + - name: Run posql_db example (With Blitzar) + run: bash crates/proof-of-sql/examples/posql_db/run_example.sh + - name: Run posql_db example (Without Blitzar) + run: bash crates/proof-of-sql/examples/posql_db/run_example.sh --no-default-features --features="rayon" diff --git a/.github/workflows/lint-and-test.yml b/.github/workflows/lint-and-test.yml index 5e291eda1..d7dafa84b 100644 --- a/.github/workflows/lint-and-test.yml +++ b/.github/workflows/lint-and-test.yml @@ -106,38 +106,6 @@ jobs: run: | cargo test proof_primitive::dory::dory_compute_commitments_test --no-default-features --features="std" && \ cargo test proof_primitive::dory::dynamic_dory_compute_commitments_test --no-default-features --features="std" - - name: Run hello_world example (With Blitzar) - run: cargo run --example hello_world --features="test" - - name: Run hello_world example (Without Blitzar and With Rayon) - run: cargo run --example hello_world --no-default-features --features="rayon test" - - name: Run hello_world example (Without Blitzar and Without Rayon) - run: cargo run --example hello_world --no-default-features --features="test" - - name: Run space example - run: cargo run --example space - - name: Run dog breeds example - run: cargo run --example dog_breeds - - name: Run wood types example - run: cargo run --example wood_types - - name: Run dinosaurs example - run: cargo run --example dinosaurs - - name: Run books example - run: cargo run --example books - - name: Run brands example - run: cargo run --example brands - - name: Run avocado-prices example - run: cargo run --example avocado-prices - - name: Run plastics example - run: cargo run --example plastics - - name: Run sushi example - run: cargo run --example sushi - - name: Run countries example - run: cargo run --example countries - - name: Run rockets example - run: cargo run --example rockets - - name: Run posql_db example (With Blitzar) - run: bash crates/proof-of-sql/examples/posql_db/run_example.sh - - name: Run posql_db example (Without Blitzar) - run: bash crates/proof-of-sql/examples/posql_db/run_example.sh --no-default-features --features="rayon" clippy: name: Clippy From dff34e8342f43b46a43d75497c2d86914ff6908d Mon Sep 17 00:00:00 2001 From: "dustin.ray" Date: Mon, 28 Oct 2024 17:55:40 -0700 Subject: [PATCH 74/74] fix: remove tests --- .github/workflows/examples.yml | 67 ----------------------------- .github/workflows/lint-and-test.yml | 12 ++++++ 2 files changed, 12 insertions(+), 67 deletions(-) delete mode 100644 .github/workflows/examples.yml diff --git a/.github/workflows/examples.yml b/.github/workflows/examples.yml deleted file mode 100644 index ac7c2a212..000000000 --- a/.github/workflows/examples.yml +++ /dev/null @@ -1,67 +0,0 @@ -name: Example Runs - -on: - workflow_call: - pull_request: - types: [opened, synchronize, reopened] - merge_group: - -concurrency: - group: ${{ github.workflow }}-${{ github.ref }} - cancel-in-progress: true - -env: - CARGO_TERM_COLOR: always - BLITZAR_BACKEND: cpu - -jobs: - example-tests: - name: Run Example Commands - runs-on: large-8-core-32gb-22-04 - - steps: - - name: Checkout sources - uses: actions/checkout@v3 - - - name: Install stable toolchain - run: curl https://sh.rustup.rs -sSf | bash -s -- -y --profile minimal && source ~/.cargo/env - - - name: Install Dependencies - run: | - export DEBIAN_FRONTEND=non-interactive - sudo apt-get update - sudo apt-get install -y clang lld - - # Runs the examples - - name: Run hello_world example (With Blitzar) - run: cargo run --example hello_world --features="test" - - name: Run hello_world example (Without Blitzar and With Rayon) - run: cargo run --example hello_world --no-default-features --features="rayon test" - - name: Run hello_world example (Without Blitzar and Without Rayon) - run: cargo run --example hello_world --no-default-features --features="test" - - name: Run space example - run: cargo run --example space - - name: Run dog breeds example - run: cargo run --example dog_breeds - - name: Run wood types example - run: cargo run --example wood_types - - name: Run dinosaurs example - run: cargo run --example dinosaurs - - name: Run books example - run: cargo run --example books - - name: Run brands example - run: cargo run --example brands - - name: Run avocado-prices example - run: cargo run --example avocado-prices - - name: Run plastics example - run: cargo run --example plastics - - name: Run sushi example - run: cargo run --example sushi - - name: Run countries example - run: cargo run --example countries - - name: Run rockets example - run: cargo run --example rockets - - name: Run posql_db example (With Blitzar) - run: bash crates/proof-of-sql/examples/posql_db/run_example.sh - - name: Run posql_db example (Without Blitzar) - run: bash crates/proof-of-sql/examples/posql_db/run_example.sh --no-default-features --features="rayon" diff --git a/.github/workflows/lint-and-test.yml b/.github/workflows/lint-and-test.yml index 7226c09d3..ea430158b 100644 --- a/.github/workflows/lint-and-test.yml +++ b/.github/workflows/lint-and-test.yml @@ -106,6 +106,18 @@ jobs: run: | cargo test proof_primitive::dory::dory_compute_commitments_test --no-default-features --features="std" && \ cargo test proof_primitive::dory::dynamic_dory_compute_commitments_test --no-default-features --features="std" + - name: Run hello_world example (With Blitzar) + run: cargo run --example hello_world --features="test" + - name: Run hello_world example (Without Blitzar and With Rayon) + run: cargo run --example hello_world --no-default-features --features="rayon test" + - name: Run hello_world example (Without Blitzar and Without Rayon) + run: cargo run --example hello_world --no-default-features --features="test" + - name: Run space example + run: cargo run --example space + - name: Run posql_db example (With Blitzar) + run: bash crates/proof-of-sql/examples/posql_db/run_example.sh + - name: Run posql_db example (Without Blitzar) + run: bash crates/proof-of-sql/examples/posql_db/run_example.sh --no-default-features --features="rayon" clippy: name: Clippy runs-on: large-8-core-32gb-22-04