Skip to content

Commit

Permalink
feat: programming_books example (#314)
Browse files Browse the repository at this point in the history
Please be sure to look over the pull request guidelines here:
https://github.com/spaceandtimelabs/sxt-proof-of-sql/blob/main/CONTRIBUTING.md#submit-pr.

# Please go through the following checklist
- [ ] The PR title and commit messages adhere to guidelines here:
https://github.com/spaceandtimelabs/sxt-proof-of-sql/blob/main/CONTRIBUTING.md.
In particular `!` is used if and only if at least one breaking change
has been introduced.
- [ ] I have run the ci check script with `source
scripts/run_ci_checks.sh`.

# Rationale for this change

<!--
Why are you proposing this change? If this is already explained clearly
in the linked issue then this section is not needed.
Explaining clearly why changes are proposed helps reviewers understand
your changes and offer better suggestions for fixes.

 Example:
 Add `NestedLoopJoinExec`.
 Closes #345.

Since we added `HashJoinExec` in #323 it has been possible to do
provable inner joins. However performance is not satisfactory in some
cases. Hence we need to fix the problem by implement
`NestedLoopJoinExec` and speed up the code
 for `HashJoinExec`.
-->

# What changes are included in this PR?

<!--
There is no need to duplicate the description in the ticket here but it
is sometimes worth providing a summary of the individual changes in this
PR.

Example:
- Add `NestedLoopJoinExec`.
- Speed up `HashJoinExec`.
- Route joins to `NestedLoopJoinExec` if the outer input is sufficiently
small.
-->

# Are these changes tested?
<!--
We typically require tests for all PRs in order to:
1. Prevent the code from being accidentally broken by subsequent changes
2. Serve as another way to document the expected behavior of the code

If tests are not included in your PR, please explain why (for example,
are they covered by existing tests)?

Example:
Yes.
-->
Yes:

Parsing the query: SELECT COUNT(*) AS total_books FROM books...
Done in 5.759542000000001 ms.
Generating proof...Done in 258.379792 ms.
Verifying proof...Verified in 82.497291 ms.
Query Result:
Ok(OwnedTable { table: {Identifier { name: "total_books" }:
BigInt([10])} })
Parsing the query: SELECT title, author FROM books WHERE rating > 4.5...
Done in 8.903875000000001 ms.
Generating proof...Done in 350.96833399999997 ms.
Verifying proof...Verified in 103.658958 ms.
Query Result:
Ok(OwnedTable { table: {Identifier { name: "title" }: VarChar(["Clean
Code", "The Clean Coder", "Design Patterns", "Effective Java",
"Introduction to Algorithms", "Code Complete"]), Identifier { name:
"author" }: VarChar(["Robert C. Martin", "Robert C. Martin", "Erich
Gamma", "Joshua Bloch", "Thomas H. Cormen", "Steve McConnell"])} })
Parsing the query: SELECT title, publication_year FROM books WHERE genre
= 'Programming' AND publication_year > 2000...
Done in 5.648333 ms.
Generating proof...Done in 257.21125 ms.
Verifying proof...Verified in 111.860208 ms.
Query Result:
Ok(OwnedTable { table: {Identifier { name: "title" }: VarChar(["Clean
Code", "The Clean Coder", "Effective Java", "Code Complete"]),
Identifier { name: "publication_year" }: BigInt([2008, 2011, 2008,
2004])} })
Parsing the query: SELECT author, COUNT(*) AS book_count FROM books
GROUP BY author ORDER BY book_count DESC LIMIT 5...
Done in 5.722208999999999 ms.
Generating proof...Done in 181.42775 ms.
Verifying proof...Verified in 79.493166 ms.
Query Result:
Ok(OwnedTable { table: {Identifier { name: "author" }: VarChar(["Robert
C. Martin", "Andrew Hunt", "Erich Gamma", "Fred Brooks", "Joshua
Bloch"]), Identifier { name: "book_count" }: BigInt([2, 1, 1, 1, 1])} })
  • Loading branch information
yarbroughrh authored Oct 28, 2024
2 parents 3c666a1 + 4d51ff2 commit b9021ba
Show file tree
Hide file tree
Showing 4 changed files with 151 additions and 1 deletion.
2 changes: 2 additions & 0 deletions .github/workflows/lint-and-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,8 @@ jobs:
run: cargo run --example dinosaurs
- name: Run books example
run: cargo run --example books
- name: Run programming books example
run: cargo run --example programming_books
- name: Run brands example
run: cargo run --example brands
- name: Run avocado-prices example
Expand Down
6 changes: 5 additions & 1 deletion crates/proof-of-sql/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,10 @@ required-features = [ "arrow" ]
name = "books"
required-features = [ "arrow" ]

[[example]]
name = "programming_books"
required-features = ["arrow"]

[[example]]
name = "brands"
required-features = [ "arrow" ]
Expand Down Expand Up @@ -148,4 +152,4 @@ required-features = [ "test" ]
[[bench]]
name = "jaeger_benches"
harness = false
required-features = [ "blitzar" ]
required-features = [ "blitzar" ]
133 changes: 133 additions & 0 deletions crates/proof-of-sql/examples/programming_books/main.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
//! This is a non-interactive example of using Proof of SQL with an extended books dataset.
//! To run this, use `cargo run --example programming_books`.
//!
//! NOTE: If this doesn't work because you do not have the appropriate GPU drivers installed,
//! you can run `cargo run --example programming_books --no-default-features --features="arrow cpu-perf"` instead. It will be slower for proof generation.

use arrow::datatypes::SchemaRef;
use arrow_csv::{infer_schema_from_files, ReaderBuilder};
use proof_of_sql::{
base::database::{
arrow_schema_utility::get_posql_compatible_schema, OwnedTable, OwnedTableTestAccessor,
TestAccessor,
},
proof_primitive::dory::{
DynamicDoryCommitment, DynamicDoryEvaluationProof, ProverSetup, PublicParameters,
VerifierSetup,
},
sql::{parse::QueryExpr, postprocessing::apply_postprocessing_steps, proof::QueryProof},
};
use rand::{rngs::StdRng, SeedableRng};
use std::{fs::File, time::Instant};

const DORY_SETUP_MAX_NU: usize = 8;
const DORY_SEED: [u8; 32] = *b"ebab60d58dee4cc69658939b7c2a582d";

/// # Panics
/// Will panic if the query does not parse or the proof fails to verify.
fn prove_and_verify_query(
sql: &str,
accessor: &OwnedTableTestAccessor<DynamicDoryEvaluationProof>,
prover_setup: &ProverSetup,
verifier_setup: &VerifierSetup,
) {
// Parse the query:
println!("Parsing the query: {sql}...");
let now = Instant::now();
let query_plan = QueryExpr::<DynamicDoryCommitment>::try_new(
sql.parse().unwrap(),
"programming_books".parse().unwrap(),
accessor,
)
.unwrap();
println!("Done in {} ms.", now.elapsed().as_secs_f64() * 1000.);

// Generate the proof and result:
print!("Generating proof...");
let now = Instant::now();
let (proof, provable_result) = QueryProof::<DynamicDoryEvaluationProof>::new(
query_plan.proof_expr(),
accessor,
&prover_setup,
);
println!("Done in {} ms.", now.elapsed().as_secs_f64() * 1000.);

// Verify the result with the proof:
print!("Verifying proof...");
let now = Instant::now();
let result = proof
.verify(
query_plan.proof_expr(),
accessor,
&provable_result,
&verifier_setup,
)
.unwrap();
let result = apply_postprocessing_steps(result.table, query_plan.postprocessing());
println!("Verified in {} ms.", now.elapsed().as_secs_f64() * 1000.);

// Display the result
println!("Query Result:");
println!("{result:?}");
}

fn main() {
let mut rng = StdRng::from_seed(DORY_SEED);
let public_parameters = PublicParameters::rand(DORY_SETUP_MAX_NU, &mut rng);
let prover_setup = ProverSetup::from(&public_parameters);
let verifier_setup = VerifierSetup::from(&public_parameters);

let filename = "./crates/proof-of-sql/examples/programming_books/programming_books.csv";
let inferred_schema =
SchemaRef::new(infer_schema_from_files(&[filename.to_string()], b',', None, true).unwrap());
let posql_compatible_schema = get_posql_compatible_schema(&inferred_schema);

let books_extra_batch = ReaderBuilder::new(posql_compatible_schema)
.with_header(true)
.build(File::open(filename).unwrap())
.unwrap()
.next()
.unwrap()
.unwrap();

// Load the table into an "Accessor" so that the prover and verifier can access the data/commitments.
let mut accessor =
OwnedTableTestAccessor::<DynamicDoryEvaluationProof>::new_empty_with_setup(&prover_setup);
accessor.add_table(
"programming_books.books".parse().unwrap(),
OwnedTable::try_from(books_extra_batch).unwrap(),
0,
);

// Query 1: Count the total number of books
prove_and_verify_query(
"SELECT COUNT(*) AS total_books FROM books",
&accessor,
&prover_setup,
&verifier_setup,
);

// Query 2: Find books with a rating higher than 4.5
prove_and_verify_query(
"SELECT title, author FROM books WHERE rating > 4.5",
&accessor,
&prover_setup,
&verifier_setup,
);

// Query 3: List all programming books published after 2000
prove_and_verify_query(
"SELECT title, publication_year FROM books WHERE genre = 'Programming' AND publication_year > 2000",
&accessor,
&prover_setup,
&verifier_setup,
);

// Query 4: Find the top 5 authors with the most books
prove_and_verify_query(
"SELECT author, COUNT(*) AS book_count FROM books GROUP BY author ORDER BY book_count DESC LIMIT 5",
&accessor,
&prover_setup,
&verifier_setup,
);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
title,author,publication_year,genre,rating
The Pragmatic Programmer,Andrew Hunt,1999,Programming,4.5
Clean Code,Robert C. Martin,2008,Programming,4.7
The Clean Coder,Robert C. Martin,2011,Programming,4.6
Design Patterns,Erich Gamma,1994,Software Engineering,4.8
Refactoring,Martin Fowler,1999,Programming,4.5
Effective Java,Joshua Bloch,2008,Programming,4.7
Introduction to Algorithms,Thomas H. Cormen,2009,Computer Science,4.8
Code Complete,Steve McConnell,2004,Programming,4.6
The Mythical Man-Month,Fred Brooks,1975,Software Engineering,4.3
Algorithms,Robert Sedgewick,1983,Computer Science,4.5

0 comments on commit b9021ba

Please sign in to comment.