Skip to content

Commit

Permalink
Merge branch 'main' into refactor/remove-get-test-accessor-and-rename…
Browse files Browse the repository at this point in the history
…-vars
  • Loading branch information
akhilender-bongirwar committed Oct 27, 2024
2 parents f7c722b + 75cc69b commit 04de341
Show file tree
Hide file tree
Showing 32 changed files with 594 additions and 573 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/lint-and-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,8 @@ jobs:
run: cargo run --example plastics
- name: Run sushi example
run: cargo run --example sushi
- name: Run countries example
run: cargo run --example countries
- name: Run posql_db example (With Blitzar)
run: bash crates/proof-of-sql/examples/posql_db/run_example.sh
- name: Run posql_db example (Without Blitzar)
Expand Down
4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ ark-poly = { version = "0.4.0" }
ark-serialize = { version = "0.4.0" }
ark-std = { version = "0.4.0", default-features = false }
arrayvec = { version = "0.7", default-features = false }
arrow = { version = "51.0" }
arrow-csv = { version = "51.0" }
arrow = { version = "51.0.0" }
arrow-csv = { version = "51.0.0" }
bit-iter = { version = "1.1.1" }
bigdecimal = { version = "0.4.5", default-features = false, features = ["serde"] }
blake3 = { version = "1.3.3", default-features = false }
Expand Down
4 changes: 4 additions & 0 deletions crates/proof-of-sql/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,10 @@ required-features = [ "arrow" ]
name = "sushi"
required-features = [ "arrow" ]

[[example]]
name = "countries"
required-features = [ "arrow" ]

[[bench]]
name = "posql_benches"
harness = false
Expand Down
35 changes: 35 additions & 0 deletions crates/proof-of-sql/examples/countries/countries_gdp.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
Country,Continent,GDP,GDPP
UnitedStates,NorthAmerica,21137,63543
China,Asia,14342,10261
Japan,Asia,5081,40293
Germany,Europe,3846,46329
India,Asia,2875,2099
UnitedKingdom,Europe,2825,42330
France,Europe,2716,41463
Italy,Europe,2001,33279
Brazil,SouthAmerica,1839,8718
Canada,NorthAmerica,1643,43119
Russia,EuropeAsia,1637,11229
SouthKorea,Asia,1622,31489
Australia,Oceania,1382,53799
Spain,Europe,1316,28152
Mexico,NorthAmerica,1265,9958
Indonesia,Asia,1119,4152
Netherlands,Europe,902,52477
SaudiArabia,Asia,793,23206
Turkey,EuropeAsia,761,9005
Switzerland,Europe,703,81392
Argentina,SouthAmerica,449,9921
Sweden,Europe,528,52073
Nigeria,Africa,448,2190
Poland,Europe,594,15673
Thailand,Asia,509,7306
SouthAfrica,Africa,350,5883
Philippines,Asia,402,3685
Colombia,SouthAmerica,323,6458
Egypt,Africa,302,3012
Pakistan,Asia,278,1450
Bangladesh,Asia,302,1855
Vietnam,Asia,283,2900
Chile,SouthAmerica,252,13120
Finland,Europe,268,48888
132 changes: 132 additions & 0 deletions crates/proof-of-sql/examples/countries/main.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
//! This is a non-interactive example of using Proof of SQL with a countries dataset.
//! To run this, use `cargo run --release --example countries`.
//!
//! NOTE: If this doesn't work because you do not have the appropriate GPU drivers installed,
//! you can run `cargo run --release --example countries --no-default-features --features="arrow cpu-perf"` instead. It will be slower for proof generation.

use arrow::datatypes::SchemaRef;
use arrow_csv::{infer_schema_from_files, ReaderBuilder};
use proof_of_sql::{
base::database::{
arrow_schema_utility::get_posql_compatible_schema, OwnedTable, OwnedTableTestAccessor,
TestAccessor,
},
proof_primitive::dory::{
DynamicDoryCommitment, DynamicDoryEvaluationProof, ProverSetup, PublicParameters,
VerifierSetup,
},
sql::{parse::QueryExpr, postprocessing::apply_postprocessing_steps, proof::QueryProof},
};
use rand::{rngs::StdRng, SeedableRng};
use std::{fs::File, time::Instant};

// We generate the public parameters and the setups used by the prover and verifier for the Dory PCS.
// The `max_nu` should be set such that the maximum table size is less than `2^(2*max_nu-1)`.
const DORY_SETUP_MAX_NU: usize = 8;
// This should be a "nothing-up-my-sleeve" phrase or number.
const DORY_SEED: [u8; 32] = *b"7a1b3c8d2e4f9g6h5i0j7k2l8m3n9o1p";

/// # Panics
/// Will panic if the query does not parse or the proof fails to verify.
fn prove_and_verify_query(
sql: &str,
accessor: &OwnedTableTestAccessor<DynamicDoryEvaluationProof>,
prover_setup: &ProverSetup,
verifier_setup: &VerifierSetup,
) {
// Parse the query:
println!("Parsing the query: {sql}...");
let now = Instant::now();
let query_plan = QueryExpr::<DynamicDoryCommitment>::try_new(
sql.parse().unwrap(),
"countries".parse().unwrap(),
accessor,
)
.unwrap();
println!("Done in {} ms.", now.elapsed().as_secs_f64() * 1000.);

// Generate the proof and result:
print!("Generating proof...");
let now = Instant::now();
let (proof, provable_result) = QueryProof::<DynamicDoryEvaluationProof>::new(
query_plan.proof_expr(),
accessor,
&prover_setup,
);
println!("Done in {} ms.", now.elapsed().as_secs_f64() * 1000.);

// Verify the result with the proof:
print!("Verifying proof...");
let now = Instant::now();
let result = proof
.verify(
query_plan.proof_expr(),
accessor,
&provable_result,
&verifier_setup,
)
.unwrap();
let result = apply_postprocessing_steps(result.table, query_plan.postprocessing());
println!("Verified in {} ms.", now.elapsed().as_secs_f64() * 1000.);

// Display the result
println!("Query Result:");
println!("{result:?}");
}

fn main() {
let mut rng = StdRng::from_seed(DORY_SEED);
let public_parameters = PublicParameters::rand(DORY_SETUP_MAX_NU, &mut rng);
let prover_setup = ProverSetup::from(&public_parameters);
let verifier_setup = VerifierSetup::from(&public_parameters);

let filename = "./crates/proof-of-sql/examples/countries/countries_gdp.csv";
let inferred_schema =
SchemaRef::new(infer_schema_from_files(&[filename.to_string()], b',', None, true).unwrap());
let posql_compatible_schema = get_posql_compatible_schema(&inferred_schema);

let countries_batch = ReaderBuilder::new(posql_compatible_schema)
.with_header(true)
.build(File::open(filename).unwrap())
.unwrap()
.next()
.unwrap()
.unwrap();

// Load the table into an "Accessor" so that the prover and verifier can access the data/commitments.
let mut accessor =
OwnedTableTestAccessor::<DynamicDoryEvaluationProof>::new_empty_with_setup(&prover_setup);
accessor.add_table(
"countries.countries".parse().unwrap(),
OwnedTable::try_from(countries_batch).unwrap(),
0,
);

prove_and_verify_query(
"SELECT COUNT(*) AS total_countries FROM countries",
&accessor,
&prover_setup,
&verifier_setup,
);

prove_and_verify_query(
"SELECT country FROM countries WHERE continent = 'Asia'",
&accessor,
&prover_setup,
&verifier_setup,
);

prove_and_verify_query(
"SELECT country FROM countries WHERE gdp > 500 AND gdp < 1500",
&accessor,
&prover_setup,
&verifier_setup,
);

prove_and_verify_query(
"SELECT SUM(gdp) AS total_market_cap FROM countries WHERE country = 'China' OR country = 'India'",
&accessor,
&prover_setup,
&verifier_setup,
);
}
3 changes: 2 additions & 1 deletion crates/proof-of-sql/examples/posql_db/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ mod commit_accessor;
mod csv_accessor;
/// TODO: add docs
mod record_batch_accessor;

use arrow::{
datatypes::{DataType, Field, Schema},
record_batch::RecordBatch,
Expand Down Expand Up @@ -273,7 +274,7 @@ fn main() {
end_timer(timer);
println!(
"Verified Result: {:?}",
RecordBatch::try_from(query_result).unwrap()
RecordBatch::try_from(query_result.table).unwrap()
);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@ use arrow::record_batch::RecordBatch;
use bumpalo::Bump;
use indexmap::IndexMap;
use proof_of_sql::base::{
arrow::arrow_array_to_column_conversion::ArrayRefExt,
database::{
ArrayRefExt, Column, ColumnRef, ColumnType, DataAccessor, MetadataAccessor, SchemaAccessor,
TableRef,
Column, ColumnRef, ColumnType, DataAccessor, MetadataAccessor, SchemaAccessor, TableRef,
},
scalar::Scalar,
};
Expand Down
79 changes: 79 additions & 0 deletions crates/proof-of-sql/src/base/arrow/column_arrow_conversions.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
use crate::base::{
database::{ColumnField, ColumnType},
math::decimal::Precision,
};
use alloc::sync::Arc;
use arrow::datatypes::{DataType, Field, TimeUnit as ArrowTimeUnit};
use proof_of_sql_parser::posql_time::{PoSQLTimeUnit, PoSQLTimeZone};

/// Convert [`ColumnType`] values to some arrow [`DataType`]
impl From<&ColumnType> for DataType {
fn from(column_type: &ColumnType) -> Self {
match column_type {
ColumnType::Boolean => DataType::Boolean,
ColumnType::TinyInt => DataType::Int8,
ColumnType::SmallInt => DataType::Int16,
ColumnType::Int => DataType::Int32,
ColumnType::BigInt => DataType::Int64,
ColumnType::Int128 => DataType::Decimal128(38, 0),
ColumnType::Decimal75(precision, scale) => {
DataType::Decimal256(precision.value(), *scale)
}
ColumnType::VarChar => DataType::Utf8,
ColumnType::Scalar => unimplemented!("Cannot convert Scalar type to arrow type"),
ColumnType::TimestampTZ(timeunit, timezone) => {
let arrow_timezone = Some(Arc::from(timezone.to_string()));
let arrow_timeunit = match timeunit {
PoSQLTimeUnit::Second => ArrowTimeUnit::Second,
PoSQLTimeUnit::Millisecond => ArrowTimeUnit::Millisecond,
PoSQLTimeUnit::Microsecond => ArrowTimeUnit::Microsecond,
PoSQLTimeUnit::Nanosecond => ArrowTimeUnit::Nanosecond,
};
DataType::Timestamp(arrow_timeunit, arrow_timezone)
}
}
}
}

/// Convert arrow [`DataType`] values to some [`ColumnType`]
impl TryFrom<DataType> for ColumnType {
type Error = String;

fn try_from(data_type: DataType) -> Result<Self, Self::Error> {
match data_type {
DataType::Boolean => Ok(ColumnType::Boolean),
DataType::Int8 => Ok(ColumnType::TinyInt),
DataType::Int16 => Ok(ColumnType::SmallInt),
DataType::Int32 => Ok(ColumnType::Int),
DataType::Int64 => Ok(ColumnType::BigInt),
DataType::Decimal128(38, 0) => Ok(ColumnType::Int128),
DataType::Decimal256(precision, scale) if precision <= 75 => {
Ok(ColumnType::Decimal75(Precision::new(precision)?, scale))
}
DataType::Timestamp(time_unit, timezone_option) => {
let posql_time_unit = match time_unit {
ArrowTimeUnit::Second => PoSQLTimeUnit::Second,
ArrowTimeUnit::Millisecond => PoSQLTimeUnit::Millisecond,
ArrowTimeUnit::Microsecond => PoSQLTimeUnit::Microsecond,
ArrowTimeUnit::Nanosecond => PoSQLTimeUnit::Nanosecond,
};
Ok(ColumnType::TimestampTZ(
posql_time_unit,
PoSQLTimeZone::try_from(&timezone_option)?,
))
}
DataType::Utf8 => Ok(ColumnType::VarChar),
_ => Err(format!("Unsupported arrow data type {data_type:?}")),
}
}
}
/// Convert [`ColumnField`] values to arrow Field
impl From<&ColumnField> for Field {
fn from(column_field: &ColumnField) -> Self {
Field::new(
column_field.name().name(),
(&column_field.data_type()).into(),
false,
)
}
}
26 changes: 26 additions & 0 deletions crates/proof-of-sql/src/base/arrow/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
//! This module provides conversions and utilities for working with Arrow data structures.

/// Module for handling conversion from Arrow arrays to columns.
pub mod arrow_array_to_column_conversion;

/// Module for converting between owned and Arrow data structures.
pub mod owned_and_arrow_conversions;

#[cfg(test)]
/// Tests for owned and Arrow conversions.
mod owned_and_arrow_conversions_test;

/// Module for converting record batches.
pub mod record_batch_conversion;

/// Module for record batch error definitions.
pub mod record_batch_errors;

/// Utility functions for record batches.
pub mod record_batch_utility;

/// Module for scalar and i256 conversions.
pub mod scalar_and_i256_conversions;

/// Module for handling conversions between columns and Arrow arrays.
pub mod column_arrow_conversions;
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,9 @@
//! This is because there is no `Int128` type in Arrow.
//! This does not check that the values are less than 39 digits.
//! However, the actual arrow backing `i128` is the correct value.
use super::scalar_and_i256_conversions::convert_scalar_to_i256;
use super::scalar_and_i256_conversions::{convert_i256_to_scalar, convert_scalar_to_i256};
use crate::base::{
database::{
scalar_and_i256_conversions::convert_i256_to_scalar, OwnedColumn, OwnedTable,
OwnedTableError,
},
database::{OwnedColumn, OwnedTable, OwnedTableError},
map::IndexMap,
math::decimal::Precision,
scalar::Scalar,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use super::{OwnedColumn, OwnedTable};
use super::owned_and_arrow_conversions::OwnedArrowConversionError;
use crate::{
base::{
database::{owned_table_utility::*, OwnedArrowConversionError},
database::{owned_table_utility::*, OwnedColumn, OwnedTable},
map::IndexMap,
scalar::Curve25519Scalar,
},
Expand Down
Loading

0 comments on commit 04de341

Please sign in to comment.