Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: make polars postprocessing optional behind feature flag #21

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .github/workflows/lint-and-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,14 @@ jobs:
run: cargo check -p proof-of-sql --no-default-features --features="test"
- name: Run cargo check (proof-of-sql) (just "blitzar" feature)
run: cargo check -p proof-of-sql --no-default-features --features="blitzar"
- name: Run cargo check (proof-of-sql) (just "polars" feature)
run: cargo check -p proof-of-sql --no-default-features --features="polars"
- name: Run cargo check (proof-of-sql) (no "test" feature)
run: cargo check -p proof-of-sql --no-default-features --features="blitzar polars"
- name: Run cargo check (proof-of-sql) (no "blitzar" feature)
run: cargo check -p proof-of-sql --no-default-features --features="test polars"
- name: Run cargo check (proof-of-sql) (no "polars" feature)
run: cargo check -p proof-of-sql --no-default-features --features="blitzar test"

test:
name: Test Suite
Expand Down
4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ bigdecimal = { version = "0.4.5", features = ["serde"] }
blake3 = { version = "1.3.3" }
blitzar = { version = "3.0.2" }
bumpalo = { version = "3.11.0" }
bytemuck = {version = "1.14.2" }
bytemuck = {version = "1.14.2", features = ["derive"] }
byte-slice-cast = { version = "1.2.1" }
clap = { version = "4.5.4" }
criterion = { version = "0.5.1" }
Expand All @@ -35,7 +35,7 @@ derive_more = { version = "0.99" }
dyn_partial_eq = { version = "0.1.2" }
flexbuffers = { version = "2.0.0" }
hashbrown = { version = "0.14.0" }
indexmap = { version = "2.1" }
indexmap = { version = "2.1", features = ["serde"] }
itertools = { version = "0.13.0" }
lalrpop-util = { version = "0.20.0" }
lazy_static = { version = "1.4.0" }
Expand Down
6 changes: 3 additions & 3 deletions crates/proof-of-sql/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ lazy_static = { workspace = true }
merlin = { workspace = true }
num-traits = { workspace = true }
num-bigint = { workspace = true, default-features = false }
polars = { workspace = true, features = ["lazy", "bigidx", "dtype-decimal", "serde-lazy"] }
polars = { workspace = true, features = ["lazy", "bigidx", "dtype-decimal", "serde-lazy"], optional = true }
postcard = { workspace = true, features = ["alloc"] }
proof-of-sql-parser = { workspace = true }
rand = { workspace = true, optional = true }
Expand All @@ -59,7 +59,7 @@ clap = { workspace = true, features = ["derive"] }
criterion = { workspace = true, features = ["html_reports"] }
opentelemetry = { workspace = true }
opentelemetry-jaeger = { workspace = true }
polars = { workspace = true, features = ["lazy"] }
polars = { workspace = true, features = ["lazy", "dtype-decimal"] }
rand = { workspace = true }
rand_core = { workspace = true }
serde_json = { workspace = true }
Expand All @@ -69,7 +69,7 @@ tracing-subscriber = { workspace = true }
flexbuffers = { workspace = true }

[features]
default = ["blitzar"]
default = ["blitzar", "polars"]
test = ["dep:rand"]

[lints]
Expand Down
6 changes: 0 additions & 6 deletions crates/proof-of-sql/src/base/database/column.rs
Original file line number Diff line number Diff line change
Expand Up @@ -193,12 +193,6 @@ impl<'a, S: Scalar> Column<'a, S> {
}
}

/// The precision for [ColumnType::INT128] values
pub const INT128_PRECISION: usize = 38;

/// The scale for [ColumnType::INT128] values
pub const INT128_SCALE: usize = 0;

/// Represents the supported data types of a column in an in-memory,
/// column-oriented database.
///
Expand Down
4 changes: 3 additions & 1 deletion crates/proof-of-sql/src/base/database/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ pub use accessor::{CommitmentAccessor, DataAccessor, MetadataAccessor, SchemaAcc

mod column;
pub use column::{Column, ColumnField, ColumnRef, ColumnType};
pub(crate) use column::{INT128_PRECISION, INT128_SCALE};

mod literal_value;
pub use literal_value::LiteralValue;
Expand All @@ -17,7 +16,9 @@ pub use table_ref::TableRef;
mod arrow_array_to_column_conversion;
pub use arrow_array_to_column_conversion::{ArrayRefExt, ArrowArrayToColumnConversionError};

#[cfg(any(test, feature = "polars"))]
mod record_batch_dataframe_conversion;
#[cfg(any(test, feature = "polars"))]
pub(crate) use record_batch_dataframe_conversion::{
dataframe_to_record_batch, record_batch_to_dataframe,
};
Expand Down Expand Up @@ -59,6 +60,7 @@ mod test_accessor;
#[cfg(any(test, feature = "test"))]
pub use test_accessor::TestAccessor;
#[cfg(test)]
#[allow(unused_imports)]
pub(crate) use test_accessor::UnimplementedTestAccessor;

#[cfg(any(test, feature = "test"))]
Expand Down
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
#[cfg(any(test, feature = "polars"))]
use super::{dataframe_to_record_batch, record_batch_to_dataframe};
use super::{
dataframe_to_record_batch, record_batch_to_dataframe, ArrayRefExt, Column, ColumnRef,
ColumnType, CommitmentAccessor, DataAccessor, MetadataAccessor, SchemaAccessor, TableRef,
TestAccessor,
ArrayRefExt, Column, ColumnRef, ColumnType, CommitmentAccessor, DataAccessor, MetadataAccessor,
SchemaAccessor, TableRef, TestAccessor,
};
use crate::base::scalar::{compute_commitment_for_testing, Curve25519Scalar};
use arrow::{array::ArrayRef, datatypes::DataType, record_batch::RecordBatch};
use bumpalo::Bump;
use curve25519_dalek::ristretto::RistrettoPoint;
use indexmap::IndexMap;
#[cfg(any(test, feature = "polars"))]
use polars::prelude::DataFrame;
use proof_of_sql_parser::Identifier;
use std::collections::HashMap;
Expand Down Expand Up @@ -114,6 +116,7 @@ impl TestAccessor<RistrettoPoint> for RecordBatchTestAccessor {

impl RecordBatchTestAccessor {
/// Apply a query function to table and then convert the result to a RecordBatch
#[cfg(any(test, feature = "polars"))]
pub fn query_table(
&self,
table_ref: TableRef,
Expand Down
4 changes: 2 additions & 2 deletions crates/proof-of-sql/src/sql/ast/and_expr_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -104,9 +104,9 @@ fn test_random_tables_with_given_offset(offset: usize) {
and(
equal(
column(t, "b", &accessor),
const_scalar(filter_val1.as_str()),
const_varchar(filter_val1.as_str()),
),
equal(column(t, "c", &accessor), const_scalar(filter_val2)),
equal(column(t, "c", &accessor), const_bigint(filter_val2)),
),
);
let verifiable_res = VerifiableQueryResult::new(&ast, &accessor, &());
Expand Down
41 changes: 17 additions & 24 deletions crates/proof-of-sql/src/sql/ast/column_expr_test.rs
Original file line number Diff line number Diff line change
@@ -1,30 +1,23 @@
use crate::{
base::database::{RecordBatchTestAccessor, TestAccessor},
record_batch,
sql::ast::{test_expr::TestExprNode, test_utility::*},
base::{
commitment::InnerProductProof,
database::{owned_table_utility::*, OwnedTableTestAccessor},
},
sql::{
ast::test_utility::*,
proof::{exercise_verification, VerifiableQueryResult},
},
};
use arrow::record_batch::RecordBatch;

fn create_test_bool_col_expr(
table_ref: &str,
results: &[&str],
filter_col: &str,
data: RecordBatch,
offset: usize,
) -> TestExprNode {
let mut accessor = RecordBatchTestAccessor::new_empty();
let table_ref = table_ref.parse().unwrap();
accessor.add_table(table_ref, data, offset);
let col_expr = column(table_ref, filter_col, &accessor);
let df_filter = polars::prelude::col(filter_col);
TestExprNode::new(table_ref, results, col_expr, df_filter, accessor)
}

#[test]
fn we_can_prove_a_query_with_a_single_selected_row() {
let data = record_batch!("a" => [true, false]);
let test_expr = create_test_bool_col_expr("sxt.t", &["a"], "a", data.clone(), 0);
let res = test_expr.verify_expr();
let expected = record_batch!("a" => [true]);
assert_eq!(res, expected);
let data = owned_table([boolean("a", [true, false])]);
let t = "sxt.t".parse().unwrap();
let accessor = OwnedTableTestAccessor::<InnerProductProof>::new_from_table(t, data, 0, ());
let ast = projection(cols_expr_plan(t, &["a"], &accessor), tab(t));
let verifiable_res = VerifiableQueryResult::new(&ast, &accessor, &());
exercise_verification(&verifiable_res, &ast, &accessor, t);
let res = verifiable_res.verify(&ast, &accessor, &()).unwrap().table;
let expected_res = owned_table([boolean("a", [true, false])]);
assert_eq!(res, expected_res);
}
47 changes: 15 additions & 32 deletions crates/proof-of-sql/src/sql/ast/dense_filter_expr_test.rs
Original file line number Diff line number Diff line change
@@ -1,27 +1,15 @@
use crate::{
base::{database::owned_table_utility::*, math::decimal::Precision},
sql::ast::{test_utility::*, ProvableExprPlan},
};
use crate::{
base::{
database::{
ColumnField, ColumnRef, ColumnType, LiteralValue, OwnedTable, OwnedTableTestAccessor,
RecordBatchTestAccessor, TableRef, TestAccessor,
owned_table_utility::*, ColumnField, ColumnRef, ColumnType, LiteralValue, OwnedTable,
OwnedTableTestAccessor, TableRef, TestAccessor,
},
math::decimal::Precision,
scalar::Curve25519Scalar,
},
record_batch,
sql::{
ast::{
// Making this explicit to ensure that we don't accidentally use the
// sparse filter for these tests
test_utility::{
col_expr_plan, cols_expr_plan, column, const_int128, dense_filter, equal, tab,
},
ColumnExpr,
DenseFilterExpr,
LiteralExpr,
TableExpr,
test_utility::*, ColumnExpr, DenseFilterExpr, LiteralExpr, ProvableExprPlan, TableExpr,
},
proof::{
exercise_verification, ProofExpr, ProverEvaluate, ResultBuilder, VerifiableQueryResult,
Expand Down Expand Up @@ -177,24 +165,19 @@ fn we_can_correctly_fetch_all_the_referenced_columns() {

#[test]
fn we_can_prove_and_get_the_correct_result_from_a_basic_dense_filter() {
let data = record_batch!(
"a" => [1_i64, 4_i64, 5_i64, 2_i64, 5_i64],
"b" => [1_i64, 2, 3, 4, 5],
);
let data = owned_table([
bigint("a", [1_i64, 4_i64, 5_i64, 2_i64, 5_i64]),
bigint("b", [1_i64, 2, 3, 4, 5]),
]);
let t = "sxt.t".parse().unwrap();
let mut accessor = RecordBatchTestAccessor::new_empty();
accessor.add_table(t, data, 0);
let accessor = OwnedTableTestAccessor::<InnerProductProof>::new_from_table(t, data, 0, ());
let where_clause = equal(column(t, "a", &accessor), const_int128(5_i128));
let expr = dense_filter(cols_expr_plan(t, &["b"], &accessor), tab(t), where_clause);
let res = VerifiableQueryResult::<InnerProductProof>::new(&expr, &accessor, &());
let res = res
.verify(&expr, &accessor, &())
.unwrap()
.into_record_batch();
let expected = record_batch!(
"b" => [3_i64, 5],
);
assert_eq!(res, expected);
let ast = dense_filter(cols_expr_plan(t, &["b"], &accessor), tab(t), where_clause);
let verifiable_res = VerifiableQueryResult::new(&ast, &accessor, &());
exercise_verification(&verifiable_res, &ast, &accessor, t);
let res = verifiable_res.verify(&ast, &accessor, &()).unwrap().table;
let expected_res = owned_table([bigint("b", [3_i64, 5])]);
assert_eq!(res, expected_res);
}

#[test]
Expand Down
Loading
Loading