spaceandtimelabs · JayWhite2357 · Jul 3, 2024 · Jul 8, 2024
diff --git a/.github/workflows/lint-and-test.yml b/.github/workflows/lint-and-test.yml
@@ -48,6 +48,14 @@ jobs:
         run: cargo check -p proof-of-sql --no-default-features --features="test"
       - name: Run cargo check (proof-of-sql) (just "blitzar" feature)
         run: cargo check -p proof-of-sql --no-default-features --features="blitzar"
+      - name: Run cargo check (proof-of-sql) (just "polars" feature)
+        run: cargo check -p proof-of-sql --no-default-features --features="polars"
+      - name: Run cargo check (proof-of-sql) (no "test" feature)
+        run: cargo check -p proof-of-sql --no-default-features --features="blitzar polars"
+      - name: Run cargo check (proof-of-sql) (no "blitzar" feature)
+        run: cargo check -p proof-of-sql --no-default-features --features="test polars"
+      - name: Run cargo check (proof-of-sql) (no "polars" feature)
+        run: cargo check -p proof-of-sql --no-default-features --features="blitzar test"
 
   test:
     name: Test Suite

diff --git a/Cargo.toml b/Cargo.toml
@@ -25,7 +25,7 @@ bigdecimal = { version = "0.4.5", features = ["serde"] }
 blake3 = { version = "1.3.3" }
 blitzar = { version = "3.0.2" }
 bumpalo = { version = "3.11.0" }
-bytemuck = {version = "1.14.2" }
+bytemuck = {version = "1.14.2", features = ["derive"] }
 byte-slice-cast = { version = "1.2.1" }
 clap = { version = "4.5.4" }
 criterion = { version = "0.5.1" }
@@ -35,7 +35,7 @@ derive_more = { version = "0.99" }
 dyn_partial_eq = { version = "0.1.2" }
 flexbuffers = { version = "2.0.0" }
 hashbrown = { version = "0.14.0" }
-indexmap = { version = "2.1" }
+indexmap = { version = "2.1", features = ["serde"] }
 itertools = { version = "0.13.0" }
 lalrpop-util = { version = "0.20.0" }
 lazy_static = { version = "1.4.0" }

diff --git a/crates/proof-of-sql/Cargo.toml b/crates/proof-of-sql/Cargo.toml
@@ -40,7 +40,7 @@ lazy_static = { workspace = true }
 merlin = { workspace = true }
 num-traits = { workspace = true }
 num-bigint = { workspace = true, default-features = false }
-polars = { workspace = true, features = ["lazy", "bigidx", "dtype-decimal", "serde-lazy"] }
+polars = { workspace = true, features = ["lazy", "bigidx", "dtype-decimal", "serde-lazy"], optional = true }
 postcard = { workspace = true, features = ["alloc"] }
 proof-of-sql-parser = { workspace = true }
 rand = { workspace = true, optional = true }
@@ -59,7 +59,7 @@ clap = { workspace = true, features = ["derive"] }
 criterion = { workspace = true, features = ["html_reports"] }
 opentelemetry = { workspace = true }
 opentelemetry-jaeger = { workspace = true }
-polars = { workspace = true, features = ["lazy"] }
+polars = { workspace = true, features = ["lazy", "dtype-decimal"] }
 rand = { workspace = true }
 rand_core = { workspace = true }
 serde_json = { workspace = true }
@@ -69,7 +69,7 @@ tracing-subscriber = { workspace = true }
 flexbuffers = { workspace = true }
 
 [features]
-default = ["blitzar"]
+default = ["blitzar", "polars"]
 test = ["dep:rand"]
 
 [lints]

diff --git a/crates/proof-of-sql/src/base/database/column.rs b/crates/proof-of-sql/src/base/database/column.rs
@@ -193,12 +193,6 @@ impl<'a, S: Scalar> Column<'a, S> {
     }
 }
 
-/// The precision for [ColumnType::INT128] values
-pub const INT128_PRECISION: usize = 38;
-
-/// The scale for [ColumnType::INT128] values
-pub const INT128_SCALE: usize = 0;
-
 /// Represents the supported data types of a column in an in-memory,
 /// column-oriented database.
 ///

diff --git a/crates/proof-of-sql/src/base/database/mod.rs b/crates/proof-of-sql/src/base/database/mod.rs
@@ -6,7 +6,6 @@ pub use accessor::{CommitmentAccessor, DataAccessor, MetadataAccessor, SchemaAcc
 
 mod column;
 pub use column::{Column, ColumnField, ColumnRef, ColumnType};
-pub(crate) use column::{INT128_PRECISION, INT128_SCALE};
 
 mod literal_value;
 pub use literal_value::LiteralValue;
@@ -17,7 +16,9 @@ pub use table_ref::TableRef;
 mod arrow_array_to_column_conversion;
 pub use arrow_array_to_column_conversion::{ArrayRefExt, ArrowArrayToColumnConversionError};
 
+#[cfg(any(test, feature = "polars"))]
 mod record_batch_dataframe_conversion;
+#[cfg(any(test, feature = "polars"))]
 pub(crate) use record_batch_dataframe_conversion::{
     dataframe_to_record_batch, record_batch_to_dataframe,
 };
@@ -59,6 +60,7 @@ mod test_accessor;
 #[cfg(any(test, feature = "test"))]
 pub use test_accessor::TestAccessor;
 #[cfg(test)]
+#[allow(unused_imports)]
 pub(crate) use test_accessor::UnimplementedTestAccessor;
 
 #[cfg(any(test, feature = "test"))]

diff --git a/crates/proof-of-sql/src/base/database/record_batch_test_accessor.rs b/crates/proof-of-sql/src/base/database/record_batch_test_accessor.rs
@@ -1,13 +1,15 @@
+#[cfg(any(test, feature = "polars"))]
+use super::{dataframe_to_record_batch, record_batch_to_dataframe};
 use super::{
-    dataframe_to_record_batch, record_batch_to_dataframe, ArrayRefExt, Column, ColumnRef,
-    ColumnType, CommitmentAccessor, DataAccessor, MetadataAccessor, SchemaAccessor, TableRef,
-    TestAccessor,
+    ArrayRefExt, Column, ColumnRef, ColumnType, CommitmentAccessor, DataAccessor, MetadataAccessor,
+    SchemaAccessor, TableRef, TestAccessor,
 };
 use crate::base::scalar::{compute_commitment_for_testing, Curve25519Scalar};
 use arrow::{array::ArrayRef, datatypes::DataType, record_batch::RecordBatch};
 use bumpalo::Bump;
 use curve25519_dalek::ristretto::RistrettoPoint;
 use indexmap::IndexMap;
+#[cfg(any(test, feature = "polars"))]
 use polars::prelude::DataFrame;
 use proof_of_sql_parser::Identifier;
 use std::collections::HashMap;
@@ -114,6 +116,7 @@ impl TestAccessor<RistrettoPoint> for RecordBatchTestAccessor {
 
 impl RecordBatchTestAccessor {
     /// Apply a query function to table and then convert the result to a RecordBatch
+    #[cfg(any(test, feature = "polars"))]
     pub fn query_table(
         &self,
         table_ref: TableRef,

diff --git a/crates/proof-of-sql/src/sql/ast/and_expr_test.rs b/crates/proof-of-sql/src/sql/ast/and_expr_test.rs
@@ -104,9 +104,9 @@ fn test_random_tables_with_given_offset(offset: usize) {
             and(
                 equal(
                     column(t, "b", &accessor),
-                    const_scalar(filter_val1.as_str()),
+                    const_varchar(filter_val1.as_str()),
                 ),
-                equal(column(t, "c", &accessor), const_scalar(filter_val2)),
+                equal(column(t, "c", &accessor), const_bigint(filter_val2)),
             ),
         );
         let verifiable_res = VerifiableQueryResult::new(&ast, &accessor, &());

diff --git a/crates/proof-of-sql/src/sql/ast/column_expr_test.rs b/crates/proof-of-sql/src/sql/ast/column_expr_test.rs
@@ -1,30 +1,23 @@
 use crate::{
-    base::database::{RecordBatchTestAccessor, TestAccessor},
-    record_batch,
-    sql::ast::{test_expr::TestExprNode, test_utility::*},
+    base::{
+        commitment::InnerProductProof,
+        database::{owned_table_utility::*, OwnedTableTestAccessor},
+    },
+    sql::{
+        ast::test_utility::*,
+        proof::{exercise_verification, VerifiableQueryResult},
+    },
 };
-use arrow::record_batch::RecordBatch;
-
-fn create_test_bool_col_expr(
-    table_ref: &str,
-    results: &[&str],
-    filter_col: &str,
-    data: RecordBatch,
-    offset: usize,
-) -> TestExprNode {
-    let mut accessor = RecordBatchTestAccessor::new_empty();
-    let table_ref = table_ref.parse().unwrap();
-    accessor.add_table(table_ref, data, offset);
-    let col_expr = column(table_ref, filter_col, &accessor);
-    let df_filter = polars::prelude::col(filter_col);
-    TestExprNode::new(table_ref, results, col_expr, df_filter, accessor)
-}
 
 #[test]
 fn we_can_prove_a_query_with_a_single_selected_row() {
-    let data = record_batch!("a" => [true, false]);
-    let test_expr = create_test_bool_col_expr("sxt.t", &["a"], "a", data.clone(), 0);
-    let res = test_expr.verify_expr();
-    let expected = record_batch!("a" => [true]);
-    assert_eq!(res, expected);
+    let data = owned_table([boolean("a", [true, false])]);
+    let t = "sxt.t".parse().unwrap();
+    let accessor = OwnedTableTestAccessor::<InnerProductProof>::new_from_table(t, data, 0, ());
+    let ast = projection(cols_expr_plan(t, &["a"], &accessor), tab(t));
+    let verifiable_res = VerifiableQueryResult::new(&ast, &accessor, &());
+    exercise_verification(&verifiable_res, &ast, &accessor, t);
+    let res = verifiable_res.verify(&ast, &accessor, &()).unwrap().table;
+    let expected_res = owned_table([boolean("a", [true, false])]);
+    assert_eq!(res, expected_res);
 }
diff --git a/crates/proof-of-sql/src/sql/ast/dense_filter_expr_test.rs b/crates/proof-of-sql/src/sql/ast/dense_filter_expr_test.rs
@@ -1,27 +1,15 @@
-use crate::{
-    base::{database::owned_table_utility::*, math::decimal::Precision},
-    sql::ast::{test_utility::*, ProvableExprPlan},
-};
 use crate::{
     base::{
         database::{
-            ColumnField, ColumnRef, ColumnType, LiteralValue, OwnedTable, OwnedTableTestAccessor,
-            RecordBatchTestAccessor, TableRef, TestAccessor,
+            owned_table_utility::*, ColumnField, ColumnRef, ColumnType, LiteralValue, OwnedTable,
+            OwnedTableTestAccessor, TableRef, TestAccessor,
         },
+        math::decimal::Precision,
         scalar::Curve25519Scalar,
     },
-    record_batch,
     sql::{
         ast::{
-            // Making this explicit to ensure that we don't accidentally use the
-            // sparse filter for these tests
-            test_utility::{
-                col_expr_plan, cols_expr_plan, column, const_int128, dense_filter, equal, tab,
-            },
-            ColumnExpr,
-            DenseFilterExpr,
-            LiteralExpr,
-            TableExpr,
+            test_utility::*, ColumnExpr, DenseFilterExpr, LiteralExpr, ProvableExprPlan, TableExpr,
         },
         proof::{
             exercise_verification, ProofExpr, ProverEvaluate, ResultBuilder, VerifiableQueryResult,
@@ -177,24 +165,19 @@ fn we_can_correctly_fetch_all_the_referenced_columns() {
 
 #[test]
 fn we_can_prove_and_get_the_correct_result_from_a_basic_dense_filter() {
-    let data = record_batch!(
-        "a" => [1_i64, 4_i64, 5_i64, 2_i64, 5_i64],
-        "b" => [1_i64, 2, 3, 4, 5],
-    );
+    let data = owned_table([
+        bigint("a", [1_i64, 4_i64, 5_i64, 2_i64, 5_i64]),
+        bigint("b", [1_i64, 2, 3, 4, 5]),
+    ]);
     let t = "sxt.t".parse().unwrap();
-    let mut accessor = RecordBatchTestAccessor::new_empty();
-    accessor.add_table(t, data, 0);
+    let accessor = OwnedTableTestAccessor::<InnerProductProof>::new_from_table(t, data, 0, ());
     let where_clause = equal(column(t, "a", &accessor), const_int128(5_i128));
-    let expr = dense_filter(cols_expr_plan(t, &["b"], &accessor), tab(t), where_clause);
-    let res = VerifiableQueryResult::<InnerProductProof>::new(&expr, &accessor, &());
-    let res = res
-        .verify(&expr, &accessor, &())
-        .unwrap()
-        .into_record_batch();
-    let expected = record_batch!(
-        "b" => [3_i64, 5],
-    );
-    assert_eq!(res, expected);
+    let ast = dense_filter(cols_expr_plan(t, &["b"], &accessor), tab(t), where_clause);
+    let verifiable_res = VerifiableQueryResult::new(&ast, &accessor, &());
+    exercise_verification(&verifiable_res, &ast, &accessor, t);
+    let res = verifiable_res.verify(&ast, &accessor, &()).unwrap().table;
+    let expected_res = owned_table([bigint("b", [3_i64, 5])]);
+    assert_eq!(res, expected_res);
 }
 
 #[test]