Skip to content

Commit

Permalink
chore: move symbolic and boolean algebra code into new crate (#3570)
Browse files Browse the repository at this point in the history
Moving some of the code we have around symbolic/boolean algebra on
expressions into its own crate, because I anticipate that we will be
building more of this kind of thing, so it would be nicer to consolidate
it as well as make it easier to reuse. It also allows us to better test
these things in isolation of the context they are being used in.

For example, we'll be building some optimization rules that more
intelligently finds predicates for filter pushdown into joins, and that
may use both `split_conjunction` as well as some expression
simplification logic.

Also took this opportunity to fix a typo (conjuct -> conjunct) and
rename `conjunct` (which is an adjective) to `combine_conjunction`.
Otherwise everything else is pretty much a straightforward move
  • Loading branch information
kevinzwang authored Dec 17, 2024
1 parent b7ea62b commit 5165e5e
Show file tree
Hide file tree
Showing 15 changed files with 570 additions and 535 deletions.
15 changes: 15 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ common-scan-info = {path = "src/common/scan-info", default-features = false}
common-system-info = {path = "src/common/system-info", default-features = false}
common-tracing = {path = "src/common/tracing", default-features = false}
common-version = {path = "src/common/version", default-features = false}
daft-algebra = {path = "src/daft-algebra", default-features = false}
daft-catalog = {path = "src/daft-catalog", default-features = false}
daft-catalog-python-catalog = {path = "src/daft-catalog/python-catalog", optional = true}
daft-compression = {path = "src/daft-compression", default-features = false}
Expand Down Expand Up @@ -149,6 +150,7 @@ members = [
"src/common/scan-info",
"src/common/system-info",
"src/common/treenode",
"src/daft-algebra",
"src/daft-catalog",
"src/daft-core",
"src/daft-csv",
Expand Down
1 change: 1 addition & 0 deletions src/common/scan-info/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ common-daft-config = {path = "../daft-config", default-features = false}
common-display = {path = "../display", default-features = false}
common-error = {path = "../error", default-features = false}
common-file-formats = {path = "../file-formats", default-features = false}
daft-algebra = {path = "../../daft-algebra", default-features = false}
daft-dsl = {path = "../../daft-dsl", default-features = false}
daft-schema = {path = "../../daft-schema", default-features = false}
pyo3 = {workspace = true, optional = true}
Expand Down
9 changes: 4 additions & 5 deletions src/common/scan-info/src/expr_rewriter.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
use std::collections::HashMap;

use common_error::DaftResult;
use daft_algebra::boolean::split_conjunction;
use daft_dsl::{
col,
common_treenode::{Transformed, TreeNode, TreeNodeRecursion},
functions::{partitioning, FunctionExpr},
null_lit,
optimization::split_conjuction,
Expr, ExprRef, Operator,
null_lit, Expr, ExprRef, Operator,
};

use crate::{PartitionField, PartitionTransform};
Expand Down Expand Up @@ -93,7 +92,7 @@ pub fn rewrite_predicate_for_partitioning(
// Before rewriting predicate for partition filter pushdown, partition predicate clauses into groups that will need
// to be applied at the data level (i.e. any clauses that aren't pure partition predicates with identity
// transformations).
let data_split = split_conjuction(predicate);
let data_split = split_conjunction(predicate);
// Predicates that reference both partition columns and data columns.
let mut needs_filter_op_preds: Vec<ExprRef> = vec![];
// Predicates that only reference data columns (no partition column references) or only reference partition columns
Expand Down Expand Up @@ -332,7 +331,7 @@ pub fn rewrite_predicate_for_partitioning(
let with_part_cols = with_part_cols.data;

// Filter to predicate clauses that only involve partition columns.
let split = split_conjuction(&with_part_cols);
let split = split_conjunction(&with_part_cols);
let mut part_preds: Vec<ExprRef> = vec![];
for e in split {
let mut all_part_keys = true;
Expand Down
16 changes: 16 additions & 0 deletions src/daft-algebra/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
[dependencies]
common-error = {path = "../common/error", default-features = false}
common-treenode = {path = "../common/treenode", default-features = false}
daft-dsl = {path = "../daft-dsl", default-features = false}
daft-schema = {path = "../daft-schema", default-features = false}

[dev-dependencies]
rstest = {workspace = true}

[lints]
workspace = true

[package]
edition = {workspace = true}
name = "daft-algebra"
version = {workspace = true}
24 changes: 24 additions & 0 deletions src/daft-algebra/src/boolean.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
use common_treenode::{TreeNode, TreeNodeRecursion};
use daft_dsl::{Expr, ExprRef, Operator};

pub fn split_conjunction(expr: &ExprRef) -> Vec<ExprRef> {
let mut splits = vec![];

expr.apply(|e| match e.as_ref() {
Expr::BinaryOp {
op: Operator::And, ..
}
| Expr::Alias(..) => Ok(TreeNodeRecursion::Continue),
_ => {
splits.push(e.clone());
Ok(TreeNodeRecursion::Jump)
}
})
.unwrap();

splits
}

pub fn combine_conjunction<T: IntoIterator<Item = ExprRef>>(exprs: T) -> Option<ExprRef> {
exprs.into_iter().reduce(|acc, e| acc.and(e))
}
4 changes: 4 additions & 0 deletions src/daft-algebra/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
pub mod boolean;
mod simplify;

pub use simplify::simplify_expr;
Loading

0 comments on commit 5165e5e

Please sign in to comment.