Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(connect): add df.filter #3346

Merged
merged 1 commit into from
Dec 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions src/daft-connect/src/translation/logical_plan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,12 @@
use tracing::warn;

use crate::translation::logical_plan::{
aggregate::aggregate, local_relation::local_relation, project::project, range::range,
read::read, to_df::to_df, with_columns::with_columns,
aggregate::aggregate, filter::filter, local_relation::local_relation, project::project,
range::range, read::read, to_df::to_df, with_columns::with_columns,
};

mod aggregate;
mod filter;
mod local_relation;
mod project;
mod range;
Expand Down Expand Up @@ -59,6 +60,9 @@
RelType::Project(p) => project(*p)
.await
.wrap_err("Failed to apply project to logical plan"),
RelType::Filter(f) => filter(*f)
.await

Check warning on line 64 in src/daft-connect/src/translation/logical_plan.rs

View check run for this annotation

Codecov / codecov/patch

src/daft-connect/src/translation/logical_plan.rs#L64

Added line #L64 was not covered by tests
.wrap_err("Failed to apply filter to logical plan"),
RelType::Aggregate(a) => aggregate(*a)
.await
.wrap_err("Failed to apply aggregate to logical plan"),
Expand Down
22 changes: 22 additions & 0 deletions src/daft-connect/src/translation/logical_plan/filter.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
use eyre::bail;

use crate::translation::{to_daft_expr, to_logical_plan, Plan};

pub async fn filter(filter: spark_connect::Filter) -> eyre::Result<Plan> {
let spark_connect::Filter { input, condition } = filter;

let Some(input) = input else {
bail!("input is required");

Check warning on line 9 in src/daft-connect/src/translation/logical_plan/filter.rs

View check run for this annotation

Codecov / codecov/patch

src/daft-connect/src/translation/logical_plan/filter.rs#L9

Added line #L9 was not covered by tests
};

let Some(condition) = condition else {
bail!("condition is required");

Check warning on line 13 in src/daft-connect/src/translation/logical_plan/filter.rs

View check run for this annotation

Codecov / codecov/patch

src/daft-connect/src/translation/logical_plan/filter.rs#L13

Added line #L13 was not covered by tests
};

let condition = to_daft_expr(&condition)?;

let mut plan = Box::pin(to_logical_plan(*input)).await?;
plan.builder = plan.builder.filter(condition)?;

Ok(plan)
}
19 changes: 19 additions & 0 deletions tests/connect/test_filter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
from __future__ import annotations

from pyspark.sql.functions import col


def test_filter(spark_session):
# Create DataFrame from range(10)
df = spark_session.range(10)

# Filter for values less than 5
df_filtered = df.filter(col("id") < 5)

# Verify the schema is unchanged after filter
assert df_filtered.schema == df.schema, "Schema should be unchanged after filter"

# Verify the filtered data is correct
df_filtered_pandas = df_filtered.toPandas()
assert len(df_filtered_pandas) == 5, "Should have 5 rows after filtering < 5"
assert all(df_filtered_pandas["id"] < 5), "All values should be less than 5"
Loading