Skip to content

Commit

Permalink
[FEAT] connect: df.withColumn
Browse files Browse the repository at this point in the history
  • Loading branch information
andrewgazelka committed Nov 21, 2024
1 parent ee2b211 commit 1ec1514
Showing 1 changed file with 19 additions and 0 deletions.
19 changes: 19 additions & 0 deletions tests/connect/test_with_column.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
from __future__ import annotations

from pyspark.sql.functions import col


def test_with_column(spark_session):
# Create DataFrame from range(10)
df = spark_session.range(10)

# Add a new column that's a boolean indicating if id > 2
df_with_col = df.withColumn("double_id", col("id") > 2)

# Verify the schema has both columns
assert "id" in df_with_col.schema.names, "Original column should still exist"
assert "double_id" in df_with_col.schema.names, "New column should be added"

# Verify the data is correct
df_pandas = df_with_col.toPandas()
assert (df_pandas["double_id"] == (df_pandas["id"] > 2)).all(), "New column should be greater than 2 comparison"

0 comments on commit 1ec1514

Please sign in to comment.