From c25eb082fc46dabdcb68a8551d7123d725efc75c Mon Sep 17 00:00:00 2001 From: Andrew Gazelka Date: Wed, 20 Nov 2024 01:57:08 -0800 Subject: [PATCH] [FEAT] connect: `df.withColumn` --- tests/connect/test_with_column.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 tests/connect/test_with_column.py diff --git a/tests/connect/test_with_column.py b/tests/connect/test_with_column.py new file mode 100644 index 0000000000..ad237339b2 --- /dev/null +++ b/tests/connect/test_with_column.py @@ -0,0 +1,19 @@ +from __future__ import annotations + +from pyspark.sql.functions import col + + +def test_with_column(spark_session): + # Create DataFrame from range(10) + df = spark_session.range(10) + + # Add a new column that's a boolean indicating if id > 2 + df_with_col = df.withColumn("double_id", col("id") > 2) + + # Verify the schema has both columns + assert "id" in df_with_col.schema.names, "Original column should still exist" + assert "double_id" in df_with_col.schema.names, "New column should be added" + + # Verify the data is correct + df_pandas = df_with_col.toPandas() + assert (df_pandas["double_id"] == (df_pandas["id"] > 2)).all(), "New column should be greater than 2 comparison"