add another test

Eventual-Inc · Dec 4, 2024 · 596e61f · 596e61f
1 parent 3ee3eda
commit 596e61f
Show file tree

Hide file tree

Showing 3 changed files with 30 additions and 3 deletions.
diff --git a/src/daft-connect/src/translation/logical_plan/local_relation.rs b/src/daft-connect/src/translation/logical_plan/local_relation.rs
@@ -11,6 +11,7 @@ use daft_logical_plan::{
     SourceInfo,
 };
 use daft_micropartition::{python::PyMicroPartition, MicroPartition};
+use daft_schema::dtype::DaftDataType;
 use daft_table::Table;
 use eyre::{bail, ensure, WrapErr};
 use itertools::Itertools;
@@ -56,13 +57,27 @@ pub fn local_relation(plan: spark_connect::LocalRelation) -> eyre::Result<Plan>
         .map(|daft_field| daft_field.to_arrow())
         .try_collect()?;
 
+    let mut dict_idx = 0;
+
     let ipc_fields: Vec<_> = daft_fields
         .iter()
-        .map(|_| {
+        .map(|field| {
+            let required_dictionary = field.dtype == DaftDataType::Utf8;
+
+            let dictionary_id = match required_dictionary {
+                true => {
+                    let res = dict_idx;
+                    dict_idx += 1;
+                    debug!("using dictionary id {res}");
+                    Some(res)
+                }
+                false => None,
+            };
+
             //  For integer columns, we don't need dictionary encoding
             IpcField {
-                fields: vec![],      // No nested fields for primitive types
-                dictionary_id: None, // No dictionary encoding
+                fields: vec![], // No nested fields for primitive types
+                dictionary_id,
             }
         })
         .collect();

diff --git a/tests/connect/conftest.py b/tests/connect/conftest.py
@@ -12,7 +12,11 @@ def spark_session():
     This fixture is available to all test files and creates a single
     Spark session for the entire test suite run.
     """
+
     from daft.daft import connect_start
+    from daft.logging import setup_debug_logger
+
+    setup_debug_logger()
 
     # Start Daft Connect server
     server = connect_start()

diff --git a/tests/connect/test_create_df.py b/tests/connect/test_create_df.py
@@ -25,3 +25,11 @@ def test_create_df(spark_session):
     assert len(df_two_pandas) == 3, "Two-column DataFrame should have 3 rows"
     assert list(df_two_pandas["num1"]) == [1, 2, 3], "First number column should contain expected values"
     assert list(df_two_pandas["num2"]) == [10, 20, 30], "Second number column should contain expected values"
+
+    # now do boolean
+    print("now testing boolean")
+    boolean_data = [(True,), (False,), (True,)]
+    df_boolean = spark_session.createDataFrame(boolean_data, ["value"])
+    df_boolean_pandas = df_boolean.toPandas()
+    assert len(df_boolean_pandas) == 3, "Boolean DataFrame should have 3 rows"
+    assert list(df_boolean_pandas["value"]) == [True, False, True], "Boolean DataFrame should contain expected values"