From e4046ee3df623352cd4afaa604389325d1cde235 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Th=C3=A9o=20Jolivet?= <theo.jolivet@ens-paris-saclay.fr>
Date: Mon, 20 Nov 2023 19:09:55 +0100
Subject: [PATCH] Fix error when polars isn't available

---
 skrub/tests/test_datetime_encoder.py   |  2 +-
 skrub/tests/test_fuzzy_join.py         | 26 +++++++++++++-------------
 skrub/tests/test_gap_encoder.py        |  2 +-
 skrub/tests/test_interpolation_join.py | 22 +++++++++++-----------
 skrub/tests/test_joiner.py             |  4 ++--
 skrub/tests/test_similarity_encoder.py |  2 +-
 6 files changed, 29 insertions(+), 29 deletions(-)

diff --git a/skrub/tests/test_datetime_encoder.py b/skrub/tests/test_datetime_encoder.py
index 1ecfa9e54..1fdba2bc3 100644
--- a/skrub/tests/test_datetime_encoder.py
+++ b/skrub/tests/test_datetime_encoder.py
@@ -352,7 +352,7 @@ def test_transform_nan(px):
 
 @pytest.mark.parametrize("px", MODULES)
 def test_mixed_type_dataframe(px):
-    if px is pl:
+    if px.__name__ == "polars":
         pytest.xfail(
             reason=(
                 "to_datetime(X) raises polars.exceptions.ComputeError: cannot cast"
diff --git a/skrub/tests/test_fuzzy_join.py b/skrub/tests/test_fuzzy_join.py
index a14b1e2df..875ebfc13 100644
--- a/skrub/tests/test_fuzzy_join.py
+++ b/skrub/tests/test_fuzzy_join.py
@@ -31,7 +31,7 @@ def test_fuzzy_join(px, analyzer: Literal["char", "char_wb", "word"]):
     """
     Testing if fuzzy_join results are as expected.
     """
-    if px is pl:
+    if px.__name__ == "polars":
         pytest.xfail(reason="Polars DataFrame object has no attribute 'reset_index'")
     df1 = px.DataFrame({"a1": ["ana", "lala", "nana et sana", np.NaN]})
     df2 = px.DataFrame({"a2": ["anna", "lala et nana", "lana", "sana", np.NaN]})
@@ -97,7 +97,7 @@ def test_fuzzy_join(px, analyzer: Literal["char", "char_wb", "word"]):
 
 @pytest.mark.parametrize("px", MODULES)
 def test_match_score(px):
-    if px is pl:
+    if px.__name__ == "polars":
         pytest.xfail(reason="Polars DataFrame object has no attribute 'reset_index'")
     left = px.DataFrame({"A": ["aa", "bb"]})
     right = px.DataFrame({"A": ["aa", "ba"], "B": [1, 2]})
@@ -109,7 +109,7 @@ def test_match_score(px):
 
 @pytest.mark.parametrize("px", MODULES)
 def test_perfect_matches(px):
-    if px is pl:
+    if px.__name__ == "polars":
         pytest.xfail(reason="Polars DataFrame object has no attribute 'reset_index'")
     # non-regression test for https://github.com/skrub-data/skrub/issues/764
     # fuzzy_join when all rows had a perfect match used to trigger a division by 0
@@ -126,7 +126,7 @@ def test_fuzzy_join_dtypes(px):
     """
     Test that the dtypes of dataframes are maintained after join
     """
-    if px is pl:
+    if px.__name__ == "polars":
         pytest.xfail(reason="Polars DataFrame object has no attribute 'reset_index'")
     a = px.DataFrame({"col1": ["aaa", "bbb"], "col2": [1, 2]})
     b = px.DataFrame({"col1": ["aaa_", "bbb_"], "col3": [1, 2]})
@@ -171,7 +171,7 @@ def test_parameters_error(px, analyzer, on, how) -> None:
 
 @pytest.mark.parametrize("px", MODULES)
 def test_missing_keys(px):
-    if px is pl:
+    if px.__name__ == "polars":
         pytest.xfail(reason="Polars DataFrame object has no attribute 'reset_index'")
     a = px.DataFrame({"col1": ["aaa", "bbb"], "col2": [1, 2]})
     b = px.DataFrame({"col1": ["aaa_", "bbb_"], "col3": [1, 2]})
@@ -190,7 +190,7 @@ def test_missing_keys(px):
 
 @pytest.mark.parametrize("px", MODULES)
 def test_drop_unmatched(px):
-    if px is pl:
+    if px.__name__ == "polars":
         pytest.xfail(reason="Polars DataFrame object has no attribute 'reset_index'")
     a = px.DataFrame({"col1": ["aaaa", "bbb", "ddd dd"], "col2": [1, 2, 3]})
     b = px.DataFrame({"col1": ["aaa_", "bbb_", "cc ccc"], "col3": [1, 2, 3]})
@@ -214,7 +214,7 @@ def test_how_param(px):
     Test correct shape of left and right joins.
     Also test if an error is raised when an incorrect parameter value is passed.
     """
-    if px is pl:
+    if px.__name__ == "polars":
         pytest.xfail(reason="Polars DataFrame object has no attribute 'reset_index'")
     a = px.DataFrame({"col1": ["aaaa", "bbb", "ddd dd"], "col2": [1, 2, 3]})
     b = px.DataFrame(
@@ -290,7 +290,7 @@ def test_correct_encoder(px):
     """
     Test that the encoder error checking is working as intended.
     """
-    if px is pl:
+    if px.__name__ == "polars":
         pytest.xfail(reason="Polars DataFrame object has no attribute 'reset_index'")
 
     class TestVectorizer(HashingVectorizer):
@@ -337,7 +337,7 @@ def test_numerical_column(px):
     """
     Testing that fuzzy_join works with numerical columns.
     """
-    if px is pl:
+    if px.__name__ == "polars":
         pytest.xfail(reason="Polars DataFrame object has no attribute 'reset_index'")
     left = px.DataFrame({"str1": ["aa", "a", "bb"], "int": [10, 2, 5]})
     right = px.DataFrame(
@@ -371,7 +371,7 @@ def test_datetime_column(px, assert_frame_equal_):
     """
     Testing that fuzzy_join works with datetime columns.
     """
-    if px is pl:
+    if px.__name__ == "polars":
         pytest.xfail(reason="Module 'polars' has no attribute 'to_datetime'")
     left = px.DataFrame(
         {
@@ -423,7 +423,7 @@ def test_mixed_joins(px, assert_frame_equal_):
     """
     Test fuzzy joining on mixed and multiple column types.
     """
-    if px is pl:
+    if px.__name__ == "polars":
         pytest.xfail(reason="Module 'polars' has no attribute 'to_datetime'")
     left = px.DataFrame(
         {
@@ -569,7 +569,7 @@ def test_iterable_input(px):
     """
     Test if iterable input: list, set, dictionary or tuple works.
     """
-    if px is pl:
+    if px.__name__ == "polars":
         pytest.xfail(reason="Polars DataFrame object has no attribute 'reset_index'")
     df1 = px.DataFrame(
         {"a": ["ana", "lala", "nana"], "str2": ["Texas", "France", "Greek God"]}
@@ -594,7 +594,7 @@ def test_missing_values(px):
     """
     Test fuzzy joining on missing values.
     """
-    if px is pl:
+    if px.__name__ == "polars":
         pytest.xfail(reason="Polars DataFrame object has no attribute 'reset_index'")
     a = px.DataFrame({"col1": ["aaaa", "bbb", "ddd dd"], "col2": [1, 2, 3]})
     b = px.DataFrame({"col3": [np.NaN, "bbb", "ddd dd"], "col4": [1, 2, 3]})
diff --git a/skrub/tests/test_gap_encoder.py b/skrub/tests/test_gap_encoder.py
index 56b064ce7..6f6afed33 100644
--- a/skrub/tests/test_gap_encoder.py
+++ b/skrub/tests/test_gap_encoder.py
@@ -222,7 +222,7 @@ def test_score(n_samples: int = 70):
 )
 def test_missing_values(px, missing: str):
     """Test what happens when missing values are in the data"""
-    if px is pl:
+    if px.__name__ == "polars":
         pytest.xfail(
             reason=(
                 "'TypeError: '<' not supported between instances of 'DataTypeClass' and"
diff --git a/skrub/tests/test_interpolation_join.py b/skrub/tests/test_interpolation_join.py
index 18f0185c7..d29a55e28 100644
--- a/skrub/tests/test_interpolation_join.py
+++ b/skrub/tests/test_interpolation_join.py
@@ -39,7 +39,7 @@ def weather():
 @pytest.mark.parametrize("key", [["latitude", "longitude"], "latitude"])
 @pytest.mark.parametrize("with_nulls", [False, True])
 def test_interpolation_join(px, buildings, weather, key, with_nulls):
-    if px is pl:
+    if px.__name__ == "polars":
         pytest.xfail(
             reason=(
                 "In polars, DataFrame.drop() got an unexpected keyword argument 'axis'"
@@ -60,7 +60,7 @@ def test_interpolation_join(px, buildings, weather, key, with_nulls):
 
 @pytest.mark.parametrize("px", MODULES)
 def test_vectorizer(px):
-    if px is pl:
+    if px.__name__ == "polars":
         pytest.xfail(
             reason=(
                 "In polars, DataFrame.drop() got an unexpected keyword argument 'axis'"
@@ -87,7 +87,7 @@ def transform(self, X):
 
 @pytest.mark.parametrize("px", MODULES)
 def test_no_multioutput(px, buildings, weather):
-    if px is pl:
+    if px.__name__ == "polars":
         pytest.xfail(
             reason=(
                 "In polars, DataFrame.drop() got an unexpected keyword argument 'axis'"
@@ -105,7 +105,7 @@ def test_no_multioutput(px, buildings, weather):
 
 @pytest.mark.parametrize("px", MODULES)
 def test_condition_choice(px):
-    if px is pl:
+    if px.__name__ == "polars":
         pytest.xfail(
             reason=(
                 "In polars, DataFrame.drop() got an unexpected keyword argument 'axis'"
@@ -143,7 +143,7 @@ def test_condition_choice(px):
 
 @pytest.mark.parametrize("px", MODULES)
 def test_suffix(px):
-    if px is pl:
+    if px.__name__ == "polars":
         pytest.xfail(
             reason=(
                 "In polars, DataFrame.drop() got an unexpected keyword argument 'axis'"
@@ -159,7 +159,7 @@ def test_suffix(px):
 
 @pytest.mark.parametrize("px", MODULES)
 def test_mismatched_indexes(px):
-    if px is pl:
+    if px.__name__ == "polars":
         pytest.xfail(
             reason=(
                 "In polars, DataFrame.drop() got an unexpected keyword argument 'axis'"
@@ -179,7 +179,7 @@ def test_mismatched_indexes(px):
 @pytest.mark.parametrize("px", MODULES)
 def test_fit_on_none(px):
     # X is hardly used in fit so it should be ok to fit without a main table
-    if px is pl:
+    if px.__name__ == "polars":
         pytest.xfail(
             reason=(
                 "In polars, DataFrame.drop() got an unexpected keyword argument 'axis'"
@@ -199,7 +199,7 @@ def test_fit_on_none(px):
 
 @pytest.mark.parametrize("px", MODULES)
 def test_join_on_date(px):
-    if px is pl:
+    if px.__name__ == "polars":
         pytest.xfail(
             reason=(
                 "In polars, DataFrame.drop() got an unexpected keyword argument 'axis'"
@@ -231,7 +231,7 @@ def fit(self, X, y):
 
 @pytest.mark.parametrize("px", MODULES)
 def test_fit_failures(px, buildings, weather):
-    if px is pl:
+    if px.__name__ == "polars":
         pytest.xfail(
             reason=(
                 "In polars, DataFrame.drop() got an unexpected keyword argument 'axis'"
@@ -281,7 +281,7 @@ def predict(self, X):
 
 @pytest.mark.parametrize("px", MODULES)
 def test_transform_failures(px, buildings, weather):
-    if px is pl:
+    if px.__name__ == "polars":
         pytest.xfail(
             reason=(
                 "In polars, DataFrame.drop() got an unexpected keyword argument 'axis'"
@@ -329,7 +329,7 @@ def test_transform_failures(px, buildings, weather):
 
 @pytest.mark.parametrize("px", MODULES)
 def test_transform_failures_dtype(px, buildings, weather):
-    if px is pl:
+    if px.__name__ == "polars":
         pytest.xfail(
             reason=(
                 "In polars, DataFrame.drop() got an unexpected keyword argument 'axis'"
diff --git a/skrub/tests/test_joiner.py b/skrub/tests/test_joiner.py
index 7858c385a..0ce6082c8 100644
--- a/skrub/tests/test_joiner.py
+++ b/skrub/tests/test_joiner.py
@@ -19,7 +19,7 @@
 
 @pytest.mark.parametrize("px", MODULES)
 def test_joiner(px):
-    if px is pl:
+    if px.__name__ == "polars":
         pytest.xfail(reason="Polars DataFrame object has no attribute 'reset_index'")
     main_table = px.DataFrame(
         {
@@ -65,7 +65,7 @@ def test_joiner(px):
 
 @pytest.mark.parametrize("px, assert_frame_equal_", ASSERT_TUPLES)
 def test_multiple_keys(px, assert_frame_equal_):
-    if px is pl:
+    if px.__name__ == "polars":
         pytest.xfail(reason="Polars DataFrame object has no attribute 'reset_index'")
     df = px.DataFrame(
         {"Co": ["France", "Italia", "Deutchland"], "Ca": ["Paris", "Roma", "Berlin"]}
diff --git a/skrub/tests/test_similarity_encoder.py b/skrub/tests/test_similarity_encoder.py
index 953ede93f..a1ff9478c 100644
--- a/skrub/tests/test_similarity_encoder.py
+++ b/skrub/tests/test_similarity_encoder.py
@@ -341,7 +341,7 @@ def test_check_fitted_super_vectorizer():
 
 @pytest.mark.parametrize("px", MODULES)
 def test_inverse_transform(px):
-    if px is pl:
+    if px.__name__ == "polars":
         pytest.xfail(reason="Setting output to polars is not possible yet.")
     encoder = SimilarityEncoder()
     encoder.set_output(transform="pandas")