Skip to content

Commit

Permalink
Create function to test if the polars module is available. Use it to …
Browse files Browse the repository at this point in the history
…xfail specific tests
  • Loading branch information
TheooJ committed Nov 21, 2023
1 parent e4046ee commit b579028
Show file tree
Hide file tree
Showing 7 changed files with 48 additions and 29 deletions.
13 changes: 13 additions & 0 deletions skrub/_dataframe/_namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,19 @@ def is_polars(dataframe):
return isinstance(dataframe, (pl.DataFrame, pl.LazyFrame))


def is_namespace_pandas(px):
return px is pd

Check warning on line 47 in skrub/_dataframe/_namespace.py

View check run for this annotation

Codecov / codecov/patch

skrub/_dataframe/_namespace.py#L47

Added line #L47 was not covered by tests


def is_namespace_polars(px):
if "polars" not in sys.modules:
return False

import polars as pl

return px is pl


def get_df_namespace(*dfs):
"""Get the namespaces of dataframes.
Expand Down
3 changes: 2 additions & 1 deletion skrub/tests/test_datetime_encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from numpy.testing import assert_allclose, assert_array_equal
from pandas.testing import assert_frame_equal

from skrub._dataframe._namespace import is_namespace_polars
from skrub._dataframe._polars import POLARS_SETUP
from skrub._datetime_encoder import (
TIME_LEVELS,
Expand Down Expand Up @@ -352,7 +353,7 @@ def test_transform_nan(px):

@pytest.mark.parametrize("px", MODULES)
def test_mixed_type_dataframe(px):
if px.__name__ == "polars":
if is_namespace_polars(px):
pytest.xfail(
reason=(
"to_datetime(X) raises polars.exceptions.ComputeError: cannot cast"
Expand Down
27 changes: 14 additions & 13 deletions skrub/tests/test_fuzzy_join.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from sklearn.feature_extraction.text import HashingVectorizer

from skrub import fuzzy_join
from skrub._dataframe._namespace import is_namespace_polars
from skrub._dataframe._polars import POLARS_SETUP

MODULES = [pd]
Expand All @@ -31,7 +32,7 @@ def test_fuzzy_join(px, analyzer: Literal["char", "char_wb", "word"]):
"""
Testing if fuzzy_join results are as expected.
"""
if px.__name__ == "polars":
if is_namespace_polars(px):
pytest.xfail(reason="Polars DataFrame object has no attribute 'reset_index'")
df1 = px.DataFrame({"a1": ["ana", "lala", "nana et sana", np.NaN]})
df2 = px.DataFrame({"a2": ["anna", "lala et nana", "lana", "sana", np.NaN]})
Expand Down Expand Up @@ -97,7 +98,7 @@ def test_fuzzy_join(px, analyzer: Literal["char", "char_wb", "word"]):

@pytest.mark.parametrize("px", MODULES)
def test_match_score(px):
if px.__name__ == "polars":
if is_namespace_polars(px):
pytest.xfail(reason="Polars DataFrame object has no attribute 'reset_index'")
left = px.DataFrame({"A": ["aa", "bb"]})
right = px.DataFrame({"A": ["aa", "ba"], "B": [1, 2]})
Expand All @@ -109,7 +110,7 @@ def test_match_score(px):

@pytest.mark.parametrize("px", MODULES)
def test_perfect_matches(px):
if px.__name__ == "polars":
if is_namespace_polars(px):
pytest.xfail(reason="Polars DataFrame object has no attribute 'reset_index'")
# non-regression test for https://github.com/skrub-data/skrub/issues/764
# fuzzy_join when all rows had a perfect match used to trigger a division by 0
Expand All @@ -126,7 +127,7 @@ def test_fuzzy_join_dtypes(px):
"""
Test that the dtypes of dataframes are maintained after join
"""
if px.__name__ == "polars":
if is_namespace_polars(px):
pytest.xfail(reason="Polars DataFrame object has no attribute 'reset_index'")
a = px.DataFrame({"col1": ["aaa", "bbb"], "col2": [1, 2]})
b = px.DataFrame({"col1": ["aaa_", "bbb_"], "col3": [1, 2]})
Expand Down Expand Up @@ -171,7 +172,7 @@ def test_parameters_error(px, analyzer, on, how) -> None:

@pytest.mark.parametrize("px", MODULES)
def test_missing_keys(px):
if px.__name__ == "polars":
if is_namespace_polars(px):
pytest.xfail(reason="Polars DataFrame object has no attribute 'reset_index'")
a = px.DataFrame({"col1": ["aaa", "bbb"], "col2": [1, 2]})
b = px.DataFrame({"col1": ["aaa_", "bbb_"], "col3": [1, 2]})
Expand All @@ -190,7 +191,7 @@ def test_missing_keys(px):

@pytest.mark.parametrize("px", MODULES)
def test_drop_unmatched(px):
if px.__name__ == "polars":
if is_namespace_polars(px):
pytest.xfail(reason="Polars DataFrame object has no attribute 'reset_index'")
a = px.DataFrame({"col1": ["aaaa", "bbb", "ddd dd"], "col2": [1, 2, 3]})
b = px.DataFrame({"col1": ["aaa_", "bbb_", "cc ccc"], "col3": [1, 2, 3]})
Expand All @@ -214,7 +215,7 @@ def test_how_param(px):
Test correct shape of left and right joins.
Also test if an error is raised when an incorrect parameter value is passed.
"""
if px.__name__ == "polars":
if is_namespace_polars(px):
pytest.xfail(reason="Polars DataFrame object has no attribute 'reset_index'")
a = px.DataFrame({"col1": ["aaaa", "bbb", "ddd dd"], "col2": [1, 2, 3]})
b = px.DataFrame(
Expand Down Expand Up @@ -290,7 +291,7 @@ def test_correct_encoder(px):
"""
Test that the encoder error checking is working as intended.
"""
if px.__name__ == "polars":
if is_namespace_polars(px):
pytest.xfail(reason="Polars DataFrame object has no attribute 'reset_index'")

class TestVectorizer(HashingVectorizer):
Expand Down Expand Up @@ -337,7 +338,7 @@ def test_numerical_column(px):
"""
Testing that fuzzy_join works with numerical columns.
"""
if px.__name__ == "polars":
if is_namespace_polars(px):
pytest.xfail(reason="Polars DataFrame object has no attribute 'reset_index'")
left = px.DataFrame({"str1": ["aa", "a", "bb"], "int": [10, 2, 5]})
right = px.DataFrame(
Expand Down Expand Up @@ -371,7 +372,7 @@ def test_datetime_column(px, assert_frame_equal_):
"""
Testing that fuzzy_join works with datetime columns.
"""
if px.__name__ == "polars":
if is_namespace_polars(px):
pytest.xfail(reason="Module 'polars' has no attribute 'to_datetime'")
left = px.DataFrame(
{
Expand Down Expand Up @@ -423,7 +424,7 @@ def test_mixed_joins(px, assert_frame_equal_):
"""
Test fuzzy joining on mixed and multiple column types.
"""
if px.__name__ == "polars":
if is_namespace_polars(px):
pytest.xfail(reason="Module 'polars' has no attribute 'to_datetime'")
left = px.DataFrame(
{
Expand Down Expand Up @@ -569,7 +570,7 @@ def test_iterable_input(px):
"""
Test if iterable input: list, set, dictionary or tuple works.
"""
if px.__name__ == "polars":
if is_namespace_polars(px):
pytest.xfail(reason="Polars DataFrame object has no attribute 'reset_index'")
df1 = px.DataFrame(
{"a": ["ana", "lala", "nana"], "str2": ["Texas", "France", "Greek God"]}
Expand All @@ -594,7 +595,7 @@ def test_missing_values(px):
"""
Test fuzzy joining on missing values.
"""
if px.__name__ == "polars":
if is_namespace_polars(px):
pytest.xfail(reason="Polars DataFrame object has no attribute 'reset_index'")
a = px.DataFrame({"col1": ["aaaa", "bbb", "ddd dd"], "col2": [1, 2, 3]})
b = px.DataFrame({"col3": [np.NaN, "bbb", "ddd dd"], "col4": [1, 2, 3]})
Expand Down
3 changes: 2 additions & 1 deletion skrub/tests/test_gap_encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from sklearn.model_selection import train_test_split

from skrub import GapEncoder
from skrub._dataframe._namespace import is_namespace_polars
from skrub._dataframe._polars import POLARS_SETUP
from skrub.datasets import fetch_midwest_survey
from skrub.tests.utils import generate_data
Expand Down Expand Up @@ -222,7 +223,7 @@ def test_score(n_samples: int = 70):
)
def test_missing_values(px, missing: str):
"""Test what happens when missing values are in the data"""
if px.__name__ == "polars":
if is_namespace_polars(px):
pytest.xfail(
reason=(
"'TypeError: '<' not supported between instances of 'DataTypeClass' and"
Expand Down
23 changes: 12 additions & 11 deletions skrub/tests/test_interpolation_join.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor

from skrub import InterpolationJoiner
from skrub._dataframe._namespace import is_namespace_polars
from skrub._dataframe._polars import POLARS_SETUP

MODULES = [pd]
Expand Down Expand Up @@ -39,7 +40,7 @@ def weather():
@pytest.mark.parametrize("key", [["latitude", "longitude"], "latitude"])
@pytest.mark.parametrize("with_nulls", [False, True])
def test_interpolation_join(px, buildings, weather, key, with_nulls):
if px.__name__ == "polars":
if is_namespace_polars(px):
pytest.xfail(
reason=(
"In polars, DataFrame.drop() got an unexpected keyword argument 'axis'"
Expand All @@ -60,7 +61,7 @@ def test_interpolation_join(px, buildings, weather, key, with_nulls):

@pytest.mark.parametrize("px", MODULES)
def test_vectorizer(px):
if px.__name__ == "polars":
if is_namespace_polars(px):
pytest.xfail(
reason=(
"In polars, DataFrame.drop() got an unexpected keyword argument 'axis'"
Expand All @@ -87,7 +88,7 @@ def transform(self, X):

@pytest.mark.parametrize("px", MODULES)
def test_no_multioutput(px, buildings, weather):
if px.__name__ == "polars":
if is_namespace_polars(px):
pytest.xfail(
reason=(
"In polars, DataFrame.drop() got an unexpected keyword argument 'axis'"
Expand All @@ -105,7 +106,7 @@ def test_no_multioutput(px, buildings, weather):

@pytest.mark.parametrize("px", MODULES)
def test_condition_choice(px):
if px.__name__ == "polars":
if is_namespace_polars(px):
pytest.xfail(
reason=(
"In polars, DataFrame.drop() got an unexpected keyword argument 'axis'"
Expand Down Expand Up @@ -143,7 +144,7 @@ def test_condition_choice(px):

@pytest.mark.parametrize("px", MODULES)
def test_suffix(px):
if px.__name__ == "polars":
if is_namespace_polars(px):
pytest.xfail(
reason=(
"In polars, DataFrame.drop() got an unexpected keyword argument 'axis'"
Expand All @@ -159,7 +160,7 @@ def test_suffix(px):

@pytest.mark.parametrize("px", MODULES)
def test_mismatched_indexes(px):
if px.__name__ == "polars":
if is_namespace_polars(px):
pytest.xfail(
reason=(
"In polars, DataFrame.drop() got an unexpected keyword argument 'axis'"
Expand All @@ -179,7 +180,7 @@ def test_mismatched_indexes(px):
@pytest.mark.parametrize("px", MODULES)
def test_fit_on_none(px):
# X is hardly used in fit so it should be ok to fit without a main table
if px.__name__ == "polars":
if is_namespace_polars(px):
pytest.xfail(
reason=(
"In polars, DataFrame.drop() got an unexpected keyword argument 'axis'"
Expand All @@ -199,7 +200,7 @@ def test_fit_on_none(px):

@pytest.mark.parametrize("px", MODULES)
def test_join_on_date(px):
if px.__name__ == "polars":
if is_namespace_polars(px):
pytest.xfail(
reason=(
"In polars, DataFrame.drop() got an unexpected keyword argument 'axis'"
Expand Down Expand Up @@ -231,7 +232,7 @@ def fit(self, X, y):

@pytest.mark.parametrize("px", MODULES)
def test_fit_failures(px, buildings, weather):
if px.__name__ == "polars":
if is_namespace_polars(px):
pytest.xfail(
reason=(
"In polars, DataFrame.drop() got an unexpected keyword argument 'axis'"
Expand Down Expand Up @@ -281,7 +282,7 @@ def predict(self, X):

@pytest.mark.parametrize("px", MODULES)
def test_transform_failures(px, buildings, weather):
if px.__name__ == "polars":
if is_namespace_polars(px):
pytest.xfail(
reason=(
"In polars, DataFrame.drop() got an unexpected keyword argument 'axis'"
Expand Down Expand Up @@ -329,7 +330,7 @@ def test_transform_failures(px, buildings, weather):

@pytest.mark.parametrize("px", MODULES)
def test_transform_failures_dtype(px, buildings, weather):
if px.__name__ == "polars":
if is_namespace_polars(px):
pytest.xfail(
reason=(
"In polars, DataFrame.drop() got an unexpected keyword argument 'axis'"
Expand Down
5 changes: 3 additions & 2 deletions skrub/tests/test_joiner.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from pandas.testing import assert_frame_equal

from skrub import Joiner
from skrub._dataframe._namespace import is_namespace_polars
from skrub._dataframe._polars import POLARS_SETUP

MODULES = [pd]
Expand All @@ -19,7 +20,7 @@

@pytest.mark.parametrize("px", MODULES)
def test_joiner(px):
if px.__name__ == "polars":
if is_namespace_polars(px):
pytest.xfail(reason="Polars DataFrame object has no attribute 'reset_index'")
main_table = px.DataFrame(
{
Expand Down Expand Up @@ -65,7 +66,7 @@ def test_joiner(px):

@pytest.mark.parametrize("px, assert_frame_equal_", ASSERT_TUPLES)
def test_multiple_keys(px, assert_frame_equal_):
if px.__name__ == "polars":
if is_namespace_polars(px):
pytest.xfail(reason="Polars DataFrame object has no attribute 'reset_index'")
df = px.DataFrame(
{"Co": ["France", "Italia", "Deutchland"], "Ca": ["Paris", "Roma", "Berlin"]}
Expand Down
3 changes: 2 additions & 1 deletion skrub/tests/test_similarity_encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from sklearn.exceptions import NotFittedError

from skrub import SimilarityEncoder
from skrub._dataframe._namespace import is_namespace_polars
from skrub._dataframe._polars import POLARS_SETUP
from skrub._similarity_encoder import ngram_similarity_matrix
from skrub._string_distances import ngram_similarity
Expand Down Expand Up @@ -341,7 +342,7 @@ def test_check_fitted_super_vectorizer():

@pytest.mark.parametrize("px", MODULES)
def test_inverse_transform(px):
if px.__name__ == "polars":
if is_namespace_polars(px):
pytest.xfail(reason="Setting output to polars is not possible yet.")
encoder = SimilarityEncoder()
encoder.set_output(transform="pandas")
Expand Down

0 comments on commit b579028

Please sign in to comment.