Skip to content

Commit

Permalink
make tests work with all pandas versions
Browse files Browse the repository at this point in the history
  • Loading branch information
LeoGrin committed Sep 20, 2023
1 parent 4e42e98 commit 22b3cea
Showing 1 changed file with 39 additions and 28 deletions.
67 changes: 39 additions & 28 deletions skrub/tests/test_datetime_encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,36 +36,61 @@ def get_datetime_array() -> np.array:
"2020-01-01 10:12:01",
"2020-01-02 10:23:00",
"2020-01-03 10:00:00",
"2020-08-24 15:55:30.123456789",
],
format="mixed",
),
pd.to_datetime(
[
"2021-02-03 12:45:23",
"2020-02-04 22:12:00",
"2021-02-05 12:00:00",
"2021-07-20 14:56:31.987654321",
],
format="mixed",
),
pd.to_datetime(
[
"2022-01-01 23:23:43",
"2020-12-25 11:12:00",
"2022-01-03 11:00:00",
"2023-09-20 14:57:32.123987654",
],
format="mixed",
),
pd.to_datetime(
[
"2023-02-03 11:12:12",
"2020-02-04 08:32:00",
"2023-02-05 23:00:00",
],
),
]
)


def get_datetime_array_nanoseconds() -> np.array:
return np.array(
[
pd.to_datetime(
[
# constant year and month
# for the first feature
"2020-08-24 15:55:30.123456789",
"2020-08-24 15:55:30.123456789",
],
),
pd.to_datetime(
[
"2020-08-20 14:56:31.987654321",
"2021-07-20 14:56:31.987654321",
],
),
pd.to_datetime(
[
"2020-08-20 14:57:32.123987654",
"2023-09-20 14:57:32.123987654",
],
),
pd.to_datetime(
[
"2020-08-20 14:58:33.987123456",
"2023-09-20 14:58:33.987123456",
],
format="mixed",
),
]
)
Expand Down Expand Up @@ -150,7 +175,6 @@ def test_fit() -> None:
0: ["year", "month", "day", "hour", "dayofweek", "total_time"],
1: ["month", "day", "hour", "dayofweek", "total_time"],
2: ["year", "month", "day", "hour", "dayofweek"],
3: ["year", "month", "day", "hour", "dayofweek", "total_time"],
}
enc.fit(X)
assert enc._to_extract == expected_to_extract
Expand All @@ -166,14 +190,13 @@ def test_fit() -> None:
0: ["year", "month", "day", "hour", "minute", "total_time"],
1: ["month", "day", "hour", "minute"],
2: ["year", "month", "day", "hour"],
3: ["year", "month", "day", "hour", "minute", "total_time"],
}
enc.fit(X)
assert enc._to_extract == expected_to_extract
assert enc.features_per_column_ == expected_features_per_column_

# extract_until="nanosecond"
X = get_datetime_array()
X = get_datetime_array_nanoseconds()
enc = DatetimeEncoder(extract_until="nanosecond")
expected_to_extract = [
"year",
Expand All @@ -186,17 +209,17 @@ def test_fit() -> None:
"nanosecond",
]
expected_features_per_column_ = {
# constant year and month
# for first feature
0: [
"year",
"month",
"day",
"hour",
"minute",
"second",
"microsecond",
"nanosecond",
],
1: ["month", "day", "hour", "minute"],
2: ["year", "month", "day", "hour"],
3: [
1: [
"year",
"month",
"day",
Expand Down Expand Up @@ -254,20 +277,14 @@ def test_fit() -> None:
"2_day",
"2_hour",
"2_dayofweek",
"3_year",
"3_month",
"3_day",
"3_hour",
"3_dayofweek",
"3_total_time",
]
enc.fit(X)
assert enc.get_feature_names_out() == expected_feature_names

# With column names
X = get_datetime_array()
X = pd.DataFrame(X)
X.columns = ["col1", "col2", "col3", "col4"]
X.columns = ["col1", "col2", "col3"]
enc = DatetimeEncoder(add_day_of_the_week=True)
expected_feature_names = [
"col1_year",
Expand All @@ -286,12 +303,6 @@ def test_fit() -> None:
"col3_day",
"col3_hour",
"col3_dayofweek",
"col4_year",
"col4_month",
"col4_day",
"col4_hour",
"col4_dayofweek",
"col4_total_time",
]
enc.fit(X)
assert enc.get_feature_names_out() == expected_feature_names
Expand Down

0 comments on commit 22b3cea

Please sign in to comment.