-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
1d40084
commit 60dc32f
Showing
6 changed files
with
212 additions
and
17 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
import numpy as np | ||
import pandas as pd | ||
import pytest | ||
from sklearn.datasets import make_classification | ||
from sklearn.model_selection import train_test_split | ||
|
||
from bluecast.blueprints.custom_model_recipes import LogisticRegressionModel | ||
|
||
|
||
@pytest.fixture | ||
def data(): | ||
# Generate a synthetic binary classification dataset | ||
X, y = make_classification( | ||
n_samples=100, n_features=20, n_informative=2, n_redundant=10, random_state=42 | ||
) | ||
X_train, X_test, y_train, y_test = train_test_split( | ||
X, y, test_size=0.2, random_state=42 | ||
) | ||
|
||
return ( | ||
pd.DataFrame(X_train), | ||
pd.DataFrame(X_test), | ||
pd.Series(y_train), | ||
pd.Series(y_test), | ||
) | ||
|
||
|
||
@pytest.fixture | ||
def model(): | ||
return LogisticRegressionModel(random_state=42) | ||
|
||
|
||
def test_autotune(data, model): | ||
X_train, X_test, y_train, y_test = data | ||
model.autotune(X_train, X_test, y_train, y_test) | ||
|
||
# Check if the model has been fitted by inspecting the attributes | ||
assert hasattr( | ||
model.model, "coef_" | ||
), "Model should have been fitted and have coefficients." | ||
|
||
|
||
def test_fit(data, model): | ||
X_train, X_test, y_train, y_test = data | ||
model.fit(X_train, X_test, y_train, y_test) | ||
|
||
# Again, check if the model has been fitted | ||
assert hasattr( | ||
model.model, "coef_" | ||
), "Model should have been fitted after calling fit method." | ||
|
||
|
||
def test_predict(data, model): | ||
X_train, X_test, y_train, y_test = data | ||
model.fit(X_train, X_test, y_train, y_test) | ||
|
||
probas, classes = model.predict(X_test) | ||
|
||
# Check the types of the returned values | ||
assert isinstance( | ||
probas, np.ndarray | ||
), "Predicted probabilities should be a numpy array." | ||
assert isinstance(classes, np.ndarray), "Predicted classes should be a numpy array." | ||
|
||
# Check the shape of the returned values | ||
assert probas.shape == ( | ||
X_test.shape[0], | ||
), "Predicted probabilities should have the correct shape." | ||
assert classes.shape == ( | ||
X_test.shape[0], | ||
), "Predicted classes should have the correct shape." | ||
|
||
# Check if values are within the expected range | ||
assert np.all( | ||
(probas >= 0) & (probas <= 1) | ||
), "Predicted probabilities should be between 0 and 1." | ||
assert np.all( | ||
(classes == 0) | (classes == 1) | ||
), "Predicted classes should be either 0 or 1." |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
import numpy as np | ||
import pandas as pd | ||
import pytest | ||
from sklearn.datasets import make_classification | ||
|
||
from bluecast.blueprints.preprocessing_recipes import PreprocessingForLinearModels | ||
from bluecast.preprocessing.custom import CustomPreprocessing | ||
|
||
|
||
# Mocking remove_correlated_columns for testing purposes | ||
def mock_remove_correlated_columns(df, threshold): | ||
return df.loc[:, df.columns[:-1]] # Just drop the last column for simplicity | ||
|
||
|
||
@pytest.fixture | ||
def sample_data(): | ||
# Create a sample DataFrame with numerical data | ||
X, y = make_classification(n_samples=100, n_features=5, random_state=42) | ||
df = pd.DataFrame(X, columns=[f"feature_{i}" for i in range(5)]) | ||
target = pd.Series(y, name="target") | ||
return df, target | ||
|
||
|
||
@pytest.fixture | ||
def preprocessing_instance(): | ||
# Create an instance of PreprocessingForLinearModels | ||
return PreprocessingForLinearModels( | ||
num_columns=["feature_0", "feature_1", "feature_2", "feature_3", "feature_4"] | ||
) | ||
|
||
|
||
def test_initialization(preprocessing_instance): | ||
# Test if the class initializes correctly | ||
assert isinstance(preprocessing_instance, CustomPreprocessing) | ||
assert preprocessing_instance.num_columns == [ | ||
"feature_0", | ||
"feature_1", | ||
"feature_2", | ||
"feature_3", | ||
"feature_4", | ||
] | ||
assert preprocessing_instance.non_correlated_columns == [] | ||
|
||
|
||
def test_fit_transform(sample_data, preprocessing_instance, monkeypatch): | ||
df, target = sample_data | ||
|
||
# Mock the remove_correlated_columns function | ||
monkeypatch.setattr( | ||
"bluecast.preprocessing.remove_collinearity.remove_correlated_columns", | ||
mock_remove_correlated_columns, | ||
) | ||
|
||
transformed_df, transformed_target = preprocessing_instance.fit_transform( | ||
df, target | ||
) | ||
|
||
# Check if transformed data has the correct shape and type | ||
assert isinstance(transformed_df, pd.DataFrame) | ||
assert isinstance(transformed_target, pd.Series) | ||
assert transformed_df.shape == (100, 4) # Since one column is removed by mock | ||
assert transformed_target.shape == (100,) | ||
|
||
# Check if missing values and infinite values are handled correctly | ||
assert not transformed_df.isnull().any().any() | ||
assert not np.isinf(transformed_df).any().any() | ||
|
||
|
||
def test_transform(sample_data, preprocessing_instance, monkeypatch): | ||
df, target = sample_data | ||
|
||
# Fit-transform first to simulate the normal flow | ||
monkeypatch.setattr( | ||
"bluecast.preprocessing.remove_collinearity.remove_correlated_columns", | ||
mock_remove_correlated_columns, | ||
) | ||
preprocessing_instance.fit_transform(df, target) | ||
|
||
# Now transform new data | ||
new_df = df.copy() | ||
new_df.loc[0, "feature_0"] = np.nan # Introduce missing value | ||
|
||
transformed_df, transformed_target = preprocessing_instance.transform( | ||
new_df, target | ||
) | ||
|
||
# Check if transformed data has the correct shape and type | ||
assert isinstance(transformed_df, pd.DataFrame) | ||
assert isinstance(transformed_target, pd.Series) | ||
assert transformed_df.shape == (100, 4) | ||
assert transformed_target.shape == (100,) | ||
|
||
# Check if missing values and infinite values are handled correctly | ||
assert not transformed_df.isnull().any().any() | ||
assert not np.isinf(transformed_df).any().any() | ||
|
||
|
||
def test_no_numerical_columns(): | ||
df = pd.DataFrame({"category": ["A", "B", "C"], "binary": [1, 0, 1]}) | ||
target = pd.Series([1, 0, 1]) | ||
|
||
preprocessing = PreprocessingForLinearModels(num_columns=[]) | ||
transformed_df, transformed_target = preprocessing.fit_transform(df, target) | ||
|
||
# Since there are no numerical columns, the DataFrame should remain unchanged | ||
pd.testing.assert_frame_equal(transformed_df, df) | ||
pd.testing.assert_series_equal(transformed_target, target) |
Binary file not shown.
Binary file not shown.