Skip to content

Commit

Permalink
Fixes for unit and intergration tests (#1238)
Browse files Browse the repository at this point in the history
* API tests refactored

* CGRU test temporary disabled

* TSF example fix
  • Loading branch information
nicl-nno authored Dec 29, 2023
1 parent fdfdde8 commit a35f602
Show file tree
Hide file tree
Showing 9 changed files with 335 additions and 295 deletions.
12 changes: 6 additions & 6 deletions examples/simple/time_series_forecasting/api_forecasting.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,10 @@ def run_ts_forecasting_example(dataset='australia', horizon: int = 30, timeout:
train_data, test_data = get_ts_data(dataset, horizon, validation_blocks)
# init model for the time series forecasting
model = Fedot(problem='ts_forecasting',

timeout=timeout,
n_jobs=-1,
metric=['mase', 'mae', 'mape', 'rmse'],
with_tuning=with_tuning,
cv_folds=2, preset='fast_train')
task_params=TsForecastingParams(forecast_length=horizon))

# run AutoML model design in the same way
pipeline = model.fit(train_data)
Expand All @@ -51,7 +49,8 @@ def run_ts_forecasting_example(dataset='australia', horizon: int = 30, timeout:
in_sample_forecast = model.predict(test_data, validation_blocks=validation_blocks)
print('Metrics for two-step in-sample forecast: ',
model.get_metrics(metric_names=['mase', 'mae', 'mape'],
validation_blocks=validation_blocks))
validation_blocks=validation_blocks,
target=test_data.target))

# plot forecasting result
if visualization:
Expand All @@ -63,7 +62,8 @@ def run_ts_forecasting_example(dataset='australia', horizon: int = 30, timeout:
simple_forecast = model.forecast(test_data)
print('Metrics for one-step forecast: ',
model.get_metrics(metric_names=['rmse', 'mae', 'mape'],
validation_blocks=validation_blocks))
validation_blocks=validation_blocks,
target=test_data.target))
if visualization:
model.plot_prediction()

Expand All @@ -77,4 +77,4 @@ def run_ts_forecasting_example(dataset='australia', horizon: int = 30, timeout:


if __name__ == '__main__':
run_ts_forecasting_example(dataset='beer', horizon=14, timeout=2., visualization=True)
run_ts_forecasting_example(dataset='beer', horizon=2, timeout=0.1, visualization=True)
4 changes: 2 additions & 2 deletions fedot/preprocessing/data_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@

import numpy as np
import pandas as pd

from golem.core.log import LoggerAdapter, default_log

from fedot.core.repository.tasks import Task, TaskTypesEnum
Expand Down Expand Up @@ -108,7 +107,8 @@ def convert_data_for_predict(self, data: InputData):
data.features = data.features.astype(object)
data.features = self.remove_incorrect_features(data.features, self.features_converted_columns)
data.features = apply_type_transformation(data.features, self.feature_type_ids, self.log)
data.target = apply_type_transformation(data.target, self.target_type_ids, self.log)
if data.target is not None:
data.target = apply_type_transformation(data.target, self.target_type_ids, self.log)
data.supplementary_data.col_type_ids = self.prepare_column_types_info(predictors=data.features,
target=data.target,
task=data.task)
Expand Down
1 change: 0 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ seaborn>=0.9.0
func_timeout==4.3.5
joblib>=0.17.0
requests>=2.0
tqdm
typing>=3.7.0
psutil>=5.9.2

Expand Down
130 changes: 130 additions & 0 deletions test/data/datasets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
import os
from typing import Optional

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

from cases.metocean_forecasting_problem import prepare_input_data
from fedot.core.data.data import InputData
from fedot.core.data.data_split import train_test_data_setup
from fedot.core.data.supplementary_data import SupplementaryData
from fedot.core.repository.dataset_types import DataTypesEnum
from fedot.core.repository.tasks import Task, TaskTypesEnum
from fedot.core.utils import fedot_project_root
from test.integration.models.test_split_train_test import get_synthetic_input_data
from test.unit.tasks.test_classification import get_iris_data, get_synthetic_classification_data
from test.unit.tasks.test_forecasting import get_ts_data
from test.unit.tasks.test_regression import get_synthetic_regression_data


def get_split_data_paths():
file_path_train = 'test/data/simple_regression_train.csv'
file_path_test = 'test/data/simple_regression_test.csv'
full_path_train = os.path.join(str(fedot_project_root()), file_path_train)
full_path_test = os.path.join(str(fedot_project_root()), file_path_test)

return full_path_train, full_path_test


def get_split_data():
task_type = 'regression'
train_full, test = get_split_data_paths()
train_file = pd.read_csv(train_full)
x, y = train_file.loc[:, ~train_file.columns.isin(['target'])].values, train_file['target'].values
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.15, random_state=24)
return task_type, x_train, x_test, y_train, y_test


def get_cholesterol_dataset():
data_path = f'{fedot_project_root()}/cases/data/cholesterol/cholesterol.csv'
data = InputData.from_csv(data_path, task=Task(TaskTypesEnum.regression))
train, test = train_test_data_setup(data)
return train, test


def get_dataset(task_type: str, validation_blocks: Optional[int] = None, n_samples: int = 200,
n_features: int = 8, forecast_length: int = 5, iris_dataset=True):
if task_type == 'regression':
data = get_synthetic_regression_data(n_samples=n_samples, n_features=n_features, random_state=42)
train_data, test_data = train_test_data_setup(data)
threshold = np.std(test_data.target) * 0.05
elif task_type == 'classification':
if iris_dataset:
data = get_iris_data()
else:
data = get_synthetic_classification_data(n_samples=n_samples, n_features=n_features, random_state=42)
train_data, test_data = train_test_data_setup(data, shuffle=True)
threshold = 0.95
elif task_type == 'clustering':
data = get_synthetic_input_data(n_samples=100)
train_data, test_data = train_test_data_setup(data)
threshold = 0.5
elif task_type == 'ts_forecasting':
train_data, test_data = get_ts_data(forecast_length=forecast_length, validation_blocks=validation_blocks)
threshold = np.std(test_data.target)
else:
raise ValueError('Incorrect type of machine learning task')
return train_data, test_data, threshold


def get_multimodal_ts_data(size=500):
file_path_train = 'cases/data/metocean/metocean_data_train.csv'
full_path_train = os.path.join(str(fedot_project_root()), file_path_train)

# a dataset for a final validation of the composed model
file_path_test = 'cases/data/metocean/metocean_data_test.csv'
full_path_test = os.path.join(str(fedot_project_root()), file_path_test)

target_history, add_history, _ = prepare_input_data(full_path_train, full_path_test,
history_size=size)
historical_data = {
'ws': add_history, # additional variable
'ssh': target_history, # target variable
}
return historical_data, target_history


def load_categorical_unimodal():
dataset_path = 'test/data/classification_with_categorical.csv'
full_path = os.path.join(str(fedot_project_root()), dataset_path)
data = InputData.from_csv(full_path)
train_data, test_data = train_test_data_setup(data, shuffle=True)

return train_data, test_data


def load_categorical_multidata():
# Create features table
features_first = np.array([[0, ' a'], [1, ' a '], [2, ' b'], [3, np.nan], [4, ' a'],
[5, ' b'], [6, 'b '], [7, ' c'], [8, ' c ']], dtype=object)
features_second = np.array([[10, ' a'], [11, ' a '], [12, ' b'], [13, ' a '], [14, ' a'],
[15, ' b'], [16, 'b '], [17, ' c'], [18, ' c ']], dtype=object)
# TODO @andreygetmanov (fails if target = ['true', 'false', ...])
target = np.array([1, 0, 1, 0, 0, 0, 0, 1, 1])

fit_data = {'first': features_first,
'second': features_second}

return fit_data, target


def data_with_binary_features_and_categorical_target():
"""
A dataset is generated where features and target require transformations.
The categorical binary features and categorical target must be converted to int
"""
task = Task(TaskTypesEnum.classification)
features = np.array([['red', 'blue'],
['red', 'blue'],
['red', 'blue'],
[np.nan, 'blue'],
['green', 'blue'],
['green', 'orange'],
['red', 'orange']])
target = np.array(['red-blue', 'red-blue', 'red-blue', 'red-blue', 'green-blue', 'green-orange', 'red-orange'])
train_input = InputData(idx=[0, 1, 2, 3, 4, 5, 6], features=features, target=target,
task=task, data_type=DataTypesEnum.table,
supplementary_data=SupplementaryData())

return train_input
5 changes: 3 additions & 2 deletions test/integration/api/test_api_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,15 @@

from examples.simple.classification.classification_pipelines import (classification_pipeline_with_balancing,
classification_pipeline_without_balancing)
from fedot.api.api_utils.assumptions.assumptions_builder import AssumptionsBuilder
from fedot import Fedot
from fedot.api.api_utils.assumptions.assumptions_builder import AssumptionsBuilder
from fedot.core.data.data_split import train_test_data_setup
from fedot.core.pipelines.node import PipelineNode
from fedot.core.pipelines.pipeline import Pipeline
from fedot.core.repository.tasks import Task, TaskTypesEnum, TsForecastingParams
from fedot.preprocessing.preprocessing import DataPreprocessor
from test.integration.api.test_main_api import get_dataset, get_cholesterol_dataset
from test.data.datasets import get_cholesterol_dataset
from test.integration.api.test_main_api import get_dataset
from test.unit.tasks.test_classification import get_binary_classification_data


Expand Down
Loading

0 comments on commit a35f602

Please sign in to comment.