diff --git a/README.rst b/README.rst index f6f790327b..cd0e7fa24d 100644 --- a/README.rst +++ b/README.rst @@ -130,9 +130,9 @@ Jupyter ноутбуки с примерами находятся в репоз Расширенные примеры: -- Задача с кредитным скорингом `binary classification task `__ -- Прогнозирование временных рядов `random process regression `__ -- Обнаружение спама `natural language preprocessing `__ +- Задача с кредитным скорингом `binary classification task `__ +- Прогнозирование временных рядов `random process regression `__ +- Обнаружение спама `natural language preprocessing `__ - Предсказание сорта вина `multi-modal data `__ diff --git a/README_en.rst b/README_en.rst index 8c3442f671..0598df8250 100644 --- a/README_en.rst +++ b/README_en.rst @@ -131,9 +131,9 @@ Also, external examples are available: Extended examples: -- Credit scoring problem, i.e. `binary classification task `__ -- Time series forecasting, i.e. `random process regression `__ -- Spam detection, i.e. `natural language preprocessing `__ +- Credit scoring problem, i.e. `binary classification task `__ +- Time series forecasting, i.e. `random process regression `__ +- Spam detection, i.e. `natural language preprocessing `__ - Wine variety prediction with `multi-modal data `__ diff --git a/cases/evo_operators_comparison/req.txt b/cases/evo_operators_comparison/req.txt deleted file mode 100644 index eccb2357d1..0000000000 --- a/cases/evo_operators_comparison/req.txt +++ /dev/null @@ -1 +0,0 @@ -fedot==0.6.1 \ No newline at end of file diff --git a/docs/source/advanced/automated_pipelines_design.rst b/docs/source/advanced/automated_pipelines_design.rst index 16b340efa6..05287d6941 100644 --- a/docs/source/advanced/automated_pipelines_design.rst +++ b/docs/source/advanced/automated_pipelines_design.rst @@ -9,4 +9,4 @@ Example of running optimization through the API can be found in the `api classif If instead users need to customize the optimization algorithm (e.g. with custom genetic operators like mutations or crossover or custom verification rules) then it's possible by directly using `ComposerBuilder` class or one of the optimizers from GOLEM. -Example of a customized usage can be found in `credit scoring case problem `_. +Example of a customized usage can be found in `credit scoring case problem `_. diff --git a/docs/source/advanced/hyperparameters_tuning.rst b/docs/source/advanced/hyperparameters_tuning.rst index 3617901dc0..d396f3589e 100644 --- a/docs/source/advanced/hyperparameters_tuning.rst +++ b/docs/source/advanced/hyperparameters_tuning.rst @@ -721,8 +721,8 @@ Another examples can be found here: * `Tuning pipelines with sparse_lagged / lagged node `_ * `Topaz multi time series forecasting `_ * `Custom model tuning `_ -* `Case: river level forecasting with composer `_ -* `Case: river level forecasting (manual) `_ +* `Case: river level forecasting with composer `_ +* `Case: river level forecasting (manual) `_ **Multitask** diff --git a/docs/source/basics/pipeline_save_load.rst b/docs/source/basics/pipeline_save_load.rst index 3e2d95adfb..48892cb6ca 100644 --- a/docs/source/basics/pipeline_save_load.rst +++ b/docs/source/basics/pipeline_save_load.rst @@ -19,7 +19,7 @@ FEDOT provides methods for saving and loading pipelines in the ``Pipeline`` clas .. code-block:: python problem = 'classification' - train_data_path = f'{fedot_project_root()}/cases/data/scoring/scoring_train.csv' + train_data_path = f'{fedot_project_root()}/examples/real_cases/data/scoring/scoring_train.csv' baseline_model = Fedot(problem=problem, timeout=1, seed=42) baseline_model.fit(features=train_data_path, target='target', predefined_model='rf') @@ -62,7 +62,7 @@ FEDOT provides methods for saving and loading pipelines in the ``Pipeline`` clas .. code-block:: python problem = 'classification' - train_data_path = f'{fedot_project_root()}/cases/data/scoring/scoring_train.csv' + train_data_path = f'{fedot_project_root()}/examples/real_cases/data/scoring/scoring_train.csv' baseline_model = Fedot(problem=problem, timeout=1, seed=42) baseline_model.fit(features=train_data_path, target='target', predefined_model='rf') @@ -99,7 +99,7 @@ FEDOT provides methods for saving and loading pipelines in the ``Pipeline`` clas .. code-block:: python problem = 'classification' - train_data_path = f'{fedot_project_root()}/cases/data/scoring/scoring_train.csv' + train_data_path = f'{fedot_project_root()}/examples/real_cases/data/scoring/scoring_train.csv' baseline_model = Fedot(problem=problem, timeout=1, seed=42) baseline_model.fit(features=train_data_path, target='target', predefined_model='rf') @@ -137,7 +137,7 @@ FEDOT provides methods for saving and loading pipelines in the ``Pipeline`` clas .. code-block:: python problem = 'classification' - train_data_path = f'{fedot_project_root()}/cases/data/scoring/scoring_train.csv' + train_data_path = f'{fedot_project_root()}/examples/real_cases/data/scoring/scoring_train.csv' baseline_model = Fedot(problem=problem, timeout=1, seed=42) baseline_model.fit(features=train_data_path, target='target', predefined_model='rf') @@ -175,7 +175,7 @@ FEDOT provides methods for saving and loading pipelines in the ``Pipeline`` clas .. code-block:: python problem = 'classification' - train_data_path = f'{fedot_project_root()}/cases/data/scoring/scoring_train.csv' + train_data_path = f'{fedot_project_root()}/examples/real_cases/data/scoring/scoring_train.csv' baseline_model = Fedot(problem=problem, timeout=1, seed=42) baseline_model.fit(features=train_data_path, target='target', predefined_model='rf') diff --git a/docs/source/basics/tabular_data.rst b/docs/source/basics/tabular_data.rst index 7a4d792bde..6c324d5b84 100644 --- a/docs/source/basics/tabular_data.rst +++ b/docs/source/basics/tabular_data.rst @@ -113,4 +113,4 @@ More details you can find in the follow links: **Cases** -* `Case: Credit scoring problem `_ \ No newline at end of file +* `Case: Credit scoring problem `_ \ No newline at end of file diff --git a/docs/source/basics/ts_forecasting.rst b/docs/source/basics/ts_forecasting.rst index 306aa35f66..db1c39cbf2 100644 --- a/docs/source/basics/ts_forecasting.rst +++ b/docs/source/basics/ts_forecasting.rst @@ -624,5 +624,5 @@ Examples **Cases** -* `Case: river level forecasting with composer `_ -* `Case: river level forecasting (manual) `_ +* `Case: river level forecasting with composer `_ +* `Case: river level forecasting (manual) `_ diff --git a/docs/source/faq/abstract.rst b/docs/source/faq/abstract.rst index 40b2f3e392..6ee8d1da81 100644 --- a/docs/source/faq/abstract.rst +++ b/docs/source/faq/abstract.rst @@ -1,13 +1,22 @@ Abstract ======== -.. topic:: What is Fedot? +.. topic:: What is FEDOT? - *Fedot is the AutoML-like framework for the automated generation of the + *FEDOT is the AutoML-like framework for the automated generation of the data-driven composite models. It can solve classification, regression, clustering, and forecasting problems.* -.. topic:: Why should I use Fedot instead of existing state-of-the-art solutions (H2O/TPOT/etc)? +.. topic:: What FEDOT is framework. + + *While the exact difference between 'library' and 'framework' is a bit ambiguous and + context-dependent in many cases, we still consider FEDOT as a framework.* + + *The reason is that is can be used not only to solve pre-defined AutoML task, + but also can be used to build new derivative solutions. + *As an examples:* `FEDOT.NAS`_, `FEDOT.Industrial`_. + +.. topic:: Why should I use FEDOT instead of existing state-of-the-art solutions (H2O/TPOT/etc)? *In practice, the existing AutoML solutions are really effective for the limited set of problems only. During the model learning, modern AutoML @@ -21,16 +30,16 @@ Abstract efficient way. Also, we are aimed to outperform the existing solutions even for well-known benchmarks (e.g. PMLB datasets).* -.. topic:: Can I install Fedot using pip/conda? +.. topic:: Can I install FEDOT using pip/conda? - *Yes, follow the* `link`_ + *Yes, follow the* `link`_. -.. topic:: Can I use Fedot in my project/research/etc? +.. topic:: Can I use FEDOT in my project/research/etc? *Yes, you can. The Fedot is published under the BSD-3 license. Also, we will be happy to help the users to adopt Fedot to their needs.* -.. topic:: Why it is named Fedot? +.. topic:: Why it is named FEDOT? *We decided to use this archaic Russian first name to add a bit of fantasy spirit into the development process.* @@ -40,3 +49,9 @@ Abstract .. _link: https://pypi.org/project/fedot .. `link` replace:: *link* + +.. _FEDOT.NAS: https://github.com/ITMO-NSS-team/nas-fedot +.. `FEDOT.NAS` replace:: *FEDOT.NAS* + +.. _FEDOT.Industrial: https://github.com/aimclub/Fedot.Industrial +.. `FEDOT.Industrial` replace:: *FEDOT.Industrial* diff --git a/examples/advanced/additional_learning.py b/examples/advanced/additional_learning.py index 0d40a56a23..1239978f68 100644 --- a/examples/advanced/additional_learning.py +++ b/examples/advanced/additional_learning.py @@ -13,8 +13,8 @@ def run_additional_learning_example(): - train_data_path = f'{fedot_project_root()}/cases/data/scoring/scoring_train.csv' - test_data_path = f'{fedot_project_root()}/cases/data/scoring/scoring_test.csv' + train_data_path = f'{fedot_project_root()}/examples/real_cases/data/scoring/scoring_train.csv' + test_data_path = f'{fedot_project_root()}/examples/real_cases/data/scoring/scoring_test.csv' train_data = pd.read_csv(train_data_path) test_data = pd.read_csv(test_data_path) diff --git a/examples/advanced/automl/pipeline_from_automl.py b/examples/advanced/automl/pipeline_from_automl.py index 15e7d6edd0..b0d10f4c09 100644 --- a/examples/advanced/automl/pipeline_from_automl.py +++ b/examples/advanced/automl/pipeline_from_automl.py @@ -1,12 +1,12 @@ from datetime import timedelta -from fedot.core.repository.operation_types_repository import OperationTypesRepository from sklearn.metrics import roc_auc_score as roc_auc -from cases.data.data_utils import get_scoring_case_data_paths +from examples.real_cases.data.data_utils import get_scoring_case_data_paths from fedot.core.data.data import InputData from fedot.core.pipelines.node import PipelineNode from fedot.core.pipelines.pipeline import Pipeline +from fedot.core.repository.operation_types_repository import OperationTypesRepository # TODO not working now - add switch to other repository.json diff --git a/examples/advanced/automl/tpot_vs_fedot.py b/examples/advanced/automl/tpot_vs_fedot.py index 51491f6fd3..0e6842d55f 100644 --- a/examples/advanced/automl/tpot_vs_fedot.py +++ b/examples/advanced/automl/tpot_vs_fedot.py @@ -55,7 +55,7 @@ def run_tpot_vs_fedot_example(train_file_path: str, test_file_path: str): if __name__ == '__main__': - train_file_path = "../../../cases/data/scoring/scoring_train.csv" - test_file_path = "../../../cases/data/scoring/scoring_test.csv" + train_file_path = "../../real_examples/real_cases/data/scoring/scoring_train.csv" + test_file_path = "../../real_examples/real_cases/data/scoring/scoring_test.csv" run_tpot_vs_fedot_example(train_file_path, test_file_path) diff --git a/examples/advanced/decompose/classification_refinement_example.py b/examples/advanced/decompose/classification_refinement_example.py index 1806571eb3..5a4c714deb 100644 --- a/examples/advanced/decompose/classification_refinement_example.py +++ b/examples/advanced/decompose/classification_refinement_example.py @@ -1,6 +1,6 @@ from golem.core.tuning.simultaneous import SimultaneousTuner -from cases.credit_scoring.credit_scoring_problem import get_scoring_data, calculate_validation_metric +from examples.real_cases.credit_scoring.credit_scoring_problem import get_scoring_data, calculate_validation_metric from fedot.core.data.data import InputData from fedot.core.pipelines.node import PipelineNode from fedot.core.pipelines.pipeline import Pipeline diff --git a/examples/advanced/decompose/refinement_forecast_example.py b/examples/advanced/decompose/refinement_forecast_example.py index 52ea9d361d..17502970d8 100644 --- a/examples/advanced/decompose/refinement_forecast_example.py +++ b/examples/advanced/decompose/refinement_forecast_example.py @@ -14,7 +14,6 @@ from fedot.core.repository.tasks import TaskTypesEnum, Task, TsForecastingParams from fedot.core.utils import set_random_seed - warnings.filterwarnings('ignore') @@ -163,6 +162,6 @@ def run_refinement_forecast(path_to_file, len_forecast=100, lagged=150, if __name__ == '__main__': set_random_seed(2020) - path = '../../../cases/data/time_series/economic_data.csv' + path = '../../real_examples/real_cases/data/time_series/economic_data.csv' run_refinement_forecast(path, len_forecast=50, validation_blocks=5, lagged=50, vis_with_decompose=True) diff --git a/examples/advanced/decompose/regression_refinement_example.py b/examples/advanced/decompose/regression_refinement_example.py index e371375937..509e0f7b9f 100644 --- a/examples/advanced/decompose/regression_refinement_example.py +++ b/examples/advanced/decompose/regression_refinement_example.py @@ -130,5 +130,5 @@ def run_river_experiment(file_path, with_tuning=False): if __name__ == '__main__': - run_river_experiment(file_path=f'{fedot_project_root()}/cases/data/river_levels/station_levels.csv', + run_river_experiment(file_path=f'{fedot_project_root()}/examples/real_cases/data/river_levels/station_levels.csv', with_tuning=True) diff --git a/cases/evo_operators_comparison/evo_operators_comparison.py b/examples/advanced/evo_operators_comparison.py similarity index 97% rename from cases/evo_operators_comparison/evo_operators_comparison.py rename to examples/advanced/evo_operators_comparison.py index 1a89fe81be..6f75500f0d 100644 --- a/cases/evo_operators_comparison/evo_operators_comparison.py +++ b/examples/advanced/evo_operators_comparison.py @@ -3,20 +3,19 @@ from typing import Sequence, Optional import numpy as np -from matplotlib import pyplot as plt -from sklearn.metrics import roc_auc_score as roc_auc - -from fedot.core.composer.composer_builder import ComposerBuilder -from fedot.core.data.data import InputData from golem.core.optimisers.genetic.gp_params import GPAlgorithmParameters from golem.core.optimisers.genetic.operators.crossover import CrossoverTypesEnum from golem.core.optimisers.genetic.operators.inheritance import GeneticSchemeTypesEnum from golem.core.optimisers.genetic.operators.mutation import MutationTypesEnum from golem.core.optimisers.opt_history_objects.opt_history import OptHistory +from matplotlib import pyplot as plt +from sklearn.metrics import roc_auc_score as roc_auc +from fedot.core.composer.composer_builder import ComposerBuilder +from fedot.core.data.data import InputData from fedot.core.pipelines.pipeline_composer_requirements import PipelineComposerRequirements -from fedot.core.repository.operation_types_repository import get_operations_for_task from fedot.core.repository.metrics_repository import ClassificationMetricsEnum +from fedot.core.repository.operation_types_repository import get_operations_for_task from fedot.core.repository.tasks import Task, TaskTypesEnum from fedot.core.utils import fedot_project_root @@ -178,8 +177,8 @@ def run_experiment_with_saved_histories(save_dir): if __name__ == '__main__': - train_data_path = f'{fedot_project_root()}/cases/data/scoring/scoring_train.csv' - test_data_path = f'{fedot_project_root()}/cases/data/scoring/scoring_test.csv' + train_data_path = f'{fedot_project_root()}/examples/real_cases/data/scoring/scoring_train.csv' + test_data_path = f'{fedot_project_root()}/examples/real_cases/data/scoring/scoring_test.csv' run_experiment(train_data_path, test_data_path, diff --git a/examples/advanced/fedot_based_solutions/external_optimizer.py b/examples/advanced/fedot_based_solutions/external_optimizer.py index 498d461b09..206200fac1 100644 --- a/examples/advanced/fedot_based_solutions/external_optimizer.py +++ b/examples/advanced/fedot_based_solutions/external_optimizer.py @@ -7,8 +7,8 @@ def run_with_random_search_composer(): - train_data_path = f'{fedot_project_root()}/cases/data/scoring/scoring_train.csv' - test_data_path = f'{fedot_project_root()}/cases/data/scoring/scoring_test.csv' + train_data_path = f'{fedot_project_root()}/examples/real_cases/data/scoring/scoring_train.csv' + test_data_path = f'{fedot_project_root()}/examples/real_cases/data/scoring/scoring_test.csv' composer_params = {'available_operations': ['class_decompose', 'rf', 'linear', 'xgboost', 'dt'], 'optimizer': RandomMutationSearchOptimizer} diff --git a/examples/advanced/gpu_example.py b/examples/advanced/gpu_example.py index abd78bd92a..2a05565f1f 100644 --- a/examples/advanced/gpu_example.py +++ b/examples/advanced/gpu_example.py @@ -69,8 +69,8 @@ def run_pipeline_with_specific_evaluation_mode(train_data: InputData, test_data: def get_scoring_data() -> Tuple[InputData, InputData]: - train_data_path = f'{fedot_project_root()}/cases/data/scoring/scoring_train.csv' - test_data_path = f'{fedot_project_root()}/cases/data/scoring/scoring_test.csv' + train_data_path = f'{fedot_project_root()}/examples/real_cases/data/scoring/scoring_train.csv' + test_data_path = f'{fedot_project_root()}/examples/real_cases/data/scoring/scoring_test.csv' train_data = InputData.from_csv(train_data_path) test_data = InputData.from_csv(test_data_path) diff --git a/examples/advanced/multi_modal_pipeline.py b/examples/advanced/multi_modal_pipeline.py index 0c79ca9a12..427e2d316a 100644 --- a/examples/advanced/multi_modal_pipeline.py +++ b/examples/advanced/multi_modal_pipeline.py @@ -3,7 +3,7 @@ from sklearn.metrics import f1_score as f1 -from cases.dataset_preparation import unpack_archived_data +from examples.real_cases.dataset_preparation import unpack_archived_data from fedot import Fedot from fedot.core.data.data import InputData, OutputData from fedot.core.data.data_split import train_test_data_setup diff --git a/examples/advanced/parallelization_comparison.py b/examples/advanced/parallelization_comparison.py index 2776213ef3..d2e36e9b42 100644 --- a/examples/advanced/parallelization_comparison.py +++ b/examples/advanced/parallelization_comparison.py @@ -28,7 +28,7 @@ def run_experiments(timeout: float = None, partitions_n=10, n_jobs=-1): :param n_jobs: how many processors you want to use in a multiprocessing mode """ - train_data_path = f'{fedot_project_root()}/cases/data/scoring/scoring_train.csv' + train_data_path = f'{fedot_project_root()}/examples/real_cases/data/scoring/scoring_train.csv' problem = 'classification' diff --git a/examples/advanced/profiler_example.py b/examples/advanced/profiler_example.py index c26d23d2fa..a0871fa60a 100644 --- a/examples/advanced/profiler_example.py +++ b/examples/advanced/profiler_example.py @@ -3,10 +3,9 @@ from golem.utilities.profiler.memory_profiler import MemoryProfiler from golem.utilities.profiler.time_profiler import TimeProfiler -from cases.credit_scoring.credit_scoring_problem import run_credit_scoring_problem, get_scoring_data +from examples.real_cases.credit_scoring.credit_scoring_problem import run_credit_scoring_problem, get_scoring_data from fedot.core.utils import set_random_seed - if __name__ == '__main__': set_random_seed(1) # JUST UNCOMMENT WHAT TYPE OF PROFILER DO YOU NEED diff --git a/examples/advanced/remote_execution/remote_fit_example.py b/examples/advanced/remote_execution/remote_fit_example.py index e8a4ee42f4..bd1aabbc23 100644 --- a/examples/advanced/remote_execution/remote_fit_example.py +++ b/examples/advanced/remote_execution/remote_fit_example.py @@ -16,7 +16,7 @@ def run_experiment(): # WARNING - THIS SCRIPT CAN BE EVALUATED ONLY WITH THE ACCESS TO DATAMALL SYSTEM # LOCAL RUN - folder = fedot_project_root().joinpath('cases', 'data', 'scoring') + folder = fedot_project_root().joinpath('examples', 'real_cases', 'data', 'scoring') path = folder.joinpath('scoring_train.csv') start = datetime.now() diff --git a/examples/advanced/remote_execution/ts_composer_with_integration.py b/examples/advanced/remote_execution/ts_composer_with_integration.py index c43f6b0fb8..c3c556cff3 100644 --- a/examples/advanced/remote_execution/ts_composer_with_integration.py +++ b/examples/advanced/remote_execution/ts_composer_with_integration.py @@ -20,7 +20,7 @@ def run_automl(data: MultiModalData, features_to_use, timeout: int = 1): """ Launch AutoML FEDOT algorithm for time series forecasting task """ - metocean_folder = fedot_project_root().joinpath('cases', 'data', 'metocean') + metocean_folder = fedot_project_root().joinpath('examples', 'real_cases', 'data', 'metocean') connect_params = {} exec_params = { @@ -73,7 +73,7 @@ def run_automl(data: MultiModalData, features_to_use, features_to_use = ['wind_speed', 'sea_height'] data = MultiModalData.from_csv_time_series( - file_path=fedot_project_root().joinpath('cases/data/metocean/metocean_data_train.csv'), + file_path=fedot_project_root().joinpath('examples/real_cases/data/metocean/metocean_data_train.csv'), columns_to_use=features_to_use, target_column='sea_height', index_col='datetime') diff --git a/examples/advanced/structural_analysis/dataset_access.py b/examples/advanced/structural_analysis/dataset_access.py index 25411f24e1..c09dd071b3 100644 --- a/examples/advanced/structural_analysis/dataset_access.py +++ b/examples/advanced/structural_analysis/dataset_access.py @@ -7,11 +7,11 @@ def get_scoring_data(): - file_path_train = join('cases', 'data', 'scoring', 'scoring_train.csv') + file_path_train = join('examples', 'real_cases', 'data', 'scoring', 'scoring_train.csv') full_path_train = join(str(fedot_project_root()), file_path_train) # a dataset for a final validation of the composed model - file_path_test = join('cases', 'data', 'scoring', 'scoring_test.csv') + file_path_test = join('examples', 'real_cases', 'data', 'scoring', 'scoring_test.csv') full_path_test = join(str(fedot_project_root()), file_path_test) task = Task(TaskTypesEnum.classification) train = InputData.from_csv(full_path_train, task=task) @@ -21,7 +21,7 @@ def get_scoring_data(): def get_kc2_data(): - file_path = join('cases', 'data', 'kc2', 'kc2.csv') + file_path = join('examples', 'real_cases', 'data', 'kc2', 'kc2.csv') full_path = join(str(fedot_project_root()), file_path) task = Task(TaskTypesEnum.classification) data = InputData.from_csv(full_path, task=task) @@ -31,7 +31,7 @@ def get_kc2_data(): def get_cholesterol_data(): - file_path = join('cases', 'data', 'cholesterol', 'cholesterol.csv') + file_path = join('examples', 'real_cases', 'data', 'cholesterol', 'cholesterol.csv') full_path = join(str(fedot_project_root()), file_path) task = Task(TaskTypesEnum.regression) data = InputData.from_csv(full_path, task=task) diff --git a/examples/advanced/time_series_forecasting/custom_model_tuning.py b/examples/advanced/time_series_forecasting/custom_model_tuning.py index 32f0c5d078..e5f516e9b1 100644 --- a/examples/advanced/time_series_forecasting/custom_model_tuning.py +++ b/examples/advanced/time_series_forecasting/custom_model_tuning.py @@ -142,7 +142,7 @@ def run_pipeline_tuning(time_series, len_forecast, pipeline_type): if __name__ == '__main__': - df = pd.read_csv('../../../cases/data/time_series/metocean.csv') + df = pd.read_csv('../../real_examples/real_cases/data/time_series/metocean.csv') time_series = np.array(df['value']) run_pipeline_tuning(time_series=time_series, len_forecast=50, diff --git a/examples/advanced/time_series_forecasting/multi_ts_arctic_forecasting.py b/examples/advanced/time_series_forecasting/multi_ts_arctic_forecasting.py index 9c4f309cab..4855679815 100644 --- a/examples/advanced/time_series_forecasting/multi_ts_arctic_forecasting.py +++ b/examples/advanced/time_series_forecasting/multi_ts_arctic_forecasting.py @@ -5,15 +5,14 @@ from golem.core.optimisers.genetic.gp_params import GPAlgorithmParameters from golem.core.optimisers.genetic.operators.mutation import MutationTypesEnum from golem.core.tuning.simultaneous import SimultaneousTuner - -from fedot.core.pipelines.pipeline_composer_requirements import PipelineComposerRequirements from matplotlib import pyplot as plt from sklearn.metrics import mean_squared_error, mean_absolute_error -from cases.multi_ts_level_forecasting import prepare_data +from examples.real_cases.multi_ts_level_forecasting import prepare_data from examples.simple.time_series_forecasting.ts_pipelines import ts_complex_ridge_smoothing_pipeline from fedot.core.composer.composer_builder import ComposerBuilder from fedot.core.composer.gp_composer.specific_operators import parameter_change_mutation +from fedot.core.pipelines.pipeline_composer_requirements import PipelineComposerRequirements from fedot.core.pipelines.tuning.tuner_builder import TunerBuilder from fedot.core.repository.metrics_repository import \ RegressionMetricsEnum diff --git a/examples/advanced/time_series_forecasting/nemo.py b/examples/advanced/time_series_forecasting/nemo.py index daeb1913f9..6cba4e6f51 100644 --- a/examples/advanced/time_series_forecasting/nemo.py +++ b/examples/advanced/time_series_forecasting/nemo.py @@ -1,17 +1,17 @@ import warnings +from copy import deepcopy import numpy as np import pandas as pd from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error -from fedot.core.pipelines.pipeline import Pipeline -from fedot.core.pipelines.node import PipelineNode from fedot.core.data.data import InputData from fedot.core.data.multi_modal import MultiModalData - +from fedot.core.pipelines.node import PipelineNode +from fedot.core.pipelines.pipeline import Pipeline from fedot.core.repository.dataset_types import DataTypesEnum from fedot.core.repository.tasks import Task, TaskTypesEnum, TsForecastingParams -from copy import deepcopy + warnings.filterwarnings('ignore') @@ -86,15 +86,14 @@ def return_working_pipeline(): len_forecast = 40 ts_name = 'sea_level' -path_to_file = '../../cases/data/nemo/sea_surface_height.csv' -path_to_exog_file = '../../cases/data/nemo/sea_surface_height_nemo.csv' +path_to_file = '../../examples/real_cases/data/nemo/sea_surface_height.csv' +path_to_exog_file = '../../examples/real_cases/data/nemo/sea_surface_height_nemo.csv' df = pd.read_csv(path_to_file) time_series = np.array(df[ts_name]) df = pd.read_csv(path_to_exog_file) exog_variable = np.array(df[ts_name]) - # Let's divide our data on train and test samples train_data = time_series[:-len_forecast] test_data = time_series[-len_forecast:] diff --git a/examples/advanced/time_series_forecasting/nemo_multiple.py b/examples/advanced/time_series_forecasting/nemo_multiple.py index 8c72ad2ccf..383c7280d5 100644 --- a/examples/advanced/time_series_forecasting/nemo_multiple.py +++ b/examples/advanced/time_series_forecasting/nemo_multiple.py @@ -248,8 +248,8 @@ def boxplot_visualize(df, label): def run_single_example(len_forecast=40, visualization=False): ts_name = 'sea_level' - path_to_file = '../../cases/data/nemo/sea_surface_height.csv' - path_to_exog_file = '../../cases/data/nemo/sea_surface_height_nemo.csv' + path_to_file = '../../examples/real_cases/data/nemo/sea_surface_height.csv' + path_to_exog_file = '../../examples/real_cases/data/nemo/sea_surface_height_nemo.csv' df = pd.read_csv(path_to_file) time_series = df[ts_name] @@ -323,8 +323,8 @@ def run_prediction_examples(mode='single', visualization=False): if mode == 'single': run_single_example(len_forecast=40, visualization=visualization) if mode == 'multiple': - run_multiple_example(path_to_file='../../cases/data/nemo/SSH_points_grid.csv', - path_to_exog_file='../../cases/data/nemo/SSH_nemo_points_grid.csv', + run_multiple_example(path_to_file='../../examples/real_cases/data/nemo/SSH_points_grid.csv', + path_to_exog_file='../../examples/real_cases/data/nemo/SSH_nemo_points_grid.csv', out_path=None, len_forecast=30, visualization=visualization) diff --git a/examples/advanced/time_series_forecasting/sparse_lagged_tuning.py b/examples/advanced/time_series_forecasting/sparse_lagged_tuning.py index ddbf36e46e..9d37588fdb 100644 --- a/examples/advanced/time_series_forecasting/sparse_lagged_tuning.py +++ b/examples/advanced/time_series_forecasting/sparse_lagged_tuning.py @@ -129,7 +129,7 @@ def visualize(tuned, no_tuned, time, method_name): def run_tuning_comparison(n_repits=10, ts_size=1000, forecast_length=50, visualization=True): - file_path = os.path.join(str(fedot_project_root()), 'cases/data/time_series/temperature.csv') + file_path = os.path.join(str(fedot_project_root()), 'examples/real_cases/data/time_series/temperature.csv') df = pd.read_csv(file_path) time_series = np.array(df['value'])[:ts_size] diff --git a/examples/project_import_export.py b/examples/project_import_export.py index 92cb60dd63..03b27abd69 100644 --- a/examples/project_import_export.py +++ b/examples/project_import_export.py @@ -1,6 +1,6 @@ from sklearn.metrics import roc_auc_score as roc_auc -from cases.data.data_utils import get_scoring_case_data_paths +from examples.real_cases.data.data_utils import get_scoring_case_data_paths from fedot.core.data.data import InputData from fedot.core.pipelines.node import PipelineNode from fedot.core.pipelines.pipeline import Pipeline diff --git a/cases/__init__.py b/examples/real_cases/__init__.py similarity index 100% rename from cases/__init__.py rename to examples/real_cases/__init__.py diff --git a/examples/real_cases/credit_scoring/__init__.py b/examples/real_cases/credit_scoring/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/cases/credit_scoring/credit_scoring_problem.py b/examples/real_cases/credit_scoring/credit_scoring_problem.py similarity index 93% rename from cases/credit_scoring/credit_scoring_problem.py rename to examples/real_cases/credit_scoring/credit_scoring_problem.py index e6818f866a..a312bafd6d 100644 --- a/cases/credit_scoring/credit_scoring_problem.py +++ b/examples/real_cases/credit_scoring/credit_scoring_problem.py @@ -48,11 +48,11 @@ def get_scoring_data(): # a dataset that will be used as a train and test set during composition - file_path_train = 'cases/data/scoring/scoring_train.csv' + file_path_train = 'examples/real_cases/data/scoring/scoring_train.csv' full_path_train = fedot_project_root().joinpath(file_path_train) # a dataset for a final validation of the composed model - file_path_test = 'cases/data/scoring/scoring_test.csv' + file_path_test = 'examples/real_cases/data/scoring/scoring_test.csv' full_path_test = fedot_project_root().joinpath(file_path_test) return full_path_train, full_path_test diff --git a/cases/credit_scoring/credit_scoring_problem_multiobj.py b/examples/real_cases/credit_scoring/credit_scoring_problem_multiobj.py similarity index 98% rename from cases/credit_scoring/credit_scoring_problem_multiobj.py rename to examples/real_cases/credit_scoring/credit_scoring_problem_multiobj.py index 4a104531cb..919c2b60da 100644 --- a/cases/credit_scoring/credit_scoring_problem_multiobj.py +++ b/examples/real_cases/credit_scoring/credit_scoring_problem_multiobj.py @@ -7,15 +7,15 @@ from golem.visualisation.opt_viz_extra import OptHistoryExtraVisualizer from sklearn.metrics import roc_auc_score as roc_auc -from cases.credit_scoring.credit_scoring_problem import get_scoring_data +from examples.real_cases.credit_scoring.credit_scoring_problem import get_scoring_data from fedot.core.composer.composer_builder import ComposerBuilder from fedot.core.data.data import InputData from fedot.core.pipelines.node import PipelineNode from fedot.core.pipelines.pipeline import Pipeline from fedot.core.pipelines.pipeline_composer_requirements import PipelineComposerRequirements from fedot.core.pipelines.tuning.tuner_builder import TunerBuilder -from fedot.core.repository.operation_types_repository import get_operations_for_task from fedot.core.repository.metrics_repository import ClassificationMetricsEnum, ComplexityMetricsEnum +from fedot.core.repository.operation_types_repository import get_operations_for_task from fedot.core.repository.tasks import Task, TaskTypesEnum from fedot.core.utils import set_random_seed diff --git a/examples/real_cases/data/__init__.py b/examples/real_cases/data/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/cases/data/arctic/topaz_multi_ts.csv b/examples/real_cases/data/arctic/topaz_multi_ts.csv similarity index 100% rename from cases/data/arctic/topaz_multi_ts.csv rename to examples/real_cases/data/arctic/topaz_multi_ts.csv diff --git a/cases/data/cancer/cancer_test.csv b/examples/real_cases/data/cancer/cancer_test.csv similarity index 100% rename from cases/data/cancer/cancer_test.csv rename to examples/real_cases/data/cancer/cancer_test.csv diff --git a/cases/data/cancer/cancer_train.csv b/examples/real_cases/data/cancer/cancer_train.csv similarity index 100% rename from cases/data/cancer/cancer_train.csv rename to examples/real_cases/data/cancer/cancer_train.csv diff --git a/cases/data/cholesterol/cholesterol.csv b/examples/real_cases/data/cholesterol/cholesterol.csv similarity index 100% rename from cases/data/cholesterol/cholesterol.csv rename to examples/real_cases/data/cholesterol/cholesterol.csv diff --git a/cases/data/data_utils.py b/examples/real_cases/data/data_utils.py similarity index 64% rename from cases/data/data_utils.py rename to examples/real_cases/data/data_utils.py index e405b34118..b489fad7e7 100644 --- a/cases/data/data_utils.py +++ b/examples/real_cases/data/data_utils.py @@ -5,8 +5,8 @@ def get_scoring_case_data_paths() -> Tuple[str, str]: - train_file_path = os.path.join('cases', 'data', 'scoring', 'scoring_train.csv') - test_file_path = os.path.join('cases', 'data', 'scoring', 'scoring_test.csv') + train_file_path = os.path.join('examples', 'real_cases', 'data', 'scoring', 'scoring_train.csv') + test_file_path = os.path.join('examples', 'real_cases', 'data', 'scoring', 'scoring_test.csv') full_train_file_path = os.path.join(str(fedot_project_root()), train_file_path) full_test_file_path = os.path.join(str(fedot_project_root()), test_file_path) diff --git a/cases/data/gapfilling/ts_temperature_gapfilling.csv b/examples/real_cases/data/gapfilling/ts_temperature_gapfilling.csv similarity index 100% rename from cases/data/gapfilling/ts_temperature_gapfilling.csv rename to examples/real_cases/data/gapfilling/ts_temperature_gapfilling.csv diff --git a/cases/data/kc2/kc2.csv b/examples/real_cases/data/kc2/kc2.csv similarity index 100% rename from cases/data/kc2/kc2.csv rename to examples/real_cases/data/kc2/kc2.csv diff --git a/cases/data/lena_levels/multi_sample.csv b/examples/real_cases/data/lena_levels/multi_sample.csv similarity index 100% rename from cases/data/lena_levels/multi_sample.csv rename to examples/real_cases/data/lena_levels/multi_sample.csv diff --git a/cases/data/metocean/metocean_data_test.csv b/examples/real_cases/data/metocean/metocean_data_test.csv similarity index 100% rename from cases/data/metocean/metocean_data_test.csv rename to examples/real_cases/data/metocean/metocean_data_test.csv diff --git a/cases/data/metocean/metocean_data_train.csv b/examples/real_cases/data/metocean/metocean_data_train.csv similarity index 100% rename from cases/data/metocean/metocean_data_train.csv rename to examples/real_cases/data/metocean/metocean_data_train.csv diff --git a/cases/data/mm_imdb.tar.gz b/examples/real_cases/data/mm_imdb.tar.gz similarity index 100% rename from cases/data/mm_imdb.tar.gz rename to examples/real_cases/data/mm_imdb.tar.gz diff --git a/cases/data/multivariate_ssh.csv b/examples/real_cases/data/multivariate_ssh.csv similarity index 100% rename from cases/data/multivariate_ssh.csv rename to examples/real_cases/data/multivariate_ssh.csv diff --git a/cases/data/nemo/SSH_nemo_points_grid.csv b/examples/real_cases/data/nemo/SSH_nemo_points_grid.csv similarity index 100% rename from cases/data/nemo/SSH_nemo_points_grid.csv rename to examples/real_cases/data/nemo/SSH_nemo_points_grid.csv diff --git a/cases/data/nemo/SSH_points_grid.csv b/examples/real_cases/data/nemo/SSH_points_grid.csv similarity index 100% rename from cases/data/nemo/SSH_points_grid.csv rename to examples/real_cases/data/nemo/SSH_points_grid.csv diff --git a/cases/data/nemo/sea_surface_height.csv b/examples/real_cases/data/nemo/sea_surface_height.csv similarity index 100% rename from cases/data/nemo/sea_surface_height.csv rename to examples/real_cases/data/nemo/sea_surface_height.csv diff --git a/cases/data/nemo/sea_surface_height_nemo.csv b/examples/real_cases/data/nemo/sea_surface_height_nemo.csv similarity index 100% rename from cases/data/nemo/sea_surface_height_nemo.csv rename to examples/real_cases/data/nemo/sea_surface_height_nemo.csv diff --git a/cases/data/nemo/test_nemo.csv b/examples/real_cases/data/nemo/test_nemo.csv similarity index 100% rename from cases/data/nemo/test_nemo.csv rename to examples/real_cases/data/nemo/test_nemo.csv diff --git a/cases/data/river_levels/station_levels.csv b/examples/real_cases/data/river_levels/station_levels.csv similarity index 100% rename from cases/data/river_levels/station_levels.csv rename to examples/real_cases/data/river_levels/station_levels.csv diff --git a/cases/data/scoring/scoring_test.csv b/examples/real_cases/data/scoring/scoring_test.csv similarity index 100% rename from cases/data/scoring/scoring_test.csv rename to examples/real_cases/data/scoring/scoring_test.csv diff --git a/cases/data/scoring/scoring_train.csv b/examples/real_cases/data/scoring/scoring_train.csv similarity index 100% rename from cases/data/scoring/scoring_train.csv rename to examples/real_cases/data/scoring/scoring_train.csv diff --git a/cases/data/spam/spamham.csv b/examples/real_cases/data/spam/spamham.csv similarity index 100% rename from cases/data/spam/spamham.csv rename to examples/real_cases/data/spam/spamham.csv diff --git a/cases/data/spamham.tar.gz b/examples/real_cases/data/spamham.tar.gz similarity index 100% rename from cases/data/spamham.tar.gz rename to examples/real_cases/data/spamham.tar.gz diff --git a/cases/data/time_series/economic_data.csv b/examples/real_cases/data/time_series/economic_data.csv similarity index 100% rename from cases/data/time_series/economic_data.csv rename to examples/real_cases/data/time_series/economic_data.csv diff --git a/cases/data/time_series/metocean.csv b/examples/real_cases/data/time_series/metocean.csv similarity index 100% rename from cases/data/time_series/metocean.csv rename to examples/real_cases/data/time_series/metocean.csv diff --git a/cases/data/time_series/temperature.csv b/examples/real_cases/data/time_series/temperature.csv similarity index 100% rename from cases/data/time_series/temperature.csv rename to examples/real_cases/data/time_series/temperature.csv diff --git a/cases/data/time_series/traffic.csv b/examples/real_cases/data/time_series/traffic.csv similarity index 100% rename from cases/data/time_series/traffic.csv rename to examples/real_cases/data/time_series/traffic.csv diff --git a/cases/dataset_preparation.py b/examples/real_cases/dataset_preparation.py similarity index 100% rename from cases/dataset_preparation.py rename to examples/real_cases/dataset_preparation.py diff --git a/cases/kc2_sourcecode_defects_classification.py b/examples/real_cases/kc2_sourcecode_defects_classification.py similarity index 97% rename from cases/kc2_sourcecode_defects_classification.py rename to examples/real_cases/kc2_sourcecode_defects_classification.py index 5ec2f20df7..d2de03e884 100644 --- a/cases/kc2_sourcecode_defects_classification.py +++ b/examples/real_cases/kc2_sourcecode_defects_classification.py @@ -8,7 +8,7 @@ def get_kc2_data(): - file_path = 'cases/data/kc2/kc2.csv' + file_path = 'examples/real_cases/data/kc2/kc2.csv' full_path = join(str(fedot_project_root()), file_path) task = Task(TaskTypesEnum.classification) data = InputData.from_csv(full_path, task=task, target_columns='problems') diff --git a/cases/metocean_forecasting_problem.py b/examples/real_cases/metocean_forecasting_problem.py similarity index 94% rename from cases/metocean_forecasting_problem.py rename to examples/real_cases/metocean_forecasting_problem.py index 87bb65c5bc..e4f9d1d0e9 100644 --- a/cases/metocean_forecasting_problem.py +++ b/examples/real_cases/metocean_forecasting_problem.py @@ -63,10 +63,10 @@ def run_metocean_forecasting_problem(train_file_path, test_file_path, # the dataset was obtained from NEMO model simulation for sea surface height # a dataset that will be used as a train and test set during composition - file_path_train = 'cases/data/metocean/metocean_data_train.csv' + file_path_train = 'examples/real_cases/data/metocean/metocean_data_train.csv' # a dataset for a final validation of the composed model - file_path_test = 'cases/data/metocean/metocean_data_test.csv' + file_path_test = 'examples/real_cases/data/metocean/metocean_data_test.csv' run_metocean_forecasting_problem(file_path_train, file_path_test, forecast_length=6, timeout=5, diff --git a/cases/multi_target_levels_forecasting.py b/examples/real_cases/multi_target_levels_forecasting.py similarity index 97% rename from cases/multi_target_levels_forecasting.py rename to examples/real_cases/multi_target_levels_forecasting.py index 24aa988c4f..3e5b775e4c 100644 --- a/cases/multi_target_levels_forecasting.py +++ b/examples/real_cases/multi_target_levels_forecasting.py @@ -100,5 +100,5 @@ def run_multi_output_case(path, vis=False): if __name__ == '__main__': - path_file = fedot_project_root() / 'cases/data/lena_levels/multi_sample.csv' + path_file = fedot_project_root() / 'examples/real_cases/data/lena_levels/multi_sample.csv' run_multi_output_case(path_file, vis=True) diff --git a/cases/multi_ts_level_forecasting.py b/examples/real_cases/multi_ts_level_forecasting.py similarity index 96% rename from cases/multi_ts_level_forecasting.py rename to examples/real_cases/multi_ts_level_forecasting.py index 2d7ddb46c7..a20251a1f4 100644 --- a/cases/multi_ts_level_forecasting.py +++ b/examples/real_cases/multi_ts_level_forecasting.py @@ -19,7 +19,7 @@ def prepare_data(forecast_length, is_multi_ts): target_column = '61_91' task = Task(TaskTypesEnum.ts_forecasting, TsForecastingParams(forecast_length=forecast_length)) - file_path = os.path.join(str(fedot_project_root()), 'cases/data/arctic/topaz_multi_ts.csv') + file_path = os.path.join(str(fedot_project_root()), 'examples/real_cases/data/arctic/topaz_multi_ts.csv') if is_multi_ts: data = InputData.from_csv_multi_time_series( file_path=file_path, diff --git a/cases/multivariate_ts_forecasting.py b/examples/real_cases/multivariate_ts_forecasting.py similarity index 96% rename from cases/multivariate_ts_forecasting.py rename to examples/real_cases/multivariate_ts_forecasting.py index 7893a4b705..2f6bbeafd8 100644 --- a/cases/multivariate_ts_forecasting.py +++ b/examples/real_cases/multivariate_ts_forecasting.py @@ -42,7 +42,7 @@ def plot_results(full_df: pd.DataFrame, target_column: int, forecast: np.array, def launch_fedot_forecasting(target_column: int = 1, forecast_horizon: int = 50, number_of_series_to_use: int = 25): """ Example how to launch FEDOT AutmoML for multivariate forecasting """ - path_to_file = fedot_project_root() / 'cases' / 'data' / 'multivariate_ssh.csv' + path_to_file = fedot_project_root() / 'examples' / 'real_cases' / 'data' / 'multivariate_ssh.csv' df = pd.read_csv(path_to_file, parse_dates=['datetime']) train_df, test_df = train_test_split(df, forecast_horizon) diff --git a/examples/real_cases/river_levels_prediction/__init__.py b/examples/real_cases/river_levels_prediction/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/cases/river_levels_prediction/river_level_case_composer.py b/examples/real_cases/river_levels_prediction/river_level_case_composer.py similarity index 99% rename from cases/river_levels_prediction/river_level_case_composer.py rename to examples/real_cases/river_levels_prediction/river_level_case_composer.py index b60f51e3cc..457a92301f 100644 --- a/cases/river_levels_prediction/river_level_case_composer.py +++ b/examples/real_cases/river_levels_prediction/river_level_case_composer.py @@ -3,12 +3,10 @@ import numpy as np import pandas as pd +from golem.core.optimisers.genetic.gp_params import GPAlgorithmParameters from golem.core.tuning.simultaneous import SimultaneousTuner - from sklearn.metrics import mean_absolute_error, mean_squared_error -from golem.core.optimisers.genetic.gp_params import GPAlgorithmParameters - from fedot.core.composer.composer_builder import ComposerBuilder from fedot.core.data.data import InputData from fedot.core.data.data_split import train_test_data_setup diff --git a/cases/river_levels_prediction/river_level_case_manual.py b/examples/real_cases/river_levels_prediction/river_level_case_manual.py similarity index 100% rename from cases/river_levels_prediction/river_level_case_manual.py rename to examples/real_cases/river_levels_prediction/river_level_case_manual.py diff --git a/cases/spam_detection.py b/examples/real_cases/spam_detection.py similarity index 87% rename from cases/spam_detection.py rename to examples/real_cases/spam_detection.py index 5ec26a2ee2..e4e58ec523 100644 --- a/cases/spam_detection.py +++ b/examples/real_cases/spam_detection.py @@ -2,7 +2,7 @@ from sklearn.metrics import roc_auc_score as roc_auc -from cases.dataset_preparation import unpack_archived_data +from examples.real_cases.dataset_preparation import unpack_archived_data from fedot.core.data.data import InputData from fedot.core.data.data_split import train_test_data_setup from fedot.core.pipelines.node import PipelineNode @@ -24,7 +24,7 @@ def execute_pipeline_for_text_problem(train_data, test_data): def run_text_problem_from_meta_file(): - data_file_abspath = os.path.abspath(os.path.join('data', 'spam', 'spamham.csv')) + data_file_abspath = os.path.abspath(os.path.join('../../examples/real_cases/data', 'spam', 'spamham.csv')) data = InputData.from_text_meta_file(meta_file_path=data_file_abspath) @@ -36,7 +36,7 @@ def run_text_problem_from_meta_file(): def run_text_problem_from_files(): - data_abspath = os.path.abspath(os.path.join('data', 'spamham')) + data_abspath = os.path.abspath(os.path.join('../../examples/real_cases/data', 'spamham')) unpack_archived_data(data_abspath) diff --git a/cases/time_series_gapfilling_case.py b/examples/real_cases/time_series_gapfilling_case.py similarity index 97% rename from cases/time_series_gapfilling_case.py rename to examples/real_cases/time_series_gapfilling_case.py index 67c48cea36..c08811341a 100644 --- a/cases/time_series_gapfilling_case.py +++ b/examples/real_cases/time_series_gapfilling_case.py @@ -129,7 +129,7 @@ def run_gapfilling_case(file_path): # Example of using the algorithm to fill in gaps in a time series # The data is daily air temperature values from the weather station if __name__ == '__main__': - dataframe = run_gapfilling_case('cases/data/gapfilling/ts_temperature_gapfilling.csv') + dataframe = run_gapfilling_case('examples/real_cases/data/gapfilling/ts_temperature_gapfilling.csv') # Display metrics print_metrics(dataframe) diff --git a/examples/simple/api_builder/classification_with_api_builder.py b/examples/simple/api_builder/classification_with_api_builder.py index f3af6f09c9..9c90702135 100644 --- a/examples/simple/api_builder/classification_with_api_builder.py +++ b/examples/simple/api_builder/classification_with_api_builder.py @@ -3,8 +3,8 @@ if __name__ == '__main__': - train_data_path = f'{fedot_project_root()}/cases/data/scoring/scoring_train.csv' - test_data_path = f'{fedot_project_root()}/cases/data/scoring/scoring_test.csv' + train_data_path = f'{fedot_project_root()}/examples/real_cases/data/scoring/scoring_train.csv' + test_data_path = f'{fedot_project_root()}/examples/real_cases/data/scoring/scoring_test.csv' fedot = (FedotBuilder(problem='classification') .setup_composition(timeout=10, with_tuning=True, preset='best_quality') diff --git a/examples/simple/classification/api_classification.py b/examples/simple/classification/api_classification.py index a75dafbb18..3c60824f3c 100644 --- a/examples/simple/classification/api_classification.py +++ b/examples/simple/classification/api_classification.py @@ -4,8 +4,8 @@ def run_classification_example(timeout: float = None, visualization=False, with_tuning=True): problem = 'classification' - train_data_path = f'{fedot_project_root()}/cases/data/scoring/scoring_train.csv' - test_data_path = f'{fedot_project_root()}/cases/data/scoring/scoring_test.csv' + train_data_path = f'{fedot_project_root()}/examples/real_cases/data/scoring/scoring_train.csv' + test_data_path = f'{fedot_project_root()}/examples/real_cases/data/scoring/scoring_test.csv' baseline_model = Fedot(problem=problem, timeout=timeout) baseline_model.fit(features=train_data_path, target='target', predefined_model='rf') diff --git a/examples/simple/interpretable/api_explain.py b/examples/simple/interpretable/api_explain.py index ee3b97acb9..1e5b61271e 100644 --- a/examples/simple/interpretable/api_explain.py +++ b/examples/simple/interpretable/api_explain.py @@ -5,7 +5,7 @@ def run_api_explain_example(visualization=False, timeout=None, with_tuning=True): - train_data = pd.read_csv(f'{fedot_project_root()}/cases/data/cancer/cancer_train.csv', index_col=0) + train_data = pd.read_csv(f'{fedot_project_root()}/examples/real_cases/data/cancer/cancer_train.csv', index_col=0) figure_path = 'api_explain_example.png' # Feature and class names for visualization diff --git a/examples/simple/interpretable/pipeline_explain.py b/examples/simple/interpretable/pipeline_explain.py index 1467cb0acb..024125867b 100644 --- a/examples/simple/interpretable/pipeline_explain.py +++ b/examples/simple/interpretable/pipeline_explain.py @@ -10,7 +10,7 @@ def run_pipeline_explain(): # Specifying paths - train_data_path = os.path.join(fedot_project_root(), 'cases', 'data', 'cancer', 'cancer_train.csv') + train_data_path = os.path.join(fedot_project_root(), 'examples', 'real_cases', 'data', 'cancer', 'cancer_train.csv') figure_path = 'pipeline_explain_example.png' # Feature and class names for visualization diff --git a/examples/simple/pipeline_tune.py b/examples/simple/pipeline_tune.py index 66ce2608d8..9c98e5bab4 100644 --- a/examples/simple/pipeline_tune.py +++ b/examples/simple/pipeline_tune.py @@ -1,10 +1,8 @@ -from copy import deepcopy - import numpy as np from golem.core.tuning.simultaneous import SimultaneousTuner from sklearn.metrics import roc_auc_score as roc_auc -from cases.data.data_utils import get_scoring_case_data_paths +from examples.real_cases.data.data_utils import get_scoring_case_data_paths from examples.simple.classification.classification_pipelines import classification_complex_pipeline from fedot.core.data.data import InputData from fedot.core.pipelines.pipeline import Pipeline diff --git a/examples/simple/pipeline_tuning_with_iopt.py b/examples/simple/pipeline_tuning_with_iopt.py index dee9153183..553060d1f0 100644 --- a/examples/simple/pipeline_tuning_with_iopt.py +++ b/examples/simple/pipeline_tuning_with_iopt.py @@ -54,7 +54,7 @@ def tune_pipeline(pipeline: Pipeline, .add_node('knnreg', 1) .join_branches('rfr') .build()) - data_path = f'{fedot_project_root()}/cases/data/cholesterol/cholesterol.csv' + data_path = f'{fedot_project_root()}/examples/real_cases/data/cholesterol/cholesterol.csv' data = InputData.from_csv(data_path, task=Task(TaskTypesEnum.regression)) diff --git a/examples/simple/regression/api_regression.py b/examples/simple/regression/api_regression.py index b1a905982a..44127bd3c7 100644 --- a/examples/simple/regression/api_regression.py +++ b/examples/simple/regression/api_regression.py @@ -9,7 +9,7 @@ def run_regression_example(visualise: bool = False, with_tuning: bool = True, timeout: float = 2., preset: str = 'auto'): - data_path = f'{fedot_project_root()}/cases/data/cholesterol/cholesterol.csv' + data_path = f'{fedot_project_root()}/examples/real_cases/data/cholesterol/cholesterol.csv' data = InputData.from_csv(data_path, task=Task(TaskTypesEnum.regression)) diff --git a/examples/simple/time_series_forecasting/fitted_values.py b/examples/simple/time_series_forecasting/fitted_values.py index 7eb4a94119..8afe680051 100644 --- a/examples/simple/time_series_forecasting/fitted_values.py +++ b/examples/simple/time_series_forecasting/fitted_values.py @@ -18,7 +18,7 @@ def show_fitted_time_series(len_forecast=24): task = Task(TaskTypesEnum.ts_forecasting, TsForecastingParams(forecast_length=len_forecast)) - ts_input = InputData.from_csv_time_series(file_path='../../../cases/data/time_series/metocean.csv', + ts_input = InputData.from_csv_time_series(file_path='../../real_examples/real_cases/data/time_series/metocean.csv', task=task, target_column='value') pipeline = get_simple_short_lagged_pipeline() diff --git a/test/data/datasets.py b/test/data/datasets.py index 122b8d0f92..be1cdf4e74 100644 --- a/test/data/datasets.py +++ b/test/data/datasets.py @@ -5,7 +5,7 @@ import pandas as pd from sklearn.model_selection import train_test_split -from cases.metocean_forecasting_problem import prepare_input_data +from examples.real_cases.metocean_forecasting_problem import prepare_input_data from fedot.core.data.data import InputData from fedot.core.data.data_split import train_test_data_setup from fedot.core.data.supplementary_data import SupplementaryData @@ -37,7 +37,7 @@ def get_split_data(): def get_cholesterol_dataset(): - data_path = f'{fedot_project_root()}/cases/data/cholesterol/cholesterol.csv' + data_path = f'{fedot_project_root()}/examples/real_cases/data/cholesterol/cholesterol.csv' data = InputData.from_csv(data_path, task=Task(TaskTypesEnum.regression)) train, test = train_test_data_setup(data) return train, test @@ -69,11 +69,11 @@ def get_dataset(task_type: str, validation_blocks: Optional[int] = None, n_sampl def get_multimodal_ts_data(size=500): - file_path_train = 'cases/data/metocean/metocean_data_train.csv' + file_path_train = 'examples/real_cases/data/metocean/metocean_data_train.csv' full_path_train = os.path.join(str(fedot_project_root()), file_path_train) # a dataset for a final validation of the composed model - file_path_test = 'cases/data/metocean/metocean_data_test.csv' + file_path_test = 'examples/real_cases/data/metocean/metocean_data_test.csv' full_path_test = os.path.join(str(fedot_project_root()), file_path_test) target_history, add_history, _ = prepare_input_data(full_path_train, full_path_test, diff --git a/test/integration/quality/test_quality_improvement.py b/test/integration/quality/test_quality_improvement.py index 58c14d63cb..0d25480868 100644 --- a/test/integration/quality/test_quality_improvement.py +++ b/test/integration/quality/test_quality_improvement.py @@ -12,8 +12,8 @@ def test_classification_quality_improvement(): # input data initialization - train_data_path = fedot_project_root().joinpath('cases/data/scoring/scoring_train.csv') - test_data_path = fedot_project_root().joinpath('cases/data/scoring/scoring_test.csv') + train_data_path = fedot_project_root().joinpath('examples/real_cases/data/scoring/scoring_train.csv') + test_data_path = fedot_project_root().joinpath('examples/real_cases/data/scoring/scoring_test.csv') seed = 50 problem = 'classification' @@ -45,8 +45,8 @@ def test_classification_quality_improvement(): def test_multiobjective_improvement(): # input data initialization - train_data_path = fedot_project_root().joinpath('cases/data/scoring/scoring_train.csv') - test_data_path = fedot_project_root().joinpath('cases/data/scoring/scoring_test.csv') + train_data_path = fedot_project_root().joinpath('examples/real_cases/data/scoring/scoring_train.csv') + test_data_path = fedot_project_root().joinpath('examples/real_cases/data/scoring/scoring_test.csv') problem = 'classification' seed = 50 diff --git a/test/integration/real_applications/test_real_cases.py b/test/integration/real_applications/test_real_cases.py index 57e7e3f343..3816023e89 100644 --- a/test/integration/real_applications/test_real_cases.py +++ b/test/integration/real_applications/test_real_cases.py @@ -2,11 +2,11 @@ from golem.core.tuning.simultaneous import SimultaneousTuner from sklearn.metrics import mean_squared_error -from cases.credit_scoring.credit_scoring_problem import run_credit_scoring_problem -from cases.metocean_forecasting_problem import run_metocean_forecasting_problem -from cases.river_levels_prediction.river_level_case_manual import run_river_experiment -from cases.spam_detection import run_text_problem_from_saved_meta_file -from cases.time_series_gapfilling_case import run_gapfilling_case +from examples.real_cases.credit_scoring.credit_scoring_problem import run_credit_scoring_problem +from examples.real_cases.metocean_forecasting_problem import run_metocean_forecasting_problem +from examples.real_cases.river_levels_prediction.river_level_case_manual import run_river_experiment +from examples.real_cases.spam_detection import run_text_problem_from_saved_meta_file +from examples.real_cases.time_series_gapfilling_case import run_gapfilling_case from fedot.core.pipelines.node import PipelineNode from fedot.core.pipelines.pipeline import Pipeline from fedot.core.utils import fedot_project_root diff --git a/test/integration/test_profiler.py b/test/integration/test_profiler.py index 5cb55fe344..6a01cdb910 100644 --- a/test/integration/test_profiler.py +++ b/test/integration/test_profiler.py @@ -5,7 +5,7 @@ from golem.utilities.profiler.memory_profiler import MemoryProfiler from golem.utilities.profiler.time_profiler import TimeProfiler -from cases.credit_scoring.credit_scoring_problem import get_scoring_data, run_credit_scoring_problem +from examples.real_cases.credit_scoring.credit_scoring_problem import get_scoring_data, run_credit_scoring_problem @pytest.fixture(scope='session', autouse=True) diff --git a/test/sensitivity/test_sensitivity.py b/test/sensitivity/test_sensitivity.py index 8688c3896b..db27a35e6f 100644 --- a/test/sensitivity/test_sensitivity.py +++ b/test/sensitivity/test_sensitivity.py @@ -1,7 +1,7 @@ import os from unittest.mock import patch -from cases.data.data_utils import get_scoring_case_data_paths +from examples.real_cases.data.data_utils import get_scoring_case_data_paths from fedot.core.data.data import InputData from fedot.core.pipelines.node import PipelineNode from fedot.core.pipelines.pipeline import Pipeline diff --git a/test/unit/data/test_data_merge_text.py b/test/unit/data/test_data_merge_text.py index 6080e7f0d8..3743ed2607 100644 --- a/test/unit/data/test_data_merge_text.py +++ b/test/unit/data/test_data_merge_text.py @@ -10,7 +10,7 @@ def load_sample_text(file_path=None, label_col='label'): - file_path = file_path or fedot_project_root().joinpath('cases', 'data', 'spam', 'spamham.csv') + file_path = file_path or fedot_project_root().joinpath('examples', 'real_cases', 'data', 'spam', 'spamham.csv') df_text = pd.read_csv(file_path) df_text = df_text.sample(frac=1).reset_index(drop=True) diff --git a/test/unit/data_operations/test_data_operation_params.py b/test/unit/data_operations/test_data_operation_params.py index 5d9d69397c..96c6ce4def 100644 --- a/test/unit/data_operations/test_data_operation_params.py +++ b/test/unit/data_operations/test_data_operation_params.py @@ -69,7 +69,7 @@ def test_ransac_with_invalid_params_fit_correctly(): than the number of objects """ - data_path = f'{fedot_project_root()}/cases/data/cholesterol/cholesterol.csv' + data_path = f'{fedot_project_root()}/examples/real_cases/data/cholesterol/cholesterol.csv' data = InputData.from_csv(data_path) train, test = train_test_data_setup(data)