Examples and docs updated (#1331)

* Examples and docs updated
aimclub · Sep 4, 2024 · 3998d22 · 3998d22
1 parent 5d87668
commit 3998d22
Show file tree

Hide file tree

Showing 87 changed files with 128 additions and 123 deletions.
diff --git a/README.rst b/README.rst
@@ -131,9 +131,9 @@ Jupyter ноутбуки с примерами находятся в репоз
 
 Расширенные примеры:
 
-- Задача с кредитным скорингом `binary classification task <https://github.com/aimclub/FEDOT/blob/master/cases/credit_scoring/credit_scoring_problem.py>`__
-- Прогнозирование временных рядов `random process regression <https://github.com/aimclub/FEDOT/blob/master/cases/metocean_forecasting_problem.py>`__
-- Обнаружение спама `natural language preprocessing <https://github.com/aimclub/FEDOT/blob/master/cases/spam_detection.py>`__
+- Задача с кредитным скорингом `binary classification task <https://github.com/aimclub/FEDOT/blob/master/examples/real_cases/credit_scoring/credit_scoring_problem.py>`__
+- Прогнозирование временных рядов `random process regression <https://github.com/aimclub/FEDOT/blob/master/examples/real_cases/metocean_forecasting_problem.py>`__
+- Обнаружение спама `natural language preprocessing <https://github.com/aimclub/FEDOT/blob/master/examples/real_cases/spam_detection.py>`__
 - Предсказание сорта вина `multi-modal data <https://github.com/aimclub/FEDOT/blob/master/examples/advanced/multimodal_text_num_example.py>`__
 
 

diff --git a/README_en.rst b/README_en.rst
@@ -131,9 +131,9 @@ Also, external examples are available:
 
 Extended examples:
 
-- Credit scoring problem, i.e. `binary classification task <https://github.com/aimclub/FEDOT/blob/master/cases/credit_scoring/credit_scoring_problem.py>`__
-- Time series forecasting, i.e. `random process regression <https://github.com/aimclub/FEDOT/blob/master/cases/metocean_forecasting_problem.py>`__
-- Spam detection, i.e. `natural language preprocessing <https://github.com/aimclub/FEDOT/blob/master/cases/spam_detection.py>`__
+- Credit scoring problem, i.e. `binary classification task <https://github.com/aimclub/FEDOT/blob/master/examples/real_cases/credit_scoring/credit_scoring_problem.py>`__
+- Time series forecasting, i.e. `random process regression <https://github.com/aimclub/FEDOT/blob/master/examples/real_cases/metocean_forecasting_problem.py>`__
+- Spam detection, i.e. `natural language preprocessing <https://github.com/aimclub/FEDOT/blob/master/examples/real_cases/spam_detection.py>`__
 - Wine variety prediction with `multi-modal data <https://github.com/aimclub/FEDOT/blob/master/examples/advanced/multimodal_text_num_example.py>`__
 
 

diff --git a/cases/evo_operators_comparison/req.txt b/cases/evo_operators_comparison/req.txt
diff --git a/docs/source/advanced/automated_pipelines_design.rst b/docs/source/advanced/automated_pipelines_design.rst
@@ -9,4 +9,4 @@ Example of running optimization through the API can be found in the `api classif
 
 If instead users need to customize the optimization algorithm (e.g. with custom genetic operators like mutations or crossover or custom verification rules) then it's possible by directly using `ComposerBuilder` class or one of the optimizers from GOLEM.
 
-Example of a customized usage can be found in `credit scoring case problem <https://github.com/aimclub/FEDOT/blob/master/cases/credit_scoring/credit_scoring_problem_multiobj.py>`_.
+Example of a customized usage can be found in `credit scoring case problem <https://github.com/aimclub/FEDOT/blob/master/examples/real_cases/credit_scoring/credit_scoring_problem_multiobj.py>`_.
diff --git a/docs/source/advanced/hyperparameters_tuning.rst b/docs/source/advanced/hyperparameters_tuning.rst
@@ -721,8 +721,8 @@ Another examples can be found here:
 * `Tuning pipelines with sparse_lagged / lagged node  <https://github.com/aimclub/FEDOT/blob/master/examples/advanced/time_series_forecasting/sparse_lagged_tuning.py>`_
 * `Topaz multi time series forecasting <https://github.com/aimclub/FEDOT/blob/master/examples/advanced/time_series_forecasting/multi_ts_arctic_forecasting.py>`_
 * `Custom model tuning <https://github.com/aimclub/FEDOT/blob/master/examples/advanced/time_series_forecasting/custom_model_tuning.py>`_
-* `Case: river level forecasting with composer <https://github.com/aimclub/FEDOT/blob/master/cases/river_levels_prediction/river_level_case_composer.py>`_
-* `Case: river level forecasting (manual) <https://github.com/aimclub/FEDOT/blob/master/cases/river_levels_prediction/river_level_case_manual.py>`_
+* `Case: river level forecasting with composer <https://github.com/aimclub/FEDOT/blob/master/examples/real_cases/river_levels_prediction/river_level_case_composer.py>`_
+* `Case: river level forecasting (manual) <https://github.com/aimclub/FEDOT/blob/master/examples/real_cases/river_levels_prediction/river_level_case_manual.py>`_
 
 **Multitask**
 

diff --git a/docs/source/basics/pipeline_save_load.rst b/docs/source/basics/pipeline_save_load.rst
@@ -19,7 +19,7 @@ FEDOT provides methods for saving and loading pipelines in the ``Pipeline`` clas
             .. code-block:: python
 
                 problem = 'classification'
-                train_data_path = f'{fedot_project_root()}/cases/data/scoring/scoring_train.csv'
+                train_data_path = f'{fedot_project_root()}/examples/real_cases/data/scoring/scoring_train.csv'
 
                 baseline_model = Fedot(problem=problem, timeout=1, seed=42)
                 baseline_model.fit(features=train_data_path, target='target', predefined_model='rf')
@@ -62,7 +62,7 @@ FEDOT provides methods for saving and loading pipelines in the ``Pipeline`` clas
             .. code-block:: python
 
                 problem = 'classification'
-                train_data_path = f'{fedot_project_root()}/cases/data/scoring/scoring_train.csv'
+                train_data_path = f'{fedot_project_root()}/examples/real_cases/data/scoring/scoring_train.csv'
 
                 baseline_model = Fedot(problem=problem, timeout=1, seed=42)
                 baseline_model.fit(features=train_data_path, target='target', predefined_model='rf')
@@ -99,7 +99,7 @@ FEDOT provides methods for saving and loading pipelines in the ``Pipeline`` clas
             .. code-block:: python
 
                 problem = 'classification'
-                train_data_path = f'{fedot_project_root()}/cases/data/scoring/scoring_train.csv'
+                train_data_path = f'{fedot_project_root()}/examples/real_cases/data/scoring/scoring_train.csv'
 
                 baseline_model = Fedot(problem=problem, timeout=1, seed=42)
                 baseline_model.fit(features=train_data_path, target='target', predefined_model='rf')
@@ -137,7 +137,7 @@ FEDOT provides methods for saving and loading pipelines in the ``Pipeline`` clas
             .. code-block:: python
 
                 problem = 'classification'
-                train_data_path = f'{fedot_project_root()}/cases/data/scoring/scoring_train.csv'
+                train_data_path = f'{fedot_project_root()}/examples/real_cases/data/scoring/scoring_train.csv'
 
                 baseline_model = Fedot(problem=problem, timeout=1, seed=42)
                 baseline_model.fit(features=train_data_path, target='target', predefined_model='rf')
@@ -175,7 +175,7 @@ FEDOT provides methods for saving and loading pipelines in the ``Pipeline`` clas
             .. code-block:: python
 
                 problem = 'classification'
-                train_data_path = f'{fedot_project_root()}/cases/data/scoring/scoring_train.csv'
+                train_data_path = f'{fedot_project_root()}/examples/real_cases/data/scoring/scoring_train.csv'
 
                 baseline_model = Fedot(problem=problem, timeout=1, seed=42)
                 baseline_model.fit(features=train_data_path, target='target', predefined_model='rf')

diff --git a/docs/source/basics/tabular_data.rst b/docs/source/basics/tabular_data.rst
@@ -113,4 +113,4 @@ More details you can find in the follow links:
 
 **Cases**
 
-* `Case: Credit scoring problem <https://github.com/aimclub/FEDOT/blob/master/cases/credit_scoring/credit_scoring_problem.py>`_
+* `Case: Credit scoring problem <https://github.com/aimclub/FEDOT/blob/master/examples/real_cases/credit_scoring/credit_scoring_problem.py>`_
diff --git a/docs/source/basics/ts_forecasting.rst b/docs/source/basics/ts_forecasting.rst
@@ -624,5 +624,5 @@ Examples
 
 **Cases**
 
-* `Case: river level forecasting with composer <https://github.com/aimclub/FEDOT/blob/master/cases/river_levels_prediction/river_level_case_composer.py>`_
-* `Case: river level forecasting (manual) <https://github.com/aimclub/FEDOT/blob/master/cases/river_levels_prediction/river_level_case_manual.py>`_
+* `Case: river level forecasting with composer <https://github.com/aimclub/FEDOT/blob/master/examples/real_cases/river_levels_prediction/river_level_case_composer.py>`_
+* `Case: river level forecasting (manual) <https://github.com/aimclub/FEDOT/blob/master/examples/real_cases/river_levels_prediction/river_level_case_manual.py>`_
diff --git a/docs/source/faq/abstract.rst b/docs/source/faq/abstract.rst
@@ -1,13 +1,22 @@
 Abstract
 ========
 
-.. topic:: What is Fedot?
+.. topic:: What is FEDOT?
 
-    *Fedot is the AutoML-like framework for the automated generation of the
+    *FEDOT is the AutoML-like framework for the automated generation of the
     data-driven composite models. It can solve classification, regression,
     clustering, and forecasting problems.*
 
-.. topic:: Why should I use Fedot instead of existing state-of-the-art solutions (H2O/TPOT/etc)?
+.. topic:: What FEDOT is framework.
+
+    *While the exact difference between 'library' and 'framework' is a bit ambiguous and
+    context-dependent in many cases, we still consider FEDOT as a framework.*
+
+    *The reason is that is can be used not only to solve pre-defined AutoML task,
+    but also can be used to build new derivative solutions.
+    *As an examples:* `FEDOT.NAS`_, `FEDOT.Industrial`_.
+
+.. topic:: Why should I use FEDOT instead of existing state-of-the-art solutions (H2O/TPOT/etc)?
 
     *In practice, the existing AutoML solutions are really effective for the
     limited set of problems only. During the model learning, modern AutoML
@@ -21,16 +30,16 @@ Abstract
     efficient way. Also, we are aimed to outperform the existing solutions
     even for well-known benchmarks (e.g. PMLB datasets).*
 
-.. topic:: Can I install Fedot using pip/conda?
+.. topic:: Can I install FEDOT using pip/conda?
 
-    *Yes, follow the* `link`_
+    *Yes, follow the* `link`_.
 
-.. topic:: Can I use Fedot in my project/research/etc?
+.. topic:: Can I use FEDOT in my project/research/etc?
 
     *Yes, you can. The Fedot is published under the BSD-3 license. Also, we
     will be happy to help the users to adopt Fedot to their needs.*
 
-.. topic:: Why it is named Fedot?
+.. topic:: Why it is named FEDOT?
 
     *We decided to use this archaic Russian first name to add a bit of
     fantasy spirit into the development process.*
@@ -40,3 +49,9 @@ Abstract
 
 .. _link: https://pypi.org/project/fedot
 .. `link` replace:: *link*
+
+.. _FEDOT.NAS: https://github.com/ITMO-NSS-team/nas-fedot
+.. `FEDOT.NAS` replace:: *FEDOT.NAS*
+
+.. _FEDOT.Industrial: https://github.com/aimclub/Fedot.Industrial
+.. `FEDOT.Industrial` replace:: *FEDOT.Industrial*
diff --git a/examples/advanced/additional_learning.py b/examples/advanced/additional_learning.py
@@ -13,8 +13,8 @@
 
 
 def run_additional_learning_example():
-    train_data_path = f'{fedot_project_root()}/cases/data/scoring/scoring_train.csv'
-    test_data_path = f'{fedot_project_root()}/cases/data/scoring/scoring_test.csv'
+    train_data_path = f'{fedot_project_root()}/examples/real_cases/data/scoring/scoring_train.csv'
+    test_data_path = f'{fedot_project_root()}/examples/real_cases/data/scoring/scoring_test.csv'
 
     train_data = pd.read_csv(train_data_path)
     test_data = pd.read_csv(test_data_path)

diff --git a/examples/advanced/automl/pipeline_from_automl.py b/examples/advanced/automl/pipeline_from_automl.py
@@ -1,12 +1,12 @@
 from datetime import timedelta
 
-from fedot.core.repository.operation_types_repository import OperationTypesRepository
 from sklearn.metrics import roc_auc_score as roc_auc
 
-from cases.data.data_utils import get_scoring_case_data_paths
+from examples.real_cases.data.data_utils import get_scoring_case_data_paths
 from fedot.core.data.data import InputData
 from fedot.core.pipelines.node import PipelineNode
 from fedot.core.pipelines.pipeline import Pipeline
+from fedot.core.repository.operation_types_repository import OperationTypesRepository
 
 
 # TODO not working now - add switch to other repository.json

diff --git a/examples/advanced/automl/tpot_vs_fedot.py b/examples/advanced/automl/tpot_vs_fedot.py
@@ -55,7 +55,7 @@ def run_tpot_vs_fedot_example(train_file_path: str, test_file_path: str):
 
 
 if __name__ == '__main__':
-    train_file_path = "../../../cases/data/scoring/scoring_train.csv"
-    test_file_path = "../../../cases/data/scoring/scoring_test.csv"
+    train_file_path = "../../real_examples/real_cases/data/scoring/scoring_train.csv"
+    test_file_path = "../../real_examples/real_cases/data/scoring/scoring_test.csv"
 
     run_tpot_vs_fedot_example(train_file_path, test_file_path)
diff --git a/examples/advanced/decompose/classification_refinement_example.py b/examples/advanced/decompose/classification_refinement_example.py
@@ -1,6 +1,6 @@
 from golem.core.tuning.simultaneous import SimultaneousTuner
 
-from cases.credit_scoring.credit_scoring_problem import get_scoring_data, calculate_validation_metric
+from examples.real_cases.credit_scoring.credit_scoring_problem import get_scoring_data, calculate_validation_metric
 from fedot.core.data.data import InputData
 from fedot.core.pipelines.node import PipelineNode
 from fedot.core.pipelines.pipeline import Pipeline

diff --git a/examples/advanced/decompose/refinement_forecast_example.py b/examples/advanced/decompose/refinement_forecast_example.py
@@ -14,7 +14,6 @@
 from fedot.core.repository.tasks import TaskTypesEnum, Task, TsForecastingParams
 from fedot.core.utils import set_random_seed
 
-
 warnings.filterwarnings('ignore')
 
 
@@ -163,6 +162,6 @@ def run_refinement_forecast(path_to_file, len_forecast=100, lagged=150,
 if __name__ == '__main__':
     set_random_seed(2020)
 
-    path = '../../../cases/data/time_series/economic_data.csv'
+    path = '../../real_examples/real_cases/data/time_series/economic_data.csv'
     run_refinement_forecast(path, len_forecast=50, validation_blocks=5,
                             lagged=50, vis_with_decompose=True)
diff --git a/examples/advanced/decompose/regression_refinement_example.py b/examples/advanced/decompose/regression_refinement_example.py
@@ -130,5 +130,5 @@ def run_river_experiment(file_path, with_tuning=False):
 
 
 if __name__ == '__main__':
-    run_river_experiment(file_path=f'{fedot_project_root()}/cases/data/river_levels/station_levels.csv',
+    run_river_experiment(file_path=f'{fedot_project_root()}/examples/real_cases/data/river_levels/station_levels.csv',
                          with_tuning=True)
diff --git a/...rs_comparison/evo_operators_comparison.py → ...ples/advanced/evo_operators_comparison.py b/...rs_comparison/evo_operators_comparison.py → ...ples/advanced/evo_operators_comparison.py
@@ -3,20 +3,19 @@
 from typing import Sequence, Optional
 
 import numpy as np
-from matplotlib import pyplot as plt
-from sklearn.metrics import roc_auc_score as roc_auc
-
-from fedot.core.composer.composer_builder import ComposerBuilder
-from fedot.core.data.data import InputData
 from golem.core.optimisers.genetic.gp_params import GPAlgorithmParameters
 from golem.core.optimisers.genetic.operators.crossover import CrossoverTypesEnum
 from golem.core.optimisers.genetic.operators.inheritance import GeneticSchemeTypesEnum
 from golem.core.optimisers.genetic.operators.mutation import MutationTypesEnum
 from golem.core.optimisers.opt_history_objects.opt_history import OptHistory
+from matplotlib import pyplot as plt
+from sklearn.metrics import roc_auc_score as roc_auc
 
+from fedot.core.composer.composer_builder import ComposerBuilder
+from fedot.core.data.data import InputData
 from fedot.core.pipelines.pipeline_composer_requirements import PipelineComposerRequirements
-from fedot.core.repository.operation_types_repository import get_operations_for_task
 from fedot.core.repository.metrics_repository import ClassificationMetricsEnum
+from fedot.core.repository.operation_types_repository import get_operations_for_task
 from fedot.core.repository.tasks import Task, TaskTypesEnum
 from fedot.core.utils import fedot_project_root
 
@@ -178,8 +177,8 @@ def run_experiment_with_saved_histories(save_dir):
 
 
 if __name__ == '__main__':
-    train_data_path = f'{fedot_project_root()}/cases/data/scoring/scoring_train.csv'
-    test_data_path = f'{fedot_project_root()}/cases/data/scoring/scoring_test.csv'
+    train_data_path = f'{fedot_project_root()}/examples/real_cases/data/scoring/scoring_train.csv'
+    test_data_path = f'{fedot_project_root()}/examples/real_cases/data/scoring/scoring_test.csv'
 
     run_experiment(train_data_path,
                    test_data_path,

diff --git a/examples/advanced/fedot_based_solutions/external_optimizer.py b/examples/advanced/fedot_based_solutions/external_optimizer.py
@@ -7,8 +7,8 @@
 
 
 def run_with_random_search_composer():
-    train_data_path = f'{fedot_project_root()}/cases/data/scoring/scoring_train.csv'
-    test_data_path = f'{fedot_project_root()}/cases/data/scoring/scoring_test.csv'
+    train_data_path = f'{fedot_project_root()}/examples/real_cases/data/scoring/scoring_train.csv'
+    test_data_path = f'{fedot_project_root()}/examples/real_cases/data/scoring/scoring_test.csv'
 
     composer_params = {'available_operations': ['class_decompose', 'rf', 'linear', 'xgboost', 'dt'],
                        'optimizer': RandomMutationSearchOptimizer}

diff --git a/examples/advanced/gpu_example.py b/examples/advanced/gpu_example.py
@@ -69,8 +69,8 @@ def run_pipeline_with_specific_evaluation_mode(train_data: InputData, test_data:
 
 
 def get_scoring_data() -> Tuple[InputData, InputData]:
-    train_data_path = f'{fedot_project_root()}/cases/data/scoring/scoring_train.csv'
-    test_data_path = f'{fedot_project_root()}/cases/data/scoring/scoring_test.csv'
+    train_data_path = f'{fedot_project_root()}/examples/real_cases/data/scoring/scoring_train.csv'
+    test_data_path = f'{fedot_project_root()}/examples/real_cases/data/scoring/scoring_test.csv'
 
     train_data = InputData.from_csv(train_data_path)
     test_data = InputData.from_csv(test_data_path)

diff --git a/examples/advanced/multi_modal_pipeline.py b/examples/advanced/multi_modal_pipeline.py
@@ -3,7 +3,7 @@
 
 from sklearn.metrics import f1_score as f1
 
-from cases.dataset_preparation import unpack_archived_data
+from examples.real_cases.dataset_preparation import unpack_archived_data
 from fedot import Fedot
 from fedot.core.data.data import InputData, OutputData
 from fedot.core.data.data_split import train_test_data_setup

diff --git a/examples/advanced/parallelization_comparison.py b/examples/advanced/parallelization_comparison.py
@@ -28,7 +28,7 @@ def run_experiments(timeout: float = None, partitions_n=10, n_jobs=-1):
     :param n_jobs: how many processors you want to use in a multiprocessing mode
 
     """
-    train_data_path = f'{fedot_project_root()}/cases/data/scoring/scoring_train.csv'
+    train_data_path = f'{fedot_project_root()}/examples/real_cases/data/scoring/scoring_train.csv'
 
     problem = 'classification'
 

diff --git a/examples/advanced/profiler_example.py b/examples/advanced/profiler_example.py
@@ -3,10 +3,9 @@
 from golem.utilities.profiler.memory_profiler import MemoryProfiler
 from golem.utilities.profiler.time_profiler import TimeProfiler
 
-from cases.credit_scoring.credit_scoring_problem import run_credit_scoring_problem, get_scoring_data
+from examples.real_cases.credit_scoring.credit_scoring_problem import run_credit_scoring_problem, get_scoring_data
 from fedot.core.utils import set_random_seed
 
-
 if __name__ == '__main__':
     set_random_seed(1)
     # JUST UNCOMMENT WHAT TYPE OF PROFILER DO YOU NEED

diff --git a/examples/advanced/remote_execution/remote_fit_example.py b/examples/advanced/remote_execution/remote_fit_example.py
@@ -16,7 +16,7 @@ def run_experiment():
     # WARNING - THIS SCRIPT CAN BE EVALUATED ONLY WITH THE ACCESS TO DATAMALL SYSTEM
 
     # LOCAL RUN
-    folder = fedot_project_root().joinpath('cases', 'data', 'scoring')
+    folder = fedot_project_root().joinpath('examples', 'real_cases', 'data', 'scoring')
     path = folder.joinpath('scoring_train.csv')
 
     start = datetime.now()

diff --git a/examples/advanced/remote_execution/ts_composer_with_integration.py b/examples/advanced/remote_execution/ts_composer_with_integration.py
@@ -20,7 +20,7 @@ def run_automl(data: MultiModalData, features_to_use,
                timeout: int = 1):
     """ Launch AutoML FEDOT algorithm for time series forecasting task """
 
-    metocean_folder = fedot_project_root().joinpath('cases', 'data', 'metocean')
+    metocean_folder = fedot_project_root().joinpath('examples', 'real_cases', 'data', 'metocean')
 
     connect_params = {}
     exec_params = {
@@ -73,7 +73,7 @@ def run_automl(data: MultiModalData, features_to_use,
     features_to_use = ['wind_speed', 'sea_height']
 
     data = MultiModalData.from_csv_time_series(
-        file_path=fedot_project_root().joinpath('cases/data/metocean/metocean_data_train.csv'),
+        file_path=fedot_project_root().joinpath('examples/real_cases/data/metocean/metocean_data_train.csv'),
         columns_to_use=features_to_use,
         target_column='sea_height',
         index_col='datetime')