aimclub · Lopa10ko · Dec 25, 2023 · Dec 6, 2023 · Dec 7, 2023 · Dec 7, 2023
diff --git a/docs/source/introduction/fedot_features/automation_features.rst b/docs/source/introduction/fedot_features/automation_features.rst
@@ -36,61 +36,74 @@ Dimensional operations
 
 FEDOT supports bunch of dimensionality preprocessing operations that can be be added to the pipeline as a node.
 
-Feature selection
-"""""""""""""""""
-
-There are different linear and non-linear algorithms for regression and classification tasks
-which uses scikit-learn's Recursive Feature Elimination (RFE).
-
-.. list-table:: Feature selection operations
-   :header-rows: 1
-
-   * - API name
-     - Definition
-   * - rfe_lin_reg
-     - RFE via Linear Regression regressor
-   * - rfe_non_lin_reg
-     - RFE via Decision tree regressor
-   * - rfe_lin_class
-     - RFE via Logistic Regression classifier
-   * - rfe_non_lin_class
-     - RFE via Decision tree classifier
-
-Feature extraction
-""""""""""""""""""
-
-These algorithms are used for generating new features.
-
-.. list-table:: Feature extraction operations
-   :header-rows: 1
-
-   * - API name
-     - Definition
-   * - pca
-     - Principal Component Analysis (PCA)
-   * - kernel_pca
-     - Principal Component Analysis (PCA) with kernel methods
-   * - fast_ica
-     - Fast Independent Component Analysis (FastICA)
-   * - poly_features
-     - Polynomial Features transformations
-   * - lagged
-     - Time-series to table transformation
-   * - sparse_lagged
-     - Time-series to sparse table transformation
-
-Feature expansion
-"""""""""""""""""
-
-These methods expands specific features to a bigger amount
-
-.. list-table:: Feature expansion operations
-   :header-rows: 1
-
-   * - API name
-     - Definition
-   * - one_hot_encoding
-     - One-hot encoding
+.. csv-table:: Feature transformation operations definitions
+   :header: "API name","Definition", "Problem"
+
+   `rfe_lin_reg`,Linear Regression Recursive Feature Elimination, Feature extraction
+   `rfe_non_lin_reg`,Decision Tree Recursive Feature Elimination, Feature extraction
+   `rfe_lin_class`,Logistic Regression Recursive Feature Elimination, Feature extraction
+   `rfe_non_lin_class`,Decision Tree Recursive Feature Elimination, Feature extraction
+   `isolation_forest_reg`,Regression Isolation Forest, Regression anomaly detection
+   `isolation_forest_class`,Classification Isolation Forest, Classification anomaly detection
+   `ransac_lin_reg`,Regression Random Sample Consensus, Outlier detection
+   `ransac_non_lin_reg`,Decision Tree Random Sample Consensus, Outlier detection
+   `pca`,Principal Component Analysis, Dimensionality reduction
+   `kernel_pca`,Kernel Principal Component Analysis, Dimensionality reduction
+   `fast_ica`,Independent Component Analysis, Feature extraction
+   `poly_features`,Polynomial Features, Feature engineering
+   `decompose`,Diff regression prediction and target for new target, Feature extraction
+   `class_decompose`,Diff classification prediction and target for new target, Feature extraction
+   `cntvect`,Count Vectorizer, Text feature extraction
+   `text_clean`,Lemmatization and Stemming, Text data processing
+   `tfidf`,TF-IDF Vectorizer, Text feature extraction
+   `word2vec_pretrained`,Text vectorization, Text feature extraction
+   `lagged`,Time series to the Hankel matrix transformation, Time series transformation
+   `sparse_lagged`,As `lagged` but with sparsing, Time series transformation
+   `smoothing`,Moving average, Time series transformation
+   `gaussian_filter`,Gaussian Filter, Time series transformation
+   `diff_filter`,Derivative Filter, Time series transformation
+   `cut`,Cut timeseries, Timeseries transformation
+   `scaling`,Scaling, Feature scaling
+   `normalization`,Normalization, Feature normalization
+   `simple_imputation`,Imputation, Data imputation
+   `one_hot_encoding`,One-Hot Encoder, Feature encoding
+   `label_encoding`,Label Encoder, Feature encoding
+   `resample`,Imbalanced binary class transformation in classification, Data transformation
+
+
+.. csv-table:: Feature transformation operations implementations
+   :header: "API name","Model used","Presets"
+
+   `rfe_lin_reg`,`sklearn.feature_selection.RFE`, 
+   `rfe_non_lin_reg`,`sklearn.feature_selection.RFE`,
+   `rfe_lin_class`,`sklearn.feature_selection.RFE`,
+   `rfe_non_lin_class`,`sklearn.feature_selection.RFE`,
+   `isolation_forest_reg`,`sklearn.ensemble.IsolationForest`,
+   `isolation_forest_class`,`sklearn.ensemble.IsolationForest`,
+   `ransac_lin_reg`,`sklearn.linear_model.RANSACRegressor`,`fast_train` `*tree`
+   `ransac_non_lin_reg`,`sklearn.linear_model.RANSACRegressor`, `*tree`
+   `pca`,`sklearn.decomposition.PCA`,`fast_train` `ts` `*tree`
+   `kernel_pca`,`sklearn.decomposition.KernelPCA`,`ts` `*tree`
+   `fast_ica`,`sklearn.decomposition.FastICA`,`ts` `*tree`
+   `poly_features`,`sklearn.preprocessing.PolynomialFeatures`,
+   `decompose`,`FEDOT model`,`fast_train` `ts` `*tree`
+   `class_decompose`,`FEDOT model`,`fast_train` `*tree`
+   `cntvect`,`sklearn.feature_extraction.text.CountVectorizer`,
+   `text_clean`,`nltk.stem.WordNetLemmatizer nltk.stem.SnowballStemmer`,
+   `tfidf`,`sklearn.feature_extraction.text.TfidfVectorizer`,
+   `word2vec_pretrained`,`Gensin-data model <https://github.com/piskvorky/gensim-data>`_,
+   `lagged`,`FEDOT model`,`fast_train` `ts`
+   `sparse_lagged`,`FEDOT model`,`fast_train` `ts`
+   `smoothing`,`FEDOT model`,`fast_train` `ts`
+   `gaussian_filter`,`FEDOT model`,`fast_train` `ts`
+   `diff_filter`,`FEDOT model`,`ts`
+   `cut`,`FEDOT model`,`fast_train` `ts`
+   `scaling`,`sklearn.preprocessing.StandardScaler`,`fast_train` `ts` `*tree`
+   `normalization`,`sklearn.preprocessing.MinMaxScaler`,`fast_train` `ts` `*tree`
+   `simple_imputation`,`sklearn.impute.SimpleImputer`,`fast_train` `*tree`
+   `one_hot_encoding`,`sklearn.preprocessing.OneHotEncoder`,
+   `label_encoding`,`sklearn.preprocessing.LabelEncoder`,`fast_train` `*tree`
+   `resample`,`FEDOT model using sklearn.utils.resample`,
 
 
 Models used
@@ -112,4 +125,90 @@ Apart from that there are other options whose names speak for themselves: ``'sta
 ``'automl'`` (the latter uses only AutoML models as pipeline nodes).
 
 .. note::
-    To make it simple, FEDOT uses ``auto`` by default to identify the best choice for you.
+    To make it simple, FEDOT uses ``auto`` by default to identify the best choice for you.
+
+
+.. csv-table:: Available models definitions
+   :header: "API name","Definition","Problem"
+
+   `adareg`,AdaBoost regressor,Regression
+   `catboostreg`,Catboost regressor,Regression
+   `dtreg`,Decision Tree regressor,Regression
+   `gbr`,Gradient Boosting regressor,Regression
+   `knnreg`,K-nearest neighbors regressor,Regression
+   `lasso`,Lasso Linear regressor,Regression
+   `lgbmreg`,Light Gradient Boosting Machine regressor,Regression
+   `linear`,Linear Regression regressor,Regression
+   `rfr`,Random Forest regressor,Regression
+   `ridge`,Ridge Linear regressor,Regression
+   `sgdr`,Stochastic Gradient Descent regressor,Regression
+   `svr`,Linear Support Vector regressor,Regression
+   `treg`,Extra Trees regressor,Regression
+   `xgbreg`,Extreme Gradient Boosting regressor,Regression
+   `bernb`,Naive Bayes classifier (multivariate Bernoulli),Classification
+   `catboost`,Catboost classifier,Classification
+   `cnn`,Convolutional Neural Network,Classification
+   `dt`,Decision Tree classifier,Classification
+   `knn`,K-nearest neighbors classifier,Classification
+   `lda`,Linear Discriminant Analysis,Classification
+   `lgbm`,Light Gradient Boosting Machine classifier,Classification
+   `logit`,Logistic Regression classifier,Classification
+   `mlp`,Multi-layer Perceptron classifier,Classification
+   `multinb`,Naive Bayes classifier (multinomial),Classification
+   `qda`,Quadratic Discriminant Analysis,Classification
+   `rf`,Random Forest classifier,Classification
+   `svc`,Support Vector classifier,Classification
+   `xgboost`,Extreme Gradient Boosting classifier,Classification
+   `kmeans`,K-Means clustering,Clustering
+   `ar`,AutoRegression,Forecasting
+   `arima`,ARIMA,Forecasting
+   `cgru`,Convolutional Gated Recurrent Unit,Forecasting
+   `ets`,Exponential Smoothing,Forecasting
+   `glm`,Generalized Linear Models,Forecasting
+   `locf`,Last Observation Carried Forward,Forecasting
+   `polyfit`,Polynomial approximation,Forecasting
+   `stl_arima`,STL Decomposition with ARIMA,Forecasting
+   `ts_naive_average`,Naive Average,Forecasting
+
+
+.. csv-table:: Available models implementations
+   :header: "API name","Model used","Presets"
+
+   `adareg`,`sklearn.ensemble.AdaBoostRegressor`,`fast_train` `ts` `*tree`
+   `catboostreg`,`catboost.CatBoostRegressor`,`*tree`
+   `dtreg`,`sklearn.tree.DecisionTreeRegressor`,`fast_train` `ts` `*tree`
+   `gbr`,`sklearn.ensemble.GradientBoostingRegressor`,`*tree`
+   `knnreg`,`sklearn.neighbors.KNeighborsRegressor`,`fast_train` `ts`
+   `lasso`,`sklearn.linear_model.Lasso`,`fast_train` `ts`
+   `lgbmreg`,`lightgbm.sklearn.LGBMRegressor`,`*tree`
+   `linear`,`sklearn.linear_model.LinearRegression`,`fast_train` `ts`
+   `rfr`,`sklearn.ensemble.RandomForestRegressor`,`fast_train` `*tree`
+   `ridge`,`sklearn.linear_model.Ridge`,`fast_train` `ts`
+   `sgdr`,`sklearn.linear_model.SGDRegressor`,`fast_train` `ts`
+   `svr`,`sklearn.svm.LinearSVR`,
+   `treg`,`sklearn.ensemble.ExtraTreesRegressor`,`*tree`
+   `xgbreg`,`xgboost.XGBRegressor`,`*tree`
+   `bernb`,`sklearn.naive_bayes.BernoulliNB`,`fast_train`
+   `catboost`,`catboost.CatBoostClassifier`,`*tree`
+   `cnn`,`FEDOT model`,
+   `dt`,`sklearn.tree.DecisionTreeClassifier`,`fast_train` `*tree`
+   `knn`,`sklearn.neighbors.KNeighborsClassifier`,`fast_train`
+   `lda`,`sklearn.discriminant_analysis.LinearDiscriminantAnalysis`,`fast_train`
+   `lgbm`,`lightgbm.sklearn.LGBMClassifier`,
+   `logit`,`sklearn.linear_model.LogisticRegression`,`fast_train`
+   `mlp`,`sklearn.neural_network.MLPClassifier`,
+   `multinb`,`sklearn.naive_bayes.MultinomialNB`,`fast_train`
+   `qda`,`sklearn.discriminant_analysis.QuadraticDiscriminantAnalysis`,`fast_train`
+   `rf`,`sklearn.ensemble.RandomForestClassifier`,`fast_train` `*tree`
+   `svc`,`sklearn.svm.SVC`,
+   `xgboost`,`xgboost.XGBClassifier`,`*tree`
+   `kmeans`,`sklearn.cluster.Kmeans`,`fast_train`
+   `ar`,`statsmodels.tsa.ar_model.AutoReg`,`fast_train` `ts`
+   `arima`,`statsmodels.tsa.arima.model.ARIMA`,`ts`
+   `cgru`,`FEDOT model`,`ts`
+   `ets`,`statsmodels.tsa.exponential_smoothing.ets.ETSModel`,`fast_train` `ts`
+   `glm`,`statsmodels.genmod.generalized_linear_model.GLM`,`fast_train` `ts`
+   `locf`,`FEDOT model`,`fast_train` `ts`
+   `polyfit`,`FEDOT model`,`fast_train` `ts`
+   `stl_arima`,`statsmodels.tsa.api.STLForecast`,`ts`
+   `ts_naive_average`,`FEDOT model`,`fast_train` `ts`
diff --git a/fedot/api/builder.py b/fedot/api/builder.py
@@ -258,6 +258,78 @@ def setup_pipeline_structure(
         Args:
             available_operations: list of model names to use. Pick the names according to operations repository.
 
+                .. details:: Possible options:
+
+                    - ``adareg`` -> AdaBoost Regressor
+                    - ``ar`` -> AutoRegression
+                    - ``arima`` -> ARIMA
+                    - ``cgru`` -> Convolutional Gated Recurrent Unit
+                    - ``bernb`` -> Naive Bayes Classifier (multivariate Bernoulli)
+                    - ``catboost`` -> Catboost Classifier
+                    - ``catboostreg`` -> Catboost Regressor
+                    - ``dt`` -> Decision Tree Classifier
+                    - ``dtreg`` -> Decision Tree Regressor
+                    - ``gbr`` -> Gradient Boosting Regressor
+                    - ``kmeans`` -> K-Means clustering
+                    - ``knn`` -> K-nearest neighbors Classifier
+                    - ``knnreg`` -> K-nearest neighbors Regressor
+                    - ``lasso`` -> Lasso Linear Regressor
+                    - ``lda`` -> Linear Discriminant Analysis
+                    - ``lgbm`` -> Light Gradient Boosting Machine Classifier
+                    - ``lgbmreg`` -> Light Gradient Boosting Machine Regressor
+                    - ``linear`` -> Linear Regression Regressor
+                    - ``logit`` -> Logistic Regression Classifier
+                    - ``mlp`` -> Multi-layer Perceptron Classifier
+                    - ``multinb`` -> Naive Bayes Classifier (multinomial)
+                    - ``qda`` -> Quadratic Discriminant Analysis
+                    - ``rf`` -> Random Forest Classifier
+                    - ``rfr`` -> Random Forest Regressor
+                    - ``ridge`` -> Ridge Linear Regressor
+                    - ``polyfit`` -> Polynomial fitter
+                    - ``sgdr`` -> Stochastic Gradient Descent Regressor
+                    - ``stl_arima`` -> STL Decomposition with ARIMA
+                    - ``glm`` -> Generalized Linear Models
+                    - ``ets`` -> Exponential Smoothing
+                    - ``locf`` -> Last Observation Carried Forward
+                    - ``ts_naive_average`` -> Naive Average
+                    - ``svc`` -> Support Vector Classifier
+                    - ``svr`` -> Linear Support Vector Regressor
+                    - ``treg`` -> Extra Trees Regressor
+                    - ``xgboost`` -> Extreme Gradient Boosting Classifier
+                    - ``xgbreg`` -> Extreme Gradient Boosting Regressor
+                    - ``cnn`` -> Convolutional Neural Network
+                    - ``scaling`` -> Scaling
+                    - ``normalization`` -> Normalization
+                    - ``simple_imputation`` -> Imputation
+                    - ``pca`` -> Principal Component Analysis
+                    - ``kernel_pca`` -> Kernel Principal Component Analysis
+                    - ``fast_ica`` -> Independent Component Analysis
+                    - ``poly_features`` -> Polynomial Features
+                    - ``one_hot_encoding`` -> One-Hot Encoder
+                    - ``label_encoding`` -> Label Encoder
+                    - ``rfe_lin_reg`` -> Linear Regression Recursive Feature Elimination
+                    - ``rfe_non_lin_reg`` -> Decision Tree Recursive Feature Elimination
+                    - ``rfe_lin_class`` -> Logistic Regression Recursive Feature Elimination
+                    - ``rfe_non_lin_class`` -> Decision Tree Recursive Feature Elimination
+                    - ``isolation_forest_reg`` -> Regression Isolation Forest
+                    - ``isolation_forest_class`` -> Classification Isolation Forest
+                    - ``decompose`` -> Regression Decomposition
+                    - ``class_decompose`` -> Classification Decomposition
+                    - ``resample`` -> Resample features
+                    - ``ransac_lin_reg`` -> Regression Random Sample Consensus
+                    - ``ransac_non_lin_reg`` -> Decision Tree Random Sample Consensus
+                    - ``cntvect`` -> Count Vectorizer
+                    - ``text_clean`` -> Lemmatization and Stemming
+                    - ``tfidf`` -> TF-IDF Vectorizer
+                    - ``word2vec_pretrained`` -> Word2Vec
+                    - ``lagged`` -> Lagged Transformation
+                    - ``sparse_lagged`` -> Sparse Lagged Transformation
+                    - ``smoothing`` -> Smoothing Transformation
+                    - ``gaussian_filter`` -> Gaussian Filter Transformation
+                    - ``diff_filter`` -> Derivative Filter Transformation
+                    - ``cut`` -> Cut Transformation
+                    - ``exog_ts`` -> Exogeneus Transformation
+
             max_depth: max depth of a pipeline. Defaults to ``6``.
 
             max_arity: max arity of a pipeline nodes. Defaults to ``3``.

diff --git a/test/integration/models/test_model.py b/test/integration/models/test_model.py
@@ -15,6 +15,7 @@
 from fedot.core.data.data import InputData, OutputData
 from fedot.core.data.data_split import train_test_data_setup
 from fedot.core.data.supplementary_data import SupplementaryData
+from fedot.core.utils import fedot_project_root
 from fedot.core.operations.evaluation.operation_implementations.data_operations.sklearn_transformations import \
     PCAImplementation
 from fedot.core.operations.evaluation.operation_implementations.models.discriminant_analysis import \
@@ -540,3 +541,22 @@ def test_operations_are_fast():
                     break
             else:
                 raise Exception(f"Operation {operation.id} cannot have ``fast-train`` tag")
+
+
+def test_all_operations_are_documented():
+    # All operations and presets should be listed in `docs/source/introduction/fedot_features/automation_features.rst`
+    to_skip = {'custom', 'data_source_img', 'data_source_text', 'data_source_table', 'data_source_ts', 'exog_ts'}
+    path_to_docs = fedot_project_root() / 'docs/source/introduction/fedot_features/automation_features.rst'
+    docs_lines = None
+
+    with open(path_to_docs, 'r') as docs_:
+        docs_lines = docs_.readlines()
+    if docs_lines:
+        for operation in OperationTypesRepository('all')._repo:
+            if operation.id not in to_skip:
+                for line in docs_lines:
+                    if operation.id in line and all(preset in line for preset in operation.presets):
+                        break
+                else:
+                    raise Exception(f"Operation {operation.id} with presets {operation.presets} \
+                                    are not documented in {path_to_docs}")