diff --git a/.github/workflows/unit-build.yml b/.github/workflows/unit-build.yml index b5983dbe2d..a7ca045881 100644 --- a/.github/workflows/unit-build.yml +++ b/.github/workflows/unit-build.yml @@ -30,6 +30,7 @@ jobs: python -m pip install --upgrade pip pip install pytest pip install . + pip install .[extra] pip install pytest-cov - name: Test with pytest run: | diff --git a/docs/source/introduction/fedot_features/automation_features.rst b/docs/source/introduction/fedot_features/automation_features.rst index 88489b0028..06841a049a 100644 --- a/docs/source/introduction/fedot_features/automation_features.rst +++ b/docs/source/introduction/fedot_features/automation_features.rst @@ -70,7 +70,6 @@ FEDOT supports bunch of dimensionality preprocessing operations that can be be a `label_encoding`,Label Encoder, Feature encoding `resample`,Imbalanced binary class transformation in classification, Data transformation `topological_features`,Calculation of topological features,Time series transformation - `fast_topological_features`,Fast calculation of part of topological features,Time series transformation .. csv-table:: Feature transformation operations implementations diff --git a/fedot/core/operations/evaluation/common_preprocessing.py b/fedot/core/operations/evaluation/common_preprocessing.py index 5f9ba45906..1fb6c1b56e 100644 --- a/fedot/core/operations/evaluation/common_preprocessing.py +++ b/fedot/core/operations/evaluation/common_preprocessing.py @@ -8,11 +8,9 @@ from fedot.core.operations.evaluation.operation_implementations.data_operations.sklearn_transformations import \ ImputationImplementation, KernelPCAImplementation, NormalizationImplementation, PCAImplementation, \ PolyFeaturesImplementation, ScalingImplementation, FastICAImplementation -from fedot.core.operations.evaluation.operation_implementations.\ +from fedot.core.operations.evaluation.operation_implementations. \ data_operations.topological.fast_topological_extractor import \ - FastTopologicalFeaturesImplementation -from fedot.core.operations.evaluation.operation_implementations.data_operations.topological. \ - topological_extractor import TopologicalFeaturesImplementation + TopologicalFeaturesImplementation from fedot.core.operations.operation_parameters import OperationParameters from fedot.utilities.random import ImplementationRandomStateHandler @@ -50,8 +48,7 @@ class FedotPreprocessingStrategy(EvaluationStrategy): 'one_hot_encoding': OneHotEncodingImplementation, 'label_encoding': LabelEncodingImplementation, 'fast_ica': FastICAImplementation, - 'topological_features': TopologicalFeaturesImplementation, - 'fast_topological_features': FastTopologicalFeaturesImplementation, + 'topological_features': TopologicalFeaturesImplementation } def __init__(self, operation_type: str, params: Optional[OperationParameters] = None): diff --git a/fedot/core/operations/evaluation/operation_implementations/data_operations/topological/fast_topological_extractor.py b/fedot/core/operations/evaluation/operation_implementations/data_operations/topological/fast_topological_extractor.py index bf8b8e4f05..1fffb676b9 100644 --- a/fedot/core/operations/evaluation/operation_implementations/data_operations/topological/fast_topological_extractor.py +++ b/fedot/core/operations/evaluation/operation_implementations/data_operations/topological/fast_topological_extractor.py @@ -1,8 +1,16 @@ +import logging from itertools import chain from typing import Optional import numpy as np -from gph import ripser_parallel as ripser + +try: + from gph import ripser_parallel as ripser +except ModuleNotFoundError: + logging.log(100, + "Topological features operation requires extra dependencies for time series forecasting, which are not" + " installed. It can infuence the performance. Please install it by 'pip install fedot[extra]'") + from joblib import Parallel, delayed from fedot.core.data.data import InputData, OutputData @@ -11,7 +19,7 @@ from fedot.core.operations.operation_parameters import OperationParameters -class FastTopologicalFeaturesImplementation(DataOperationImplementation): +class TopologicalFeaturesImplementation(DataOperationImplementation): def __init__(self, params: Optional[OperationParameters] = None): super().__init__(params) self.window_size_as_share = params.get('window_size_as_share') diff --git a/fedot/core/operations/evaluation/operation_implementations/data_operations/topological/hankel_matrix.py b/fedot/core/operations/evaluation/operation_implementations/data_operations/topological/hankel_matrix.py deleted file mode 100644 index 1405477bac..0000000000 --- a/fedot/core/operations/evaluation/operation_implementations/data_operations/topological/hankel_matrix.py +++ /dev/null @@ -1,115 +0,0 @@ -from typing import Union - -import numpy as np -import pandas as pd -from scipy.linalg import hankel - - -class HankelMatrix: - """ - This class implements an algorithm for converting an original time series into a Hankel matrix. - """ - - def __init__(self, - time_series: Union[pd.DataFrame, pd.Series, np.ndarray, list], - window_size: int = None, - strides: int = 1): - self.__time_series = time_series - self.__convert_ts_to_array() - self.__window_length = window_size - self.__strides = strides - if len(self.__time_series.shape) > 1: - self.__ts_length = self.__time_series[0].size - else: - self.__ts_length = self.__time_series.size - - if self.__window_length is None: - self.__window_length = round(self.__ts_length * 0.35) - else: - self.__window_length = round(self.__window_length) - self.__subseq_length = self.__ts_length - self.__window_length + 1 - - self.__check_windows_length() - if len(self.__time_series.shape) > 1: - self.__trajectory_matrix = self.__get_2d_trajectory_matrix() - else: - self.__trajectory_matrix = self.__get_1d_trajectory_matrix() - - def __check_windows_length(self): - if not 2 <= self.__window_length <= self.__ts_length / 2: - self.__window_length = int(self.__ts_length / 3) - - def __convert_ts_to_array(self): - if isinstance(self.__time_series, pd.DataFrame): - self.__time_series = self.__time_series.values.reshape(-1, 1) - elif isinstance(self.__time_series, list): - self.__time_series = np.array(self.__time_series) - else: - self.__time_series = self.__time_series - - def __get_1d_trajectory_matrix(self): - if self.__strides > 1: - return self.__strided_trajectory_matrix(self.__time_series) - else: - return hankel(self.__time_series[:self.__window_length + 1], self.__time_series[self.__window_length:]) - - def __get_2d_trajectory_matrix(self): - if self.__strides > 1: - return [self.__strided_trajectory_matrix(time_series) for time_series - in self.__time_series] - else: - return [hankel(time_series[:self.__window_length + 1], time_series[self.__window_length:]) for time_series - in self.__time_series] - - def __strided_trajectory_matrix(self, time_series): - shape = (time_series.shape[0] - self.__window_length + 1, self.__window_length) - strides = (time_series.strides[0],) + time_series.strides - rolled = np.lib.stride_tricks.as_strided(time_series, shape=shape, strides=strides) - return rolled[np.arange(0, shape[0], self.__strides)].T - - @property - def window_length(self): - return self.__window_length - - @property - def time_series(self): - return self.__time_series - - @property - def sub_seq_length(self): - return self.__subseq_length - - @window_length.setter - def window_length(self, window_length): - self.__window_length = window_length - - @property - def trajectory_matrix(self): - return self.__trajectory_matrix - - @property - def ts_length(self): - return self.__ts_length - - @trajectory_matrix.setter - def trajectory_matrix(self, trajectory_matrix: np.ndarray): - self.__trajectory_matrix = trajectory_matrix - - -def get_x_y_pairs(train, train_periods, prediction_periods): - """ - train_scaled - training sequence - train_periods - How many data points to use as inputs - prediction_periods - How many periods to ouput as predictions - """ - train_scaled = train.T - r = train_scaled.shape[0] - train_periods - prediction_periods - x_train = [train_scaled[i:i + train_periods] for i in range(r)] - y_train = [train_scaled[i + train_periods:i + train_periods + prediction_periods] for i in range(r)] - - # -- use the stack function to convert the list of 1D tensors - # into a 2D tensor where each element of the list is now a row - x_train = np.concatenate(x_train, axis=0) - y_train = np.concatenate(y_train, axis=0) - - return x_train, y_train diff --git a/fedot/core/operations/evaluation/operation_implementations/data_operations/topological/point_cloud.py b/fedot/core/operations/evaluation/operation_implementations/data_operations/topological/point_cloud.py deleted file mode 100644 index 71ada89ede..0000000000 --- a/fedot/core/operations/evaluation/operation_implementations/data_operations/topological/point_cloud.py +++ /dev/null @@ -1,149 +0,0 @@ -import numpy as np -import pandas as pd -from ripser import Rips - -from fedot.core.operations.evaluation.operation_implementations.data_operations.topological.hankel_matrix import \ - HankelMatrix - - -class TopologicalTransformation: - """Decomposes the given time series with a singular-spectrum analysis. Assumes the values of the time series are - recorded at equal intervals. - - Args: - time_series: Time series to be decomposed. - max_simplex_dim: Maximum dimension of the simplices to be used in the Rips filtration. - epsilon: Maximum distance between two points to be considered connected by an edge in the Rips filtration. - persistence_params: ... - window_length: Length of the window to be used in the rolling window function. - - Attributes: - epsilon_range (np.ndarray): Range of epsilon values to be used in the Rips filtration. - - """ - - def __init__(self, - time_series: np.ndarray = None, - max_simplex_dim: int = None, - epsilon: int = 10, - persistence_params: dict = None, - window_length: int = None, - stride: int = 1): - self.time_series = time_series - self.stride = stride - self.max_simplex_dim = max_simplex_dim - self.epsilon_range = self.__create_epsilon_range(epsilon) - self.persistence_params = persistence_params - - if self.persistence_params is None: - self.persistence_params = { - 'coeff': 2, - 'do_cocycles': False, - 'verbose': False} - - self.__window_length = window_length - - @staticmethod - def __create_epsilon_range(epsilon): - return np.array([y * float(1 / epsilon) for y in range(epsilon)]) - - def time_series_to_point_cloud(self, - input_data: np.array = None, - dimension_embed=2) -> np.array: - """Convert a time series into a point cloud in the dimension specified by dimension_embed. - - Args: - input_data: Time series to be converted. - dimension_embed: dimension of Euclidean space in which to embed the time series into by taking - windows of dimension_embed length, e.g. if the time series is ``[t_1,...,t_n]`` and dimension_embed - is ``2``, then the point cloud would be ``[(t_0, t_1), (t_1, t_2),...,(t_(n-1), t_n)]`` - - Returns: - Collection of points embedded into Euclidean space of dimension = dimension_embed, constructed - in the manner explained above. - - """ - - if self.__window_length is None: - self.__window_length = dimension_embed - - trajectory_transformer = HankelMatrix(time_series=input_data, - window_size=self.__window_length, - strides=self.stride) - return trajectory_transformer.trajectory_matrix - - def point_cloud_to_persistent_cohomology_ripser(self, - point_cloud: np.array = None, - max_simplex_dim: int = 1): - - # ensure epsilon_range is a numpy array - epsilon_range = self.epsilon_range - - # build filtration - self.persistence_params['maxdim'] = max_simplex_dim - filtration = Rips(**self.persistence_params) - - if point_cloud is None: - point_cloud = self.time_series_to_point_cloud() - - # initialize persistence diagrams - diagrams = filtration.fit_transform(point_cloud) - # Instantiate persistence landscape transformer - # plot_diagrams(diagrams) - - # normalize epsilon distance in diagrams so max is 1 - diagrams = [np.array([dg for dg in diag if np.isfinite(dg).all()]) for diag in diagrams] - diagrams = diagrams / max( - [np.array([dg for dg in diag if np.isfinite(dg).all()]).max() for diag in diagrams if diag.shape[0] > 0]) - - ep_ran_len = len(epsilon_range) - - homology = {dimension: np.zeros(ep_ran_len).tolist() for dimension in range(max_simplex_dim + 1)} - - for dimension, diagram in enumerate(diagrams): - if dimension <= max_simplex_dim and len(diagram) > 0: - homology[dimension] = np.array( - [np.array(((epsilon_range >= point[0]) & (epsilon_range <= point[1])).astype(int)) - for point in diagram - ]).sum(axis=0).tolist() - - return homology - - def time_series_to_persistent_cohomology_ripser(self, - time_series: np.array, - max_simplex_dim: int) -> dict: - """Wrapper function that takes in a time series and outputs the persistent homology object, along with other - auxiliary objects. - - Args: - time_series: Time series to be converted. - max_simplex_dim: Maximum dimension of the simplicial complex to be constructed. - - Returns: - Persistent homology object. Dictionary with keys in ``range(max_simplex_dim)`` and, the value ``hom[i]`` - is an array of length equal to ``len(epsilon_range)`` containing the betti numbers of the ``i-th`` homology - groups for the Rips filtration. - - """ - - homology = self.point_cloud_to_persistent_cohomology_ripser(point_cloud=time_series, - max_simplex_dim=max_simplex_dim) - return homology - - def time_series_rolling_betti_ripser(self, ts): - - point_cloud = self.rolling_window(array=ts, window=self.__window_length) - homology = self.time_series_to_persistent_cohomology_ripser(point_cloud, - max_simplex_dim=self.max_simplex_dim) - df_features = pd.DataFrame(data=homology) - cols = ["Betti_{}".format(i) for i in range(df_features.shape[1])] - df_features.columns = cols - df_features['Betti_sum'] = df_features.sum(axis=1) - return df_features - - def rolling_window(self, array, window): - if window <= 0: - raise ValueError("Window size must be a positive integer.") - if window > len(array): - raise ValueError("Window size cannot exceed the length of the array.") - return np.array([array[i:i + window] for i in range(len(array) - window + 1)]) diff --git a/fedot/core/operations/evaluation/operation_implementations/data_operations/topological/topological.py b/fedot/core/operations/evaluation/operation_implementations/data_operations/topological/topological.py deleted file mode 100644 index 1a14e33db7..0000000000 --- a/fedot/core/operations/evaluation/operation_implementations/data_operations/topological/topological.py +++ /dev/null @@ -1,245 +0,0 @@ -from abc import ABC - -import numpy as np -import pandas as pd -from gtda.diagrams import Scaler, Filtering, PersistenceEntropy, PersistenceLandscape, BettiCurve -from gtda.homology import VietorisRipsPersistence - - -class PersistenceDiagramFeatureExtractor(ABC): - """Abstract class persistence diagrams features extractor. - - """ - - def extract_feature_(self, persistence_diagram): - pass - - def fit_transform(self, x_pd): - return self.extract_feature_(x_pd) - - -class PersistenceDiagramsExtractor: - """Class to extract persistence diagrams from time series. - - Args: - takens_embedding_dim: Dimension of the Takens embedding. - takens_embedding_delay: Delay of the Takens embedding. - homology_dimensions: Homology dimensions to compute. - filtering: Whether to filter the persistence diagrams. - filtering_dimensions: Homology dimensions to filter. - parallel: Whether to parallelize the computation. - - """ - - def __init__(self, - takens_embedding_dim: int, - takens_embedding_delay: int, - homology_dimensions: tuple, - filtering: bool = False, - filtering_dimensions: tuple = (1, 2)): - self.takens_embedding_dim_ = takens_embedding_dim - self.takens_embedding_delay_ = takens_embedding_delay - self.homology_dimensions_ = homology_dimensions - self.filtering_ = filtering - self.filtering_dimensions_ = filtering_dimensions - - def transform(self, x_embeddings): - vr = VietorisRipsPersistence(metric='euclidean', homology_dimensions=self.homology_dimensions_, n_jobs=1) - diagram_scaler = Scaler(n_jobs=1) - persistence_diagrams = diagram_scaler.fit_transform(vr.fit_transform([x_embeddings])) - if self.filtering_: - diagram_filter = Filtering(epsilon=0.1, homology_dimensions=self.filtering_dimensions_) - persistence_diagrams = diagram_filter.fit_transform(persistence_diagrams) - return persistence_diagrams[0] - - -class TopologicalFeaturesExtractor: - def __init__(self, persistence_diagram_extractor, persistence_diagram_features): - self.persistence_diagram_extractor_ = persistence_diagram_extractor - self.persistence_diagram_features_ = persistence_diagram_features - - def transform(self, x): - - x_pers_diag = self.persistence_diagram_extractor_.transform(x) - n = self.persistence_diagram_extractor_.homology_dimensions_[-1] + 1 - feature_list = [] - column_list = [] - for feature_name, feature_model in self.persistence_diagram_features_.items(): - try: - x_features = feature_model.fit_transform(x_pers_diag) - feature_list.append(x_features) - for dim in range(len(x_features)): - column_list.append('{}_{}'.format(feature_name, dim)) - except BaseException: - feature_list.append(np.array([0 for i in range(n)])) - for dim in range(n): - column_list.append('{}_{}'.format(feature_name, dim)) - continue - x_transformed = pd.DataFrame(data=np.hstack(feature_list)).T - x_transformed.columns = column_list - return x_transformed - - -class HolesNumberFeature(PersistenceDiagramFeatureExtractor): - def __init__(self): - super(HolesNumberFeature).__init__() - - def extract_feature_(self, persistence_diagram): - feature = np.zeros(int(np.max(persistence_diagram[:, 2])) + 1) - for hole in persistence_diagram: - if hole[1] - hole[0] > 0: - feature[int(hole[2])] += 1.0 - return feature - - -class MaxHoleLifeTimeFeature(PersistenceDiagramFeatureExtractor): - def __init__(self): - super(MaxHoleLifeTimeFeature).__init__() - - def extract_feature_(self, persistence_diagram): - feature = np.zeros(int(np.max(persistence_diagram[:, 2])) + 1) - for hole in persistence_diagram: - lifetime = hole[1] - hole[0] - if lifetime > feature[int(hole[2])]: - feature[int(hole[2])] = lifetime - return feature - - -class RelevantHolesNumber(PersistenceDiagramFeatureExtractor): - def __init__(self, ratio=0.7): - super(RelevantHolesNumber).__init__() - self.ratio_ = ratio - - def extract_feature_(self, persistence_diagram): - feature = np.zeros(int(np.max(persistence_diagram[:, 2])) + 1) - max_lifetimes = np.zeros(int(np.max(persistence_diagram[:, 2])) + 1) - - for hole in persistence_diagram: - lifetime = hole[1] - hole[0] - if lifetime > max_lifetimes[int(hole[2])]: - max_lifetimes[int(hole[2])] = lifetime - - for hole in persistence_diagram: - index = int(hole[2]) - lifetime = hole[1] - hole[0] - if np.equal(lifetime, self.ratio_ * max_lifetimes[index]): - feature[index] += 1.0 - - return feature - - -class AverageHoleLifetimeFeature(PersistenceDiagramFeatureExtractor): - def __init__(self): - super(AverageHoleLifetimeFeature).__init__() - - def extract_feature_(self, persistence_diagram): - feature = np.zeros(int(np.max(persistence_diagram[:, 2])) + 1) - n_holes = np.zeros(int(np.max(persistence_diagram[:, 2])) + 1) - - for hole in persistence_diagram: - lifetime = hole[1] - hole[0] - index = int(hole[2]) - if lifetime > 0: - feature[index] += lifetime - n_holes[index] += 1 - - for i in range(feature.shape[0]): - feature[i] = feature[i] / n_holes[i] if n_holes[i] != 0 else 0.0 - - return feature - - -class SumHoleLifetimeFeature(PersistenceDiagramFeatureExtractor): - def __init__(self): - super(SumHoleLifetimeFeature).__init__() - - def extract_feature_(self, persistence_diagram): - feature = np.zeros(int(np.max(persistence_diagram[:, 2])) + 1) - for hole in persistence_diagram: - feature[int(hole[2])] += hole[1] - hole[0] - return feature - - -class PersistenceEntropyFeature(PersistenceDiagramFeatureExtractor): - def __init__(self): - super(PersistenceEntropyFeature).__init__() - - def extract_feature_(self, persistence_diagram): - persistence_entropy = PersistenceEntropy(n_jobs=1) - return persistence_entropy.fit_transform([persistence_diagram])[0] - - -class SimultaneousAliveHolesFeature(PersistenceDiagramFeatureExtractor): - def __init__(self): - super(SimultaneousAliveHolesFeature).__init__() - - @staticmethod - def get_average_intersection_number_(segments): - intersections = list() - n_segments = segments.shape[0] - - for i in range(n_segments): - count = 1 - start = segments[i, 0] - end = segments[i, 1] - - for j in range(i + 1, n_segments): - if start <= segments[j, 0] <= end: - count += 1 - else: - break - intersections.append(count) - - return np.sum(intersections) / len(intersections) - - def get_average_simultaneous_holes_(self, holes): - starts = holes[:, 0] - ends = holes[:, 1] - ind = np.lexsort((starts, ends)) - segments = np.array([[starts[i], ends[i]] for i in ind]) - return self.get_average_intersection_number_(segments) - - def extract_feature_(self, persistence_diagram): - n_dims = int(np.max(persistence_diagram[:, 2])) + 1 - feature = np.zeros(n_dims) - - for dim in range(n_dims): - holes = list() - for hole in persistence_diagram: - if hole[1] - hole[0] != 0.0 and int(hole[2]) == dim: - holes.append(hole) - if len(holes) != 0: - feature[dim] = self.get_average_simultaneous_holes_(np.array(holes)) - - return feature - - -class AveragePersistenceLandscapeFeature(PersistenceDiagramFeatureExtractor): - def __init__(self): - super(AveragePersistenceLandscapeFeature).__init__() - - def extract_feature_(self, persistence_diagram): - # As practice shows, only 1st layer of 1st homology dimension plays role - persistence_landscape = PersistenceLandscape(n_jobs=1).fit_transform([persistence_diagram])[0, 1, 0, :] - return np.array([np.sum(persistence_landscape) / persistence_landscape.shape[0]]) - - -class BettiNumbersSumFeature(PersistenceDiagramFeatureExtractor): - def __init__(self): - super(BettiNumbersSumFeature).__init__() - - def extract_feature_(self, persistence_diagram): - betti_curve = BettiCurve(n_jobs=1).fit_transform([persistence_diagram])[0] - return np.array([np.sum(betti_curve[i, :]) for i in range(int(np.max(persistence_diagram[:, 2])) + 1)]) - - -class RadiusAtMaxBNFeature(PersistenceDiagramFeatureExtractor): - def __init__(self): - super(RadiusAtMaxBNFeature).__init__() - - def extract_feature_(self, persistence_diagram, n_bins=100): - betti_curve = BettiCurve(n_jobs=1, n_bins=n_bins).fit_transform([persistence_diagram])[0] - max_dim = int(np.max(persistence_diagram[:, 2])) + 1 - max_bettis = np.array([np.max(betti_curve[i, :]) for i in range(max_dim)]) - return np.array( - [np.where(betti_curve[i, :] == max_bettis[i])[0][0] / (n_bins * max_dim) for i in range(max_dim)]) diff --git a/fedot/core/operations/evaluation/operation_implementations/data_operations/topological/topological_extractor.py b/fedot/core/operations/evaluation/operation_implementations/data_operations/topological/topological_extractor.py deleted file mode 100644 index 224f498519..0000000000 --- a/fedot/core/operations/evaluation/operation_implementations/data_operations/topological/topological_extractor.py +++ /dev/null @@ -1,72 +0,0 @@ -import math -import sys -from multiprocessing import cpu_count -from typing import Optional - -import numpy as np -from joblib import Parallel, delayed - -from fedot.core.data.data import InputData, OutputData -from fedot.core.operations.evaluation.operation_implementations.data_operations.topological.point_cloud import \ - TopologicalTransformation -from fedot.core.operations.evaluation.operation_implementations.data_operations.topological.topological import \ - HolesNumberFeature, MaxHoleLifeTimeFeature, RelevantHolesNumber, AverageHoleLifetimeFeature, \ - SumHoleLifetimeFeature, PersistenceEntropyFeature, SimultaneousAliveHolesFeature, \ - AveragePersistenceLandscapeFeature, BettiNumbersSumFeature, RadiusAtMaxBNFeature, PersistenceDiagramsExtractor, \ - TopologicalFeaturesExtractor -from fedot.core.operations.evaluation.operation_implementations.implementation_interfaces import \ - DataOperationImplementation -from fedot.core.operations.operation_parameters import OperationParameters -from golem.utilities.utilities import determine_n_jobs - -sys.setrecursionlimit(1000000000) - -PERSISTENCE_DIAGRAM_FEATURES = {'HolesNumberFeature': HolesNumberFeature(), - 'MaxHoleLifeTimeFeature': MaxHoleLifeTimeFeature(), - 'RelevantHolesNumber': RelevantHolesNumber(), - 'AverageHoleLifetimeFeature': AverageHoleLifetimeFeature(), - 'SumHoleLifetimeFeature': SumHoleLifetimeFeature(), - 'PersistenceEntropyFeature': PersistenceEntropyFeature(), - 'SimultaneousAliveHolesFeature': SimultaneousAliveHolesFeature(), - 'AveragePersistenceLandscapeFeature': AveragePersistenceLandscapeFeature(), - 'BettiNumbersSumFeature': BettiNumbersSumFeature(), - 'RadiusAtMaxBNFeature': RadiusAtMaxBNFeature()} - -PERSISTENCE_DIAGRAM_EXTRACTOR = PersistenceDiagramsExtractor(takens_embedding_dim=1, - takens_embedding_delay=2, - homology_dimensions=(0, 1)) - - -class TopologicalFeaturesImplementation(DataOperationImplementation): - def __init__(self, params: Optional[OperationParameters] = None): - super().__init__(params) - self.n_jobs = determine_n_jobs(params.get('n_jobs', 1)) - self.feature_extractor = TopologicalFeaturesExtractor( - persistence_diagram_extractor=PERSISTENCE_DIAGRAM_EXTRACTOR, - persistence_diagram_features=PERSISTENCE_DIAGRAM_FEATURES) - - def fit(self, input_data: InputData): - pass - - def transform(self, input_data: InputData) -> OutputData: - parallel = Parallel(n_jobs=self.n_jobs, verbose=0, pre_dispatch="2*n_jobs") - feature_matrix = parallel(delayed(self.generate_features_from_ts)(sample) for sample in input_data.features) - predict = self._clean_predict(np.array([ts for ts in feature_matrix])) - return predict - - @staticmethod - def _clean_predict(predict: np.array): - """Clean predict from nan, inf and reshape data for Fedot appropriate form - """ - predict = np.where(np.isnan(predict), 0, predict) - predict = np.where(np.isinf(predict), 0, predict) - predict = predict.reshape(predict.shape[0], -1) - return predict - - def generate_features_from_ts(self, ts_data: np.array): - self.data_transformer = TopologicalTransformation( - window_length=0) - - point_cloud = self.data_transformer.time_series_to_point_cloud(input_data=ts_data) - topological_features = self.feature_extractor.transform(point_cloud) - return topological_features diff --git a/fedot/core/pipelines/tuning/search_space.py b/fedot/core/pipelines/tuning/search_space.py index b44a02b977..3e60a627b8 100644 --- a/fedot/core/pipelines/tuning/search_space.py +++ b/fedot/core/pipelines/tuning/search_space.py @@ -768,7 +768,7 @@ def get_parameters_dict(self): 'sampling-scope': [0.9, 0.99], 'type': 'continuous'} }, - 'fast_topological_features': { + 'topological_features': { 'window_size_as_share': { 'hyperopt-dist': hp.uniform, 'sampling-scope': [0.1, 0.9], diff --git a/fedot/core/repository/data/data_operation_repository.json b/fedot/core/repository/data/data_operation_repository.json index e171b36b96..6c3eb06c54 100644 --- a/fedot/core/repository/data/data_operation_repository.json +++ b/fedot/core/repository/data/data_operation_repository.json @@ -259,18 +259,7 @@ "input_type": "[DataTypesEnum.table]", "output_type": "[DataTypesEnum.table]", "tags": [ - "non_applicable_for_ts", - "feature_space_transformation" - ] - }, - "fast_topological_features": { - "meta": "custom_ts_preprocessing", - "presets": [ - "ts" - ], - "input_type": "[DataTypesEnum.table]", - "output_type": "[DataTypesEnum.table]", - "tags": [ + "ts-extra", "non_applicable_for_ts", "feature_space_transformation" ] diff --git a/fedot/core/repository/data/default_operation_params.json b/fedot/core/repository/data/default_operation_params.json index a6abf66cb0..4076b4d26e 100644 --- a/fedot/core/repository/data/default_operation_params.json +++ b/fedot/core/repository/data/default_operation_params.json @@ -159,9 +159,6 @@ "whiten": "unit-variance" }, "topological_features": { - "n_jobs": -1 - }, - "fast_topological_features": { "n_jobs": 1, "window_size_as_share": 0.66, "max_homology_dimension": 1, diff --git a/fedot/core/repository/operation_types_repository.py b/fedot/core/repository/operation_types_repository.py index 4c39685c95..3266b78cce 100644 --- a/fedot/core/repository/operation_types_repository.py +++ b/fedot/core/repository/operation_types_repository.py @@ -1,4 +1,5 @@ import json +import logging import os from collections import defaultdict from dataclasses import dataclass @@ -13,6 +14,13 @@ from fedot.core.repository.json_evaluation import import_enums_from_str, import_strategy_from_str, read_field from fedot.core.repository.tasks import Task, TaskTypesEnum +EXTRA_TS_INSTALLED = True +try: + from gph import ripser_parallel as ripser + dummy_var = ripser # for pep8 +except ModuleNotFoundError: + EXTRA_TS_INSTALLED = False + if TYPE_CHECKING: from fedot.core.operations.evaluation.evaluation_interfaces import EvaluationStrategy @@ -431,6 +439,13 @@ def get_operations_for_task(task: Optional[Task], data_type: Optional[DataTypesE if BEST_QUALITY_PRESET_NAME in preset or AUTO_PRESET_NAME in preset: preset = None + if task.task_type is TaskTypesEnum.ts_forecasting and not EXTRA_TS_INSTALLED: + if not forbidden_tags: + forbidden_tags = [] + logging.log(100, + "Extra dependencies for time series forecasting are not installed. It can infuence the " + "performance. Please install it by 'pip install fedot[extra]'") + forbidden_tags.append('ts-extra') task_type = task.task_type if task else None if mode in AVAILABLE_REPO_NAMES: repo = OperationTypesRepository(mode) diff --git a/other_requirements/extra.txt b/other_requirements/extra.txt index d896e3d2a6..e63165864d 100644 --- a/other_requirements/extra.txt +++ b/other_requirements/extra.txt @@ -12,3 +12,7 @@ nltk >= 3.5 # Misc protobuf~=3.19.0 + +# Topological features +giotto_tda==0.6.0 +ripser==0.6.4 diff --git a/requirements.txt b/requirements.txt index b0a1824f03..3bb2af2ac4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -15,8 +15,7 @@ ete3>=3.1.0 networkx>=2.4, !=2.7.*, !=2.8.1, !=2.8.2, !=2.8.3 scikit_learn>=1.0.0; python_version >= '3.8' sktime==0.16.1 -giotto_tda==0.6.0 -ripser==0.6.4 + # Analysis and optimizations hyperopt==0.2.7