From 8c52c7acc7024ac525095edf4d68ca170caef523 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Patrick=20Sch=C3=A4fer?= Date: Tue, 28 Feb 2023 13:09:41 +0100 Subject: [PATCH 1/3] black --- .../dictionary_based/__init__.py | 4 + .../dictionary_based/_tde_fast.py | 918 ++++++++++++++++++ .../panel/dictionary_based/_sfa_fast.py | 6 +- 3 files changed, 925 insertions(+), 3 deletions(-) create mode 100644 sktime/classification/dictionary_based/_tde_fast.py diff --git a/sktime/classification/dictionary_based/__init__.py b/sktime/classification/dictionary_based/__init__.py index 63d2c566d1..f23b913a57 100644 --- a/sktime/classification/dictionary_based/__init__.py +++ b/sktime/classification/dictionary_based/__init__.py @@ -5,6 +5,7 @@ "BOSSEnsemble", "ContractableBOSS", "TemporalDictionaryEnsemble", + "FastTemporalDictionaryEnsemble", "IndividualTDE", "WEASEL", "MUSE", @@ -17,4 +18,7 @@ IndividualTDE, TemporalDictionaryEnsemble, ) +from sktime.classification.dictionary_based._tde_fast import ( + FastTemporalDictionaryEnsemble, +) from sktime.classification.dictionary_based._weasel import WEASEL diff --git a/sktime/classification/dictionary_based/_tde_fast.py b/sktime/classification/dictionary_based/_tde_fast.py new file mode 100644 index 0000000000..01c13f0c73 --- /dev/null +++ b/sktime/classification/dictionary_based/_tde_fast.py @@ -0,0 +1,918 @@ +# -*- coding: utf-8 -*- +"""TDE classifiers. + +Dictionary based TDE classifiers based on SFA transform. Contains a single +IndividualTDE and TDE. +""" + +__author__ = ["MatthewMiddlehurst", "patrickzib"] +__all__ = ["FastTemporalDictionaryEnsemble", "IndividualTDE", "histogram_intersection"] + +import math +import time +import warnings + +import numpy as np +from joblib import Parallel, delayed +from numba import njit, types +from scipy.sparse import csr_matrix, hstack +from sklearn import preprocessing +from sklearn.kernel_ridge import KernelRidge +from sklearn.utils import check_random_state + +from sktime.classification.base import BaseClassifier +from sktime.transformations.panel.dictionary_based import SFAFast +from sktime.utils.validation.panel import check_X_y + + +class FastTemporalDictionaryEnsemble(BaseClassifier): + """Temporal Dictionary Ensemble (TDE). + + Implementation of the dictionary based Temporal Dictionary Ensemble as described + in [1]_. + + Overview: Input "n" series length "m" with "d" dimensions + TDE searches "k" parameter values selected using a Gaussian processes + regressor, evaluating each with a LOOCV. It then retains "s" + ensemble members. + There are six primary parameters for individual classifiers: + - alpha: alphabet size + - w: window length + - l: word length + - p: normalise/no normalise + - h: levels + - b: MCB/IGB + For any combination, an individual TDE classifier slides a window of + length w along the series. The w length window is shortened to + an l length word through taking a Fourier transform and keeping the + first l/2 complex coefficients. These lcoefficients are then discretised + into alpha possible values, to form a word length l using breakpoints + found using b. A histogram of words for each series is formed and stored, + using a spatial pyramid of h levels. For multivariate series, accuracy + from a reduced histogram is used to select dimensions. + + fit involves finding n histograms. + predict uses 1 nearest neighbour with a histogram intersection + distance function. + + Parameters + ---------- + n_parameter_samples : int, default=250 + Number of parameter combinations to consider for the final ensemble. + max_ensemble_size : int, default=50 + Maximum number of estimators in the ensemble. + max_win_len_prop : float, default=1 + Maximum window length as a proportion of series length, must be between 0 and 1. + min_window : int, default=10 + Minimum window length. + randomly_selected_params: int, default=50 + Number of parameters randomly selected before the Gaussian process parameter + selection is used. + bigrams : boolean or None, default=None + Whether to use bigrams, defaults to true for univariate data and false for + multivariate data. + dim_threshold : float, default=0.85 + Dimension accuracy threshold for multivariate data, must be between 0 and 1. + max_dims : int, default=20 + Max number of dimensions per classifier for multivariate data. + time_limit_in_minutes : int, default=0 + Time contract to limit build time in minutes, overriding n_parameter_samples. + Default of 0 means n_parameter_samples is used. + contract_max_n_parameter_samples : int, default=np.inf + Max number of parameter combinations to consider when time_limit_in_minutes is + set. + typed_dict : bool, default=True + Use a numba typed Dict to store word counts. May increase memory usage, but will + be faster for larger datasets. As the Dict cannot be pickled currently, there + will be some overhead converting it to a python dict with multiple threads and + pickling. + save_train_predictions : bool, default=False + Save the ensemble member train predictions in fit for use in _get_train_probs + leave-one-out cross-validation. + n_jobs : int, default=1 + The number of jobs to run in parallel for both `fit` and `predict`. + ``-1`` means using all processors. + random_state : int or None, default=None + Seed for random number generation. + + Attributes + ---------- + n_classes_ : int + The number of classes. + classes_ : list + The classes labels. + n_instances_ : int + The number of train cases. + n_dims_ : int + The number of dimensions per case. + series_length_ : int + The length of each series. + n_estimators_ : int + The final number of classifiers used (<= max_ensemble_size) + estimators_ : list of shape (n_estimators) of IndividualTDE + The collections of estimators trained in fit. + weights_ : list of shape (n_estimators) of float + Weight of each estimator in the ensemble. + + See Also + -------- + IndividualTDE, ContractableBOSS + + Notes + ----- + For the Java version, see + `TSML `_. + + References + ---------- + .. [1] Matthew Middlehurst, James Large, Gavin Cawley and Anthony Bagnall + "The Temporal Dictionary Ensemble (TDE) Classifier for Time Series + Classification", in proceedings of the European Conference on Machine Learning + and Principles and Practice of Knowledge Discovery in Databases, 2020. + + Examples + -------- + >>> from sktime.classification.dictionary_based import TemporalDictionaryEnsemble + >>> from sktime.datasets import load_unit_test + >>> X_train, y_train = load_unit_test(split="train", return_X_y=True) + >>> X_test, y_test = load_unit_test(split="test", return_X_y=True) + >>> clf = TemporalDictionaryEnsemble( + ... n_parameter_samples=10, + ... max_ensemble_size=3, + ... randomly_selected_params=5, + ... ) + >>> clf.fit(X_train, y_train) + TemporalDictionaryEnsemble(...) + >>> y_pred = clf.predict(X_test) + """ + + _tags = { + "capability:multivariate": False, + "capability:train_estimate": True, + "capability:contractable": True, + "capability:multithreading": True, + "classifier_type": "dictionary", + } + + def __init__( + self, + n_parameter_samples=250, + max_ensemble_size=50, + max_win_len_prop=1, + min_window=10, + randomly_selected_params=50, + bigrams=None, + dim_threshold=0.85, + max_dims=20, + time_limit_in_minutes=0.0, + contract_max_n_parameter_samples=np.inf, + typed_dict=True, + save_train_predictions=False, + n_jobs=1, + random_state=None, + ): + self.n_parameter_samples = n_parameter_samples + self.max_ensemble_size = max_ensemble_size + self.max_win_len_prop = max_win_len_prop + self.min_window = min_window + self.randomly_selected_params = randomly_selected_params + self.bigrams = bigrams + + # multivariate + self.dim_threshold = dim_threshold + self.max_dims = max_dims + + self.time_limit_in_minutes = time_limit_in_minutes + self.contract_max_n_parameter_samples = contract_max_n_parameter_samples + self.typed_dict = typed_dict + self.save_train_predictions = save_train_predictions + self.random_state = random_state + self.n_jobs = n_jobs + + self.n_instances_ = 0 + self.n_dims_ = 0 + self.series_length_ = 0 + self.n_estimators_ = 0 + self.estimators_ = [] + self.weights_ = [] + + self._word_lengths = [16, 14, 12, 10, 8] + self._norm_options = [True, False] + self._levels = [1, 2, 3] + self._igb_options = [True, False] + self._alphabet_size = 4 + self._weight_sum = 0 + self._prev_parameters_x = [] + self._prev_parameters_y = [] + self._min_window = min_window + + super(FastTemporalDictionaryEnsemble, self).__init__() + + def _fit(self, X, y): + """Fit an ensemble on cases (X,y), where y is the target variable. + + Build an ensemble of base TDE classifiers from the training set (X, + y), through an optimised selection over the para space to make a fixed size + ensemble of the best. + + Parameters + ---------- + X : 3D np.array of shape = [n_instances, n_dimensions, series_length] + The training data. + y : array-like, shape = [n_instances] + The class labels. + + Returns + ------- + self : + Reference to self. + + Notes + ----- + Changes state by creating a fitted model that updates attributes + ending in "_" and sets is_fitted flag to True. + """ + if self.n_parameter_samples <= self.randomly_selected_params: + warnings.warn( + "TemporalDictionaryEnsemble warning: n_parameter_samples <= " + "randomly_selected_params, ensemble member parameters will be fully " + "randomly selected.", + stacklevel=1, + ) + + self.n_instances_, self.n_dims_, self.series_length_ = X.shape + + self.estimators_ = [] + self.weights_ = [] + self._prev_parameters_x = [] + self._prev_parameters_y = [] + + # Window length parameter space dependent on series length + max_window_searches = self.series_length_ / 4 + max_window = int(self.series_length_ * self.max_win_len_prop) + + if self.min_window >= max_window: + self._min_window = max_window + warnings.warn( + f"TemporalDictionaryEnsemble warning: min_window = " + f"{self.min_window} is larger than max_window = {max_window}." + f" min_window has been set to {max_window}.", + stacklevel=1, + ) + + win_inc = int((max_window - self._min_window) / max_window_searches) + if win_inc < 1: + win_inc = 1 + + possible_parameters = self._unique_parameters(max_window, win_inc) + num_classifiers = 0 + subsample_size = int(self.n_instances_ * 0.7) + lowest_acc = 1 + lowest_acc_idx = 0 + + time_limit = self.time_limit_in_minutes * 60 + start_time = time.time() + train_time = 0 + if time_limit > 0: + n_parameter_samples = 0 + contract_max_n_parameter_samples = self.contract_max_n_parameter_samples + else: + n_parameter_samples = self.n_parameter_samples + contract_max_n_parameter_samples = np.inf + + rng = check_random_state(self.random_state) + + if self.bigrams is None: + if self.n_dims_ > 1: + use_bigrams = False + else: + use_bigrams = True + else: + use_bigrams = self.bigrams + + # use time limit or n_parameter_samples if limit is 0 + while ( + ( + train_time < time_limit + and num_classifiers < contract_max_n_parameter_samples + ) + or num_classifiers < n_parameter_samples + ) and len(possible_parameters) > 0: + if num_classifiers < self.randomly_selected_params: + parameters = possible_parameters.pop( + rng.randint(0, len(possible_parameters)) + ) + else: + scaler = preprocessing.StandardScaler() + scaler.fit(self._prev_parameters_x) + gp = KernelRidge(kernel="poly", degree=1) + gp.fit( + scaler.transform(self._prev_parameters_x), self._prev_parameters_y + ) + preds = gp.predict(scaler.transform(possible_parameters)) + parameters = possible_parameters.pop( + rng.choice(np.flatnonzero(preds == preds.max())) + ) + + subsample = rng.choice( + self.n_instances_, size=subsample_size, replace=False + ) + X_subsample = X[subsample] + y_subsample = y[subsample] + + tde = IndividualTDE( + *parameters, + alphabet_size=self._alphabet_size, + bigrams=use_bigrams, + dim_threshold=self.dim_threshold, + max_dims=self.max_dims, + typed_dict=self.typed_dict, + n_jobs=self._threads_to_use, + random_state=self.random_state, + ) + tde.fit(X_subsample, y_subsample) + tde._subsample = subsample + + tde._accuracy = self._individual_train_acc( + tde, + y_subsample, + subsample_size, + 0 if num_classifiers < self.max_ensemble_size else lowest_acc, + ) + if tde._accuracy > 0: + weight = math.pow(tde._accuracy, 4) + else: + weight = 0.000000001 + + if num_classifiers < self.max_ensemble_size: + if tde._accuracy < lowest_acc: + lowest_acc = tde._accuracy + lowest_acc_idx = num_classifiers + self.weights_.append(weight) + self.estimators_.append(tde) + elif tde._accuracy > lowest_acc: + self.weights_[lowest_acc_idx] = weight + self.estimators_[lowest_acc_idx] = tde + lowest_acc, lowest_acc_idx = self._worst_ensemble_acc() + + self._prev_parameters_x.append(parameters) + self._prev_parameters_y.append(tde._accuracy) + + num_classifiers += 1 + train_time = time.time() - start_time + + self.n_estimators_ = len(self.estimators_) + self._weight_sum = np.sum(self.weights_) + + return self + + def _predict(self, X) -> np.ndarray: + """Predict class values of n instances in X. + + Parameters + ---------- + X : 3D np.array of shape = [n_instances, n_dimensions, series_length] + The data to make predictions for. + + Returns + ------- + y : array-like, shape = [n_instances] + Predicted class labels. + """ + rng = check_random_state(self.random_state) + return np.array( + [ + self.classes_[int(rng.choice(np.flatnonzero(prob == prob.max())))] + for prob in self._predict_proba(X) + ] + ) + + def _predict_proba(self, X) -> np.ndarray: + """Predict class probabilities for n instances in X. + + Parameters + ---------- + X : 3D np.array of shape = [n_instances, n_dimensions, series_length] + The data to make predict probabilities for. + + Returns + ------- + y : array-like, shape = [n_instances, n_classes_] + Predicted probabilities using the ordering in classes_. + """ + _, _, series_length = X.shape + if series_length != self.series_length_: + raise TypeError( + "ERROR number of attributes in the train does not match " + "that in the test data" + ) + + sums = np.zeros((X.shape[0], self.n_classes_)) + + for n, clf in enumerate(self.estimators_): + preds = clf.predict(X) + for i in range(0, X.shape[0]): + sums[i, self._class_dictionary[preds[i]]] += self.weights_[n] + + return sums / (np.ones(self.n_classes_) * self._weight_sum) + + def _worst_ensemble_acc(self): + min_acc = 1.0 + min_acc_idx = 0 + + for c, classifier in enumerate(self.estimators_): + if classifier._accuracy < min_acc: + min_acc = classifier._accuracy + min_acc_idx = c + + return min_acc, min_acc_idx + + def _unique_parameters(self, max_window, win_inc): + possible_parameters = [ + [win_size, word_len, normalise, levels, igb] + for normalise in self._norm_options + for win_size in range(self._min_window, max_window + 1, win_inc) + for word_len in self._word_lengths + for levels in self._levels + for igb in self._igb_options + ] + + return possible_parameters + + def _get_train_probs(self, X, y, train_estimate_method="loocv") -> np.ndarray: + self.check_is_fitted() + X, y = check_X_y(X, y, coerce_to_numpy=True) + + n_instances, n_dims, series_length = X.shape + + if ( + n_instances != self.n_instances_ + or n_dims != self.n_dims_ + or series_length != self.series_length_ + ): + raise ValueError( + "n_instances, n_dims, series_length mismatch. X should be " + "the same as the training data used in fit for generating train " + "probabilities." + ) + + results = np.zeros((n_instances, self.n_classes_)) + divisors = np.zeros(n_instances) + + if train_estimate_method.lower() == "loocv": + for i, clf in enumerate(self.estimators_): + subsample = clf._subsample + preds = ( + clf._train_predictions + if self.save_train_predictions + else Parallel(n_jobs=self._threads_to_use, prefer="threads")( + delayed(clf._train_predict)( + i, + ) + for i in range(len(subsample)) + ) + ) + + for n, pred in enumerate(preds): + results[subsample[n]][ + self._class_dictionary[pred] + ] += self.weights_[i] + divisors[subsample[n]] += self.weights_[i] + elif train_estimate_method.lower() == "oob": + indices = range(n_instances) + for i, clf in enumerate(self.estimators_): + oob = [n for n in indices if n not in clf._subsample] + + if len(oob) == 0: + continue + + preds = clf.predict(X[oob]) + + for n, pred in enumerate(preds): + results[oob[n]][self._class_dictionary[pred]] += self.weights_[i] + divisors[oob[n]] += self.weights_[i] + else: + raise ValueError( + "Invalid train_estimate_method. Available options: loocv, oob" + ) + + for i in range(n_instances): + results[i] = ( + np.ones(self.n_classes_) * (1 / self.n_classes_) + if divisors[i] == 0 + else results[i] / (np.ones(self.n_classes_) * divisors[i]) + ) + + return results + + def _individual_train_acc(self, tde, y, train_size, lowest_acc): + correct = 0 + required_correct = int(lowest_acc * train_size) + + if self._threads_to_use > 1: + c = Parallel(n_jobs=self._threads_to_use, prefer="threads")( + delayed(tde._train_predict)( + i, + ) + for i in range(train_size) + ) + + for i in range(train_size): + if correct + train_size - i < required_correct: + return -1 + elif c[i] == y[i]: + correct += 1 + + if self.save_train_predictions: + tde._train_predictions.append(c[i]) + + else: + for i in range(train_size): + if correct + train_size - i < required_correct: + return -1 + + c = tde._train_predict(i) + + if c == y[i]: + correct += 1 + + if self.save_train_predictions: + tde._train_predictions.append(c) + + return correct / train_size + + @classmethod + def get_test_params(cls, parameter_set="default"): + """Return testing parameter settings for the estimator. + + Parameters + ---------- + parameter_set : str, default="default" + Name of the set of test parameters to return, for use in tests. If no + special parameters are defined for a value, will return `"default"` set. + For classifiers, a "default" set of parameters should be provided for + general testing, and a "results_comparison" set for comparing against + previously recorded results if the general set does not produce suitable + probabilities to compare against. + + Returns + ------- + params : dict or list of dict, default={} + Parameters to create testing instances of the class. + Each dict are parameters to construct an "interesting" test instance, i.e., + `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance. + `create_test_instance` uses the first (or only) dictionary in `params`. + """ + if parameter_set == "results_comparison": + return { + "n_parameter_samples": 10, + "max_ensemble_size": 5, + "randomly_selected_params": 5, + } + else: + return { + "n_parameter_samples": 5, + "max_ensemble_size": 2, + "randomly_selected_params": 3, + "save_train_predictions": True, + } + + +class IndividualTDE(BaseClassifier): + """Single TDE classifier, an extension of the Bag of SFA Symbols (BOSS) model. + + Base classifier for the TDE classifier. Implementation of single TDE base model + from Middlehurst (2021). [1]_ + + Overview: input "n" series of length "m" and IndividualTDE performs a SFA + transform to form a sparse dictionary of discretised words. The resulting + dictionary is used with the histogram intersection distance function in a + 1-nearest neighbor. + + fit involves finding "n" histograms. + + predict uses 1 nearest neighbor with the histogram intersection distance function. + + Parameters + ---------- + window_size : int, default=10 + Size of the window to use in the SFA transform. + word_length : int, default=8 + Length of word to use to use in the SFA transform. + norm : bool, default=False + Whether to normalize SFA words by dropping the first Fourier coefficient. + levels : int, default=1 + The number of spatial pyramid levels for the SFA transform. + igb : bool, default=False + Whether to use Information Gain Binning (IGB) or + Multiple Coefficient Binning (MCB) for the SFA transform. + alphabet_size : default=4 + Number of possible letters (values) for each word. + bigrams : bool, default=True + Whether to record word bigrams in the SFA transform. + dim_threshold : float, default=0.85 + Accuracy threshold as a propotion of the highest accuracy dimension for words + extracted from each dimensions. Only applicable for multivariate data. + max_dims : int, default=20 + Maximum number of dimensions words are extracted from. Only applicable for + multivariate data. + typed_dict : bool, default=True + Use a numba TypedDict to store word counts. May increase memory usage, but will + be faster for larger datasets. + n_jobs : int, default=1 + The number of jobs to run in parallel for both `fit` and `predict`. + ``-1`` means using all processors. + random_state : int or None, default=None + Seed for random, integer. + + Attributes + ---------- + n_classes_ : int + The number of classes. + classes_ : list + The classes labels. + n_instances_ : int + The number of train cases. + n_dims_ : int + The number of dimensions per case. + series_length_ : int + The length of each series. + + See Also + -------- + FastTemporalDictinaryEnsemble, SFA + + Notes + ----- + For the Java version, see + `TSML `_. + + References + ---------- + .. [1] Matthew Middlehurst, James Large, Gavin Cawley and Anthony Bagnall + "The Temporal Dictionary Ensemble (TDE) Classifier for Time Series + Classification", in proceedings of the European Conference on Machine Learning + and Principles and Practice of Knowledge Discovery in Databases, 2020. + + Examples + -------- + >>> from sktime.classification.dictionary_based import IndividualTDE + >>> from sktime.datasets import load_unit_test + >>> X_train, y_train = load_unit_test(split="train", return_X_y=True) + >>> X_test, y_test = load_unit_test(split="test", return_X_y=True) + >>> clf = IndividualTDE() + >>> clf.fit(X_train, y_train) + IndividualTDE(...) + >>> y_pred = clf.predict(X_test) + """ + + _tags = { + "capability:multivariate": False, + "capability:multithreading": True, + } + + def __init__( + self, + window_size=10, + word_length=8, + norm=False, + levels=1, + igb=False, + alphabet_size=4, + bigrams=True, + dim_threshold=0.85, + max_dims=20, + typed_dict=True, + n_jobs=1, + random_state=None, + ): + self.window_size = window_size + self.word_length = word_length + self.norm = norm + self.levels = levels + self.igb = igb + self.alphabet_size = alphabet_size + self.bigrams = bigrams + + # multivariate + self.dim_threshold = dim_threshold + self.max_dims = max_dims + + self.typed_dict = typed_dict + self.n_jobs = n_jobs + self.random_state = random_state + + self.n_instances_ = 0 + self.n_dims_ = 0 + self.series_length_ = 0 + + self._transformers = [] + self._transformed_data = [] + self._class_vals = [] + self._dims = [] + self._highest_dim_bit = 0 + self._accuracy = 0 + self._subsample = [] + self._train_predictions = [] + + super(IndividualTDE, self).__init__() + + def _fit(self, X, y): + """Fit a single base TDE classifier on n_instances cases (X,y). + + Parameters + ---------- + X : 3D np.array of shape = [n_instances, n_dimensions, series_length] + The training data. + y : array-like, shape = [n_instances] + The class labels. + + Returns + ------- + self : + Reference to self. + + Notes + ----- + Changes state by creating a fitted model that updates attributes + ending in "_" and sets is_fitted flag to True. + """ + self.n_instances_, self.n_dims_, self.series_length_ = X.shape + self._class_vals = y + + self._transformers.append( + SFAFast( + word_length=self.word_length, + alphabet_size=self.alphabet_size, + window_size=self.window_size, + norm=self.norm, + # levels=self.levels, + binning_method="information-gain" if self.igb else "equi-depth", + bigrams=self.bigrams, + remove_repeat_words=True if self.levels == 1 else False, + lower_bounding=False, + save_words=True if self.levels > 1 else False, # only needed for levels + n_jobs=self._threads_to_use, + ) + # SFA( + # word_length=self.word_length, + # alphabet_size=self.alphabet_size, + # window_size=self.window_size, + # norm=self.norm, + # levels=self.levels, + # binning_method="information-gain" if self.igb else "equi-depth", + # bigrams=self.bigrams, + # remove_repeat_words=True, + # lower_bounding=False, + # save_words=False, + # use_fallback_dft=True, + # typed_dict=self.typed_dict, + # n_jobs=self._threads_to_use, + # ) + ) + bag = self._transformers[0].fit_transform(X, y) + + # adding pyramids from raw words + if self.levels > 1: + bag = self._add_to_pyramid() + + self._transformed_data = bag + + def _add_to_pyramid(self): + SFA = self._transformers[0] + words = SFA.words + + sfa_words = [] + for lev in range(1, self.levels): + # number of quadrants equal to pow(2, level-1) + level = pow(2, lev - 1) + for i in range(0, level): + quadrant_words = words[ + :, + int(i * words.shape[-1] / level) : int( + (i + 1) * words.shape[-1] / level + ), + ] + + if quadrant_words.shape[-1] > 0: + sfa_words.append( + SFA.transform_to_bag( + quadrant_words, SFA.word_length_actual, None + ) + ) + + if type(sfa_words[0]) is np.ndarray: + all_words = np.concatenate(sfa_words, axis=1) + else: + all_words = hstack(sfa_words) + + return all_words + + def _predict(self, X): + """Predict class values of all instances in X. + + Parameters + ---------- + X : 3D np.array of shape = [n_instances, n_dimensions, series_length] + The data to make predictions for. + + Returns + ------- + y : array-like, shape = [n_instances] + Predicted class labels. + """ + test_bags = self._transformers[0].transform(X) + + # adding pyramids from raw words + if self.levels > 1: + test_bags = self._add_to_pyramid() + + classes = Parallel(n_jobs=self._threads_to_use, prefer="threads")( + delayed(self._test_nn)( + test_bag, + ) + for test_bag in test_bags + ) + + return np.array(classes) + + def _test_nn(self, test_bag): + rng = check_random_state(self.random_state) + + best_sim = -1 + nn = None + + for n, bag in enumerate(self._transformed_data): + sim = histogram_intersection(test_bag, bag) + + if sim > best_sim or (sim == best_sim and rng.random() < 0.5): + best_sim = sim + nn = self._class_vals[n] + + return nn + + def _train_predict(self, train_num, bags=None): + if bags is None: + bags = self._transformed_data + + test_bag = bags[train_num] + best_sim = -1 + nn_label = None + + for n, bag in enumerate(bags): + if n == train_num: + continue + + sim = histogram_intersection(test_bag, bag) + + if sim > best_sim: + best_sim = sim + nn_label = self._class_vals[n] + + return nn_label + + +def histogram_intersection(first, second): + """Find the distance between two histograms using the histogram intersection. + + This distance function is designed for sparse matrix, represented as a + dictionary or numba Dict, but can accept arrays. + + Parameters + ---------- + first : dict, numba.Dict or array + First dictionary used in distance measurement. + second : dict, numba.Dict or array + Second dictionary that will be used to measure distance from `first`. + + Returns + ------- + dist : float + The histogram intersection distance between the first and second dictionaries. + """ + if isinstance(first, csr_matrix): # csr matrix + return hist_intersection(first, second) + else: # numpy array ? + return np.sum( + [ + 0 if first[n] == 0 else np.min(first[n], second[n]) + for n in range(len(first)) + ] + ) + + +@njit(fastmath=True, cache=True) +def _histogram_intersection_dict(first, second): + sim = 0 + for word, val_a in first.items(): + val_b = second.get(word, types.uint32(0)) + sim += min(val_a, val_b) + return sim + + +def hist_intersection(X, Y): + """Compute Histogram Intersection for two scipy csr_matrix.""" + mask = Y.nonzero()[-1] + return np.sum(X[:, mask].minimum(Y)) + np.sum(X[:, mask]) diff --git a/sktime/transformations/panel/dictionary_based/_sfa_fast.py b/sktime/transformations/panel/dictionary_based/_sfa_fast.py index 4fcfaccbf7..779109f69b 100644 --- a/sktime/transformations/panel/dictionary_based/_sfa_fast.py +++ b/sktime/transformations/panel/dictionary_based/_sfa_fast.py @@ -360,9 +360,9 @@ def transform(self, X, y=None): self.lower_bounding, ) - # only save at fit - # if self.save_words: - # self.words = words + # TODO only save at fit? + if self.save_words: + self.words = words # transform: applies the feature selection strategy empty_dict = Dict.empty( From ae054000d5c1d6e27345a1a3d777f5277c25790c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Patrick=20Sch=C3=A4fer?= Date: Mon, 6 Mar 2023 19:22:14 +0100 Subject: [PATCH 2/3] this PR adds WEASEL with dilation --- CODEOWNERS | 1 + docs/source/api_reference/classification.rst | 1 + .../dictionary_based/__init__.py | 2 + .../dictionary_based/_weasel_v2.py | 457 ++++++++++++++++++ 4 files changed, 461 insertions(+) create mode 100644 sktime/classification/dictionary_based/_weasel_v2.py diff --git a/CODEOWNERS b/CODEOWNERS index 2aa9f98af7..31161c40ef 100644 --- a/CODEOWNERS +++ b/CODEOWNERS @@ -13,6 +13,7 @@ sktime/classification/dictionary_based/_cboss.py @patrickzib @MatthewMiddlehurst sktime/classification/dictionary_based/_muse.py @patrickzib @MatthewMiddlehurst @TonyBagnall sktime/classification/dictionary_based/_tde.py @patrickzib @MatthewMiddlehurst @TonyBagnall sktime/classification/dictionary_based/_weasel.py @patrickzib @MatthewMiddlehurst @TonyBagnall +sktime/classification/dictionary_based/_weasel_v2.py @patrickzib sktime/classification/distance_based/ @jasonlines @goaster @TonyBagnall sktime/classification/dummy/ @ZiyaoWei sktime/classification/early_classification/_probability_threshold.py @MatthewMiddlehurst diff --git a/docs/source/api_reference/classification.rst b/docs/source/api_reference/classification.rst index 4ccd1b6305..0b42e476e2 100644 --- a/docs/source/api_reference/classification.rst +++ b/docs/source/api_reference/classification.rst @@ -55,6 +55,7 @@ Dictionary-based MUSE TemporalDictionaryEnsemble WEASEL + WEASEL_V2 Distance-based -------------- diff --git a/sktime/classification/dictionary_based/__init__.py b/sktime/classification/dictionary_based/__init__.py index 63d2c566d1..dec3711c15 100644 --- a/sktime/classification/dictionary_based/__init__.py +++ b/sktime/classification/dictionary_based/__init__.py @@ -7,6 +7,7 @@ "TemporalDictionaryEnsemble", "IndividualTDE", "WEASEL", + "WEASEL_V2", "MUSE", ] @@ -18,3 +19,4 @@ TemporalDictionaryEnsemble, ) from sktime.classification.dictionary_based._weasel import WEASEL +from sktime.classification.dictionary_based._weasel_v2 import WEASEL_V2 diff --git a/sktime/classification/dictionary_based/_weasel_v2.py b/sktime/classification/dictionary_based/_weasel_v2.py new file mode 100644 index 0000000000..724ad9c0e1 --- /dev/null +++ b/sktime/classification/dictionary_based/_weasel_v2.py @@ -0,0 +1,457 @@ +# -*- coding: utf-8 -*- +"""WEASEL 2.0 classifier. + +A Random Dilated Dictionary Transform for Fast, Accurate and Constrained Memory +Time Series Classification. + +""" + +__author__ = ["patrickzib"] +__all__ = ["WEASEL_V2"] + +import numpy as np +from joblib import Parallel, delayed +from scipy.sparse import hstack +from sklearn.linear_model import LogisticRegression, RidgeClassifierCV +from sklearn.utils import check_random_state + +from sktime.classification.base import BaseClassifier +from sktime.transformations.panel.dictionary_based import SFAFast + + +class WEASEL_V2(BaseClassifier): + """Word Extraction for Time Series Classification (WEASEL) v2.0. + + Overview: Input 'n' series length 'm' + WEASEL is a dictionary classifier that builds a bag-of-patterns using SFA + for different window lengths and learns a logistic regression classifier + on this bag. + + WEASEL 2.0 has three key parameters that are automcatically set based on the + length of the time series: + (1) Minimal window length: Typically defaulted to 4 + (2) Maximal window length: Typically chosen from + 24, 44 or 84 depending on the time series length. + (3) Ensemble size: Typically chosen from 50, 100, 150, to derive + a feature vector of roughly 20𝑘 up to 70𝑘 features (distinct words). + + From the other parameters passed, WEASEL chosen random values for each set + of configurations. E.g. for each of 150 configurations, a random value is chosen + from the below options. + + Parameters + ---------- + min_window : int, default=4, + Minimal length of the subsequences to compute words from. + norm_options : array of bool, default=[False], + If the array contains True, words are computed over mean-normed TS + If the array contains False, words are computed over raw TS + If both are set, words are computed for both. + A value will be randomly chosen for each parameter-configuration. + word_lengths : array of int, default=[7, 8], + Length of the words to compute. A value will be randomly chosen for each + parameter-configuration. + use_first_differences: array of bool, default=[True, False], + If the array contains True, words are computed over first order differences. + If the array contains False, words are computed over the raw time series. + If both are set, words are computed for both. + feature_selection: {"chi2_top_k", "none", "random"}, default: chi2_top_k + Sets the feature selections strategy to be used. Large amounts of memory may be + needed depending on the setting of bigrams (true is more) or + alpha (larger is more). + 'chi2_top_k' reduces the number of words to at most 'max_feature_count', + dropping values based on p-value. + 'random' reduces the number to at most 'max_feature_count', + by randomly selecting features. + 'none' does not apply any feature selection and yields large bag of words + max_feature_count : int, default=30_000 + size of the dictionary - number of words to use - if feature_selection set to + "chi2" or "random". Else ignored. + support_probabilities: bool, default: False + If set to False, a RidgeClassifierCV will be trained, which has higher accuracy + and is faster, yet does not support predict_proba. + If set to True, a LogisticRegression will be trained, which does support + predict_proba(), yet is slower and typically less accurate. predict_proba() is + needed for example in Early-Classification like TEASER. + n_jobs : int, default=4 + The number of jobs to run in parallel for both `fit` and `predict`. + ``-1`` means using all processors. + random_state: int or None, default=None + Seed for random, integer + + Attributes + ---------- + n_classes_ : int + The number of classes. + classes_ : list + The classes labels. + + See Also + -------- + MUSE + + References + ---------- + .. [1] Patrick Schäfer and Ulf Leser, "WEASEL 2.0 -- A Random Dilated Dictionary + Transform for Fast, Accurate and Memory Constrained Time Series Classification", + Preprint, https://arxiv.org/abs/2301.10194 + + Examples + -------- + >>> from sktime.classification.dictionary_based import WEASEL_V2 + >>> from sktime.datasets import load_unit_test + >>> X_train, y_train = load_unit_test(split="train", return_X_y=True) + >>> X_test, y_test = load_unit_test(split="test", return_X_y=True) + >>> clf = WEASEL_V2() + >>> clf.fit(X_train, y_train) + WEASEL_V2(...) + >>> y_pred = clf.predict(X_test) + """ + + _tags = { + "capability:multithreading": True, + "classifier_type": "dictionary", + } + + def __init__( + self, + min_window=4, + norm_options=(False), + word_lengths=(7, 8), + use_first_differences=(True, False), + feature_selection="chi2_top_k", + max_feature_count=30_000, + random_state=None, + support_probabilities=False, + n_jobs=4, + ): + self.alphabet_sizes = [2] + self.binning_strategies = ["equi-depth", "equi-width"] + + self.anova = False + self.variance = True + self.bigrams = False + self.lower_bounding = True + self.remove_repeat_words = False + + self.norm_options = norm_options + self.word_lengths = word_lengths + + self.random_state = random_state + + self.min_window = min_window + self.max_window = 84 + self.ensemble_size = 150 + self.max_feature_count = max_feature_count + self.use_first_differences = use_first_differences + self.feature_selection = feature_selection + + self.window_sizes = [] + self.series_length = 0 + self.n_instances = 0 + + self.SFA_transformers = [] + + self.clf = None + self.n_jobs = n_jobs + self.support_probabilities = support_probabilities + + # set_num_threads(n_jobs) + + super(WEASEL_V2, self).__init__() + + def _fit(self, X, y): + """Build a WEASEL classifiers from the training set (X, y). + + Parameters + ---------- + X : 3D np.array of shape = [n_instances, n_dimensions, series_length] + The training data. + y : array-like, shape = [n_instances] + The class labels. + + Returns + ------- + self : + Reference to self. + """ + # Window length parameter space dependent on series length + self.n_instances, self.series_length = X.shape[0], X.shape[-1] + XX = X.squeeze(1) + + # avoid overfitting with too many features + if self.n_instances < 250: + self.max_window = 24 + self.ensemble_size = 50 + elif self.series_length < 100: + self.max_window = 44 + self.ensemble_size = 100 + else: + self.max_window = 84 + self.ensemble_size = 150 + + self.max_window = int(min(self.series_length, self.max_window)) + if self.min_window > self.max_window: + raise ValueError( + f"Error in WEASEL, min_window =" + f"{self.min_window} is bigger" + f" than max_window ={self.max_window}," + f" series length is {self.series_length}" + f" try set min_window to be smaller than series length in " + f"the constructor, but the classifier may not work at " + f"all with very short series" + ) + + # Randomly choose window sizes + self.window_sizes = np.arange(self.min_window, self.max_window + 1, 1) + + parallel_res = Parallel(n_jobs=self.n_jobs, timeout=99999, backend="threading")( + delayed(_parallel_fit)( + i, + XX, + y.copy(), + self.window_sizes, + self.alphabet_sizes, + self.word_lengths, + self.series_length, + self.norm_options, + self.use_first_differences, + self.binning_strategies, + self.variance, + self.anova, + self.bigrams, + self.lower_bounding, + self.n_jobs, + self.max_feature_count, + self.ensemble_size, + self.feature_selection, + self.remove_repeat_words, + self.random_state, + ) + for i in range(self.ensemble_size) + ) + + sfa_words = [] + for words, transformer in parallel_res: + self.SFA_transformers.extend(transformer) + sfa_words.extend(words) + + # merging arrays from different threads + if type(sfa_words[0]) is np.ndarray: + all_words = np.concatenate(sfa_words, axis=1) + else: + all_words = hstack(sfa_words) + + if not self.support_probabilities: + self.clf = RidgeClassifierCV(alphas=np.logspace(-1, 5, 10)) + else: + self.clf = LogisticRegression( + max_iter=5000, + solver="liblinear", + dual=True, + penalty="l2", + random_state=self.random_state, + n_jobs=self.n_jobs, + ) + + self.clf.fit(all_words, y) + self.total_features_count = all_words.shape[1] + if hasattr(self.clf, "best_score_"): + self.cross_val_score = self.clf.best_score_ + + return self + + def _predict(self, X) -> np.ndarray: + """Predict class values of n instances in X. + + Parameters + ---------- + X : 3D np.array of shape = [n_instances, n_dimensions, series_length] + The data to make predictions for. + + Returns + ------- + y : array-like, shape = [n_instances] + Predicted class labels. + """ + bag = self._transform_words(X) + return self.clf.predict(bag) + + def _predict_proba(self, X) -> np.ndarray: + """Predict class probabilities for n instances in X. + + Parameters + ---------- + X : 3D np.array of shape = [n_instances, n_dimensions, series_length] + The data to make predict probabilities for. + + Returns + ------- + y : array-like, shape = [n_instances, n_classes_] + Predicted probabilities using the ordering in classes_. + """ + bag = self._transform_words(X) + if self.support_probabilities: + return self.clf.predict_proba(bag) + else: + raise ValueError( + "Error in WEASEL v2, please set support_probabilities=True, to" + + "allow for probabilities to be computed." + ) + + def _transform_words(self, X): + XX = X.squeeze(1) + + parallel_res = Parallel(n_jobs=self.n_jobs, timeout=99999, backend="threading")( + delayed(transformer.transform)(XX) for transformer in self.SFA_transformers + ) + + all_words = [] + for words in parallel_res: + # words = words.astype(np.float32) / norm + all_words.append(words) + + # X_features = self.rocket.transform(X) + + if type(all_words[0]) is np.ndarray: + # all_words.append(X_features) + all_words = np.concatenate(all_words, axis=1) + else: + # all_words.append(csr_matrix(X_features.values)) + all_words = hstack(all_words) + + return all_words + + @classmethod + def get_test_params(cls, parameter_set="default"): + """Return testing parameter settings for the estimator. + + Parameters + ---------- + parameter_set : str, default="default" + Name of the set of test parameters to return, for use in tests. If no + special parameters are defined for a value, will return `"default"` set. + + Returns + ------- + params : dict or list of dict, default={} + Parameters to create testing instances of the class. + Each dict are parameters to construct an "interesting" test instance, i.e., + `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance. + `create_test_instance` uses the first (or only) dictionary in `params`. + """ + return { + "feature_selection": "none", + "support_probabilities": True, + } + + +def _parallel_fit( + i, + X, + y, + window_sizes, + alphabet_sizes, + word_lengths, + series_length, + norm_options, + use_first_differences, + binning_strategies, + variance, + anova, + bigrams, + lower_bounding, + n_jobs, + max_feature_count, + ensemble_size, + feature_selection, + remove_repeat_words, + random_state, +): + if random_state is None: + rng = check_random_state(None) + else: + rng = check_random_state(random_state + i) + + window_size = rng.choice(window_sizes) + dilation = np.maximum( + 1, + np.int32(2 ** rng.uniform(0, np.log2((series_length - 1) / (window_size - 1)))), + ) + + alphabet_size = rng.choice(alphabet_sizes) + + # maximize word-length + word_length = min(window_size - 2, rng.choice(word_lengths)) + norm = rng.choice(norm_options) + binning_strategy = rng.choice(binning_strategies) + + all_transformers = [] + all_words = [] + for first_difference in use_first_differences: + transformer = getSFAFast( + alphabet_size, + anova, + bigrams, + binning_strategy, + dilation, + ensemble_size, + feature_selection, + first_difference, + i, + lower_bounding, + max_feature_count, + n_jobs, + norm, + remove_repeat_words, + variance, + window_size, + word_length, + ) + + # generate SFA words on sample + words = transformer.fit_transform(X, y) + all_words.append(words) + all_transformers.append(transformer) + return all_words, all_transformers + + +def getSFAFast( + alphabet_size, + anova, + bigrams, + binning_strategy, + dilation, + ensemble_size, + feature_selection, + first_difference, + i, + lower_bounding, + max_feature_count, + n_jobs, + norm, + remove_repeat_words, + variance, + window_size, + word_length, +): + transformer = SFAFast( + variance=variance, + word_length=word_length, + alphabet_size=alphabet_size, + window_size=window_size, + norm=norm, + anova=anova, + binning_method=binning_strategy, + remove_repeat_words=remove_repeat_words, + bigrams=bigrams, + dilation=dilation, + lower_bounding=lower_bounding, + first_difference=first_difference, + feature_selection=feature_selection, + max_feature_count=max_feature_count // ensemble_size, + random_state=i, + return_sparse=False, + n_jobs=n_jobs, + ) + return transformer From d5d3200c8b1e05c84ff4257994df463f080d8885 Mon Sep 17 00:00:00 2001 From: MatthewMiddlehurst Date: Thu, 18 Jul 2024 20:11:18 +0100 Subject: [PATCH 3/3] docs start --- .../collection/dictionary_based/_sfa.py | 70 +++++++++---------- .../collection/dictionary_based/_sfa_fast.py | 69 +++++++++--------- 2 files changed, 66 insertions(+), 73 deletions(-) diff --git a/aeon/transformations/collection/dictionary_based/_sfa.py b/aeon/transformations/collection/dictionary_based/_sfa.py index 9bf638e3d5..2358b11626 100644 --- a/aeon/transformations/collection/dictionary_based/_sfa.py +++ b/aeon/transformations/collection/dictionary_based/_sfa.py @@ -3,7 +3,7 @@ Configurable SFA transform for discretising time series into words. """ -__maintainer__ = [] +__maintainer__ = ["patrickzib", "MatthewMiddlehurst"] __all__ = ["SFA"] import math @@ -21,13 +21,14 @@ from aeon.transformations.collection import BaseCollectionTransformer -# The binning methods to use: equi-depth, equi-width, information gain or kmeans +# The binning methods to use binning_methods = { "equi-depth", "equi-width", "information-gain", "information-gain-mae", "kmeans", + "quantile", } @@ -40,47 +41,42 @@ class SFA(BaseCollectionTransformer): shorten the series with DFT discretise the shortened series into bins set by MFC form a word from these discrete values - by default SFA produces a single word per series (window_size=0) - if a window is used, it forms a histogram of counts of words. + SFA returns a dictionary of word counts for each series + + This is a slower but more flexible version of the SFA transform, which can store + greater than 64 bit words. This is at the cost of efficiency, however. Parameters ---------- - word_length: int, default = 8 - length of word to shorten window to (using PAA) - - alphabet_size: int, default = 4 - number of values to discretise each value to - - window_size: int, default = 12 - size of window for sliding. Input series - length for whole series transform - - norm: boolean, default = False - mean normalise words by dropping first fourier coefficient - - binning_method: {"equi-depth", "equi-width", "information-gain", - "information-gain-mae", "kmeans"}, default="equi-depth" - the binning method used to derive the breakpoints. - - anova: boolean, default = False - If True, the Fourier coefficient selection is done via a one-way - ANOVA test. If False, the first Fourier coefficients are selected. - Only applicable if labels are given - - bigrams: boolean, default = False - whether to create bigrams of SFA words - - skip_grams: boolean, default = False - whether to create skip-grams of SFA words - - remove_repeat_words: boolean, default = False - whether to use numerosity reduction (default False) + word_length : int, default=8 + Length of word to shorten window to (using PAA). + alphabet_size : int, default=4 + Number of values to discretise each value to. + window_size : int, default=12 + Size of window for sliding. Input series length for whole series transform. + norm : boolean, default=False + Mean normalise words by dropping first fourier coefficient. + binning_method : str, default="equi-depth" + The binning method used to derive the breakpoints. One of {"equi-depth", + "equi-width", "information-gain", "information-gain-mae", "kmeans", + "quantile"}. + anova : boolean, default=False + If True, the Fourier coefficient selection is done via a one-way ANOVA test. + If False, the first Fourier coefficients are selected. Only applicable if + labels are given. + bigrams : boolean, default=False + Whether to create bigrams of SFA words. + skip_grams : boolean, default=False + Whether to create skip-grams of SFA words. + levels: int, default=1 + Number of spatial pyramid levels + remove_repeat_words : boolean, default=False + Whether to use numerosity reduction. lower_bounding_distances : boolean, default = None If set to True, the FFT is normed to allow for ED lower bounding. - levels: int, default = 1 - Number of spatial pyramid levels + save_words: boolean, default = False whether to save the words generated for each series (default False) @@ -123,8 +119,8 @@ def __init__( anova=False, bigrams=False, skip_grams=False, - remove_repeat_words=False, levels=1, + remove_repeat_words=False, lower_bounding=True, lower_bounding_distances=None, save_words=False, diff --git a/aeon/transformations/collection/dictionary_based/_sfa_fast.py b/aeon/transformations/collection/dictionary_based/_sfa_fast.py index 0b195341ea..cf1e85f48e 100644 --- a/aeon/transformations/collection/dictionary_based/_sfa_fast.py +++ b/aeon/transformations/collection/dictionary_based/_sfa_fast.py @@ -1,25 +1,17 @@ """Symbolic Fourier Approximation (SFA) Transformer. -Configurable SFA transform for discretising time series into words. - +Efficient but rigid SFA transform for discretising time series into words. """ -__maintainer__ = [] +__maintainer__ = ["patrickzib", "MatthewMiddlehurst"] __all__ = ["SFAFast"] import math import sys -from warnings import simplefilter import numpy as np import pandas as pd -from numba import ( - NumbaPendingDeprecationWarning, - NumbaTypeSafetyWarning, - njit, - objmode, - prange, -) +from numba import njit, objmode, prange from numba.core import types from numba.typed import Dict from scipy.sparse import csr_matrix @@ -30,7 +22,7 @@ from aeon.transformations.collection import BaseCollectionTransformer -# The binning methods to use: equi-depth, equi-width, information gain or kmeans +# The binning methods to use binning_methods = { "equi-depth", "equi-width", @@ -40,9 +32,6 @@ "quantile", } -simplefilter(action="ignore", category=NumbaPendingDeprecationWarning) -simplefilter(action="ignore", category=NumbaTypeSafetyWarning) - class SFAFast(BaseCollectionTransformer): """Symbolic Fourier Approximation (SFA) Transformer. @@ -53,37 +42,44 @@ class SFAFast(BaseCollectionTransformer): shorten the series with DFT discretise the shortened series into bins set by MFC form a word from these discrete values - by default SFA produces a single word per series (window_size=0) - if a window is used, it forms a histogram of counts of words. + SFA returns an array of word counts for each series consisting of a column for + each word found in fit. + + This is a faster but more rigid version of the SFA transform, which can only use + up to 64 bit words and does not store the actual words found in its transformed + array. Parameters ---------- - word_length : int, default = 8 + word_length : int, default=8 Length of word to shorten window to (using PAA). - alphabet_size : int, default = 4 + alphabet_size : int, default=4 Number of values to discretise each value to. - window_size : int, default = 12 + window_size : int, default=12 Size of window for sliding. Input series length for whole series transform. - norm : boolean, default = False + norm : boolean, default=False Mean normalise words by dropping first fourier coefficient. binning_method : str, default="equi-depth" The binning method used to derive the breakpoints. One of {"equi-depth", - "equi-width", "information-gain", "information-gain-mae", "kmeans"}, - anova : boolean, default = False + "equi-width", "information-gain", "information-gain-mae", "kmeans", + "quantile"}. + anova : boolean, default=False If True, the Fourier coefficient selection is done via a one-way ANOVA test. If False, the first Fourier coefficients are selected. Only applicable if labels are given. - variance : boolean, default = False + variance : boolean, default=False If True, the Fourier coefficient selection is done via the largest variance. If False, the first Fourier coefficients are selected. Only applicable if labels are given. + bigrams : boolean, default=False + Whether to create bigrams of SFA words. + skip_grams : boolean, default=False + Whether to create skip-grams of SFA words. dilation : int, default = 0 When set to dilation > 1, adds dilation to the sliding window operation. - save_words : boolean, default = False - whether to save the words generated for each series (default False) - bigrams : boolean, default = False - Whether to create bigrams of SFA words. - feature_selection : {"chi2", "chi2_top_k", "none", "random"}, default: none + remove_repeat_words : boolean, default=False + Whether to use numerosity reduction. + feature_selection : {"chi2", "chi2_top_k", "random", None}, default=None Sets the feature selections strategy to be used. Large amounts of memory may be needed depending on the setting of bigrams (true is more) or alpha (larger is more). @@ -92,19 +88,20 @@ class SFAFast(BaseCollectionTransformer): dropping values based on p-value. 'random' reduces the number to at most 'max_feature_count', by randomly selecting features. - 'none' does not apply any feature selection and yields large bag of words, - p_threshold : int, default=0.05 (disabled by default) + None does not apply any feature selection and yields large bag of words, + p_threshold : float, default=0.05 If feature_selection=chi2 is chosen, feature selection is applied based on the chi-squared test. This is the p-value threshold to use for chi-squared test on bag-of-words (lower means more strict). 1 indicates that the test should not be performed. - max_feature_count : int, default=256 + max_feature_count : int, default=256 If feature_selection=random is chosen, this parameter defines the number of randomly chosen unique words used. - skip_grams : boolean, default = False - Whether to create skip-grams of SFA words. - remove_repeat_words : boolean, default = False - Whether to use numerosity reduction. + + + + save_words : boolean, default = False + whether to save the words generated for each series (default False) lower_bounding_distances : boolean, default = None If set to True, the FFT is normed to allow for ED lower bounding. return_sparse : boolean, default=True