From 46d5a93d91e145fafb5bc935ca7d3aa80615fc0a Mon Sep 17 00:00:00 2001 From: W0lfgunbl00d Date: Wed, 13 Nov 2024 08:31:07 +0100 Subject: [PATCH 1/5] rls --- river/linear_model/__init__.py | 10 ++ river/linear_model/incrementalAUC.py | 111 ++++++++++++++++++++++ river/linear_model/rls.py | 137 +++++++++++++++++++++++++++ 3 files changed, 258 insertions(+) create mode 100644 river/linear_model/incrementalAUC.py create mode 100644 river/linear_model/rls.py diff --git a/river/linear_model/__init__.py b/river/linear_model/__init__.py index 756720490a..10146cacd7 100644 --- a/river/linear_model/__init__.py +++ b/river/linear_model/__init__.py @@ -10,6 +10,11 @@ from .pa import PAClassifier, PARegressor from .perceptron import Perceptron from .softmax import SoftmaxRegression +<<<<<<< Updated upstream +======= +from .rls import RLS +from .incrementalAUC import IncrementalAUC +>>>>>>> Stashed changes __all__ = [ "base", @@ -21,4 +26,9 @@ "PARegressor", "Perceptron", "SoftmaxRegression", +<<<<<<< Updated upstream +======= + "RLS", + "IncrementalAUC", +>>>>>>> Stashed changes ] diff --git a/river/linear_model/incrementalAUC.py b/river/linear_model/incrementalAUC.py new file mode 100644 index 0000000000..f2fc9eadc7 --- /dev/null +++ b/river/linear_model/incrementalAUC.py @@ -0,0 +1,111 @@ +from river import metrics +import numpy as np + +class IncrementalAUC(metrics.base.BinaryMetric): + """Calculates AUC incrementally.""" + + def __init__(self): + super().__init__() + self.positive_scores = [] + self.negative_scores = [] + + def update(self, y_true, y_pred): + """Updates the metric with the new prediction and true label.""" + if y_true == 1: + self.positive_scores.append(y_pred) + else: + self.negative_scores.append(y_pred) + return self + + def get(self, X_train, y_train, X_test, y_test, epochs=900, lr=0.5, n_mc=500, gamma=1e-4, eps=0.01): + """ + Implements the stochastic gradient ascent method to optimize theta and computes the AUC + based on the accumulated scores. + + Parameters: + - X_train: Training feature matrix. + - y_train: Training labels. + - X_test: Test feature matrix. + - y_test: Test labels. + - epochs: Number of training epochs. + - lr: Initial learning rate. + - n_mc: Number of Monte Carlo samples for gradient estimation. + - gamma: Learning rate discount factor. + - eps: Smoothing parameter for the sigmoid function. + + Returns: + - auc: Final AUC score based on the accumulated scores. + """ + from sklearn.metrics import roc_auc_score + # Separate the classes + X1 = X_train[y_train == 1] + X0 = X_train[y_train == 0] + + # Initialize parameters + np.random.seed(123) + theta = np.random.randn(X_train.shape[1]) + current_lr = lr + + # Reset accumulated scores + self.positive_scores = [] + self.negative_scores = [] + + # Optimization loop + for seed, epoch in enumerate(range(epochs)): + # Update learning rate + current_lr = current_lr / (1 + gamma) + + # Update theta using stochastic gradient ascent + theta -= current_lr * self.stochastic_gradient(theta, X1, X0, N=n_mc, eps=eps, random_state=seed) + + # After training, compute the scores on the test set + y_scores = np.dot(X_test, theta) + + # Update accumulated scores using y_test and y_scores + for y_true_sample, y_score in zip(y_test, y_scores): + self.update(y_true_sample, y_score) + + # Compute AUC based on accumulated scores + y_scores_accumulated = self.positive_scores + self.negative_scores + y_true_accumulated = [1] * len(self.positive_scores) + [0] * len(self.negative_scores) + + auc = roc_auc_score(y_true_accumulated, y_scores_accumulated) + return auc + + def sigma_eps(self, x, eps=0.01): + z = x / eps + if z > 35: + return 1 + elif z < -35: + return 0 + else: + return 1.0 / (1.0 + np.exp(- z)) + + def reg_u_statistic(self, y_true, y_probs, eps=0.01): + p = y_probs[y_true == 1] + q = y_probs[y_true == 0] + + aux = [] + for pp in p: + for qq in q: + aux.append(self.sigma_eps(pp - qq, eps=eps)) + + u = np.array(aux).mean() + return u + + def stochastic_gradient(self, theta, X1, X0, N=1000, eps=0.01, random_state=1): + + np.random.seed(random_state) + + indices_1 = np.random.choice(np.arange(X1.shape[0]), size=N) + indices_0 = np.random.choice(np.arange(X0.shape[0]), size=N) + + X1_, X0_ = X1[indices_1], X0[indices_0] + + avg = np.zeros_like(theta) + for xi, xj in zip(X1_, X0_): + dx = xj - xi + sig = self.sigma_eps(theta @ dx, eps=eps) + avg = avg + sig * (1 - sig) * dx + + return avg / (N * eps) diff --git a/river/linear_model/rls.py b/river/linear_model/rls.py new file mode 100644 index 0000000000..91e2b3a09d --- /dev/null +++ b/river/linear_model/rls.py @@ -0,0 +1,137 @@ +import numpy as np + + +class RLS(object): + """ + Recursive Least Squares (RLS) + + The Recursive Least Squares (RLS) algorithm is an adaptive filtering method that adjusts filter coefficients + to minimize the weighted least squares error between the desired and predicted outputs. It is widely used + in signal processing and control systems for applications requiring fast adaptation to changes in input signals. + + Parameters + ---------- + p : int + The order of the filter (number of coefficients to be estimated). + l : float, optional, default=0.99 + Forgetting factor (0 < l ≤ 1). Controls how quickly the algorithm forgets past data. + A smaller value makes the algorithm more responsive to recent data. + delta : float, optional, default=1000000 + Initial value for the inverse correlation matrix (P(0)). A large value ensures numerical stability at the start. + + Attributes + ---------- + p : int + Filter order. + l : float + Forgetting factor. + delta : float + Initialization value for P(0). + currentStep : int + The current iteration step of the RLS algorithm. + x : numpy.ndarray + Input vector of size (p+1, 1). Stores the most recent inputs. + P : numpy.ndarray + Inverse correlation matrix, initialized to a scaled identity matrix. + estimates : list of numpy.ndarray + List of estimated weight vectors (filter coefficients) at each step. + Pks : list of numpy.ndarray + List of inverse correlation matrices (P) at each step. + + Methods + ------- + estimate(xn, dn) + Updates the filter coefficients using the current input (`xn`) and desired output (`dn`). + + + Examples + -------- + >>> import numpy as np + + >>> # Initialize the RLS filter with order 2, forgetting factor 0.98, and delta 1e6 + >>> rls = RLS(p=2, l=0.98, delta=1e6) + + >>> # Simulate some data + >>> np.random.seed(42) + >>> num_samples = 100 + >>> x_data = np.sin(np.linspace(0, 10, num_samples)) # Input signal + >>> noise = np.random.normal(0, 0.1, num_samples) # Add some noise + >>> d_data = 0.5 * x_data + 0.3 + noise # Desired output + + >>> # Apply RLS algorithm + >>> for xn, dn in zip(x_data, d_data): + ... weights = rls.estimate(xn, dn) + ... print("Updated Weights:", weights.flatten()) + + >>> # Final weights after adaptation + >>> print("Final Weights:", rls.estimates[-1].flatten()) + """ + def __init__(self, p: int, l=0.99, delta=1000000): + """ + Initializes the Recursive Least Squares (RLS) filter. + + Parameters + ---------- + p : int + Filter order (number of coefficients). + l : float, optional + Forgetting factor (default is 0.99). + delta : float, optional + Initial value for the inverse correlation matrix (default is 1,000,000). + """ + self.p = p # Filter order + self.l = l # Forgetting factor + self.delta = delta # Value to initialise P(0) + + self.currentStep = 0 + + self.x = np.zeros((p + 1, 1)) # Column vector + self.P = np.identity(p + 1) * self.delta + + self.estimates = [] + self.estimates.append(np.zeros((p + 1, 1))) # Weight vector initialized to zeros + + self.Pks = [] + self.Pks.append(self.P) + + def estimate(self, xn: float, dn: float): + """ + Performs one iteration of the RLS algorithm to update filter coefficients. + + Parameters + ---------- + xn : float + The current input sample. + dn : float + The desired output corresponding to the current input. + + Returns + ------- + numpy.ndarray + Updated weight vector (filter coefficients) after the current iteration. + """ + # Update input vector + self.x = np.roll(self.x, -1) + self.x[-1, 0] = xn + + # Get previous weight vector + wn_prev = self.estimates[-1] + + # Compute gain vector + denominator = self.l + self.x.T @ self.Pks[-1] @ self.x + gn = (self.Pks[-1] @ self.x) / denominator + + # Compute a priori error + alpha = dn - (self.x.T @ wn_prev) + + # Update inverse correlation matrix + Pn = (self.Pks[-1] - gn @ self.x.T @ self.Pks[-1]) / self.l + self.Pks.append(Pn) + + # Update weight vector + wn = wn_prev + gn * alpha + self.estimates.append(wn) + + self.currentStep += 1 + + return wn From d7ffb9b7c2bac34bd984c096ce9fab00484de199 Mon Sep 17 00:00:00 2001 From: W0lfgunbl00d Date: Wed, 13 Nov 2024 08:37:11 +0100 Subject: [PATCH 2/5] rls changes --- river/linear_model/__init__.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/river/linear_model/__init__.py b/river/linear_model/__init__.py index 10146cacd7..dcf20ffb12 100644 --- a/river/linear_model/__init__.py +++ b/river/linear_model/__init__.py @@ -10,11 +10,8 @@ from .pa import PAClassifier, PARegressor from .perceptron import Perceptron from .softmax import SoftmaxRegression -<<<<<<< Updated upstream -======= from .rls import RLS from .incrementalAUC import IncrementalAUC ->>>>>>> Stashed changes __all__ = [ "base", @@ -26,9 +23,6 @@ "PARegressor", "Perceptron", "SoftmaxRegression", -<<<<<<< Updated upstream -======= "RLS", "IncrementalAUC", ->>>>>>> Stashed changes ] From ffa23249d9ea670a40d022e7add1f1d5749cc1a6 Mon Sep 17 00:00:00 2001 From: W0lfgunbl00d Date: Sun, 17 Nov 2024 13:18:35 +0100 Subject: [PATCH 3/5] Update rls.py fixed test --- river/linear_model/rls.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/river/linear_model/rls.py b/river/linear_model/rls.py index 91e2b3a09d..4c2ea6938f 100644 --- a/river/linear_model/rls.py +++ b/river/linear_model/rls.py @@ -1,7 +1,7 @@ import numpy as np -class RLS(object): +class RLS: """ Recursive Least Squares (RLS) @@ -46,10 +46,11 @@ class RLS(object): Examples -------- + >>> from river import linear_model >>> import numpy as np >>> # Initialize the RLS filter with order 2, forgetting factor 0.98, and delta 1e6 - >>> rls = RLS(p=2, l=0.98, delta=1e6) + >>> rls = linear_model.RLS(p=2, l=0.98, delta=1e6) >>> # Simulate some data >>> np.random.seed(42) @@ -61,10 +62,8 @@ class RLS(object): >>> # Apply RLS algorithm >>> for xn, dn in zip(x_data, d_data): ... weights = rls.estimate(xn, dn) - ... print("Updated Weights:", weights.flatten()) - - >>> # Final weights after adaptation >>> print("Final Weights:", rls.estimates[-1].flatten()) + Final Weights: [ 3.48065382 -6.15301727 3.3361416 ] """ def __init__(self, p: int, l=0.99, delta=1000000): """ From dd269a7314d195e6daac3b67fe8a91f2a1c56489 Mon Sep 17 00:00:00 2001 From: W0lfgunbl00d Date: Sun, 17 Nov 2024 13:31:58 +0100 Subject: [PATCH 4/5] Update rls.py update --- river/linear_model/rls.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/river/linear_model/rls.py b/river/linear_model/rls.py index 4c2ea6938f..6150baa6cc 100644 --- a/river/linear_model/rls.py +++ b/river/linear_model/rls.py @@ -23,7 +23,7 @@ class RLS: ---------- p : int Filter order. - l : float + forgetting_factor : float Forgetting factor. delta : float Initialization value for P(0). @@ -50,7 +50,7 @@ class RLS: >>> import numpy as np >>> # Initialize the RLS filter with order 2, forgetting factor 0.98, and delta 1e6 - >>> rls = linear_model.RLS(p=2, l=0.98, delta=1e6) + >>> rls = linear_model.RLS(p=2, forgetting_factor=0.98, delta=1e6) >>> # Simulate some data >>> np.random.seed(42) @@ -65,7 +65,7 @@ class RLS: >>> print("Final Weights:", rls.estimates[-1].flatten()) Final Weights: [ 3.48065382 -6.15301727 3.3361416 ] """ - def __init__(self, p: int, l=0.99, delta=1000000): + def __init__(self, p: int, forgetting_factor=0.99, delta=1000000): """ Initializes the Recursive Least Squares (RLS) filter. @@ -73,13 +73,13 @@ def __init__(self, p: int, l=0.99, delta=1000000): ---------- p : int Filter order (number of coefficients). - l : float, optional + forgetting_factor : float, optional Forgetting factor (default is 0.99). delta : float, optional Initial value for the inverse correlation matrix (default is 1,000,000). """ self.p = p # Filter order - self.l = l # Forgetting factor + self.forgetting_factor = forgetting_factor # Forgetting factor self.delta = delta # Value to initialise P(0) self.currentStep = 0 @@ -117,14 +117,14 @@ def estimate(self, xn: float, dn: float): wn_prev = self.estimates[-1] # Compute gain vector - denominator = self.l + self.x.T @ self.Pks[-1] @ self.x + denominator = self.forgetting_factor + self.x.T @ self.Pks[-1] @ self.x gn = (self.Pks[-1] @ self.x) / denominator # Compute a priori error alpha = dn - (self.x.T @ wn_prev) # Update inverse correlation matrix - Pn = (self.Pks[-1] - gn @ self.x.T @ self.Pks[-1]) / self.l + Pn = (self.Pks[-1] - gn @ self.x.T @ self.Pks[-1]) / self.forgetting_factor self.Pks.append(Pn) # Update weight vector From 0953c5ebdf7d4b117bb29d244d318f827dd2faf3 Mon Sep 17 00:00:00 2001 From: W0lfgunbl00d Date: Sun, 17 Nov 2024 13:49:51 +0100 Subject: [PATCH 5/5] update rls.py test precommit --- river/linear_model/__init__.py | 4 +-- river/linear_model/incrementalAUC.py | 18 +++++++--- river/linear_model/rls.py | 53 +++++++++++++++------------- 3 files changed, 43 insertions(+), 32 deletions(-) diff --git a/river/linear_model/__init__.py b/river/linear_model/__init__.py index dcf20ffb12..581113dbcb 100644 --- a/river/linear_model/__init__.py +++ b/river/linear_model/__init__.py @@ -5,13 +5,13 @@ from . import base from .alma import ALMAClassifier from .bayesian_lin_reg import BayesianLinearRegression +from .incrementalAUC import IncrementalAUC from .lin_reg import LinearRegression from .log_reg import LogisticRegression from .pa import PAClassifier, PARegressor from .perceptron import Perceptron -from .softmax import SoftmaxRegression from .rls import RLS -from .incrementalAUC import IncrementalAUC +from .softmax import SoftmaxRegression __all__ = [ "base", diff --git a/river/linear_model/incrementalAUC.py b/river/linear_model/incrementalAUC.py index f2fc9eadc7..5b89911d58 100644 --- a/river/linear_model/incrementalAUC.py +++ b/river/linear_model/incrementalAUC.py @@ -1,6 +1,10 @@ -from river import metrics +from __future__ import annotations + import numpy as np +from river import metrics + + class IncrementalAUC(metrics.base.BinaryMetric): """Calculates AUC incrementally.""" @@ -17,7 +21,9 @@ def update(self, y_true, y_pred): self.negative_scores.append(y_pred) return self - def get(self, X_train, y_train, X_test, y_test, epochs=900, lr=0.5, n_mc=500, gamma=1e-4, eps=0.01): + def get( + self, X_train, y_train, X_test, y_test, epochs=900, lr=0.5, n_mc=500, gamma=1e-4, eps=0.01 + ): """ Implements the stochastic gradient ascent method to optimize theta and computes the AUC based on the accumulated scores. @@ -37,6 +43,7 @@ def get(self, X_train, y_train, X_test, y_test, epochs=900, lr=0.5, n_mc=500, ga - auc: Final AUC score based on the accumulated scores. """ from sklearn.metrics import roc_auc_score + # Separate the classes X1 = X_train[y_train == 1] X0 = X_train[y_train == 0] @@ -56,7 +63,9 @@ def get(self, X_train, y_train, X_test, y_test, epochs=900, lr=0.5, n_mc=500, ga current_lr = current_lr / (1 + gamma) # Update theta using stochastic gradient ascent - theta -= current_lr * self.stochastic_gradient(theta, X1, X0, N=n_mc, eps=eps, random_state=seed) + theta -= current_lr * self.stochastic_gradient( + theta, X1, X0, N=n_mc, eps=eps, random_state=seed + ) # After training, compute the scores on the test set y_scores = np.dot(X_test, theta) @@ -79,7 +88,7 @@ def sigma_eps(self, x, eps=0.01): elif z < -35: return 0 else: - return 1.0 / (1.0 + np.exp(- z)) + return 1.0 / (1.0 + np.exp(-z)) def reg_u_statistic(self, y_true, y_probs, eps=0.01): p = y_probs[y_true == 1] @@ -94,7 +103,6 @@ def reg_u_statistic(self, y_true, y_probs, eps=0.01): return u def stochastic_gradient(self, theta, X1, X0, N=1000, eps=0.01, random_state=1): - np.random.seed(random_state) indices_1 = np.random.choice(np.arange(X1.shape[0]), size=N) diff --git a/river/linear_model/rls.py b/river/linear_model/rls.py index 6150baa6cc..fbdc40e54f 100644 --- a/river/linear_model/rls.py +++ b/river/linear_model/rls.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import numpy as np @@ -13,7 +15,7 @@ class RLS: ---------- p : int The order of the filter (number of coefficients to be estimated). - l : float, optional, default=0.99 + forgetting_factor : float, optional, default=0.99 Forgetting factor (0 < l ≤ 1). Controls how quickly the algorithm forgets past data. A smaller value makes the algorithm more responsive to recent data. delta : float, optional, default=1000000 @@ -65,18 +67,19 @@ class RLS: >>> print("Final Weights:", rls.estimates[-1].flatten()) Final Weights: [ 3.48065382 -6.15301727 3.3361416 ] """ + def __init__(self, p: int, forgetting_factor=0.99, delta=1000000): """ - Initializes the Recursive Least Squares (RLS) filter. - - Parameters - ---------- - p : int - Filter order (number of coefficients). - forgetting_factor : float, optional - Forgetting factor (default is 0.99). - delta : float, optional - Initial value for the inverse correlation matrix (default is 1,000,000). + Initializes the Recursive Least Squares (RLS) filter. + + Parameters + ---------- + p : int + Filter order (number of coefficients). + forgetting_factor : float, optional + Forgetting factor (default is 0.99). + delta : float, optional + Initial value for the inverse correlation matrix (default is 1,000,000). """ self.p = p # Filter order self.forgetting_factor = forgetting_factor # Forgetting factor @@ -95,20 +98,20 @@ def __init__(self, p: int, forgetting_factor=0.99, delta=1000000): def estimate(self, xn: float, dn: float): """ - Performs one iteration of the RLS algorithm to update filter coefficients. - - Parameters - ---------- - xn : float - The current input sample. - dn : float - The desired output corresponding to the current input. - - Returns - ------- - numpy.ndarray - Updated weight vector (filter coefficients) after the current iteration. - """ + Performs one iteration of the RLS algorithm to update filter coefficients. + + Parameters + ---------- + xn : float + The current input sample. + dn : float + The desired output corresponding to the current input. + + Returns + ------- + numpy.ndarray + Updated weight vector (filter coefficients) after the current iteration. + """ # Update input vector self.x = np.roll(self.x, -1) self.x[-1, 0] = xn