Skip to content

Commit

Permalink
Cleaned solvers code and added comments
Browse files Browse the repository at this point in the history
  • Loading branch information
Jad-yehya committed Nov 19, 2024
1 parent 1b6316d commit 2c5c872
Show file tree
Hide file tree
Showing 9 changed files with 68 additions and 28 deletions.
5 changes: 4 additions & 1 deletion solvers/AR.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@


class Solver(BaseSolver):
name = "AR"
name = "AR" # AutoRegressive Linear model

install_cmd = "conda"
requirements = ["pip:torch", "tqdm"]
Expand Down Expand Up @@ -130,11 +130,13 @@ def run(self, _):
xw_hat, 1
)

# Calculating the percentile value for the threshold
percentile_value = np.percentile(
np.abs(self.X_test[self.window_size:] - x_hat[self.window_size:]),
self.percentile
)

# Thresholding
predictions = np.zeros_like(x_hat)-1
predictions[self.window_size:] = np.where(
np.abs(self.X_test[self.window_size:] -
Expand All @@ -143,6 +145,7 @@ def run(self, _):

self.predictions = np.max(predictions, axis=1)

# Skipping the solver call if a condition is met
def skip(self, X_train, X_test, y_test):
if X_train.shape[0] < self.window_size + self.horizon:
return True, "No enough training samples"
Expand Down
14 changes: 12 additions & 2 deletions solvers/abod.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@


class Solver(BaseSolver):
name = "ABOD"
name = "ABOD" # Angle-Based Outlier Detection

install_cmd = "conda"
requirements = ["pip:pyod"]
Expand All @@ -34,8 +34,10 @@ def set_objective(self, X_train, y_test, X_test):
)

def run(self, _):
# Using only windowed data, parameter used only for consistency
if self.window:
# We need to transform the data to have a rolling window

# Transofrming the data into rolling windowed data
if self.X_train is not None:
self.Xw_train = np.lib.stride_tricks.sliding_window_view(
self.X_train, window_shape=self.window_size, axis=0
Expand All @@ -51,6 +53,7 @@ def run(self, _):
self.y_test, window_shape=self.window_size, axis=0
)[::self.stride]

# Flattening the data for the model
flatrain = self.Xw_train.reshape(self.Xw_train.shape[0], -1)
flatest = self.Xw_test.reshape(self.Xw_test.shape[0], -1)

Expand All @@ -64,18 +67,25 @@ def run(self, _):
(self.X_train.shape[0] - self.window_size) // self.stride
) + 1

# Mapping the binary output from {-1, 1} to {1, 0}
# For consistency with the other solvers
self.raw_y_hat = np.array(raw_y_hat)
self.raw_y_hat = np.where(self.raw_y_hat == -1, 1, 0)

# Adding -1 for the non predicted samples
# The first window_size samples are not predicted by the model
self.raw_y_hat = np.append(
np.full(self.X_train.shape[0] -
result_shape, -1), self.raw_y_hat
)

# Anomaly scores (Not used but allows finer thresholding)
self.raw_anomaly_score = np.array(raw_anomaly_score)
self.raw_anomaly_score = np.append(
np.full(result_shape, -1), self.raw_anomaly_score
)

# Function used to skip a solver call when n_neighbors >= window_size
def skip(self, X_train, X_test, y_test):
if self.n_neighbors >= self.window_size:
return True, "Number of neighbors greater than number of samples."
Expand Down
11 changes: 10 additions & 1 deletion solvers/cblof.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,9 @@ def set_objective(self, X_train, y_test, X_test):
)

def run(self, _):

# Using only windowed data, parameter used only for consistency
if self.window:

# We need to transform the data to have a rolling window
if self.X_train is not None:
self.Xw_train = np.lib.stride_tricks.sliding_window_view(
Expand All @@ -51,6 +52,7 @@ def run(self, _):
self.y_test, window_shape=self.window_size, axis=0
)[::self.stride]

# Flattening the data for the model
flatrain = self.Xw_train.reshape(self.Xw_train.shape[0], -1)
flatest = self.Xw_test.reshape(self.Xw_test.shape[0], -1)

Expand All @@ -63,18 +65,25 @@ def run(self, _):
(self.X_train.shape[0] - self.window_size) // self.stride
) + 1

# Mapping the binary output from {-1, 1} to {1, 0}
# For consistency with the other solvers
self.raw_y_hat = np.array(raw_y_hat)
self.raw_y_hat = np.where(self.raw_y_hat == -1, 1, 0)

# Adding -1 for the non predicted samples
# The first window_size samples are not predicted by the model
self.raw_y_hat = np.append(
np.full(self.X_train.shape[0] -
result_shape, -1), self.raw_y_hat
)

# Anomaly scores (Not used but allows finer thresholding)
self.raw_anomaly_score = np.array(raw_anomaly_score)
self.raw_anomaly_score = np.append(
np.full(result_shape, -1), self.raw_anomaly_score
)

# Skipping the solver call if a condition is met
def skip(self, X_train, X_test, y_test):
if X_train.shape[0] < self.window_size:
return True, "No enough samples to create a window"
Expand Down
24 changes: 13 additions & 11 deletions solvers/dif.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
from benchopt import safe_import_context

with safe_import_context() as import_ctx:
from benchopt.utils.sys_info import get_cuda_version
from pyod.models.dif import DIF
import numpy as np

Expand All @@ -26,15 +25,15 @@ class Solver(BaseSolver):
def set_objective(self, X_train, y_test, X_test):
self.X_train = X_train
self.X_test, self.y_test = X_test, y_test
if get_cuda_version() is None:
self.clf = DIF(contamination=self.contamination)
else:
self.clf = DIF(contamination=self.contamination, device="cuda")
# Device is automatically selected by the model
# if device=None
self.clf = DIF(contamination=self.contamination, device=None)

def run(self, _):

# Using only windowed data, parameter used only for consistency
if self.window:
# We need to transform the data to have a rolling window

# Transofrming the data into rolling windowed data
if self.X_train is not None:
self.Xw_train = np.lib.stride_tricks.sliding_window_view(
self.X_train, window_shape=self.window_size, axis=0
Expand All @@ -50,6 +49,7 @@ def run(self, _):
self.y_test, window_shape=self.window_size, axis=0
)[::self.stride]

# Flattening the data for the model
flatrain = self.Xw_train.reshape(self.Xw_train.shape[0], -1)
flatest = self.Xw_test.reshape(self.Xw_test.shape[0], -1)

Expand All @@ -62,23 +62,25 @@ def run(self, _):
(self.X_train.shape[0] - self.window_size) // self.stride
) + 1

# Mapping the binary output from {-1, 1} to {1, 0}
# For consistency with the other solvers
self.raw_y_hat = np.array(raw_y_hat)
self.raw_y_hat = np.where(self.raw_y_hat == -1, 1, 0)

# Adding -1 for the non predicted samples
# The first window_size samples are not predicted by the model
self.raw_y_hat = np.append(
np.full(self.X_train.shape[0] -
result_shape, -1), self.raw_y_hat
)

# Anomaly scores (Not used but allows finer thresholding)
self.raw_anomaly_score = np.array(raw_anomaly_score)
self.raw_anomaly_score = np.append(
np.full(result_shape, -1), self.raw_anomaly_score
)

def skip(self, X_train, X_test, y_test):
# If cuda is not available, we skip the test because deep method
# from benchopt.utils.sys_info import get_cuda_version
# if get_cuda_version() is None:
# return True, "Cuda is not available"
if X_train.shape[0] < self.window_size:
return True, "Not enough samples to create a window"
return False, None
Expand Down
6 changes: 6 additions & 0 deletions solvers/isolation-forest.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,13 +58,19 @@ def run(self, _):
(self.X_train.shape[0] - self.window_size) // self.stride
) + 1

# Mapping the binary output from {-1, 1} to {1, 0}
# For consistency with the other solvers
self.raw_y_hat = np.array(raw_y_hat)
self.raw_y_hat = np.where(self.raw_y_hat == -1, 1, 0)

# Adding -1 for the non predicted samples
# The first window_size samples are not predicted by the model
self.raw_y_hat = np.append(
np.full(self.X_train.shape[0] -
result_shape, -1), self.raw_y_hat
)

# Anomaly scores (Not used but allows finer thresholding)
self.raw_anomaly_score = np.array(raw_anomaly_score)
self.raw_anomaly_score = np.append(
np.full(result_shape, -1), self.raw_anomaly_score
Expand Down
6 changes: 6 additions & 0 deletions solvers/lof.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,13 +63,19 @@ def run(self, _):
(self.X_train.shape[0] - self.window_size) // self.stride
) + 1

# Mapping the binary output from {-1, 1} to {1, 0}
# For consistency with the other solvers
self.raw_y_hat = np.array(raw_y_hat)
self.raw_y_hat = np.where(self.raw_y_hat == -1, 1, 0)

# Adding -1 for the non predicted samples
# The first window_size samples are not predicted by the model
self.raw_y_hat = np.append(
np.full(self.X_train.shape[0] -
result_shape, -1), self.raw_y_hat
)

# Anomaly scores (Not used but allows finer thresholding)
self.raw_anomaly_score = np.array(raw_anomaly_score)
self.raw_anomaly_score = np.append(
np.full(result_shape, -1), self.raw_anomaly_score
Expand Down
5 changes: 2 additions & 3 deletions solvers/lstm.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ def run(self, _):

ti = tqdm(range(self.n_epochs), desc="epoch", leave=True)

# Training loop
for epoch in ti:
self.model.train()
train_loss = 0
Expand All @@ -122,6 +123,7 @@ def run(self, _):
# Saving the model
torch.save(self.model.state_dict(), "model.pth")

# Test loop
self.model.eval()
raw_reconstruction = []
for x in self.test_loader:
Expand All @@ -147,9 +149,6 @@ def run(self, _):
)

def skip(self, X_train, X_test, y_test):
# from benchopt.utils.sys_info import get_cuda_version
# if get_cuda_version() is None:
# return True, "CUDA is not available. Skipping this solver."
if X_train.shape[0] < self.window_size:
return True, "Not enough samples to create a window."
return False, None
Expand Down
9 changes: 8 additions & 1 deletion solvers/ocsvm.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,25 +56,32 @@ def run(self, _):
raw_y_hat = self.clf.predict(self.flatest)
raw_anomaly_score = self.clf.decision_function(self.flatest)

# The results we get has a shape of
result_shape = (
(self.X_train.shape[0] - self.window_size) // self.stride
) + 1

# Mapping the binary output from {-1, 1} to {1, 0}
# For consistency with the other solvers
self.raw_y_hat = np.array(raw_y_hat)

# Adding -1 for the non predicted samples
# The first window_size samples are not predicted by the model
self.raw_y_hat = np.where(self.raw_y_hat == -1, 1, 0)
self.raw_y_hat = np.append(
np.full(self.X_train.shape[0] -
result_shape, -1), self.raw_y_hat
)

# Anomaly scores (Not used but allows finer thresholding)
self.raw_anomaly_score = np.array(raw_anomaly_score)
self.raw_anomaly_score = np.append(
np.full(result_shape, -1), self.raw_anomaly_score
)

def skip(self, X_train, X_test, y_test):
if X_train.shape[0] < self.window_size:
return True, "Window size is larger than dataset size. Skipping."
return True, "Window size is larger than dataset size."
return False, None

def get_result(self):
Expand Down
16 changes: 7 additions & 9 deletions solvers/vanilla-transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ def set_objective(self, X_train, y_test, X_test):
self.optimizer, mode='min', factor=0.5, patience=5
)

# Using only windowed data, parameter used only for consistency
if self.window:
if self.X_train is not None:
self.Xw_train = np.lib.stride_tricks.sliding_window_view(
Expand Down Expand Up @@ -91,6 +92,7 @@ def run(self, _):
patience = 20
no_improve = 0

# Training loop
for epoch in ti:
self.model.train()
total_loss = 0
Expand All @@ -115,7 +117,7 @@ def run(self, _):
total_loss += loss.item()

avg_loss = total_loss / (len(self.Xw_train) // self.batch_size)
ti.set_description(f"Epoch {epoch} (loss={avg_loss:.5e})") # noqa
ti.set_description(f"Epoch {epoch} (loss={avg_loss:.5e})")

# Learning rate scheduling
self.scheduler.step(avg_loss)
Expand All @@ -128,13 +130,11 @@ def run(self, _):
else:
no_improve += 1
if no_improve == patience:
# print("Early stopping!")
break

# self.model.load_state_dict(torch.load('best_model.pth'))

# Test loop
self.model.eval()
batch_size = 1024 # Adjust this based on your GPU memory
batch_size = 1024
all_predictions = []

with torch.no_grad():
Expand All @@ -161,11 +161,13 @@ def run(self, _):
x_hat[self.window_size+self.horizon:] = mean_overlaping_pred(
xw_hat, 1)

# Calculating the percentile value for the threshold
percentile_value = np.percentile(
np.abs(self.X_test[self.window_size:] - x_hat[self.window_size:]),
self.percentile
)

# Thresholding
predictions = np.zeros_like(x_hat)-1
predictions[self.window_size:] = np.where(
np.abs(self.X_test[self.window_size:] -
Expand All @@ -177,10 +179,6 @@ def run(self, _):
def skip(self, X_train, X_test, y_test):
if X_train.shape[0] < self.window_size + self.horizon:
return True, "No enough training samples"

if X_test.shape[0] < self.window_size + self.horizon:
return True, "No enough testing samples"

return False, None

def get_result(self):
Expand Down

0 comments on commit 2c5c872

Please sign in to comment.