Skip to content

Commit

Permalink
Refactored examples. Fixed evaluation comments. int requiremnet for y…
Browse files Browse the repository at this point in the history
…_pred changed to float.
  • Loading branch information
selimfirat committed Aug 15, 2020
1 parent 0d047b7 commit 9827eb4
Show file tree
Hide file tree
Showing 8 changed files with 77 additions and 56 deletions.
29 changes: 17 additions & 12 deletions examples/example_ensemble.py
Original file line number Diff line number Diff line change
@@ -1,36 +1,41 @@
from sklearn.utils import shuffle
from tqdm import tqdm
# Import modules.
from pysad.evaluation import AUROCMetric
from pysad.models import LODA
from pysad.models import xStream
from pysad.utils import ArrayStreamer
from pysad.transform.ensemble import AverageScoreEnsembler
from pysad.utils import Data
from sklearn.utils import shuffle
from tqdm import tqdm
import numpy as np

# This example demonstrates the usage of an ensembling method.
if __name__ == '__main__':
np.random.seed(61)
np.random.seed(61) # Fix random seed.

data = Data("data")
X_all, y_all = data.get_data("arrhythmia.mat")
X_all, y_all = shuffle(X_all, y_all)
iterator = ArrayStreamer(shuffle=False)
auroc = AUROCMetric()
X_all, y_all = data.get_data("arrhythmia.mat") # Load Aryhytmia data.
X_all, y_all = shuffle(X_all, y_all) # Shuffle data.
iterator = ArrayStreamer(shuffle=False) # Create streamer to simulate streaming data.
auroc = AUROCMetric() # Tracker of area under receiver-operating- characteristics curve metric.

models = [
models = [ # Models to be ensembled.
xStream(),
LODA()
]
ensembler = AverageScoreEnsembler()
ensembler = AverageScoreEnsembler() # Ensembler module.

for X, y in tqdm(iterator.iter(X_all[100:], y_all[100:])):
for X, y in tqdm(iterator.iter(X_all, y_all)): # Iterate over examples.
model_scores = np.empty(len(models), dtype=np.float)

# Fit & Score via for each model.
for i, model in enumerate(models):
model.fit_partial(X)
model_scores[i] = model.score_partial(X)

score = ensembler.fit_transform_partial(model_scores)
auroc.update(y, score)
score = ensembler.fit_transform_partial(model_scores) # fit to ensembler model and get ensembled score.

auroc.update(y, score) # update AUROC metric.

# Output score.
print("AUROC: ", auroc.get())
21 changes: 13 additions & 8 deletions examples/example_probability_calibration.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,22 @@
# Import modules.
from pysad.models import xStream
from pysad.transform.probability_calibration import ConformalProbabilityCalibrator
from pysad.utils import Data
import numpy as np

# This example demonstrates the usage of the probability calibrators.
if __name__ == "__main__":
model = xStream()
calibrator = ConformalProbabilityCalibrator(windowed=True, window_size=300)
streaming_data = Data().get_iterator("arrhythmia.mat")
np.random.seed(61) # Fix seed.

for i, (x, y_true) in enumerate(streaming_data):
anomaly_score = model.fit_score_partial(x)
model = xStream() # Init model.
calibrator = ConformalProbabilityCalibrator(windowed=True, window_size=300) # Init probability calibrator.
streaming_data = Data().get_iterator("arrhythmia.mat") # Get streamer.

calibrated_score = calibrator.fit_transform(anomaly_score)
print(calibrated_score)
if calibrated_score < 0.05: # ıf probabability is less than 5%.
for i, (x, y_true) in enumerate(streaming_data): # Stream data.
anomaly_score = model.fit_score_partial(x) # Fit to an instance x and score it.

calibrated_score = calibrator.fit_transform(anomaly_score) # Fit & calibrate score.

# Output if the instance is anomalous.
if calibrated_score < 0.05: # If probability is less than 5%.
print(f"Alert: {i}th data point is anomalous.")
22 changes: 12 additions & 10 deletions examples/example_pyod_integration.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# Import modules.
from pyod.models.iforest import IForest
from sklearn.utils import shuffle
from pysad.evaluation import AUROCMetric
Expand All @@ -7,26 +8,27 @@
from tqdm import tqdm
import numpy as np

# This example demonstrates the integration of a PYOD model via ReferenceWindowModel.
# This example demonstrates the integration of a PyOD model via ReferenceWindowModel.
if __name__ == "__main__":
np.random.seed(61)
np.random.seed(61) # Fix seed.

# Get data to stream.
data = Data("data")
X_all, y_all = data.get_data("arrhythmia.mat")
X_all, y_all = shuffle(X_all, y_all)
iterator = ArrayStreamer(shuffle=False)

# Fit reference window integration to first 100 instances initially.
model = ReferenceWindowModel(model_cls=IForest, window_size=240, sliding_size=30, initial_window_X=X_all[:100])

iterator = ArrayStreamer(shuffle=False)

auroc = AUROCMetric()
auroc = AUROCMetric() # Init area under receiver-operating-characteristics curve metric tracker.

y_pred = []
for X, y in tqdm(iterator.iter(X_all[100:], y_all[100:])):
model.fit_partial(X)
score = model.score_partial(X)

y_pred.append(score)
model.fit_partial(X) # Fit to the instance.
score = model.score_partial(X) # Score the instance.

auroc.update(y, score)
auroc.update(y, score) # Update the metric.

# Output AUROC metric.
print("AUROC: ", auroc.get())
9 changes: 7 additions & 2 deletions examples/example_statistics.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,23 @@
import numpy as np

# Import modules.
from pysad.statistics import AverageMeter
from pysad.statistics import VarianceMeter
import numpy as np

# This example shows the usage of statistics module for streaming data.
if __name__ == '__main__':

# Init data with mean 0 and standard deviation 1.
X = np.random.randn(1000)

# Init statistics trackers for mean and variance.
avg_meter = AverageMeter()
var_meter = VarianceMeter()

for i in range(1000):
# Update statistics trackers.
avg_meter.update(X[i])
var_meter.update(X[i])

# Output resulting statistics.
print(f"Average: {avg_meter.get()}, Standard deviation: {np.sqrt(var_meter.get())}")
# It is close to random normal distribution with mean 0 and std 1 as we init the array via np.random.rand.
32 changes: 17 additions & 15 deletions examples/example_usage.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# Import modules.
from sklearn.utils import shuffle
from pysad.evaluation import AUROCMetric
from pysad.models import xStream
Expand All @@ -8,28 +9,29 @@
from tqdm import tqdm
import numpy as np

# This example demonstrates the usage of the most modules in pysad framework.
# This example demonstrates the usage of the most modules in PySAD framework.
if __name__ == "__main__":
np.random.seed(61)
data = Data("data")
np.random.seed(61) # Fix random seed.

# Get data to stream.
data = Data("data")
X_all, y_all = data.get_data("arrhythmia.mat")
X_all, y_all = shuffle(X_all, y_all)

iterator = ArrayStreamer(shuffle=False)
model = xStream()
preprocessor = InstanceUnitNormScaler()
postprocessor = RunningAveragePostprocessor(window_size=5)
auroc = AUROCMetric()
iterator = ArrayStreamer(shuffle=False) # Init streamer to simulate streaming data.

model = xStream() # Init xStream anomaly detection model.
preprocessor = InstanceUnitNormScaler() # Init normalizer.
postprocessor = RunningAveragePostprocessor(window_size=5) # Init running average postprocessor.
auroc = AUROCMetric() # Init area under receiver-operating- characteristics curve metric.

y_pred = []
for X, y in tqdm(iterator.iter(X_all[100:], y_all[100:])):
X = preprocessor.fit_transform_partial(X)
for X, y in tqdm(iterator.iter(X_all[100:], y_all[100:])): # Stream data.
X = preprocessor.fit_transform_partial(X) # Fit preprocessor to and transform the instance.

score = model.fit_score_partial(X)
score = postprocessor.fit_transform_partial(score)
score = model.fit_score_partial(X) # Fit model to and score the instance.
score = postprocessor.fit_transform_partial(score) # Apply running averaging to the score.

y_pred.append(score)
auroc.update(y, score)
auroc.update(y, score) # Update AUROC metric.

# Output resulting AUROCS metric.
print("AUROC: ", auroc.get())
14 changes: 8 additions & 6 deletions examples/example_usage_short.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
# Import modules.
from pysad.evaluation import AUROCMetric
from pysad.models import LODA
from pysad.utils import Data

model = LODA()
metric = AUROCMetric()
streaming_data = Data().get_iterator("arrhythmia.mat")
model = LODA() # Init model
metric = AUROCMetric() # Init area under receiver-operating- characteristics curve metric
streaming_data = Data().get_iterator("arrhythmia.mat") # Get data streamer.

for x, y_true in streaming_data:
anomaly_score = model.fit_score_partial(x)
for x, y_true in streaming_data: # Stream data.
anomaly_score = model.fit_score_partial(x) # Fit the instance to model and score the instance.

metric.update(y_true, anomaly_score)
metric.update(y_true, anomaly_score) # Update the AUROC metric.

# Output the resulting AUROCMetric.
print(f"Area under ROC metric is {metric.get()}.")
2 changes: 1 addition & 1 deletion pysad/core/base_metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def update(self, y_true, y_pred):
Args:
y_true (int): Ground truth class. Either 1 or 0.
y_pred (int): Predicted class. Either 1 or 0.
y_pred (float): Predicted class or anomaly score. Higher values correspond to more anomalousness and lower values correspond to more normalness.
"""
pass

Expand Down
4 changes: 2 additions & 2 deletions pysad/evaluation/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def update(self, y_true, y_pred):
Args:
y_true (int): Ground truth class. Either 1 or 0.
y_pred (int): Predicted class. Either 1 or 0.
y_pred (float): Predicted class or anomaly score. Higher values correspond to more anomalousness and lower values correspond to more normalness.
"""
self.y_true.append(y_true)
self.y_pred.append(y_pred)
Expand All @@ -37,7 +37,7 @@ def _evaluate(self, y_true, y_pred):
Args:
y_true (list[int]): Ground truth classes.
y_pred (list[int]): Predicted classes.
y_pred (list[float]): Predicted classes or scores.
"""
pass

Expand Down

0 comments on commit 9827eb4

Please sign in to comment.