Skip to content

Commit

Permalink
chore!: unify and refactor trainer (#315)
Browse files Browse the repository at this point in the history
- improve and unify Trainer with better logging and performance
- add VAE model to model factory
- rc0 version

---------

Signed-off-by: Avik Basu <[email protected]>
Co-authored-by: Avik Basu <[email protected]>
  • Loading branch information
ab93 and ab93 authored Oct 30, 2023
1 parent 509e38a commit 61f4575
Show file tree
Hide file tree
Showing 27 changed files with 142 additions and 202 deletions.
4 changes: 2 additions & 2 deletions docs/autoencoders.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,10 @@ Here we are using `VanillaAE`, a Vanilla Autoencoder model.

```python
from numalogic.models.autoencoder.variants import VanillaAE
from numalogic.models.autoencoder import AutoencoderTrainer
from numalogic.models.autoencoder import TimeseriesTrainer

model = VanillaAE(seq_len=12, n_features=3)
trainer = AutoencoderTrainer(max_epochs=50, enable_progress_bar=True)
trainer = TimeseriesTrainer(max_epochs=50, enable_progress_bar=True)
trainer.fit(model, datamodule=datamodule)
```

Expand Down
6 changes: 3 additions & 3 deletions docs/quick-start.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,15 @@ pip install numalogic

## Numalogic as a Library

Numalogic can be used as an independent library, and it provides various ML models and tools. Here, we are using the `AutoencoderTrainer`. Refer to [training section](autoencoders.md) for other available options.
Numalogic can be used as an independent library, and it provides various ML models and tools. Here, we are using the `TimeseriesTrainer`. Refer to [training section](autoencoders.md) for other available options.

In this example, the train data set has numbers ranging from 1-10. Whereas in the test data set, there are data points that go out of this range, which the algorithm should be able to detect as anomalies.

```python
import numpy as np
from sklearn.preprocessing import StandardScaler
from torch.utils.data import DataLoader
from numalogic.models.autoencoder import AutoencoderTrainer
from numalogic.models.autoencoder import TimeseriesTrainer
from numalogic.models.autoencoder.variants import VanillaAE
from numalogic.models.threshold import StdDevThreshold
from numalogic.transforms import TanhNorm
Expand Down Expand Up @@ -57,7 +57,7 @@ model = VanillaAE(seq_len=SEQ_LEN, n_features=1)
train_dataset = StreamingDataset(train_data, seq_len=SEQ_LEN)

# Define the trainer, and fit the model.
trainer = AutoencoderTrainer(max_epochs=30, enable_progress_bar=True)
trainer = TimeseriesTrainer(max_epochs=30, enable_progress_bar=True)
trainer.fit(model, train_dataloaders=DataLoader(train_dataset))

# Get the training reconstruction error from the model.
Expand Down
8 changes: 4 additions & 4 deletions examples/conv_ae.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,7 @@
],
"source": [
"from numalogic.models.autoencoder.variants import Conv1dAE\n",
"from numalogic.models.autoencoder import AutoencoderTrainer\n",
"from numalogic.models.autoencoder import TimeseriesTrainer\n",
"\n",
"model_1 = Conv1dAE(seq_len=SEQ_LEN, in_channels=1, enc_channels=(16, 32, 8), enc_kernel_sizes=3)\n",
"print(model_1)"
Expand Down Expand Up @@ -367,7 +367,7 @@
}
],
"source": [
"trainer = AutoencoderTrainer(max_epochs=MAX_EPOCHS, enable_progress_bar=True)\n",
"trainer = TimeseriesTrainer(max_epochs=MAX_EPOCHS, enable_progress_bar=True)\n",
"trainer.fit(model_1, train_dataloaders=DataLoader(train_dataset, batch_size=BATCH_SIZE))"
],
"metadata": {
Expand Down Expand Up @@ -565,7 +565,7 @@
}
],
"source": [
"trainer = AutoencoderTrainer(max_epochs=MAX_EPOCHS, enable_progress_bar=True)\n",
"trainer = TimeseriesTrainer(max_epochs=MAX_EPOCHS, enable_progress_bar=True)\n",
"trainer.fit(model_2, train_dataloaders=DataLoader(train_dataset, batch_size=BATCH_SIZE))"
],
"metadata": {
Expand Down Expand Up @@ -756,7 +756,7 @@
],
"source": [
"\n",
"trainer = AutoencoderTrainer(accelerator=\"cpu\", max_epochs=MAX_EPOCHS, enable_progress_bar=True)\n",
"trainer = TimeseriesTrainer(accelerator=\"cpu\", max_epochs=MAX_EPOCHS, enable_progress_bar=True)\n",
"trainer.fit(model_3, train_dataloaders=DataLoader(StreamingDataset(x_train, seq_len=SEQ_LEN), batch_size=BATCH_SIZE))"
],
"metadata": {
Expand Down
4 changes: 2 additions & 2 deletions examples/multi_udf/src/udf/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import os

import numpy.typing as npt
from numalogic.models.autoencoder import AutoencoderTrainer
from numalogic.models.autoencoder import TimeseriesTrainer
from numalogic.udfs import NumalogicUDF
from numalogic.registry import MLflowRegistry, ArtifactData
from numalogic.tools.data import StreamingDataset
Expand Down Expand Up @@ -36,7 +36,7 @@ def _infer(artifact_data: ArtifactData, stream_data: npt.NDArray[float]) -> list
main_model = artifact_data.artifact
streamloader = DataLoader(StreamingDataset(stream_data, WIN_SIZE))

trainer = AutoencoderTrainer()
trainer = TimeseriesTrainer()
reconerr = trainer.predict(main_model, dataloaders=streamloader)
return reconerr.tolist()

Expand Down
6 changes: 3 additions & 3 deletions examples/multi_udf/src/udf/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import cachetools
import numpy.typing as npt
import pandas as pd
from numalogic.models.autoencoder import AutoencoderTrainer
from numalogic.models.autoencoder import TimeseriesTrainer
from numalogic.models.autoencoder.variants import Conv1dAE
from numalogic.models.threshold import StdDevThreshold
from numalogic.udfs import NumalogicUDF
Expand Down Expand Up @@ -34,7 +34,7 @@ def __init__(self):
self.model_key = "ae::model"

def _save_artifact(
self, model, skeys: list[str], dkeys: list[str], _: Optional[AutoencoderTrainer] = None
self, model, skeys: list[str], dkeys: list[str], _: Optional[TimeseriesTrainer] = None
) -> None:
"""Saves the model in the registry."""
self.registry.save(skeys=skeys, dkeys=dkeys, artifact=model)
Expand Down Expand Up @@ -81,7 +81,7 @@ def exec(self, keys: list[str], datum: Datum) -> Messages:
# Train the autoencoder model
datamodule = TimeseriesDataModule(WIN_SIZE, train_data, batch_size=BATCH_SIZE)
model = Conv1dAE(seq_len=WIN_SIZE, in_channels=train_data.shape[1])
trainer = AutoencoderTrainer(max_epochs=MAX_EPOCHS, enable_progress_bar=True)
trainer = TimeseriesTrainer(max_epochs=MAX_EPOCHS, enable_progress_bar=True)
trainer.fit(model, datamodule=datamodule)

# Get reconstruction error of the training set
Expand Down
8 changes: 4 additions & 4 deletions examples/quick-start.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -389,15 +389,15 @@
{
"data": {
"text/plain": "<IPython.core.display.Javascript object>",
"application/javascript": "\n setTimeout(function() {\n var nbb_cell_id = 60;\n var nbb_unformatted_code = \"from torch.utils.data import DataLoader\\nfrom numalogic.models.autoencoder import AutoencoderTrainer\\nfrom numalogic.models.autoencoder.variants import Conv1dAE\\nfrom numalogic.tools.data import StreamingDataset\\n\\nSEQ_LEN = 24 # length of the sequence\\nMAX_EPOCHS = 30 # number of epochs to run\\nBATCH_SIZE = 64 # training batch size\\n\\nmodel = Conv1dAE(seq_len=SEQ_LEN, in_channels=1, enc_channels=(8, 4))\\nmodel\";\n var nbb_formatted_code = \"from torch.utils.data import DataLoader\\nfrom numalogic.models.autoencoder import AutoencoderTrainer\\nfrom numalogic.models.autoencoder.variants import Conv1dAE\\nfrom numalogic.tools.data import StreamingDataset\\n\\nSEQ_LEN = 24 # length of the sequence\\nMAX_EPOCHS = 30 # number of epochs to run\\nBATCH_SIZE = 64 # training batch size\\n\\nmodel = Conv1dAE(seq_len=SEQ_LEN, in_channels=1, enc_channels=(8, 4))\\nmodel\";\n var nbb_cells = Jupyter.notebook.get_cells();\n for (var i = 0; i < nbb_cells.length; ++i) {\n if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n nbb_cells[i].set_text(nbb_formatted_code);\n }\n break;\n }\n }\n }, 500);\n "
"application/javascript": "\n setTimeout(function() {\n var nbb_cell_id = 60;\n var nbb_unformatted_code = \"from torch.utils.data import DataLoader\\nfrom numalogic.models.autoencoder import TimeseriesTrainer\\nfrom numalogic.models.autoencoder.variants import Conv1dAE\\nfrom numalogic.tools.data import StreamingDataset\\n\\nSEQ_LEN = 24 # length of the sequence\\nMAX_EPOCHS = 30 # number of epochs to run\\nBATCH_SIZE = 64 # training batch size\\n\\nmodel = Conv1dAE(seq_len=SEQ_LEN, in_channels=1, enc_channels=(8, 4))\\nmodel\";\n var nbb_formatted_code = \"from torch.utils.data import DataLoader\\nfrom numalogic.models.autoencoder import TimeseriesTrainer\\nfrom numalogic.models.autoencoder.variants import Conv1dAE\\nfrom numalogic.tools.data import StreamingDataset\\n\\nSEQ_LEN = 24 # length of the sequence\\nMAX_EPOCHS = 30 # number of epochs to run\\nBATCH_SIZE = 64 # training batch size\\n\\nmodel = Conv1dAE(seq_len=SEQ_LEN, in_channels=1, enc_channels=(8, 4))\\nmodel\";\n var nbb_cells = Jupyter.notebook.get_cells();\n for (var i = 0; i < nbb_cells.length; ++i) {\n if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n nbb_cells[i].set_text(nbb_formatted_code);\n }\n break;\n }\n }\n }, 500);\n "
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from torch.utils.data import DataLoader\n",
"from numalogic.models.autoencoder import AutoencoderTrainer\n",
"from numalogic.models.autoencoder import TimeseriesTrainer\n",
"from numalogic.models.autoencoder.variants import Conv1dAE\n",
"from numalogic.tools.data import StreamingDataset\n",
"\n",
Expand Down Expand Up @@ -440,14 +440,14 @@
{
"data": {
"text/plain": "<IPython.core.display.Javascript object>",
"application/javascript": "\n setTimeout(function() {\n var nbb_cell_id = 61;\n var nbb_unformatted_code = \"trainer = AutoencoderTrainer(max_epochs=MAX_EPOCHS, enable_progress_bar=True)\\ntrainer.fit(\\n model,\\n train_dataloaders=DataLoader(StreamingDataset(X_train, seq_len=SEQ_LEN), batch_size=BATCH_SIZE),\\n val_dataloaders=DataLoader(StreamingDataset(X_val, seq_len=SEQ_LEN), batch_size=BATCH_SIZE)\\n)\";\n var nbb_formatted_code = \"trainer = AutoencoderTrainer(max_epochs=MAX_EPOCHS, enable_progress_bar=True)\\ntrainer.fit(\\n model,\\n train_dataloaders=DataLoader(\\n StreamingDataset(X_train, seq_len=SEQ_LEN), batch_size=BATCH_SIZE\\n ),\\n val_dataloaders=DataLoader(\\n StreamingDataset(X_val, seq_len=SEQ_LEN), batch_size=BATCH_SIZE\\n ),\\n)\";\n var nbb_cells = Jupyter.notebook.get_cells();\n for (var i = 0; i < nbb_cells.length; ++i) {\n if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n nbb_cells[i].set_text(nbb_formatted_code);\n }\n break;\n }\n }\n }, 500);\n "
"application/javascript": "\n setTimeout(function() {\n var nbb_cell_id = 61;\n var nbb_unformatted_code = \"trainer = TimeseriesTrainer(max_epochs=MAX_EPOCHS, enable_progress_bar=True)\\ntrainer.fit(\\n model,\\n train_dataloaders=DataLoader(StreamingDataset(X_train, seq_len=SEQ_LEN), batch_size=BATCH_SIZE),\\n val_dataloaders=DataLoader(StreamingDataset(X_val, seq_len=SEQ_LEN), batch_size=BATCH_SIZE)\\n)\";\n var nbb_formatted_code = \"trainer = TimeseriesTrainer(max_epochs=MAX_EPOCHS, enable_progress_bar=True)\\ntrainer.fit(\\n model,\\n train_dataloaders=DataLoader(\\n StreamingDataset(X_train, seq_len=SEQ_LEN), batch_size=BATCH_SIZE\\n ),\\n val_dataloaders=DataLoader(\\n StreamingDataset(X_val, seq_len=SEQ_LEN), batch_size=BATCH_SIZE\\n ),\\n)\";\n var nbb_cells = Jupyter.notebook.get_cells();\n for (var i = 0; i < nbb_cells.length; ++i) {\n if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n nbb_cells[i].set_text(nbb_formatted_code);\n }\n break;\n }\n }\n }, 500);\n "
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"trainer = AutoencoderTrainer(max_epochs=MAX_EPOCHS, enable_progress_bar=True)\n",
"trainer = TimeseriesTrainer(max_epochs=MAX_EPOCHS, enable_progress_bar=True)\n",
"trainer.fit(\n",
" model,\n",
" train_dataloaders=DataLoader(StreamingDataset(X_train, seq_len=SEQ_LEN), batch_size=BATCH_SIZE),\n",
Expand Down
4 changes: 2 additions & 2 deletions numalogic/blocks/_nn.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import numpy.typing as npt

from numalogic.blocks import Block
from numalogic.models.autoencoder import AutoencoderTrainer
from numalogic.models.autoencoder import TimeseriesTrainer
from numalogic.tools.data import StreamingDataset
from numalogic.tools.types import nn_model_t, state_dict_t

Expand Down Expand Up @@ -64,7 +64,7 @@ def fit(
-------
The error of the model on the input data.
"""
trainer = AutoencoderTrainer(**trainer_kwargs)
trainer = TimeseriesTrainer(**trainer_kwargs)
ds = StreamingDataset(input_, self.seq_len)
trainer.fit(self._artifact, train_dataloaders=DataLoader(ds, batch_size=batch_size))
reconerr = trainer.predict(
Expand Down
10 changes: 4 additions & 6 deletions numalogic/config/_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@


from dataclasses import dataclass, field
from typing import Optional, Any
from typing import Any

from omegaconf import MISSING

Expand Down Expand Up @@ -75,14 +75,12 @@ class LightningTrainerConf:

accelerator: str = "auto"
max_epochs: int = 50
logger: bool = False
logger: bool = True
log_freq: int = 5
check_val_every_n_epoch: int = 5
log_every_n_steps: int = 20
enable_checkpointing: bool = False
enable_progress_bar: bool = True
enable_progress_bar: bool = False
enable_model_summary: bool = True
limit_val_batches: bool = 0
callbacks: Optional[Any] = None


@dataclass
Expand Down
6 changes: 5 additions & 1 deletion numalogic/config/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ class ModelFactory(_ObjectFactory):
TransformerAE,
SparseTransformerAE,
)
from numalogic.models.vae.variants import Conv1dVAE

_CLS_MAP: ClassVar[dict] = {
"VanillaAE": VanillaAE,
Expand All @@ -116,13 +117,16 @@ class ModelFactory(_ObjectFactory):
"SparseLSTMAE": SparseLSTMAE,
"TransformerAE": TransformerAE,
"SparseTransformerAE": SparseTransformerAE,
"Conv1dVAE": Conv1dVAE,
}


class RegistryFactory(_ObjectFactory):
"""Factory class to create registry instances."""

_CLS_SET: ClassVar[frozenset] = frozenset({"RedisRegistry", "MLflowRegistry"})
_CLS_SET: ClassVar[frozenset] = frozenset(
{"RedisRegistry", "MLflowRegistry", "DynamoDBRegistry"}
)

def get_instance(self, object_info: Union[ModelInfo, RegistryInfo]):
import numalogic.registry as reg
Expand Down
4 changes: 2 additions & 2 deletions numalogic/models/autoencoder/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,6 @@
# limitations under the License.


from numalogic.models.autoencoder.trainer import AutoencoderTrainer
from numalogic.tools.trainer import TimeseriesTrainer

__all__ = ["AutoencoderTrainer"]
__all__ = ["TimeseriesTrainer"]
29 changes: 6 additions & 23 deletions numalogic/models/autoencoder/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,23 +44,6 @@ def __init__(
self.criterion = self.init_criterion(loss_fn)
self.weight_decay = weight_decay

self._total_train_loss = 0.0
self._total_val_loss = 0.0

@property
def total_train_loss(self):
return self._total_train_loss

@property
def total_val_loss(self):
return self._total_val_loss

def reset_train_loss(self):
self._total_train_loss = 0.0

def reset_val_loss(self):
self._total_val_loss = 0.0

@staticmethod
def init_criterion(loss_fn: str):
if loss_fn == "huber":
Expand Down Expand Up @@ -97,11 +80,11 @@ def configure_optimizers(self) -> dict[str, Any]:
return {"optimizer": optimizer}

def training_step(self, batch: Tensor, batch_idx: int) -> Tensor:
loss = self._get_reconstruction_loss(batch)
self._total_train_loss += loss.detach().item()
return loss
recon_loss = self._get_reconstruction_loss(batch)
self.log("train_loss", recon_loss, on_epoch=True, on_step=False)
return recon_loss

def validation_step(self, batch: Tensor, batch_idx: int) -> Tensor:
loss = self._get_reconstruction_loss(batch)
self._total_val_loss += loss.detach().item()
return loss
recon_loss = self._get_reconstruction_loss(batch)
self.log("val_loss", recon_loss)
return recon_loss
86 changes: 0 additions & 86 deletions numalogic/models/autoencoder/trainer.py

This file was deleted.

2 changes: 1 addition & 1 deletion numalogic/models/autoencoder/variants/conv.py
Original file line number Diff line number Diff line change
Expand Up @@ -342,5 +342,5 @@ def _get_reconstruction_loss(self, batch) -> Tensor:
def validation_step(self, batch: Tensor, batch_idx: int) -> Tensor:
recon = self.reconstruction(batch)
loss = self.criterion(batch, recon)
self._total_val_loss += loss.detach().item()
self.log("val_loss", loss)
return loss
Loading

0 comments on commit 61f4575

Please sign in to comment.