Enhance model serialization and deserialization (#37)

* [FIX] `tools.py` * Update `VERSION`
jzsmoreno · Jun 27, 2024 · 1f29fcf · 1f29fcf
1 parent 5cab636
commit 1f29fcf
Show file tree

Hide file tree

Showing 3 changed files with 48 additions and 12 deletions.
diff --git a/likelihood/VERSION b/likelihood/VERSION
@@ -1 +1 @@
-1.2.15
+1.2.16
diff --git a/likelihood/models/deep/autoencoders.py b/likelihood/models/deep/autoencoders.py
@@ -6,12 +6,11 @@
 import pandas as pd
 import tensorflow as tf
 from pandas.core.frame import DataFrame
-from tensorflow.keras.models import Model
 
 from likelihood.tools import OneHotEncoder
 
 
-class AutoClassifier(Model):
+class AutoClassifier(tf.keras.Model):
     """
     An auto-classifier model that automatically determines the best classification strategy based on the input data.
 
@@ -23,6 +22,10 @@ class AutoClassifier(Model):
 
     Methods:
         __init__(self, input_shape, num_classes, units, activation): Initializes an AutoClassifier instance with the given parameters.
+        build(self, input_shape): Builds the model architecture based on input_shape.
+        call(self, x): Defines the forward pass of the model.
+        get_config(self): Returns the configuration of the model.
+        from_config(cls, config): Recreates an instance of AutoClassifier from its configuration.
     """
 
     def __init__(self, input_shape, num_classes, units, activation):
@@ -41,33 +44,59 @@ def __init__(self, input_shape, num_classes, units, activation):
             The type of activation function to use for the neural network layers.
         """
         super(AutoClassifier, self).__init__()
+        self.input_shape = input_shape
+        self.num_classes = num_classes
         self.units = units
-        self.shape = input_shape
+        self.activation = activation
 
+        self.encoder = None
+        self.decoder = None
+        self.classifier = None
+
+    def build(self, input_shape):
         self.encoder = tf.keras.Sequential(
             [
-                tf.keras.layers.Dense(units=units, activation=activation),
-                tf.keras.layers.Dense(units=int(units / 2), activation=activation),
+                tf.keras.layers.Dense(units=self.units, activation=self.activation),
+                tf.keras.layers.Dense(units=int(self.units / 2), activation=self.activation),
             ]
         )
 
         self.decoder = tf.keras.Sequential(
             [
-                tf.keras.layers.Dense(units=units, activation=activation),
-                tf.keras.layers.Dense(units=input_shape, activation=activation),
+                tf.keras.layers.Dense(units=self.units, activation=self.activation),
+                tf.keras.layers.Dense(units=self.input_shape, activation=self.activation),
             ]
         )
 
         self.classifier = tf.keras.Sequential(
-            [tf.keras.layers.Dense(num_classes, activation="softmax")]
+            [tf.keras.layers.Dense(self.num_classes, activation="softmax")]
         )
 
     def call(self, x):
         encoded = self.encoder(x)
         decoded = self.decoder(encoded)
         combined = tf.concat([decoded, encoded], axis=1)
-        classifier = self.classifier(combined)
-        return classifier
+        classification = self.classifier(combined)
+        return classification
+
+    def get_config(self):
+        config = {
+            "input_shape": self.input_shape,
+            "num_classes": self.num_classes,
+            "units": self.units,
+            "activation": self.activation,
+        }
+        base_config = super(AutoClassifier, self).get_config()
+        return dict(list(base_config.items()) + list(config.items()))
+
+    @classmethod
+    def from_config(cls, config):
+        return cls(
+            input_shape=config["input_shape"],
+            num_classes=config["num_classes"],
+            units=config["units"],
+            activation=config["activation"],
+        )
 
 
 def call_existing_code(

diff --git a/likelihood/tools/tools.py b/likelihood/tools/tools.py
@@ -815,7 +815,10 @@ def train(self, path_to_save: str, **kwargs) -> None:
                 self._df[i] = self._df[i].apply(
                     self._code_transformation_to, dictionary_list=encode_dict
                 )
-                median_value = len(self._df[i].unique()) // 2
+                if len(self._df[i].unique()) > 1:
+                    median_value = len(self._df[i].unique()) // 2
+                else:
+                    median_value = 1.0
                 if norm_method == "median":
                     self._df[i] = self._df[i].astype("float64")
                     self._df[i] = self._df[i] / median_value
@@ -842,6 +845,8 @@ def encode(self, path_to_save: str = "./", **kwargs) -> DataFrame:
             print("Configuration detected")
             if len(self.median_list) == len(self._encode_columns):
                 median_mode = True
+            else:
+                median_mode = False
             for num, colname in enumerate(self._encode_columns):
                 if self._df[colname].dtype == "object":
                     encode_dict = self.encoding_list[num]
@@ -859,6 +864,8 @@ def decode(self) -> DataFrame:
         df_decoded = self._df.copy()
         if len(self.median_list) == len(self._encode_columns):
             median_mode = True
+        else:
+            median_mode = False
         try:
             number_of_columns = len(self.decoding_list[j])
             for i in self._encode_columns: