Skip to content

Commit

Permalink
Merge branch 'develop'
Browse files Browse the repository at this point in the history
  • Loading branch information
daniprec committed May 14, 2020
2 parents f92e181 + be5311c commit e3f96bd
Show file tree
Hide file tree
Showing 8 changed files with 751 additions and 16 deletions.
138 changes: 138 additions & 0 deletions better_nilm/model/export.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
import os
import pickle

from keras.models import model_from_json


def store_model_json(model, path_model, path_weights=None):
"""
Serializes a model into a json file.
Also serializes its weights as a h5 file.
Parameters
----------
model : keras.models.Sequential
path_model : str
Path to where the json is created, including the filename and json
termination.
path_weights : str, default=None
Path to where the h5 is created, including the filename and h5
termination. If None is provided, weights are stored in the same
route as the model, using the same name.
"""
if not path_model.endswith(".json"):
raise ValueError("path_model must end in a json file. Current "
f"route:\n{path_model}")

if path_weights is None:
path_weights = path_model.rsplit(".")[0]
path_weights = path_weights + ".h5"
elif not path_weights.endswith(".h5"):
raise ValueError("path_weights must end in a h5 file. Current "
f"route:\n{path_weights}")
# serialize model to JSON
model_json = model.to_json()
with open(path_model, "w") as json_file:
json_file.write(model_json)
# serialize weights to HDF5
model.save_weights(path_weights)
print(f"Saved model to disk. Path:\n{path_model}")


def load_model_json(path_model, path_weights=None):
"""
Parameters
----------
path_model : str
Path to where the serialized model is stored, in json format.
path_weights : str, default=None
Path to where the model weights are stored, in h5 format.
If None is provided, assumes the h5 file is located in the same route
as the model and with the same name.
Returns
-------
model : keras.models.Sequential
"""
if not path_model.endswith(".json"):
raise ValueError("path_model must end in a json file. Current "
f"route:\n{path_model}")

if path_weights is None:
path_weights = path_model.rsplit(".")[0]
path_weights = path_weights + ".h5"
elif not path_weights.endswith(".h5"):
raise ValueError("path_weights must end in a h5 file. Current "
f"route:\n{path_weights}")

if not os.path.isfile(path_model):
raise FileNotFoundError(f"path_model does not lead to an existing "
f"file:\n{path_model}")

if not os.path.isfile(path_weights):
raise FileNotFoundError(f"path_weights does not lead to an existing "
f"file:\n{path_weights}")

# load json and create model
json_file = open(path_model, 'r')
model_json = json_file.read()
json_file.close()
model = model_from_json(model_json)
# load weights into new model
model.load_weights(path_weights)
print("Loaded model from disk")
return model


def store_dict_pkl(dic, path_dic):
"""
Stores a dictionary into a pkl file.
Parameters
----------
dic : dict
Dictionary to store.
path_dic : str
Path to where the pkl is created, including the filename and pkl
termination.
"""

if not path_dic.endswith(".pkl"):
raise ValueError("path_dic must end in a pkl file. Current "
f"route:\n{path_dic}")

a_file = open(path_dic, "wb")
pickle.dump(dic, a_file)
a_file.close()


def load_dict_pkl(path_dic):
"""
Loads a dictionary from a pkl file.
Parameters
----------
path_dic : str
Path to where the dictionary is stored, in pkl format.
Returns
-------
dic : dict
Dictionary.
"""
if not path_dic.endswith(".pkl"):
raise ValueError("path_pkl must end in a pkl file. Current "
f"route:\n{path_dic}")

if not os.path.isfile(path_dic):
raise FileNotFoundError(f"path_dic does not lead to an existing "
f"file:\n{path_dic}")

a_file = open(path_dic, "rb")
dic = pickle.load(a_file)

return dic
46 changes: 38 additions & 8 deletions better_nilm/model/gru.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,27 +4,44 @@
from keras.layers import Conv1D
from keras.layers import GRU
from keras.layers import Bidirectional
from keras.layers import Lambda
from keras.layers import Activation
from keras.activations import sigmoid

from keras.optimizers import Adam


def create_gru_model(series_len, num_appliances,
regression_weight=1, classification_weight=1):
def create_gru_model(series_len, num_appliances, thresholds,
regression_weight=1, classification_weight=1,
learning_rate=0.001):
"""
Creates a Gated Recurrent Unit model.
Based on OdysseasKr GRU model:
https://github.com/OdysseasKr/neural-disaggregator/blob/master/GRU
Parameters
----------
series_len : int
num_appliances : int
thresholds : numpy.array
shape = (num_appliances, )
Load threshold for each appliance
regression_weight : float, default=1
Weight for the regression loss (MSE)
classification_weight : float, default=1
Weight for the classification loss (BCE)
learning_rate : float, default=0.001
Starting learning rate for the Adam optimizer.
Returns
-------
model : keras.models.Sequential
"""
assert len(thresholds) == num_appliances, "Number of thresholds must " \
"equal the amount of appliances"

# ARCHITECTURE

# Input layer (batch, series_len, 1)
inputs = Input(shape=(series_len, 1))
Expand All @@ -41,22 +58,35 @@ def create_gru_model(series_len, num_appliances,
gru2 = Bidirectional(GRU(128, return_sequences=True, stateful=False),
merge_mode='concat')(gru1)

# Dense layer
dense = Dense(64, activation='relu')(gru2)

# Regression output
# Fully Connected Layers (batch, series_len, num_appliances)
regression = Dense(num_appliances, activation='relu',
name='regression')(gru2)
name='regression')(dense)

# Classification output
subtract = Lambda(lambda x: x - thresholds)(regression)
# Fully Connected Layers (batch, series_len, num_appliances)
classification = Dense(num_appliances, activation="sigmoid",
name="classification")(gru2)
classification = Activation(sigmoid, name='classification')(subtract)

# TRAINING

# Weights
# We scale the weights because BCE grows bigger than MSE
class_w = classification_weight * .003
reg_w = regression_weight * .997

# Optimizer
opt = Adam(learning_rate=learning_rate)

model = Model(inputs=inputs,
outputs=[regression, classification])
model.compile(loss={"regression": "mean_squared_error",
"classification": "binary_crossentropy"},
loss_weights={"regression": regression_weight,
"classification": classification_weight},
optimizer='adam')
loss_weights={"regression": reg_w,
"classification": class_w},
optimizer=opt)

return model
8 changes: 3 additions & 5 deletions better_nilm/model/preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,10 +219,8 @@ def _get_cluster_centroids(ser):
std = np.zeros((num_meters, 2))

for idx in range(num_meters):
# Take one meter record, and sort the in ascending order
# to ensure the first values correspond to OFF state
# Take one meter record
meter = ser[:, :, idx].flatten()
meter = np.sort(meter)
meter = meter.reshape((len(meter), -1))
kmeans = KMeans(n_clusters=2).fit(meter)

Expand Down Expand Up @@ -297,8 +295,8 @@ def binarize(ser, thresholds):

# Iterate through all the appliances
for idx in range(num_app):
mask_on = ser[:, :, idx] >= thresholds[idx]
ser_bin[mask_on] = 1
mask_on = ser[:, :, idx] > thresholds[idx]
ser_bin[:, :, idx] = mask_on.astype(int)

ser_bin = ser_bin.astype(int)

Expand Down
141 changes: 141 additions & 0 deletions better_nilm/model/scores.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
import numpy as np

from sklearn.metrics import mean_squared_error

from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score


def _assert_shape(y_pred, y_real, appliances):
if not y_pred.shape == y_real.shape:
raise ValueError("Array shape mismatch.\n"
f"y_pred shape: {y_pred.shape}\n"
f"y_real_shape: {y_real.shape}")

if y_pred.shape[2] != len(appliances):
raise ValueError("Number of appliances mismatch.\n"
f"Appliances in y_pred array: {y_pred.shape[2]}\n"
f"Appliances in appliances list: {len(appliances)}")


def regression_score_dict(y_pred, y_real, appliances):
"""
Returns a dictionary with some regression scores, for each appliance.
- MSE, Mean Square Error
- RMSE, Root Mean Squared Error
Parameters
----------
y_pred : numpy.array
shape = (num_series, series_len, num_appliances)
- num_series : Amount of time series.
- series_len : Length of each time series.
- num_appliances : Meters contained in the array.
y_real : numpy.array
shape = (num_series, series_len, num_appliances)
appliances : list
len = num_appliances
Must be sorted following the order of both y_pred and y_real
Returns
-------
scores : dict
'appliance': {'metric': value}
"""
_assert_shape(y_pred, y_real, appliances)

if np.mean(y_real) <= 1:
print("Warning!\nThe predicted values appear to be normalized.\n"
"It is recommended to use the de-normalized values\n"
"when computing the regression errors")

# Initialize dict
scores = {}

for idx, app in enumerate(appliances):
app_pred = y_pred[:, :, idx].flatten()
app_real = y_real[:, :, idx].flatten()

# MSE and RMSE
app_mse = mean_squared_error(app_real, app_pred)
app_rmse = np.sqrt(app_mse)

scores[app] = {"mse": round(app_mse, 2),
"rmse": round(app_rmse, 2)}

return scores


def classification_scores_dict(y_pred, y_real, appliances, threshold=.5):
"""
Returns a dictionary with some regression scores, for each appliance.
- Accuracy
- F1-Score
- Precision
- Recall
Parameters
----------
y_pred : numpy.array
shape = (num_series, series_len, num_appliances)
- num_series : Amount of time series.
- series_len : Length of each time series.
- num_appliances : Meters contained in the array.
y_real : numpy.array
shape = (num_series, series_len, num_appliances)
appliances : list
len = num_appliances
Must be sorted following the order of both y_pred and y_real
threshold : float, default=0.5
Minimum value (form 0 to 1) at which we consider the appliance to be ON
Returns
-------
scores : dict
'appliance': {'metric': value}
"""

_assert_shape(y_pred, y_real, appliances)

if ((y_pred.max() > 1).any() or (y_real > 1).any()
or (y_pred.min() < 0).any() or (y_real.min() < 0).any()):
raise ValueError("Classification values must be between 0 and 1.")

# Binarize the arrays
bin_pred = np.zeros(y_pred.shape)
bin_pred[y_pred >= threshold] = 1
bin_pred = bin_pred.astype(int)

bin_real = np.zeros(y_real.shape)
bin_real[y_real >= threshold] = 1
bin_real = bin_real.astype(int)

# Initialize dict
scores = {}

for idx, app in enumerate(appliances):
app_pred = bin_pred[:, :, idx].flatten()
app_real = bin_real[:, :, idx].flatten()

# Precision
app_accuracy = accuracy_score(app_real, app_pred)

# F1-Score
app_f1 = f1_score(app_real, app_pred)

# Precision
app_precision = precision_score(app_real, app_pred)

# Recall
app_recall = recall_score(app_real, app_pred)

scores[app] = {"accuracy": round(app_accuracy, 4),
"f1": round(app_f1, 4),
"precision": round(app_precision, 4),
"recall": round(app_recall, 4)}

return scores
Loading

0 comments on commit e3f96bd

Please sign in to comment.