Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a project for neural networks #112

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Neural Networks/RNN-Traffic-Prediction/.gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Auto detect text files and perform LF normalization
* text=auto
547 changes: 547 additions & 0 deletions Neural Networks/RNN-Traffic-Prediction/1-RNN.ipynb

Large diffs are not rendered by default.

174 changes: 174 additions & 0 deletions Neural Networks/RNN-Traffic-Prediction/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
import math
import torch
from torch.utils import data
import torch.nn as nn
from matplotlib import pyplot as plt
from sklearn.metrics import mean_squared_error as mse_fn, mean_absolute_error as mae_fn
import numpy as np
import time


def mape_fn(y, pred):
mask = y != 0
y = y[mask]
pred = pred[mask]
mape = np.abs((y - pred) / y)
mape = np.mean(mape) * 100
return mape


def eval(y, pred):
y = y.cpu().numpy()
pred = pred.cpu().numpy()
mse = mse_fn(y, pred)
rmse = math.sqrt(mse)
mae = mae_fn(y, pred)
mape = mape_fn(y, pred)
return [rmse, mae, mape]


# 测试函数(用于分类)
def test(net, output_model, data_iter, loss_fn, denormalize_fn, device='cpu'):
rmse, mae, mape = 0, 0, 0
batch_count = 0
total_loss = 0.0
net.eval()
if output_model is not None:
output_model.eval()
for X, Y in data_iter:
X = X.to(device).float()
Y = Y.to(device).float()
output, hidden = net(X)
if output_model is not None:
y_hat = output_model(output[:, -1, :].squeeze(-1)).squeeze(-1)
else:
y_hat = output[:, -1, :].squeeze(-1)
loss = loss_fn(y_hat, Y)

Y = denormalize_fn(Y)
y_hat = denormalize_fn(y_hat)
a, b, c = eval(Y.detach(), y_hat.detach())
rmse += a
mae += b
mape += c
total_loss += loss.detach().cpu().numpy().tolist()
batch_count += 1
return [rmse / batch_count, mae / batch_count, mape / batch_count], total_loss / batch_count


def train(net, train_iter, val_iter, test_iter, loss_fn, denormalize_fn, optimizer, num_epoch,
early_stop=10, device='cpu', output_model=None, is_print=True, is_print_batch=False):
train_loss_lst = []
val_loss_lst = []
train_score_lst = []
val_score_lst = []
epoch_time = []

best_epoch = 0
best_val_rmse = 9999
early_stop_flag = 0
for epoch in range(num_epoch):
net.train()
if output_model is not None:
output_model.train()
epoch_loss = 0
batch_count = 0
batch_time = []
rmse, mae, mape = 0, 0, 0
for X, Y in train_iter:
batch_s = time.time()
X = X.to(device).float()
Y = Y.to(device).float()
optimizer.zero_grad()
output, hidden = net(X)
if output_model is not None:
y_hat = output_model(output[:, -1, :].squeeze(-1)).squeeze()
else:
y_hat = output[:, -1, :].squeeze(-1)
loss = loss_fn(y_hat, Y)
loss.backward()
optimizer.step()

Y = denormalize_fn(Y)
y_hat = denormalize_fn(y_hat)
a, b, c = eval(Y.detach(), y_hat.detach())
rmse += a
mae += b
mape += c
epoch_loss += loss.detach().cpu().numpy().tolist()
batch_count += 1
# sample_num += X.shape[0]

batch_time.append(time.time() - batch_s)
if is_print and is_print_batch:
print('epoch-batch: %d-%d, train loss %.4f, time use %.3fs' %
(epoch + 1, batch_count, epoch_loss, batch_time[-1]))

train_loss = epoch_loss / batch_count
train_loss_lst.append(train_loss)
train_score_lst.append([rmse/batch_count, mae/batch_count, mape/batch_count])

# 验证集
val_score, val_loss = test(net, output_model, val_iter, loss_fn, denormalize_fn, device)
val_score_lst.append(val_score)
val_loss_lst.append(val_loss)

epoch_time.append(np.array(batch_time).sum())

# 打印本轮训练结果
if is_print:
print('*** epoch%d, train loss %.4f, train rmse %.4f, val loss %.4f, val rmse %.6f, time use %.3fs' %
(epoch + 1, train_loss, train_score_lst[-1][0], val_loss, val_score[0], epoch_time[-1]))

# 早停
if val_score[0] < best_val_rmse:
best_val_rmse = val_score[0]
best_epoch = epoch
early_stop_flag = 0
else:
early_stop_flag += 1
if early_stop_flag == early_stop:
print(f'\nThe model has not been improved for {early_stop} rounds. Stop early!')
break

# 输出最终训练结果
print(f'\n{"*" * 40}\nFinal result:')
print(f'Get best validation rmse {np.array(val_score_lst)[:, 0].min() :.4f} '
f'at epoch {best_epoch}')
print(f'Total time {np.array(epoch_time).sum():.2f}s')
print()

# 计算测试集效果
test_score, test_loss = test(net, output_model, test_iter, loss_fn, denormalize_fn, device)
print('Test result:')
print(f'Test RMSE: {test_score[0]} Test MAE: {test_score[1]} Test MAPE: {test_score[2]}')
return train_loss_lst, val_loss_lst, train_score_lst, val_score_lst, epoch


def visualize(num_epochs, train_data, test_data, x_label='epoch', y_label='loss'):
x = np.arange(0, num_epochs + 1).astype(dtype=np.int)
plt.plot(x, train_data, label=f"train_{y_label}", linewidth=1.5)
plt.plot(x, test_data, label=f"val_{y_label}", linewidth=1.5)
plt.xlabel(x_label)
plt.ylabel(y_label)
plt.legend()
plt.show()


def plot_metric(score_log):
score_log = np.array(score_log)

plt.figure(figsize=(10, 6), dpi=300)
plt.subplot(2, 2, 1)
plt.plot(score_log[:, 0], c='#d28ad4')
plt.ylabel('RMSE')

plt.subplot(2, 2, 2)
plt.plot(score_log[:, 1], c='#e765eb')
plt.ylabel('MAE')

plt.subplot(2, 2, 3)
plt.plot(score_log[:, 2], c='#6b016d')
plt.ylabel('MAPE')

plt.show()