-
Notifications
You must be signed in to change notification settings - Fork 0
/
net.cpp
143 lines (132 loc) · 3.78 KB
/
net.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
#include "net.h"
#include <random>
#include <numeric>
#include <iostream>
Net::Net(Loss *loss, float lr) : m_loss(loss), m_lr(lr)
{
}
Net::~Net()
{
for (Layer* l : m_layers)
{
delete l;
}
m_layers.clear();
delete m_loss;
}
Tensor Net::Forward(const Tensor &in_X)
{
Tensor X = in_X.Clone();
#ifdef USE_GPU
X.ToGPU();
#endif
for (Layer *l : m_layers)
{
l->Forward(X);
}
return X;
}
void Net::SGD(const std::vector<Tensor> &training_data, const std::vector<Tensor> &training_labels,
int num_epochs, int mini_batch_size,
std::vector<Tensor> *test_data, std::vector<Tensor> *test_labels)
{
std::vector<int> train_data_indices(training_data.size());
std::iota(train_data_indices.begin(), train_data_indices.end(), 0);
auto rng = std::default_random_engine{};
for (int i = 0; i < num_epochs; i++)
{
std::shuffle(train_data_indices.begin(), train_data_indices.end(), rng);
size_t start_idx = 0;
size_t end_idx = mini_batch_size;
while (true)
{
Tensor batch_data(mini_batch_size, 1, 28*28, 1);
Tensor batch_labels(mini_batch_size, 1, 10, 1);
for (size_t k = start_idx; k < end_idx; k++)
{
batch_data.SetItemHost(k%mini_batch_size, training_data[train_data_indices[k]]);
batch_labels.SetItemHost(k%mini_batch_size, training_labels[train_data_indices[k]]);
}
#ifdef USE_GPU
batch_data.ToGPU();
batch_labels.ToGPU();
#endif
TrainMinibatch(batch_data, batch_labels);
start_idx += mini_batch_size;
if (start_idx >= training_data.size())
{
break;
}
end_idx += mini_batch_size;
end_idx = std::min(end_idx, training_data.size());
int bs = end_idx - start_idx;
if (bs < mini_batch_size)
{
batch_data = Tensor(bs, 1, 28*28, 1);
batch_labels = Tensor(bs, 1, 10, 1);
#ifdef USE_GPU
batch_data.ToGPU();
batch_labels.ToGPU();
#endif
}
}
std::cout << "Epoch #" << i << " finished. ";
if (test_data)
{
std::cout << Evaluate(*test_data, *test_labels) << "/" << test_data->size();
}
std::cout << std::endl;
}
}
void Net::TrainMinibatch(Tensor &in_X, const Tensor &in_y)
{
Tensor &X = in_X;
const Tensor& y = in_y;
for (Layer *l : m_layers)
{
l->Forward(X);
}
Tensor err = m_loss->Deriv(X, y);
for (int i = m_layers.size()-1; i >= 0; i--)
{
m_layers[i]->Backward(err);
m_layers[i]->UpdateWeights(m_lr);
}
}
static int GetMaxIdx(const Tensor &y)
{
assert(y.GetNumCols() == 1);
assert(y.GetNumChannels() == 1);
assert(y.GetNumItems() == 1);
int idx = -1;
float max_activation = std::numeric_limits<float>::lowest();
for (int i = 0, count = y.GetNumRows(); i < count; i++)
{
if (y(0, 0, i, 0) > max_activation)
{
max_activation = y(0, 0, i, 0);
idx = i;
}
}
return idx;
}
int Net::Evaluate(const std::vector<Tensor> &test_data, const std::vector<Tensor> &test_labels)
{
int num_correct = 0;
for (int i = 0; i < test_data.size(); i++)
{
Tensor prediction = Forward(test_data[i]);
#ifdef USE_GPU
prediction.UpdateFromGPU();
#endif
int predicted_number = GetMaxIdx(prediction);
int correct_number = GetMaxIdx(test_labels[i]);
//std::cout << "Predicted: " << predicted_number << std::endl;
//std::cout << "Correct: " << correct_number << std::endl;
if (predicted_number == correct_number)
{
num_correct++;
}
}
return num_correct;
}