forked from klintkanopka/dkt2
-
Notifications
You must be signed in to change notification settings - Fork 1
/
dkt2.py
executable file
·118 lines (100 loc) · 4.26 KB
/
dkt2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
#!/usr/bin/env python3
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import argparse
import csv
import numpy as np
from os.path import isfile
from keras.models import Model, load_model
from keras.layers import Input, GRU, LSTM, SimpleRNN, Dense
from functools import reduce
def read_data_from_csv_file(fileName, n_params=8):
rows = []
with open(fileName, "r") as csvfile:
reader = csv.reader(csvfile, delimiter=',')
for row in reader:
rows.append(row)
row_skip = 1
col_skip = 2
index = row_skip
print("the number of rows is " + str(len(rows)))
n_students = int((len(rows)-row_skip)/(n_params+1))
print("the number of students is " + str(n_students))
n_items = int(rows[row_skip][col_skip])
for n in range(n_students):
if (int(rows[row_skip + n*(n_params + 1)][col_skip]) > n_items):
n_items = int(rows[row_skip + n*(n_params + 1)][col_skip])
print("the number of items is " + str(n_items))
inputs = np.zeros((n_students, n_items, n_params - 1))
target = np.zeros((n_students, n_items, 1))
for i in range(n_students):
index = (n_params + 1)*i + row_skip
num_items = int(rows[index][col_skip])
for j in range(num_items):
target[i][j][0] = int(rows[index + 1][j + col_skip])
for k in range(n_params-1):
for j in range(num_items):
inputs[i][j][k] = float(rows[index + 2 + k][j + col_skip])
print("finished reading data")
return inputs, target
def compose2(f, g):
return lambda x: f(g(x))
def compose(*args):
return reduce(compose2, args)
def make_model(input_shape, rnn_layer=GRU, units=64):
inputs = Input(input_shape)
x = rnn_layer(units, return_sequences=True)(inputs)
x = Dense(1)(x)
return Model(inputs=inputs, outputs=x)
def main():
argparser = argparse.ArgumentParser()
argparser.add_argument("data")
argparser.add_argument( "-n",
"--epochs",
type=int,
default=150,
help="number of epochs to train" )
argparser.add_argument( "-s",
"--split",
type=float,
default=0.2,
help="fraction of data to use for training" )
argparser.add_argument( "-t",
"--type",
type=str.lower,
default="gru",
choices=["gru", "lstm", "simplernn"],
help="type of RNN layer to use" )
argparser.add_argument( "-u",
"--units",
type=int,
default=64,
help="number of units per RNN cell" )
argparser.add_argument( "-m",
"--model-file",
type=str,
default="dkt2.h5",
help="model file to use" )
args = argparser.parse_args()
rnn_layer = {"gru": GRU, "lstm": LSTM, "simplernn": SimpleRNN}[args.type];
x, y = read_data_from_csv_file(args.data)
if isfile(args.model_file):
model = load_model(args.model_file)
else:
model = make_model( x.shape[1:], rnn_layer=rnn_layer, units=args.units)
model.compile("Adam", "binary_crossentropy", metrics=["accuracy"]);
model.summary()
model.fit(x=x, y=y, epochs=args.epochs, validation_split=args.split)
model.save(args.model_file)
if __name__ == "__main__":
main()