-
Notifications
You must be signed in to change notification settings - Fork 0
/
boltzmann_movie_machine.py
110 lines (95 loc) · 3.76 KB
/
boltzmann_movie_machine.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
# -*- coding: utf-8 -*-
"""
Created on Wed Mar 25 13:55:22 2020
@author: Ram
"""
import pgmpy
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.parallel
import torch.optim as optim
import torch.utils.data
from torch.autograd import Variable
movies = pd.read_csv('C:/Users/Ram/Downloads/Boltzmann_Machines/ml-1m/movies.dat', sep = '::', header = None, engine = 'python', encoding = 'latin-1')
users = pd.read_csv('C:/Users/Ram/Downloads/Boltzmann_Machines/ml-1m/users.dat', sep = '::', header = None, engine = 'python', encoding = 'latin-1')
ratings = pd.read_csv('C:/Users/Ram/Downloads/Boltzmann_Machines/ml-1m/ratings.dat', sep = '::', header = None, engine = 'python', encoding = 'latin-1')
training_set = pd.read_csv('C:/Users/Ram/Downloads/Boltzmann_Machines/ml-100k/u1.base', delimiter='\t')
training_set = np.array(training_set, dtype = 'int')
test_set = pd.read_csv('C:/Users/Ram/Downloads/Boltzmann_Machines/ml-100k/u1.test', delimiter='\t')
test_set = np.array(test_set, dtype = 'int')
nb_users = int(max(max(training_set[:,0]), max(test_set[:,0])))
nb_movies = int(max(max(training_set[:,1]), max(test_set[:,1])))
def convert(data):
new_data = []
for id_users in range(1, nb_users + 1):
id_movies = data[:, 1][data[:,0] == id_users]
id_ratings = data[:, 2][data[:,0] == id_users]
ratings = np.zeros(nb_movies)
ratings[id_movies - 1] = id_ratings
new_data.append(list(ratings))
return new_data
training_set = convert(training_set)
test_set = convert(test_set)
training_set = torch.FloatTensor(training_set)
test_set = torch.FloatTensor(test_set)
training_set[training_set == 0] = -1
training_set[training_set == 1] = 0
training_set[training_set == 2] = 0
training_set[training_set >= 3] = 1
test_set[test_set == 0] = -1
test_set[test_set == 1] = 0
test_set[test_set == 2] = 0
test_set[test_set >= 3] = 1
class RBM():
def __init__(self, nv, nh):
self.W = torch.randn(nh, nv)
self.a = torch.randn(1, nh)
self.b = torch.randn(1, nv)
def sample_h(self, x):
wx = torch.mm(x, self.W.t())
activation = wx + self.a.expand_as(wx)
p_h_given_v = torch.sigmoid(activation)
return p_h_given_v, torch.bernoulli(p_h_given_v)
def sample_v(self, y):
wy = torch.mm(y, self.W)
activation = wy + self.b.expand_as(wy)
p_v_given_h = torch.sigmoid(activation)
return p_v_given_h, torch.bernoulli(p_v_given_h)
def train(self, v0, vk, ph0, phk):
self.W += (torch.mm(v0.t(), ph0) - torch.mm(vk.t(), phk)).t()
self.b += torch.sum((v0 - vk), 0)
self.a += torch.sum((ph0 - phk), 0)
nv = len(training_set[0])
nh = 100
batch_size = 100
rbm = RBM(nv, nh)
nb_epoch = 10
for epoch in range(1, nb_epoch + 1):
train_loss = 0
s = 0.
for id_user in range(0, nb_users - batch_size, batch_size):
vk = training_set[id_user:id_user+batch_size]
v0 = training_set[id_user:id_user+batch_size]
ph0,_ = rbm.sample_h(v0)
for k in range(10):
_,hk = rbm.sample_h(vk)
_,vk = rbm.sample_v(hk)
vk[v0<0] = v0[v0<0]
phk,_ = rbm.sample_h(vk)
rbm.train(v0, vk, ph0, phk)
train_loss += torch.mean(torch.abs(v0[v0>=0] - vk[v0>=0]))
s += 1.
print('epoch: '+str(epoch)+' loss: '+str(train_loss/s))
test_loss = 0
s = 0.
for id_user in range(nb_users):
v = training_set[id_user:id_user+1]
vt = test_set[id_user:id_user+1]
if len(vt[vt>=0]) > 0:
_,h = rbm.sample_h(v)
_,v = rbm.sample_v(h)
test_loss += torch.mean(torch.abs(vt[vt >= 0] - v[vt>=0]))
s+=1.
print('test loss: '+str(test_loss/s))