forked from prakashpandey9/Text-Classification-Pytorch
-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
114 lines (96 loc) · 4.11 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import os
import time
import load_data
import torch
import torch.nn.functional as F
from torch.autograd import Variable
import torch.optim as optim
import numpy as np
from models.LSTM import LSTMClassifier
TEXT, vocab_size, word_embeddings, train_iter, valid_iter, test_iter = load_data.load_dataset()
def clip_gradient(model, clip_value):
params = list(filter(lambda p: p.grad is not None, model.parameters()))
for p in params:
p.grad.data.clamp_(-clip_value, clip_value)
def train_model(model, train_iter, epoch):
total_epoch_loss = 0
total_epoch_acc = 0
model.cuda()
optim = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()))
steps = 0
model.train()
for idx, batch in enumerate(train_iter):
text = batch.text[0]
target = batch.label
target = torch.autograd.Variable(target).long()
if torch.cuda.is_available():
text = text.cuda()
target = target.cuda()
if (text.size()[0] is not 32):# One of the batch returned by BucketIterator has length different than 32.
continue
optim.zero_grad()
prediction = model(text)
loss = loss_fn(prediction, target)
num_corrects = (torch.max(prediction, 1)[1].view(target.size()).data == target.data).float().sum()
acc = 100.0 * num_corrects/len(batch)
loss.backward()
clip_gradient(model, 1e-1)
optim.step()
steps += 1
if steps % 100 == 0:
print (f'Epoch: {epoch+1}, Idx: {idx+1}, Training Loss: {loss.item():.4f}, Training Accuracy: {acc.item(): .2f}%')
total_epoch_loss += loss.item()
total_epoch_acc += acc.item()
return total_epoch_loss/len(train_iter), total_epoch_acc/len(train_iter)
def eval_model(model, val_iter):
total_epoch_loss = 0
total_epoch_acc = 0
model.eval()
with torch.no_grad():
for idx, batch in enumerate(val_iter):
text = batch.text[0]
if (text.size()[0] is not 32):
continue
target = batch.label
target = torch.autograd.Variable(target).long()
if torch.cuda.is_available():
text = text.cuda()
target = target.cuda()
prediction = model(text)
loss = loss_fn(prediction, target)
num_corrects = (torch.max(prediction, 1)[1].view(target.size()).data == target.data).sum()
acc = 100.0 * num_corrects/len(batch)
total_epoch_loss += loss.item()
total_epoch_acc += acc.item()
return total_epoch_loss/len(val_iter), total_epoch_acc/len(val_iter)
learning_rate = 2e-5
batch_size = 32
output_size = 2
hidden_size = 256
embedding_length = 300
model = LSTMClassifier(batch_size, output_size, hidden_size, vocab_size, embedding_length, word_embeddings)
loss_fn = F.cross_entropy
for epoch in range(10):
train_loss, train_acc = train_model(model, train_iter, epoch)
val_loss, val_acc = eval_model(model, valid_iter)
print(f'Epoch: {epoch+1:02}, Train Loss: {train_loss:.3f}, Train Acc: {train_acc:.2f}%, Val. Loss: {val_loss:3f}, Val. Acc: {val_acc:.2f}%')
test_loss, test_acc = eval_model(model, test_iter)
print(f'Test Loss: {test_loss:.3f}, Test Acc: {test_acc:.2f}%')
''' Let us now predict the sentiment on a single sentence just for the testing purpose. '''
test_sen1 = "This is one of the best creation of Nolan. I can say, it's his magnum opus. Loved the soundtrack and especially those creative dialogues."
test_sen2 = "Ohh, such a ridiculous movie. Not gonna recommend it to anyone. Complete waste of time and money."
test_sen1 = TEXT.preprocess(test_sen1)
test_sen1 = [[TEXT.vocab.stoi[x] for x in test_sen1]]
test_sen2 = TEXT.preprocess(test_sen2)
test_sen2 = [[TEXT.vocab.stoi[x] for x in test_sen2]]
test_sen = np.asarray(test_sen1)
test_sen = torch.LongTensor(test_sen)
test_tensor = Variable(test_sen, volatile=True)
test_tensor = test_tensor.cuda()
model.eval()
output = model(test_tensor, 1)
out = F.softmax(output, 1)
if (torch.argmax(out[0]) == 1):
print ("Sentiment: Positive")
else:
print ("Sentiment: Negative")