forked from yixinL7/BRIO
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathlabel_smoothing_loss.py
25 lines (21 loc) · 1.03 KB
/
label_smoothing_loss.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
import torch
import torch.nn as nn
class label_smoothing_loss(nn.Module):
def __init__(self, ignore_index, epsilon=0.1):
super(label_smoothing_loss, self).__init__()
self.ignore_idx = ignore_index
self.epsilon = epsilon
def forward(self, input, target):
input = input.transpose(1, 2) # [batch_size, seq_len, word_num]
input = torch.log_softmax(input, dim=2)
k = input.size(2)
target_prob = torch.ones_like(input).type_as(input) * self.epsilon * 1 / k
mask = torch.arange(k).unsqueeze(0).unsqueeze(0).expand(target.size(0), target.size(1), -1).type_as(target)
mask = torch.eq(mask, target.unsqueeze(-1).expand(-1, -1, k))
target_prob.masked_fill_(mask, 1 - self.epsilon + (self.epsilon * 1 / k))
loss = - torch.mul(target_prob, input)
loss = loss.sum(2)
# mask ignore_idx
mask = (target != self.ignore_idx).type_as(input)
loss = (torch.mul(loss, mask).sum() / mask.sum()).mean()
return loss