-
Notifications
You must be signed in to change notification settings - Fork 0
/
my_loss_function.py
executable file
·114 lines (90 loc) · 3.95 KB
/
my_loss_function.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
def loss_kd(outputs, labels, teacher_outputs, params):
"""
loss function for Knowledge Distillation (KD)
"""
alpha = params.alpha
T = params.temperature
loss_CE = F.cross_entropy(outputs, labels)
D_KL = nn.KLDivLoss()(F.log_softmax(outputs/T, dim=1), F.softmax(teacher_outputs/T, dim=1)) * (T * T)
KD_loss = (1. - alpha)*loss_CE + alpha*D_KL
return KD_loss
def loss_kd_t(outputs, labels, teacher_outputs, params):
"""
loss function for Knowledge Distillation (KD)
"""
# alpha = params.alpha
T = params.temperature
alpha = torch.max(F.softmax(teacher_outputs/1000, dim=1)) #此处有改动
loss_CE = F.cross_entropy(outputs, labels)
D_KL = nn.KLDivLoss()(F.log_softmax(outputs/T, dim=1), F.softmax(teacher_outputs/T, dim=1)) * (T * T)
KD_loss = (1. - alpha)*loss_CE + alpha*D_KL
return KD_loss
def loss_kd_self(outputs, labels, teacher_outputs, params):
"""
loss function for self training: Tf-KD_{self}
"""
alpha = params.alpha
# alpha = torch.max(F.softmax(teacher_outputs/T, dim=1))
T = params.temperature
loss_CE = F.cross_entropy(outputs, labels)
D_KL = nn.KLDivLoss()(F.log_softmax(outputs/T, dim=1), F.softmax(teacher_outputs/T, dim=1)) * (T * T) * params.multiplier # multiple is 1.0 in most of cases, some cases are 10 or 50
KD_loss = (1. - alpha)*loss_CE + alpha*D_KL
return KD_loss
def loss_kd_regularization(outputs, labels, params):
"""
loss function for mannually-designed regularization: Tf-KD_{reg}
"""
alpha = params.reg_alpha
T = params.reg_temperature
correct_prob = 0.99 # the probability for correct class in u(k)
loss_CE = F.cross_entropy(outputs, labels)
K = outputs.size(1)
teacher_soft = torch.ones_like(outputs).cuda()
teacher_soft = teacher_soft*(1-correct_prob)/(K-1) # p^d(k)
for i in range(outputs.shape[0]):
teacher_soft[i ,labels[i]] = correct_prob
loss_soft_regu = nn.KLDivLoss()(F.log_softmax(outputs, dim=1), F.softmax(teacher_soft/T, dim=1))*params.multiplier
KD_loss = (1. - alpha)*loss_CE + alpha*loss_soft_regu
return KD_loss
def loss_label_smoothing(outputs, labels):
"""
loss function for label smoothing regularization
"""
alpha = 0.1
N = outputs.size(0) # batch_size
C = outputs.size(1) # number of classes
smoothed_labels = torch.full(size=(N, C), fill_value= alpha / (C - 1)).cuda()
smoothed_labels.scatter_(dim=1, index=torch.unsqueeze(labels, dim=1), value=1-alpha)
log_prob = torch.nn.functional.log_softmax(outputs, dim=1)
loss = -torch.sum(log_prob * smoothed_labels) / N
return loss
class FocalLoss(nn.Module):
def __init__(self, gamma=0, alpha=None, size_average=True):
super(FocalLoss, self).__init__()
self.gamma = gamma
self.alpha = alpha
if isinstance(alpha,(float,int)): self.alpha = torch.Tensor([alpha,1-alpha])
if isinstance(alpha,list): self.alpha = torch.Tensor(alpha)
self.size_average = size_average
def forward(self, input, target):
if input.dim()>2:
input = input.view(input.size(0),input.size(1),-1) # N,C,H,W => N,C,H*W
input = input.transpose(1,2) # N,C,H*W => N,H*W,C
input = input.contiguous().view(-1,input.size(2)) # N,H*W,C => N*H*W,C
target = target.view(-1,1)
logpt = F.log_softmax(input)
logpt = logpt.gather(1,target)
logpt = logpt.view(-1)
pt = Variable(logpt.data.exp())
if self.alpha is not None:
if self.alpha.type()!=input.data.type():
self.alpha = self.alpha.type_as(input.data)
at = self.alpha.gather(0,target.data.view(-1))
logpt = logpt * Variable(at)
loss = -1 * (1-pt)**self.gamma * logpt
if self.size_average: return loss.mean()
else: return loss.sum()