-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathlinearGAE_contrastiveloss.py
103 lines (87 loc) · 3.1 KB
/
linearGAE_contrastiveloss.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import numpy as np
import pandas as pd
import networkx as nx
from model import *
import torch
from sklearn.metrics import roc_auc_score
import torch.nn.functional as F
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
def compute_loss_para(adj):
pos_weight = ((adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum())
print('pos weight=',pos_weight)
norm = adj.shape[0] * adj.shape[0] / float((adj.shape[0] * adj.shape[0] - adj.sum()) * 2)
weight_mask = adj.view(-1) == 1
weight_tensor = torch.ones(weight_mask.size(0)).to(device)
weight_tensor[weight_mask] = pos_weight
return weight_tensor, norm
# contrastive loss is used by Graph-mlp's implementation
def contrast_loss(x_dis, adj_label, tau=1):
x_dis = torch.exp(x_dis/tau)
x_dis_sum = torch.sum(x_dis, 1)
x_dis_sum_pos = torch.sum(x_dis*adj_label, 1)
loss = -torch.log(x_dis_sum_pos / x_dis_sum).mean()
return loss
# def contrast_loss_mean(x_dis, adj_label, tau = 1.0): # the neighbors' weights are the same
# x_dis = torch.exp(x_dis/tau)
# x_dis_sum = torch.sum(x_dis, 1).reshape(-1,1)
# x_dis_pos = x_dis
# item = x_dis_pos * (x_dis_sum ** (-1))
# item = -torch.log(item)
# item = item*adj_label
# sigle_loss = torch.sum(item, dim=1)/torch.sum(adj_label, dim=1)
# loss = torch.mean(sigle_loss)
# return loss
def get_feature_dis(x):
x_dis = [email protected]
mask = torch.eye(x_dis.shape[0]).cuda()
x_sum = torch.sum(x**2, 1).reshape(-1, 1)
x_sum = torch.sqrt(x_sum).reshape(-1, 1)
x_sum = x_sum @ x_sum.T
x_dis = x_dis*(x_sum**(-1))
x_dis = (1-mask) * x_dis
return x_dis
dataset = 'cora'
ratio = 0.1
edges = pd.read_csv('data/' + dataset + '/ano_'+dataset+str(ratio)+'.csv').values
node_nums = edges.max()+1
g = nx.Graph()
g.add_nodes_from(range(node_nums))
labels = edges[:, -1]
for e in edges:
g.add_edge(e[0], e[1])
adj = np.array(nx.adjacency_matrix(g).todense())
adj = torch.from_numpy(adj).float().cuda()
weight_tensor, norm = compute_loss_para(adj)
model = GAElinear(node_nums, 128, 32)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
model = model.cuda()
epochs = 200
best_auc = 0
for epoch in range(epochs):
model.train()
recovered, z = model(adj)
x_dis = get_feature_dis(z)
loss_Ncontrast = contrast_loss(x_dis, adj, tau=1) # use tau=0.1 in Enron
loss = norm * F.binary_cross_entropy(recovered.view(-1), adj.view(-1), weight=weight_tensor)
total_loss = loss + 1*loss_Ncontrast
cur_loss = total_loss.item()
print('epoch: '+str(epoch)+', loss = '+str(cur_loss))
optimizer.zero_grad()
total_loss.backward()
optimizer.step()
model.eval()
res, z = model(adj)
res = (res + res.t())/2
res = (res - adj)**2
res = res.cpu().detach().numpy()
pre = []
for e in edges:
pre.append(res[e[0], e[1]])
pre = np.array(pre)
auc = roc_auc_score(labels, pre)
if auc > best_auc:
# save embeddings
np.save('data/'+dataset+'/ae_embed_nloss2'+str(ratio)+'.npy', z.cpu().detach().numpy())
best_auc = auc
print('auc = ', auc)
print('best_auc = ', best_auc)