-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathRGSE_without_embed.py
123 lines (106 loc) · 3.63 KB
/
RGSE_without_embed.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import numpy as np
import pandas as pd
import networkx as nx
from sklearn.metrics import roc_auc_score
from model import MLP
import torch
import torch.nn as nn
import torch.optim as optim
def read_graph(edges, nodes):
g = nx.Graph()
for i in range(nodes):
g.add_node(i)
for edge in edges:
origin_city = int(edge[0])
dest_city = int(edge[1])
if not g.has_edge(origin_city, dest_city):
g.add_edge(origin_city, dest_city, weight=0)
g.get_edge_data(origin_city, dest_city)['weight'] += 1
return g
def extract_features(graph, x, y):
common_neighbors = list(nx.common_neighbors(graph, x, y))
x_degree = y_degree = 0
for i in list(nx.neighbors(graph, x)):
x_degree += graph.get_edge_data(x, i)['weight']
for i in list(nx.neighbors(graph, y)):
y_degree += graph.get_edge_data(y, i)['weight']
z_weight_sum = 0
xz = zy = 0
for z in common_neighbors:
sz = 0
for i in list(nx.neighbors(graph, z)):
sz += graph.get_edge_data(i, z)['weight']
z_weight_sum += sz
xz += graph.get_edge_data(x, z)['weight']
zy += graph.get_edge_data(z, y)['weight']
if len(common_neighbors) == 0:
feat1 = 0
else:
feat1 = xz / z_weight_sum
if x_degree > 0:
feat2 = xz / x_degree
else:feat2 = 0
if y_degree > 0:
feat3 = zy / y_degree
else:feat3 = 0
features = [feat1, feat2, feat3]
return features
def get_needed_data(g):
newdata = []
node_nums = g.number_of_nodes()
for e in g.edges:
newdata.append([e[0], e[1], 1]) # pos
for _ in range(1):
j = np.random.randint(node_nums) # left neg
while g.has_edge(e[0], j):
j = np.random.randint(node_nums)
newdata.append([e[0], j, 0])
for _ in range(1):
j = np.random.randint(node_nums) # right neg
while g.has_edge(e[1], j):
j = np.random.randint(node_nums)
newdata.append([e[1], j, 0])
return np.array(newdata)
dataset = 'cora'
ratio = 0.1
edges = pd.read_csv('data/' + dataset + '/ano_'+dataset+str(ratio)+'.csv').values
nodes_nums = edges.max() + 1
g = read_graph(edges, nodes_nums)
edges = get_needed_data(g)
res_label = edges[:, -1]
edges_features = np.zeros((len(edges), 3))
print('extract features')
for i, edge in enumerate(edges):
features = extract_features(g, int(edge[0]), int(edge[1]))
edges_features[i] = features
print('extract features finished!')
# save features
# all_feats = np.concatenate([edges, edges_features], axis=1)
# np.save('data/' + dataset + '/allfeats'+str(ratio)+'.npy', all_feats)
# print('saved')
edges = pd.read_csv('data/' + dataset + '/ano_'+dataset+str(ratio)+'.csv').values
ano_label = edges[:, -1]
ori_edges_features = np.zeros((len(edges), 3))
for i, edge in enumerate(edges):
features = extract_features(g, int(edge[0]), int(edge[1]))
ori_edges_features[i] = features
model = MLP(3, 32, 16)
model = model.cuda()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)
all_data = torch.from_numpy(edges_features).float().cuda()
res_label = torch.from_numpy(res_label).long().cuda()
ori_data = torch.from_numpy(ori_edges_features).float().cuda()
for epoch in range(100):
model.train()
out = model(all_data)
loss = criterion(out, res_label)
optimizer.zero_grad()
loss.backward()
optimizer.step()
print('epoch: '+str(epoch)+', loss = ' + str(loss.item()))
model.eval()
pre = model(ori_data)
pre = pre[:,0]
auc = roc_auc_score(ano_label, pre.cpu().detach().numpy())
print('auc =', auc)