-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathadd_dist_anomaly.py
56 lines (53 loc) · 1.77 KB
/
add_dist_anomaly.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import numpy as np
import pandas as pd
import networkx as nx
dataset = 'cora'
edges = pd.read_csv('data/'+dataset+'/'+dataset+'.csv').values
# edges = pd.read_csv('data/BlogCatalog/BlogCatalog.edge',header=None,sep='\t').values
edge_nums = len(edges)
node_nums = edges.max()+1
g = nx.Graph()
g.add_nodes_from(range(node_nums))
for e in edges:
if not g.has_edge(e[0], e[1]):
g.add_edge(e[0], e[1], weight=0)
print(g.number_of_nodes())
ori_edges_num = g.number_of_edges()
print(ori_edges_num)
total_anomaly = 0
# random select M source nodes
M = int(node_nums*0.4)
# in our experiments, we fix T=20
T = 20
ratio = 0.1
total_thre = ori_edges_num*ratio
source_nodes = np.random.randint(0, node_nums, M)
for u in source_nodes:
neighs = list(g.adj[u])
neigh_nums = len(neighs)
thre = ratio*neigh_nums
candidate_nodes = np.random.randint(0, node_nums, int(T*thre))
dist = []
for c in candidate_nodes:
if nx.has_path(g, u, c):
dis = nx.shortest_path_length(g, source=u, target=c)
dist.append([c, dis]) # [node,dist]
else:
dist.append([c, 100000])
dist.sort(key=lambda x: x[1], reverse=True)
cnt = 0
for v in dist:
if (u != v[0]) and (v[0] not in neighs) and (not g.has_edge(u, v[0])):
g.add_edge(u, v[0], weight=1)
cnt = cnt + 1
total_anomaly = total_anomaly + 1
if cnt >= thre:
break
if total_anomaly >= total_thre:
break
if total_anomaly >= total_thre:
break
print('new graph edges = '+str(g.number_of_edges()))
print('anomaly ratio='+str((g.number_of_edges()-ori_edges_num)/ori_edges_num))
new_edge_table = nx.to_pandas_edgelist(g)
new_edge_table.to_csv('data/'+dataset+'/ano_'+dataset+str(ratio)+'.csv', index=False)