-
Notifications
You must be signed in to change notification settings - Fork 0
/
preprocess.py
123 lines (95 loc) · 4.59 KB
/
preprocess.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
from __future__ import division
import os
import random
import networkx as nx
import numpy as np
from evalne.evaluation.evaluator import LPEvaluator
from evalne.evaluation.score import Scoresheet
from evalne.evaluation.split import LPEvalSplit
from evalne.utils import preprocess as pp
from evalne.utils import split_train_test as stt
# NOTE: The example `as is`, only evaluates baseline methods. To evaluate the OpenNE methods, PRUNE and Metapath2vec
# these must be first installed. Then the correct paths must be set in the commands_other variable.
# Finally, the following parameter can be set to True.
run_other_methods = False
def main():
# Initialize some parameters
inpath = list()
nw_names = ['aves-weaver-social', 'bio-CE-LC','bio-DM-LC',
'bn-cat-mixed-species_brain_1',
'soc-wiki-Vote', 'fb-pages-food','soc-hamsterster','ego-Facebook',
'bio-CE-HT', 'bio-celegans-dir','bio-WormNet-v3'] # Stores the names of the networks evaluated
inpath.append("./input/aves-weaver-social.edges")
inpath.append("./input/bio-CE-LC.edges")
inpath.append("./input/bio-DM-LC.edges")
inpath.append("./input/bn-cat-mixed-species_brain_1.edges")
inpath.append("./input/soc-wiki-Vote.edges")
inpath.append("./input/fb-pages-food.edges")
inpath.append("./input/soc-hamsterster.edges")
inpath.append("./input/ego-Facebook.edges")
inpath.append("./input/bio-CE-HT.edges")
inpath.append("./input/bio-celegans-dir.edges")
inpath.append("./input/bio-WormNet-v3.edges")
outpath = "./output/"
if not os.path.exists(outpath):
os.makedirs(outpath)
directed = False # indicates if the graphs are directed or undirected
delimiters = (' ', ' ', ' ', ' ', ' ', ',', ' ', ' ', ' ', ' ', ' ') # indicates the delimiter in the original graph
repeats = 2 # number of time the experiment will be repeated
# Create a scoresheet to store the results
scoresheet = Scoresheet(tr_te='test')
for i in range(len(inpath)):
# Create folders for the evaluation results (one per input network)
if not os.path.exists(outpath):
os.makedirs(outpath)
# Load and preprocess the graph
G = preprocess(inpath[i], nw_names[i], outpath, delimiters[i], directed)
pp.get_stats(G)
# Alternatively, train/test splits can be computed one at a time
train_E, test_E = stt.split_train_test(G=G, train_frac=0.80)
train_E_false, test_E_false = stt.generate_false_edges_cwa(G, train_E=train_E, test_E=test_E,
num_fe_train=None,
num_fe_test=None)
stt.store_train_test_splits(os.path.join(outpath, "lp_train_test_splits", nw_names[i]),
train_E=train_E, train_E_false=train_E_false, test_E=test_E,
test_E_false=test_E_false, split_id=0)
TG = nx.Graph()
TG.add_edges_from(train_E)
nx.write_edgelist(TG, outpath+nw_names[i]+".train.edgelist", delimiter=",", data=False)
# # Evaluate baselines
# eval_baselines(nee, directed, scoresheet)
#
# # Evaluate other NE methods
# if run_other_methods:
# eval_other(nee, scoresheet)
# print("\nEvaluation results:")
# print("-------------------")
#
# # Print results averaged over exp repeats
# scoresheet.print_tabular(metric='auroc')
#
# # Write results averaged over exp repeats to a single file
# scoresheet.write_tabular(filename=os.path.join(outpath, 'eval_output.txt'), metric='auroc')
#
# # Store the Scoresheet object for later analysis
# scoresheet.write_pickle(os.path.join(outpath, 'eval.pkl'))
#
# print("Evaluation results are also stored in a folder named `output` in the current directory.")
# print("End of evaluation")
def preprocess(inpath, name, outpath, delimiter, directed):
"""
Graph preprocessing routine.
"""
print('Preprocessing graph...')
# Load a graph
G = pp.load_graph(inpath, delimiter=delimiter, comments='#', directed=directed)
# Preprocess the graph
G, ids = pp.prep_graph(G, relabel=True, del_self_loops=True)
# Store preprocessed graph to a file
pp.save_graph(G, output_path=outpath + name+ ".prep_graph.edgelist", delimiter=',', write_stats=True)
# Return the preprocessed graph
return G
if __name__ == "__main__":
random.seed(42)
np.random.seed(42)
main()