forked from toolleeo/special-section-processing
-
Notifications
You must be signed in to change notification settings - Fork 0
/
SpecSecGraph.py
72 lines (56 loc) · 2.34 KB
/
SpecSecGraph.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import csv
import argparse
import os, glob
import itertools
import matplotlib.pyplot as plt # for plotting the results
import networkx as nx
# Lettura del file csv con gli articoli postprocessati
# Reading the csv file with postprocessed articles
def read_csv(path):
with open(str.format(*glob.glob(os.path.join(path, path.split('/')[-1] + '_postproc.csv'))), 'r', encoding='utf-8') as input_file:
csv_reader = csv.DictReader(input_file, delimiter='\t')
doc = []
for item in csv_reader:
doc.append(item)
return doc
# Calcolo dei lati e del loro peso (numero di parole in comune tra gli abstract_filtered di due articoli)
# Calculation of edges and their weight (number of words in common between the abstract_filtered of two articles)
def edges_calc(doc):
graph_edges = []
edges_weights = []
for paper1, paper2 in itertools.combinations(doc, 2):
intersection = len(set(paper1['abstract_filtered'].lower().split()).intersection(set(paper2['abstract_filtered'].lower().split())))
if intersection > 0:
graph_edges.append((paper1['id'], paper2['id']))
edges_weights.append(intersection)
return graph_edges, edges_weights
# Inserimento dei nodi all'interno del grafo
# Insertion of nodes within the graph
def nodes_calc(doc):
nodes = []
for d in doc:
nodes.append(d['id'])
return nodes
# Creazione del grafo
# Creation of the graph
def create_graph(nodes, edges, weights, path):
g = nx.Graph()
g.add_nodes_from(nodes)
g.add_edges_from(edges)
pos = nx.circular_layout(g)
nx.draw(g, pos, with_labels=True, node_color='orange', node_size=900, edge_color=['red' if weight > 6 else 'blue' for weight in weights])
a_dict = {a: b for a, b in zip(edges, weights)}
nx.draw_networkx_edge_labels(g, pos, edge_labels=a_dict)
plt.savefig(os.path.join(path, path.split('/')[-1]+'_graph.png'))
plt.clf()
def main():
parser = argparse.ArgumentParser()
parser.add_argument('directories', nargs='*', help='elenco di directories con gli articoli postprocessati')
args = parser.parse_args()
for path in args.directories:
doc = read_csv(path)
graph_edges, edges_weights = edges_calc(doc)
nodes = nodes_calc(doc)
create_graph(nodes, graph_edges, edges_weights, path)
if __name__ == '__main__':
main()