-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfcn_spatio_temporal_connectome.py
349 lines (261 loc) · 11.8 KB
/
fcn_spatio_temporal_connectome.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
#!/usr/bin/env python
import argparse
import sys
import os
import networkx as nx
import scipy.io
import numpy as np
import sklearn as sk
from sklearn import preprocessing
DESCRIPTION = """
This function outputs a spatio-temporal graph given a set of functional time series
and a structural connectivity matrix.
INPUTs
- SC_filename: csv file containing the subject structural
connectivity matrix. Note that the structural connectivity
matrix is assumed to be binary
[string]
- TS_filename: csv file containing the matrix of the functional
time series (each row corresopnds to a single region
time series). Note that the script expects the number of
rows of the time series matrix to be equal to the dimension
of the (square) structural conenctivity matrix
[string]
- pos_threshold: threshold for point-process analysis
[float]
- LABEL_filename = txt file containing the labels or identifiers of the
regions corresponding to the structural connectivity
matrix and funcitonal time series.
Each row of the file contains a single region identifier.
Note that the script expects the number of rows of the
LABEL_filename to be equal to the dimension of the (square)
structural connectivity matrix
- output_prefix: prefix for the output files, e.g., '/output_dir/subjID_'
[string]
OUTPUTs
- z-scored time series (2D matrix, saved as mat file)
["output_prefix_"ts_zscore.mat"]
- spatio-temporal connectome (gpickle format)
["output_prefix_"spatio-temporal_connectome.gpickle"]
- connected components of the spaito-temporal connectome (Matlab structure, saved as mat file)
["output_prefix_"CC.mat"]
- feature vectors (2D matrix, saved as mat file)
["output_prefix_"FM.mat"]
REQUIREs
Python networkx
Python numpy
Python scipy
Python ssklearn
Python sys
CREDITS A. Griffa, B. Ricaud, K. Benzi, X. Bresson, A. Daducci, P. Vandergheynst, J.P. Thiran, P Hagmann (2017)
Transient Networks of Spatio-temporal Connectivity Map Communication Pathways in Brain Functional Systems.
NeuroImage 155:490-502.
"""
def buildArgsParser():
p = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter,
description=DESCRIPTION)
p.add_argument('SC_filename', action='store', metavar='SC_FILENAME', type=str,
help='Binary structural connectivity matrix - csv file.')
p.add_argument('TS_filename', action='store', metavar='TS_FILENAME', type=str,
help='Functional time series matrix - csv file.')
p.add_argument('pos_threshold', action='store', metavar='POSITIVE_THRESHOLD', type=str,
help='Positive threshold value - number.')
p.add_argument('LABEL_filename', action='store', metavar='LABEL_FILENAME', type=str,
help='Binary structural connectivity matrix - txt file.')
p.add_argument('output_prefix', action='store', metavar='OUTPUT_PREFIX', type=str,
help='output prefix - e.g., ""output_directory/code_""')
return p
def main():
# Handel inputs
# ======================================================================
parser = buildArgsParser()
args = parser.parse_args()
if not os.path.isfile(args.SC_filename):
parser.error('"{0}" must be a file!'.format(args.SC_filename))
if not os.path.isfile(args.TS_filename):
parser.error('"{0}" must be a file!'.format(args.TS_filename))
if not os.path.isfile(args.LABEL_filename):
parser.error('"{0}" must be a file!'.format(args.LABEL_filename))
SC_filename = args.SC_filename
TS_filename = args.TS_filename
LABEL_filename = args.LABEL_filename
pos_threshold = float(args.pos_threshold)
output_prefix = args.output_prefix
# Load inputs
# [1] subject
# [2] SC
SC = np.loadtxt(open(SC_filename,"rb"), delimiter=',', skiprows=0)
# Remove diagonal elements (not meaningful for the construction of a spatio-temporal connectome)
SC = SC - np.diag(np.diag(SC))
# [3] ts
ts = np.loadtxt(open(TS_filename, 'rb'), delimiter=',', skiprows=0)
# [4] positive_threhsold value
if pos_threshold <= 0:
print >> sys.stderr, "A positive threshold value is expected!"
sys.exit(2)
# [5] ROI labels
labels = open(LABEL_filename,'r').read().split('\n')
if not labels[-1]:
labels = labels[0:-1]
# [6] output directory
# Number of ROIs in the structural connectivity graph
nROIs = ts.shape[0]
# Number of time points
ntp = ts.shape[1]
# Sanity check
if nROIs != SC.shape[0]:
print >> sys.stderr, "The dimensions of SC and ts are not consistent!"
sys.exit(3)
# Define output file names
# ======================================================================
TS_zscore_filename = os.path.join(output_prefix + "ts_zscore.mat")
G_filename = os.path.join(output_prefix + "spatio-temporal_connectome.gpickle")
CC_filename = os.path.join(output_prefix + "CC.mat")
FM_filename = os.path.join(output_prefix + "FM.mat")
# If ts contains time series (not point processes), z-score ROI-wise time series
# ======================================================================
print('..... z-score time series .....')
test = np.where(ts==0)
test = np.array(test[0])
testn = test.size
test = np.where(ts==1)
test = np.array(test[0])
testn = testn + test.size
if ts.size != testn:
for i in range(len(ts)):
ts[i] = np.nan_to_num(ts[i])
ts[i] = sk.preprocessing.scale(ts[i], with_mean=True, with_std=True, copy=True)
# Save z-scored time series
print('..... write file: ' + TS_zscore_filename)
scipy.io.savemat(TS_zscore_filename, dict(ts=ts))
else:
pos_threshold = 1
# Create static structural connectivity graph Gs from the input adjacency matrix SC
# ======================================================================
# Create the extended multilayer graph from the adjacency matrix
Gs = nx.from_numpy_array(SC)
# Build a spatio-temporal connectome from SC and ts information
# ======================================================================
print('..... build multilayer network (spatio-temporal connectome) .....')
# Initiate a new NetworkX graph
G = nx.Graph()
graph_data = {}
graph_data['subject'] = output_prefix # subject ID / output_prefix
graph_data['ntp'] = ntp # ntp, number of time points
graph_data['nROIs'] = nROIs # nROIs, number of anatomical ROIs
graph_data['activation_threshold'] = pos_threshold # nROIs, number of anatomical ROIs
G.graph['graph_data'] = graph_data
# Loop throughout all the time points (1 -> ntp-1)
for t in range(ntp):
# For current time point, find active ROIs
tsst = ts[:,t] # fMRI values for all the nodes, current time point (t)
active_nodes = np.where(tsst >= pos_threshold)[0]
# Loop throughout all the ROIs active at current time point
for i in active_nodes:
# Generate a new node ID (in the multilayer network)
# NOTE: each node in the multilayer network has a unique ID equal to layer_pos * nROIs + i,
# with i anatomical_id of the considered node (from 1 to nROIs), layer_pos node position in time (from 1 to ntp),
# and nROIs number of ROIs in the structural connectivity graph
node_id = t * nROIs + i + 1 # ROI IDs start from 1
# If node_id does not exist in G, add it
if ~G.has_node(node_id):
# Generate attributes for the new node in the multilayer network
node_attrs = {}
node_attrs = {
'anatomical_id' : i + 1, # ROIs ID start from 1
'weight' : tsst[i],
'tp': t + 1, # tp ID start from 1
'node_id': node_id,
'label': labels[i]
}
G.add_node(node_id, **node_attrs)
if t < (ntp-1):
# Extract the neighbors of anatomical_id in Gs
neighbor_nodes = list(Gs.neighbors(i))
# Consider as well the node itself in the following (t+1) time point
neighbor_nodes.extend([i])
# Extract brain regions that are active at the following (t+1) time point
tsstt = ts[:,t+1] # fMRI values for all the nodes, following (t+1) time point
active_nodes_tt = np.where(tsstt >= pos_threshold)[0]
# Intersect current region's neighbors, and regions active at the following (t+1) time point
new_nodes = np.intersect1d(neighbor_nodes, active_nodes_tt, assume_unique=True)
# Loop throughout all neighbor and active regions: add them to the multilayer network, together with the corresponding edges
for j in new_nodes:
node_id_new = ( (t + 1) * nROIs ) + j + 1
node_attrs = {
'anatomical_id': j + 1,
'weight': tsstt[j],
'tp': t + 2,
'node_id': node_id_new,
'label': labels[j]
}
G.add_node(node_id_new, **node_attrs)
G.add_edge(node_id, node_id_new)
# Add bi-directional links between nodes belonging to the same layer of Gs
# Loop throughout all the time points (1 -> ntp)
for t in range(ntp):
# For current time point, find active nodes
tsst = ts[:,t] # fMRI values for all the nodes, current time point
active_nodes = np.where(tsst >= pos_threshold)[0]
# Add links between active nodes at current time point, which are also neighbors in Gs
for i in active_nodes:
# Node ID in multilayer network
node_id = t * nROIs + i + 1
# Extract region neighbors in Gs
neighbor_nodes = Gs.neighbors(i)
# Intersect current node neighbors, and nodes active at the current (t) time point
new_nodes = np.intersect1d(list(neighbor_nodes), list(active_nodes), assume_unique=True)
# Add edges
for j in new_nodes:
node_id_new = t * nROIs + j + 1
if ~G.has_edge(node_id, node_id_new):
G.add_edge(node_id, node_id_new)
# Save spatio-temporal connectome (multilayer network) as gpickle file
print('..... write file: ' + G_filename)
#nx.write_gpickle(G, G_filename)
# Extract the connected components (CCs) of the multilayer network
# ======================================================================
print('..... extract connected components (CCs) of multilayer network .....')
# The output nx.connected_component_subgraphs() is a list of nx graphs,
# ordered from the largest to the smallest (in terms of number of nodes)
CC = [G.subgraph(c).copy() for c in nx.connected_components(G)]
print('..... (number of CCs: ' + str(len(CC)) + ')')
# Set attributes of CCs: width (temporal extension), height (spatial extension), subject ID
# And save the CCs to a Matlab array of structures
height = np.zeros(shape=(len(CC)))
width = np.zeros(shape=(len(CC)))
subjectCC = []
# Initialize Python list of dictionaries and feature matrix
dictlist_cc = [dict() for x in range(len(CC))]
FM = np.zeros(shape=(len(CC),nROIs))
# Loop throughout all the connected components
for i in range(0, len(CC)):
# Current connected component
cc = CC[i]
# Nodes anatomical_id and layer_pos
nodes = cc.nodes()
anatomical_id_dict = nx.get_node_attributes(cc, 'anatomical_id')
anatomical_id = [anatomical_id_dict[x] for x in nodes]
tp_dict = nx.get_node_attributes(cc,'tp')
tp = [tp_dict[x] for x in nodes]
# Spatial height and temporal width of current component
height[i] = len(set(anatomical_id))
width[i] = max(tp) - min(tp) + 1
# Feature vector
ids = np.array(anatomical_id) - 1
for j in range(0,len(ids)):
FM[i][ids[j]] += 1
# Fill-in Python dictionary
dictlist_cc[i] = {'height':height[i],'width':width[i],'nodes':nodes,'edges':cc.edges(),'anatomical_id':anatomical_id,'tp':tp}
# Save connected components list of dictionaries to Matlab format
print('..... write file: ' + CC_filename)
mdict = {'CC':dictlist_cc}
scipy.io.savemat(CC_filename, mdict)
# Normalize and save feature matrix
print('..... write file: ' + FM_filename)
FM_norms = np.apply_along_axis(np.linalg.norm, 1, FM)
FM = FM.astype(float) / FM_norms[:, np.newaxis]
scipy.io.savemat(FM_filename, mdict={'FM':FM})
if __name__ == "__main__":
main()