-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathgetAllenStructureList.m
290 lines (234 loc) · 10.2 KB
/
getAllenStructureList.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
function [ARA_table,tableRowInds] = getAllenStructureList(varargin)
% Download the list of adult mouse structures from the Allen API.
%
% function ARA_table = getAllenStructureList('param1',val1,...)
%
%
% Purpose
% Make an API query to read in the Allen Reference Atlas (ARA) brain area
% list. All areas and data are read. Data are cached in the system's temp
% directory and re-read from here if possible to improve speed.
%
%
% Inputs (all optional param/val pair)
% 'downloadAgain' - [false] If true, the function wipes cached data and
% re-reads. zero by default.
% 'ancestorsOf' - [empty] Returns only those areas that are ancestors of
% of the named area. You may supply a string, numeric scalar,
% or a cell array that combines these to return a table that
% contains the acestors of multiple areas. If the ID or
% structure name can not be found, the function returns an
% empty array and displays a warning on-screen.
% 'childrenOf' - [empty] Returns only those areas that are children of
% of the named area. As above, you may supply a string, numeric
% scalar, or a cell array that combines these.
% 'excludeReferenceArea' - [false] if true, the areas supplied by the childrenOf
% and ancestorOf arguments are removed from the
% outputs.
%
%
% Outputs
% ARA_table - a table containing the imported data.
% tableRowInds - The rows in the original table that are present in ARA_table.
% If the user has selected a subset of data using the childrenOf
% of or ancestorsOf arguments, then tableRowInds is:
% ARA_table = ORIGINAL_TABLE(tableRowInds,:);
%
%
% Examples
%
% a) Basic usage
% S=getAllenStructureList;
%
% b) Returning subsets of the data
% S=getAllenStructureList('ancestorsOf',{'Posterior auditory area, layer 1',1017})
% S=getAllenStructureList('ancestorsOf','Posterior auditory area, layer 1')
% S=getAllenStructureList('childrenOf','Cerebellum')
% S=getAllenStructureList('childrenOf','Cerebellum','ancestorsOf','Posterior auditory area, layer 1')
%
% c) Remove the cerebellum
% S=getAllenStructureList;
% [~,ind]=getAllenStructureList('childrenOf','Cerebellum');
% S(ind,:)=[];
%
% d) Remove the cerebellum's children but keep the cerebellum
% S=getAllenStructureList;
% [~,ind]=getAllenStructureList('childrenOf','Cerebellum', 'excludeReferenceArea',true);
% S(ind,:)=[];
%
%
%
% Rob Campbell - Basel 2015
if nargin==1
%Alert anyone who might be using the old scheme
fprintf('\n\n')
help mfilename
error('You supplied only one input argument')
return
end
% Parse input aruments
params = inputParser;
params.CaseSensitive = false;
params.addParameter('downloadAgain', false, @(x) islogical(x) || x==0 || x==1);
params.addParameter('ancestorsOf', {}, @(x) ischar(x) || isnumeric(x) || iscell(x))
params.addParameter('childrenOf', {}, @(x) ischar(x) || isnumeric(x) || iscell(x))
params.addParameter('excludeReferenceArea', false, @(x) islogical(x) || x==0 || x==1);
params.parse(varargin{:})
downloadAgain = params.Results.downloadAgain;
%Ensure that ancestorsOf and chilrenOf are cell arrays of IDs or names in order to simplify later code
ancestorsOf = checkFilteringInput(params.Results.ancestorsOf);
childrenOf = checkFilteringInput(params.Results.childrenOf);
excludeReferenceArea = params.Results.excludeReferenceArea;
%Cached files will be stored here
cachedCSV = fullfile(tempdir,sprintf('%s_CACHED.csv',mfilename));
cachedMAT = fullfile(tempdir,sprintf('%s_CACHED.mat',mfilename));
if ~exist(cachedMAT,'file') || downloadAgain
% The data are to be re-read or we couldn't find any cached data
% The adult mouse structure graph has an id of 1.
fprintf('Pulling ARA structure list from the web\n')
url='http://api.brain-map.org/api/v2/data/Structure/query.csv?criteria=[graph_id$eq1]&num_rows=all';
[~,status] = urlwrite(url,cachedCSV,'Timeout',5);
if ~status
error('Failed to get CSV file from URL %s', url)
end
fid = fopen(cachedCSV);
if fid<0
error('Failed to open CSV file at %s\n', cachedCSV)
end
%Loop through and read each data row
readParams={'%d%d%q%q%d%d%d%d%d%d%d%d%s%s%s%s%s%d%d%d%s\n','delimiter',','};
ARA_table=textscan(fid,readParams{:});
fclose(fid);
ARA_table=readtable(cachedCSV,'format',readParams{:});
%cache to disk in temporary location
save(cachedMAT,'ARA_table')
else
%If the data have been imported before we can just return them
load(cachedMAT)
end
%Filter the structure list if needed
if isempty(ancestorsOf) && ~isempty(childrenOf)
[ARA_table,tableRowInds] = returnChildrenOnly(ARA_table,childrenOf,excludeReferenceArea);
elseif ~isempty(ancestorsOf) && isempty(childrenOf)
[ARA_table,tableRowInds] = returnAncestorsOnly(ARA_table,ancestorsOf,excludeReferenceArea);
elseif ~isempty(ancestorsOf) && ~isempty(childrenOf)
[ARA_tableC,tableRowIndsC] = returnChildrenOnly(ARA_table,childrenOf,excludeReferenceArea);
[ARA_tableA,tableRowIndsA] = returnAncestorsOnly(ARA_table,ancestorsOf,excludeReferenceArea);
ARA_table = unique([ARA_tableC;ARA_tableA]);
tableRowInds = unique([tableRowIndsC;tableRowIndsA]);
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
function fInput = checkFilteringInput(fInput)
%Ensure that ancestorsOf or childrenOf are a suitable cell array
if iscell(fInput)
%Do nothing
elseif isvector(fInput) && ischar(fInput)
fInput={fInput};
elseif isnumeric(fInput)
fInput=num2cell(fInput);
else
fprintf('\n *** %s - Unknown value of input variable \n', mfilename)
help mfilename
return
end
function [returnedTable,tableRowInds] = returnAncestorsOnly(ARA_table,ancestorsOf,excludeReferenceArea)
% If the user asked for only the ancestors of an area, we search for these here and
% return an empty array with an on-screen warning if nothing could be found.
%
% Outputs
% returnedTable - the table containing only the ancestors of the selected area
% tableRowInds - these are: returnedTable = ARA_table(tableRowInds,:)
if isempty(ancestorsOf)
returnedTable=ARA_table;
tableRowInds=[];
return
end
childRows=[]; %Rows of the table for which we will find ancestors
for ii=1:length(ancestorsOf)
tChild=ancestorsOf{ii}; %This child for which we will look for parents
if isnumeric(tChild)
childRows=[childRows;find(ARA_table.id==tChild)];
elseif ischar(tChild)
childRows=[childRows;strmatch(tChild,ARA_table.name)];
end
end
%Loop through childRows and collect the table rows of all ancestors
ancestors=[];
for ii=1:length(childRows)
grandpa = ARA_table.structure_id_path(childRows(ii));
grandpa = strsplit(grandpa{1},'/'); %produce a cell array of character arrays that are area index values
grandpa = cell2mat(cellfun(@str2num,grandpa,'UniformOutput',false));
ancestors = [ancestors, grandpa];
end
if excludeReferenceArea
%Don't keep if this is the root area whose children we are looking for if the user asked to discard this
for ii = 1:length(childRows)
thisID = ARA_table.id(childRows(ii));
ancestors(ancestors==thisID)=[];
end
end
if isempty(childRows) || isempty(ancestors)
fprintf('\n\n *** NO ANCESTORS FOUND. RETURNING EMPTY ARRAY ***\n\n')
returnedTable=[];
tableRowInds=[];
return
end
tableRowInds = unique(ancestors);
for ii=1:length(ancestors)
tableRowInds(ii)=find(ARA_table.id==ancestors(ii));
end
returnedTable = ARA_table(tableRowInds,:); %filter it
function [returnedTable,tableRowInds] = returnChildrenOnly(ARA_table,childrenOf,excludeReferenceArea)
% If the user asked for only the children of an area, we search for these here and
% return an empty array with an on-screen warning if nothing could be found.
%
% Outputs
% returnedTable - the table containing only the children of the selected area
% tableRowInds - these are: returnedTable = ARA_table(tableRowInds,:)
if isempty(childrenOf)
returnedTable=ARA_table;
tableRowInds=[];
return
end
childRows=[]; %Rows of the table for which we will find children
for ii=1:length(childrenOf)
tChild=childrenOf{ii}; %This child for which we will look for parents
if isnumeric(tChild)
childRows=[childRows;find(ARA_table.id==tChild)];
elseif ischar(tChild)
childRows=[childRows;strmatch(tChild,ARA_table.name)];
end
end
%Get the index values associated with these rows
childRows = unique(childRows);
ind = zeros(size(childRows));
for ii=1:length(childRows)
ind(ii)=ARA_table.id(childRows(ii));
end
% Now we will loop through the whole table and look for rows that list each of these
% values in their structure_id_path. It will be faster if we pre-preprocess the ID hierarchies
sIDPathSplit = cellfun(@(x) strsplit(x,'/'), ARA_table.structure_id_path,'UniformOutput',false);
for ii=1:length(sIDPathSplit)
sIDPathSplit{ii} = cell2mat(cellfun(@str2num,sIDPathSplit{ii},'UniformOutput',false));
end
tableRowInds = [];
for thisInd = 1:length(ind)
for thisRow = 1:height(ARA_table)
sID = sIDPathSplit{thisRow};
f=find(sID==ind(thisInd));
if ~isempty(f)
%Don't keep if this is the root area whose children we are looking for if the user asked to discard this
if excludeReferenceArea && max(f)==length(sID) %this works because it returns true when the target area is the last in the list
continue
end
tableRowInds(end+1)=thisRow;
end
end
end
tableRowInds = unique(tableRowInds);
if isempty(tableRowInds)
fprintf('\n\n *** NO CHILDREN FOUND. RETURNING EMPTY ARRAY ***\n\n')
returnedTable=[];
return
end
returnedTable = ARA_table(tableRowInds,:); %filter it