-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path05-PLS_across_age_groups.m
169 lines (117 loc) · 5.4 KB
/
05-PLS_across_age_groups.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
% PLS code (pls_analysis.m) can be downloaded at
% http://pls.rotman-baycrest.on.ca/source/ ("Latest PLS Applications")
%% load
clear all
clc
cd '~/Documents/SickKids/abagen_analysis/'
load('GeneExpression.mat') % relevant node indices
load('SpinsTwirls.mat') % spin test indices
load('coordinates.mat') % (x,y,z) coordinates for brain regions
load('GeneNames.mat') % (x,y,z) coordinates for brain regions
corridinates= table2array(coordinates);
GeneExpression= table2array(GeneExpressionDestrieux);
SpinsTwrils=SpinsTwrils+1;
addpath('./natsortfiles')
topgene=readtable('~/Documents/SickKids/abagen_analysis/OverlapGeneLoadings.csv');
%% set up of PLS analysis
addpath(genpath('./Pls/'));
% set up PLS analysis
X = zscore(GeneExpression);
nnodes = 148; % number of nodes/ ROIs
ngenes = length(GeneExpression);
nterms= 4;
bootnnodes = 118; % bootstrapped number of ROIs
% behav pls
option.method = 3;
option.num_boot = 0;
option.num_perm = 0; % zero permutations because they will be run manually later to account for spatial autocorrelation
exp{1} = X;
filename= dir('/Users/jason/Documents/SickKids/ICCvalues2/ICC_group_mean_*_arrhythmic.csv');
sort_files=natsort({filename.name})';
%% get genes with entrezID
T = table2cell(readtable('gene_entrez_ids')); % load entrezID of genes
gene_name = GeneNames; % get relevant gene names
entrezIDs = zeros(size(gene_name));
idx = [];
for k = 1:length(gene_name) % for each gene
if ismember(gene_name{k}, T(:,1)) % if the gene has an entrezID
entrezIDs(k) = cell2mat(T(find(strcmp(gene_name{k}, T(:,1))),2)); % store the entrezID
idx = [idx;k]; % also store the index of the gene
end
end
%entrezIDs = entrezIDs(entrezIDs ~= 0); % remove all genes without entrezID
entrezIDsNONID = entrezIDs(entrezIDs ~= 0); % this will be our background genes to compare to in the enrichment analysis
%% iterate over age groups
for i=1:37
filename2PLS= [filename(i).folder, '/', sort_files{i}];
ICC=readmatrix(filename2PLS);
X = zscore(GeneExpression);
exp{1} = X;
Y = zscore(ICC);
option.stacked_behavdata = Y;
result = pls_analysis(exp, nnodes, 1, option); % this is the PLS result that is used in all other analyses
cb=(result.s.^2)/(sum(result.s.^2)); % calculate percent varaince explained
var_explained(i,:)=cb;
%% spin test
% this code comes from pls_analysis.m and is modified to account for a
% spatial autocorrelation-preserving permutation test
Y = zscore(ICC);
nspins = 1000; % number of permutations ("spins")
s_spins = zeros(nterms,nspins); % singular values
option.method = 3; % set up PLS
option.num_boot = 0;
option.num_perm = 0;
X = zscore(GeneExpression);
exp{1} = X;
for k = (1+ (i-1)*1000):(nspins*i)
option.stacked_behavdata = Y(SpinsTwrils(:,k),:); % permute neurosynth matrix
datamatsvd=rri_xcor(option.stacked_behavdata,exp{1},0); % refer to pls_analysis.m
[r,c] = size(datamatsvd);
if r <= c
[pu, sperm, pv] = svd(datamatsvd',0);
else
[pv, sperm, pu] = svd(datamatsvd,0);
end
% rotate pv to align with the original v
rotatemat = rri_bootprocrust(result.v,pv);
% rescale the vectors
pv = pv * sperm * rotatemat;
sperm = sqrt(sum(pv.^2));
s_spins(:,k) = sperm;
end
sprob = zeros(nterms,1); % p-value for each latent variable
for k = 1:nterms % get permuted (via spin test) p-values
sprob(k) = (1+(nnz(find(s_spins(k,:)>=result.s(k)))))/(1+nspins);
end
pvalues(i,:)=sprob;
var_CI=[];
for b=1:1000
bootind= randi(148,[1,bootnnodes]);
% set up PLS analysis
X = zscore(GeneExpression(bootind,:));
Y = zscore(ICC(bootind,:));
option.stacked_behavdata = Y;
exp{1} = X;
result_CI = pls_analysis(exp, bootnnodes, 1, option); % this is the PLS result that is used in all other analyses
cb_CI=(result_CI.s.^2)/(sum(result_CI.s.^2)); % calculate percent varaince explained
var_CI(b)=cb_CI(1);
end
var_explained_CI(i,:)=quantile(var_CI, [0.025, 0.975]);
% get gene sets
% compute the loading of each gene as the correlation between the original
% data and the gene scores
gload = zeros(ngenes,1);
for k = 1:ngenes
gload(k) = corr(GeneExpression(:,k),result.vsc(:,1));
end
overlap(i)=abs(corr(topgene.loadings, gload(topgene.index)));
if corr(topgene.loadings, gload(topgene.index)) > 0
neurophyscores(i,:)=-1*corr(ICC,result.usc(:,1));
else corr(topgene.loadings, gload(topgene.index)) < 0
neurophyscores(i,:)=corr(ICC,result.usc(:,1));
end
end
pvalFDR2= mafdr(pvalues(:,1),'BHFDR',true);
varNames = {'varexplained', 'CIlower','CIupper','pval','pvalFDR','overlapGeneLoad', 'NeuroPhysTheta', 'NeuroPhysAlpha', 'NeuroPhysBeta', 'NeuroPhysGamma'};
Tableoutput=table(var_explained(:,1), var_explained_CI(:,1),var_explained_CI(:,2),pvalues(:,1), pvalFDR2, overlap', neurophyscores(:,1),neurophyscores(:,2),neurophyscores(:,3),neurophyscores(:,4), 'VariableNames',varNames);
writetable(Tableoutput, './PLS_genes_across_lifespan_2.csv');