Skip to content

Commit

Permalink
local merge
Browse files Browse the repository at this point in the history
  • Loading branch information
rmtfleming committed Apr 22, 2021
2 parents 08cd2c3 + be52c22 commit da7e7a5
Show file tree
Hide file tree
Showing 48 changed files with 3,833 additions and 1,496 deletions.
12 changes: 11 additions & 1 deletion external/dataIntegration/mCADRE/pruningModel.m
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,17 @@
model_rem = removeRxns(tissueModel, r);
end
% Check for inactive reactions after removal of r
[fluxConsistentMetBool,fluxConsistentRxnBool] = findFluxConsistentSubset(model_rem,paramConsistency);
try
[fluxConsistentMetBool,fluxConsistentRxnBool] = findFluxConsistentSubset(model_rem,paramConsistency);
rStatus_and_not_error = true;
catch
rStatus_and_not_error = false;
end
else
rStatus_and_not_error = false;
end

if rStatus_and_not_error
inactive_G= [ r; model_rem.rxns(fluxConsistentRxnBool==0)];

inactiveCore = intersect(inactive_G, coreRxn);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ function analyzeMgPipeResults(infoFilePath,resPath,varargin)
cd(violinPath)

% create violin plots for net uptake and secretion files
if any(contains(fileList{i,1},{'net_uptake_fluxes.csv','net_secretion_fluxes.csv'}))
if any(strcmp(fileList{i,1},{'net_uptake_fluxes.csv','net_secretion_fluxes.csv'}))
makeViolinPlots(sampleData, infoFile, 'stratification',sampleGroupHeaders{j}, 'plottedFeature', filename, 'unit', 'mmol/person/day')
end
cd(currentDir)
Expand Down
795 changes: 357 additions & 438 deletions ...is/multiSpecies/microbiomeModelingToolbox/additionalAnalysis/calculateReactionAbundance.m
100755 → 100644

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
% AUTHOR
% - Almut Heinken, 08/2020

reactionDatabase = readtable('reactionDatabase.txt', 'Delimiter', 'tab','TreatAsEmpty',['UND. -60001','UND. -2011','UND. -62011'], 'ReadVariableNames', false);
reactionDatabase = readtable('ReactionDatabase.txt', 'Delimiter', 'tab','TreatAsEmpty',['UND. -60001','UND. -2011','UND. -62011'], 'ReadVariableNames', false);
reactionDatabase=table2cell(reactionDatabase);

reactionAbundance = readtable(reactionAbundancePath, 'ReadVariableNames', false);
Expand Down Expand Up @@ -54,6 +54,4 @@
end
end

writetable(cell2table(subsystemAbundance),'SubsystemAbundance.txt','FileType','text','WriteVariableNames',false,'Delimiter','\t');

end
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
function ReactionAbundance = fastCalculateReactionAbundance(abundancePath, modelPath, rxnsList, numWorkers)
% Part of the Microbiome Modeling Toolbox. This function calculates and
% plots the total abundance of reactions of interest in a given microbiome
% sample based on the strain-level composition.
% Reaction presence or absence in each strain is derived from the reaction content
% of the respective AGORA model.
%
% USAGE
%
% ReactionAbundance = fastCalculateReactionAbundance(abundancePath, modelPath, rxnsList, numWorkers)
%
% INPUTS:
% abundancePath: Path to the .csv file with the abundance data.
% Example: 'cobratoolbox/papers/018_microbiomeModelingToolbox/examples/normCoverage.csv'
% modelPath: Folder containing the strain-specific AGORA models
% OPTIONAL INPUTS:
% rxnsList: List of reactions for which the abundance
% should be calculated (if left empty: all
% reactions in all models)
% numWorkers: Number of workers used for parallel pool. If
% left empty, the parallel pool will not be
% started. Parallellization is recommended if
% all reactions are computed.
%
% OUTPUT:
% ReactionAbundance Table with total abundance for each microbiome
% and reaction
%
% .. Author: - Almut Heinken, 04/2021

% read the csv file with the abundance data
abundance = readtable(abundancePath, 'ReadVariableNames', false);
abundance = table2cell(abundance);
if isnumeric(abundance{2, 1})
abundance(:, 1) = [];
end

% load the models
for i = 2:size(abundance, 1)
model = readCbModel([modelPath filesep abundance{i, 1} '.mat']);
modelsList{i, 1} = model;
end

if ~exist('rxnsList', 'var') || isempty(rxnsList) % define reaction list if not entered
fprintf('No reaction list entered. Abundances will be calculated for all reactions in all models. \n')
% get model list from abundance input file
for i = 2:size(abundance, 1)
model = modelsList{i, 1};
rxnsList = vertcat(model.rxns, rxnsList);
end
rxnsList = unique(rxnsList);
end

% load the models found in the individuals and extract which reactions are
% in which model
for i = 2:size(abundance, 1)
model = modelsList{i, 1};
ReactionPresence{i, 1} = abundance{i, 1};
for j = 1:length(rxnsList)
ReactionPresence{1, j + 1} = rxnsList{j};
if ~isempty(find(ismember(model.rxns, rxnsList{j})))
ReactionPresence{i, j + 1} = '1';
else
ReactionPresence{i, j + 1} = '0';
end
end
end
ReactionPresence{1,1}='Strains';


% prepare table for the total abundance
ReactionAbundance = {};
for i = 1:length(rxnsList)
ReactionAbundance{1, i + 1} = rxnsList{i};
end
for i = 2:size(abundance, 2)
ReactionAbundance{i, 1} = abundance{1, i};
end

% use parallel pool if workers specified as input
if exist('numWorkers', 'var') && numWorkers > 0
poolobj = gcp('nocreate');
if isempty(poolobj)
parpool(numWorkers)
end
end

clear abundance

totalAbun={};
parfor i = 2:size(ReactionAbundance, 1)
i
% reload the file to avoid running out of memory
abundance = readtable(abundancePath, 'ReadVariableNames', false);
abundance = table2cell(abundance);
if isnumeric(abundance{2, 1})
abundance(:, 1) = [];
end

% temporarily store reaction abundances
totalAbun{i} = zeros(length(rxnsList), 1);

for j = 2:size(abundance, 1)
% find all reactions present in the strain
presentRxns = find(strcmp(ReactionPresence(j,2:end),'1'));

for k = 1:length(presentRxns)
% summarize total abundance
totalAbun{i}(presentRxns(k),1) = totalAbun{i}(presentRxns(k),1) + str2double(abundance{j,i});
end
end
end

% collect the temporarily stored abundances to put together the table
for i = 2:size(ReactionAbundance, 1)
ReactionAbundance(i,2:end) = num2cell(totalAbun{i});
end

end
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@

% Define the list of metabolites required by at least one AGORA model for
% growth
essentialMetabolites = {'EX_12dgr180(e)'; 'EX_26dap_M(e)'; 'EX_2dmmq8(e)'; 'EX_2obut(e)'; 'EX_3mop(e)'; 'EX_4abz(e)'; 'EX_4hbz(e)'; 'EX_ac(e)'; 'EX_acgam(e)'; 'EX_acmana(e)'; 'EX_acnam(e)'; 'EX_ade(e)'; 'EX_adn(e)'; 'EX_adocbl(e)'; 'EX_adpcbl(e)'; 'EX_ala_D(e)'; 'EX_ala_L(e)'; 'EX_amet(e)'; 'EX_amp(e)'; 'EX_arab_D(e)'; 'EX_arab_L(e)'; 'EX_arg_L(e)'; 'EX_asn_L(e)'; 'EX_btn(e)'; 'EX_ca2(e)'; 'EX_cbl1(e)'; 'EX_cgly(e)'; 'EX_chor(e)'; 'EX_chsterol(e)'; 'EX_cit(e)'; 'EX_cl(e)'; 'EX_cobalt2(e)'; 'EX_csn(e)'; 'EX_cu2(e)'; 'EX_cys_L(e)'; 'EX_cytd(e)'; 'EX_dad_2(e)'; 'EX_dcyt(e)'; 'EX_ddca(e)'; 'EX_dgsn(e)'; 'EX_fald(e)'; 'EX_fe2(e)'; 'EX_fe3(e)'; 'EX_fol(e)'; 'EX_for(e)'; 'EX_gal(e)'; 'EX_glc_D(e)'; 'EX_gln_L(e)'; 'EX_glu_L(e)'; 'EX_gly(e)'; 'EX_glyc(e)'; 'EX_glyc3p(e)'; 'EX_gsn(e)'; 'EX_gthox(e)'; 'EX_gthrd(e)'; 'EX_gua(e)'; 'EX_h(e)'; 'EX_h2o(e)'; 'EX_h2s(e)'; 'EX_his_L(e)'; 'EX_hxan(e)'; 'EX_ile_L(e)'; 'EX_k(e)'; 'EX_lanost(e)'; 'EX_leu_L(e)'; 'EX_lys_L(e)'; 'EX_malt(e)'; 'EX_met_L(e)'; 'EX_mg2(e)'; 'EX_mn2(e)'; 'EX_mqn7(e)'; 'EX_mqn8(e)'; 'EX_nac(e)'; 'EX_ncam(e)'; 'EX_nmn(e)'; 'EX_no2(e)'; 'EX_ocdca(e)'; 'EX_ocdcea(e)'; 'EX_orn(e)'; 'EX_phe_L(e)'; 'EX_pheme(e)'; 'EX_pi(e)'; 'EX_pnto_R(e)'; 'EX_pro_L(e)'; 'EX_ptrc(e)'; 'EX_pydx(e)'; 'EX_pydxn(e)'; 'EX_q8(e)'; 'EX_rib_D(e)'; 'EX_ribflv(e)'; 'EX_ser_L(e)'; 'EX_sheme(e)'; 'EX_so4(e)'; 'EX_spmd(e)'; 'EX_thm(e)'; 'EX_thr_L(e)'; 'EX_thymd(e)'; 'EX_trp_L(e)'; 'EX_ttdca(e)'; 'EX_tyr_L(e)'; 'EX_ura(e)'; 'EX_val_L(e)'; 'EX_xan(e)'; 'EX_xyl_D(e)'; 'EX_zn2(e)'; 'EX_glu_D(e)'; 'EX_melib(e)'; 'EX_chtbs(e)'; 'EX_metsox_S_L(e)'; 'EX_hdca(e)'; 'EX_gam(e)'; 'EX_indole(e)'; 'EX_glcn(e)'; 'EX_coa(e)'};
essentialMetabolites = {'EX_12dgr180(e)'; 'EX_26dap_M(e)'; 'EX_2dmmq8(e)'; 'EX_2obut(e)'; 'EX_3mop(e)'; 'EX_4abz(e)'; 'EX_4hbz(e)'; 'EX_ac(e)'; 'EX_acgam(e)'; 'EX_acmana(e)'; 'EX_acnam(e)'; 'EX_ade(e)'; 'EX_adn(e)'; 'EX_adocbl(e)'; 'EX_ala_D(e)'; 'EX_ala_L(e)'; 'EX_amet(e)'; 'EX_amp(e)'; 'EX_arab_D(e)'; 'EX_arab_L(e)'; 'EX_arg_L(e)'; 'EX_asn_L(e)'; 'EX_btn(e)'; 'EX_ca2(e)'; 'EX_cbl1(e)'; 'EX_cgly(e)'; 'EX_chor(e)'; 'EX_chsterol(e)'; 'EX_cit(e)'; 'EX_cl(e)'; 'EX_cobalt2(e)'; 'EX_csn(e)'; 'EX_cu2(e)'; 'EX_cys_L(e)'; 'EX_cytd(e)'; 'EX_dad_2(e)'; 'EX_dcyt(e)'; 'EX_ddca(e)'; 'EX_dgsn(e)'; 'EX_fald(e)'; 'EX_fe2(e)'; 'EX_fe3(e)'; 'EX_fol(e)'; 'EX_for(e)'; 'EX_gal(e)'; 'EX_glc_D(e)'; 'EX_gln_L(e)'; 'EX_glu_L(e)'; 'EX_gly(e)'; 'EX_glyc(e)'; 'EX_glyc3p(e)'; 'EX_gsn(e)'; 'EX_gthox(e)'; 'EX_gthrd(e)'; 'EX_gua(e)'; 'EX_h(e)'; 'EX_h2o(e)'; 'EX_h2s(e)'; 'EX_his_L(e)'; 'EX_hxan(e)'; 'EX_ile_L(e)'; 'EX_k(e)'; 'EX_lanost(e)'; 'EX_leu_L(e)'; 'EX_lys_L(e)'; 'EX_malt(e)'; 'EX_met_L(e)'; 'EX_mg2(e)'; 'EX_mn2(e)'; 'EX_mqn7(e)'; 'EX_mqn8(e)'; 'EX_nac(e)'; 'EX_ncam(e)'; 'EX_nmn(e)'; 'EX_no2(e)'; 'EX_ocdca(e)'; 'EX_ocdcea(e)'; 'EX_orn(e)'; 'EX_phe_L(e)'; 'EX_pheme(e)'; 'EX_pi(e)'; 'EX_pnto_R(e)'; 'EX_pro_L(e)'; 'EX_ptrc(e)'; 'EX_pydx(e)'; 'EX_pydxn(e)'; 'EX_q8(e)'; 'EX_rib_D(e)'; 'EX_ribflv(e)'; 'EX_ser_L(e)'; 'EX_sheme(e)'; 'EX_so4(e)'; 'EX_spmd(e)'; 'EX_thm(e)'; 'EX_thr_L(e)'; 'EX_thymd(e)'; 'EX_trp_L(e)'; 'EX_ttdca(e)'; 'EX_tyr_L(e)'; 'EX_ura(e)'; 'EX_val_L(e)'; 'EX_xan(e)'; 'EX_xyl_D(e)'; 'EX_zn2(e)'; 'EX_glu_D(e)'; 'EX_melib(e)'; 'EX_chtbs(e)'; 'EX_metsox_S_L(e)'; 'EX_hdca(e)'; 'EX_gam(e)'; 'EX_indole(e)'; 'EX_glcn(e)'; 'EX_coa(e)'; 'EX_man(e)'; 'EX_fum(e)'; 'EX_succ(e)'; 'EX_no3(e)'; 'EX_ins(e)'; 'EX_uri(e)'; 'EX_drib(e)'; 'EX_pime(e)'; 'EX_lac_L(e)'; 'EX_glypro(e)'; 'EX_urea(e)'; 'EX_duri(e)'; 'EX_h2(e)'; 'EX_mal_L(e)'; 'EX_tre(e)'; 'EX_orot(e)'};

% fix any exchange nomenclature issues
adaptedDietConstraints(:, 1) = strrep(adaptedDietConstraints(:, 1), '[e]', '(e)');
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
function [exch,modelStoragePath] = buildModelStorage(microbeNames,modPath)

currentDir=pwd;
mkdir('modelStorage')
cd('modelStorage')
modelStoragePath = pwd;

exch = {};
for j = 1:size(microbeNames, 1)
model = readCbModel([modPath filesep microbeNames{j,1} '.mat']);
%exch = union(exch, model.mets(find(sum(model.S(:, strncmp('EX_', model.rxns, 3)), 2) ~= 0)));
exStruct = findSExRxnInd(model);
new_exch = findMetsFromRxns(model,model.rxns(exStruct.ExchRxnBool & ~exStruct.biomassBool));
exch = union(exch,new_exch);
end

% get already built reconstructions
dInfo = dir(modelStoragePath);
modelList={dInfo.name};
modelList=modelList';
modelList=strrep(modelList,'.mat','');
microbesNames=setdiff(microbeNames,modelList);


if length(microbesNames)>0
%% create a new extracellular space [u] for microbes
for j = 1:size(microbeNames, 1)
model = readCbModel([modPath filesep microbeNames{j,1} '.mat']);
% temp fix
if isfield(model,'C')
model=rmfield(model,'C');
model=rmfield(model,'d');
end
%

% removing possible constraints of the bacs
selExc = findExcRxns(model);
Reactions2 = model.rxns(find(selExc));
allex = Reactions2(strmatch('EX', Reactions2));
biomass = allex(find(strncmp(allex,'bio',3)));
finrex = setdiff(allex, biomass);
model = changeRxnBounds(model, finrex, -1000, 'l');
model = changeRxnBounds(model, finrex, 1000, 'u');

% removing blocked reactions from the bacs
%BlockedRxns = identifyFastBlockedRxns(model,model.rxns, printLevel);
%model= removeRxns(model, BlockedRxns);
%BlockedReaction = findBlockedReaction(model,'L2')

model = convertOldStyleModel(model);
exmod = model.rxns(strncmp('EX_', model.rxns, 3)); % find exchange reactions
eMets = model.mets(~cellfun(@isempty, strfind(model.mets, '[e]'))); % exchanged metabolites
dummyMicEU = createModel();
%dummyMicEU = makeDummyModel(2 * size(eMets, 1), size(eMets, 1));
dummyMicEUmets = [strcat(strcat(microbeNames{j, 1}, '_'), regexprep(eMets, '\[e\]', '\[u\]')); regexprep(eMets, '\[e\]', '\[u\]')];
dummyMicEU = addMultipleMetabolites(dummyMicEU,dummyMicEUmets);
nMets = numel(eMets);
S = [speye(nMets);-speye(nMets)];
lbs = repmat(-1000,nMets,1);
ubs = repmat(1000,nMets,1);
names = strcat(strcat(microbeNames{j, 1}, '_'), 'IEX_', regexprep(eMets, '\[e\]', '\[u\]'), 'tr');
dummyMicEU = addMultipleReactions(dummyMicEU,names,dummyMicEUmets,S,'lb',lbs,'ub',ubs);
model = removeRxns(model, exmod);
model.rxns = strcat(strcat(microbeNames{j, 1}, '_'), model.rxns);
model.mets = strcat(strcat(microbeNames{j, 1}, '_'), regexprep(model.mets, '\[e\]', '\[u\]')); % replace [e] with [u]
[model] = mergeTwoModels(dummyMicEU, model, 2, false, false);

%finish up by A: removing duplicate reactions
%We will lose information here, but we will just remove the duplicates.
[model,rxnToRemove,rxnToKeep]= checkDuplicateRxn(model,'S',1,0,1);

writeCbModel(model,'format','mat','fileName',[microbeNames{j,1} '.mat']); % store model
end
end

cd(currentDir)

end
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
createdModels = {};

% use the setup model containing every strain in every sample
parfor k = 1:length(sampNames)
for k = 1:length(sampNames)
mgmodel = model;
abunRed = abundance(:,k+1);

Expand Down Expand Up @@ -64,7 +64,7 @@
% Coupling constraints for bacteria
for i = 1:length(presBac)
IndRxns=find(strncmp(mgmodel.rxns,[presBac{i,1} '_'],length(presBac{i,1})+1));%finding indixes of specific reactions
% find the name of biomass reacion in the microbe model
% find the name of biomass reaction in the microbe model
bioRxn=mgmodel.rxns{find(strncmp(mgmodel.rxns,strcat(presBac{i,1},'_bio'),length(char(strcat(presBac{i,1},'_bio')))))};
mgmodel=coupleRxnList2Rxn(mgmodel,mgmodel.rxns(IndRxns(1:length(mgmodel.rxns(IndRxns(:,1)))-1,1)),bioRxn,400,0); %couple the specific reactions
end
Expand Down
Loading

0 comments on commit da7e7a5

Please sign in to comment.