diff --git a/external/dataIntegration/mCADRE/pruningModel.m b/external/dataIntegration/mCADRE/pruningModel.m
index 5c4b7d76ec..33ad4e53de 100644
--- a/external/dataIntegration/mCADRE/pruningModel.m
+++ b/external/dataIntegration/mCADRE/pruningModel.m
@@ -66,7 +66,17 @@
                 model_rem = removeRxns(tissueModel, r);
             end
             % Check for inactive reactions after removal of r
-            [fluxConsistentMetBool,fluxConsistentRxnBool] = findFluxConsistentSubset(model_rem,paramConsistency);
+            try
+                [fluxConsistentMetBool,fluxConsistentRxnBool] = findFluxConsistentSubset(model_rem,paramConsistency);
+                rStatus_and_not_error = true;
+            catch
+                rStatus_and_not_error = false;
+            end
+        else
+            rStatus_and_not_error = false;
+        end
+        
+        if rStatus_and_not_error
             inactive_G= [ r; model_rem.rxns(fluxConsistentRxnBool==0)];
             
             inactiveCore = intersect(inactive_G, coreRxn);
diff --git a/src/analysis/multiSpecies/microbiomeModelingToolbox/additionalAnalysis/analyzeMgPipeResults.m b/src/analysis/multiSpecies/microbiomeModelingToolbox/additionalAnalysis/analyzeMgPipeResults.m
index 0eaa701a12..8def2a7e93 100644
--- a/src/analysis/multiSpecies/microbiomeModelingToolbox/additionalAnalysis/analyzeMgPipeResults.m
+++ b/src/analysis/multiSpecies/microbiomeModelingToolbox/additionalAnalysis/analyzeMgPipeResults.m
@@ -113,7 +113,7 @@ function analyzeMgPipeResults(infoFilePath,resPath,varargin)
             cd(violinPath)
             
             % create violin plots for net uptake and secretion files
-            if any(contains(fileList{i,1},{'net_uptake_fluxes.csv','net_secretion_fluxes.csv'}))
+            if any(strcmp(fileList{i,1},{'net_uptake_fluxes.csv','net_secretion_fluxes.csv'}))
                 makeViolinPlots(sampleData, infoFile, 'stratification',sampleGroupHeaders{j}, 'plottedFeature', filename, 'unit', 'mmol/person/day')
             end
             cd(currentDir)
diff --git a/src/analysis/multiSpecies/microbiomeModelingToolbox/additionalAnalysis/calculateReactionAbundance.m b/src/analysis/multiSpecies/microbiomeModelingToolbox/additionalAnalysis/calculateReactionAbundance.m
old mode 100755
new mode 100644
index 177534c0ee..e4a885e71a
--- a/src/analysis/multiSpecies/microbiomeModelingToolbox/additionalAnalysis/calculateReactionAbundance.m
+++ b/src/analysis/multiSpecies/microbiomeModelingToolbox/additionalAnalysis/calculateReactionAbundance.m
@@ -1,438 +1,357 @@
-function [ReactionAbundance,TaxonomyInfo] = calculateReactionAbundance(abundancePath, modelPath, infoFilePath, rxnsList, numWorkers)
-% Part of the Microbiome Modeling Toolbox. This function calculates and
-% plots the total abundance of reactions of interest in a given microbiome
-% sample based on the strain-level composition.
-% Reaction presence or absence in each strain is derived from the reaction content
-% of the respective AGORA model. Two results are given: the total abundance,
-% and the abundance on different taxonomical levels.
-%
-% USAGE
-%
-%    [ReactionAbundance,TaxonomyInfo] = calculateReactionAbundance(abundancePath, modelPath, infoFilePath, rxnsList, numWorkers)
-%
-% INPUTS:
-%    abundancePath:          Path to the .csv file with the abundance data.
-%                            Example: 'cobratoolbox/papers/018_microbiomeModelingToolbox/examples/normCoverage.csv'
-%    modelPath:              Folder containing the strain-specific AGORA models
-% OPTIONAL INPUTS:
-%    infoFilePath:           Path to the spreadsheet with the taxonomy
-%                            information on organisms (default:
-%                            AGORA_infoFile.xlsx)
-%    rxnsList:               List of reactions for which the abundance
-%                            should be calculated (if left empty: all
-%                            reactions in all models)
-%    numWorkers:             Number of workers used for parallel pool. If
-%                            left empty, the parallel pool will not be
-%                            started. Parallellization is recommended if
-%                            all reactions are computed.
-%
-% OUTPUT:
-%    ReactionAbundance       Structure with abundance for each microbiome
-%                            and reaction in total and on taxon levels
-%     TaxonomyInfo:          Taxonomical information on each taxon level 
-%
-% .. Author: - Almut Heinken, 03/2018
-%                             10/2018:  changed input to location of the csv file with the
-%                                       abundance data
-%                           01/2020:    adapted to be suitable for pan-models
-
-% read the csv file with the abundance data
-abundance = readtable(abundancePath, 'ReadVariableNames', false);
-abundance = table2cell(abundance);
-if isnumeric(abundance{2, 1})
-    abundance(:, 1) = [];
-end
-
-% 
-
-% load the models
-for i = 2:size(abundance, 1)
-    model = readCbModel([modelPath filesep abundance{i, 1} '.mat']);
-    modelsList{i, 1} = model;
-end
-
-if ~exist('rxnsList', 'var') || isempty(rxnsList)  % define reaction list if not entered
-    fprintf('No reaction list entered. Abundances will be calculated for all reactions in all models. \n')
-    % get model list from abundance input file
-    for i = 2:size(abundance, 1)
-        model = modelsList{i, 1};
-        rxnsList = vertcat(model.rxns, rxnsList);
-    end
-    rxnsList = unique(rxnsList);
-end
-
-% Get the taxonomy information
-if exist('infoFilePath','var')
-    taxonomy = readtable(infoFilePath, 'ReadVariableNames', false);
-    taxonomy = table2cell(taxonomy);
-else
-    taxonomy = readtable('AGORA_infoFile.xlsx', 'ReadVariableNames', false);
-    taxonomy = table2cell(taxonomy);
-end
-
-% load the models found in the individuals and extract which reactions are
-% in which model
-for i = 2:size(abundance, 1)
-    model = modelsList{i, 1};
-    ReactionPresence{i, 1} = abundance{i, 1};
-    for j = 1:length(rxnsList)
-        ReactionPresence{1, j + 1} = rxnsList{j};
-        if ~isempty(find(ismember(model.rxns, rxnsList{j})))
-            ReactionPresence{i, j + 1} = 1;
-        else
-            ReactionPresence{i, j + 1} = 0;
-        end
-    end
-end
-
-% put together a Matlab structure of the results
-ReactionAbundance = struct;
-
-% prepare table for the total abundance
-for j = 1:length(rxnsList)
-    ReactionAbundance.('Total'){1, j + 1} = rxnsList{j};
-end
-
-TaxonomyLevels = {
-    'Phylum'
-    'Class'
-    'Order'
-    'Family'
-    'Genus'
-    'Species'
-};
-% extract the list of entries on each taxonomical level
-for t = 1:size(TaxonomyLevels, 1)
-    % find the columns corresponding to each taxonomy level and the list of
-    % unique taxa
-    taxonCol = find(strcmp(taxonomy(1, :), TaxonomyLevels{t}));
-    % find and save all entries
-    taxa = unique(taxonomy(2:end, taxonCol));
-    % exclude unclassified entries
-    taxa(strncmp('unclassified', taxa, taxonCol)) = [];
-    TaxonomyLevels{t, 2} = taxa;
-    % define the correct columns in taxonomy table
-    TaxonomyLevels{t, 3} = taxonCol;
-    % prepare table for the abundance on taxon levels
-    cnt = 2;
-    for j = 1:length(rxnsList)
-        for l = 1:length(TaxonomyLevels{t, 2})
-            ReactionAbundance.(TaxonomyLevels{t, 1}){1, cnt} = strcat(TaxonomyLevels{t, 2}{l}, '_', rxnsList{j});
-            cnt = cnt + 1;
-        end
-    end
-end
-
-% Find the right column for the input data (strains, species,..)
-taxa=taxonomy(2:end,1);
-if length(intersect(abundance(2:end,1),taxa))==size(abundance,1)-1
-    inputTaxa=taxa;
-    inputCol=1;
-else
-    abundance(:,1)=regexprep(abundance(:,1),'pan','','once');
-    inputTaxa={};
-    for i=2:size(taxonomy,2)
-        taxa=strrep(taxonomy(:,i),' ','_');
-        taxa=strrep(taxa,'.','_');
-        taxa=strrep(taxa,'/','_');
-        taxa=strrep(taxa,'-','_');
-        taxa=strrep(taxa,'__','_');
-        if length(intersect(abundance(2:end,1),taxa))==size(abundance,1)-1
-            inputTaxa=taxa;
-            inputCol=i;
-        end
-    end
-end
-if isempty(inputTaxa)
-    error('Some taxa in the abundance file are not found in the taxonomy file!')
-end
-
-for i = 2:size(abundance, 2)
-    %% calculate reaction abundance for the samples one by one
-    fprintf(['Calculating reaction abundance for sample ', num2str(i - 1), ' of ' num2str(size(abundance, 2) - 1) '.. \n'])
-    ReactionAbundance.('Total'){i, 1} = abundance{1, i};
-    if ~isempty(taxonomy)
-        for t = 1:size(TaxonomyLevels, 1)
-            ReactionAbundance.(TaxonomyLevels{t, 1}){i, 1} = abundance{1, i};
-        end
-    end
-    % use parallel pool if workers specified as input
-    if exist('numWorkers', 'var') && numWorkers > 0
-        poolobj = gcp('nocreate');
-        if isempty(poolobj)
-            parpool(numWorkers)
-        end
-        % create tables in which abundances for each individual for
-        % all reactions/taxa are stored
-        totalAbun = zeros(length(rxnsList), 1);
-        if ~isempty(taxonomy)
-            phylumAbun = zeros(length(rxnsList), length(TaxonomyLevels{1, 2}));
-            classAbun = zeros(length(rxnsList), length(TaxonomyLevels{2, 2}));
-            orderAbun = zeros(length(rxnsList), length(TaxonomyLevels{3, 2}));
-            familyAbun = zeros(length(rxnsList), length(TaxonomyLevels{4, 2}));
-            genusAbun = zeros(length(rxnsList), length(TaxonomyLevels{5, 2}));
-            speciesAbun = zeros(length(rxnsList), length(TaxonomyLevels{6, 2}));
-        end
-        parfor j = 1:length(rxnsList)
-        % store the abundance for each reaction and taxon separately in a
-        % temporary file to enable parallellization
-            if ~isempty(taxonomy)
-                tmpPhyl = zeros(length(rxnsList), length(TaxonomyLevels{1, 2}));
-                tmpClass = zeros(length(rxnsList), length(TaxonomyLevels{2, 2}));
-                tmpOrder = zeros(length(rxnsList), length(TaxonomyLevels{3, 2}));
-                tmpFamily = zeros(length(rxnsList), length(TaxonomyLevels{4, 2}));
-                tmpGenus = zeros(length(rxnsList), length(TaxonomyLevels{5, 2}));
-                tmpSpecies = zeros(length(rxnsList), length(TaxonomyLevels{6, 2}));
-            end
-            for k = 2:size(abundance, 1)
-                % check if the reaction is present in the strain
-                if ReactionPresence{k, j + 1} == 1
-                    % calculate total abundance
-                    totalAbun(j) = totalAbun(j) + str2double(abundance{k, i});
-                    if ~isempty(taxonomy)
-                        % calculate phylum abundance
-                        t = 1;
-                        findTax = taxonomy(find(strcmp(abundance{k, 1}, inputTaxa)), TaxonomyLevels{t, 3});
-                        if any(strcmp(findTax{1}, TaxonomyLevels{t, 2}))
-                            taxonCol = find(strcmp(findTax{1}, TaxonomyLevels{t, 2}));
-                            tmpPhyl(1, taxonCol) = tmpPhyl(1, taxonCol) + str2double(abundance{k, i});
-                        end
-                        % calculate class abundance
-                        t = 2;
-                        findTax = taxonomy(find(strcmp(abundance{k, 1}, inputTaxa)), TaxonomyLevels{t, 3});
-                        if any(strcmp(findTax{1}, TaxonomyLevels{t, 2}))
-                            taxonCol = find(strcmp(findTax{1}, TaxonomyLevels{t, 2}));
-                            tmpClass(1, taxonCol) = tmpClass(1, taxonCol) + str2double(abundance{k, i});
-                        end
-                        % calculate order abundance
-                        t = 3;
-                        findTax = taxonomy(find(strcmp(abundance{k, 1}, inputTaxa)), TaxonomyLevels{t, 3});
-                        if any(strcmp(findTax{1}, TaxonomyLevels{t, 2}))
-                            taxonCol = find(strcmp(findTax{1}, TaxonomyLevels{t, 2}));
-                            tmpOrder(1, taxonCol) = tmpOrder(1, taxonCol) + str2double(abundance{k, i});
-                        end
-                        % calculate family abundance
-                        t = 4;
-                        findTax = taxonomy(find(strcmp(abundance{k, 1}, inputTaxa)), TaxonomyLevels{t, 3});
-                        if any(strcmp(findTax{1}, TaxonomyLevels{t, 2}))
-                            taxonCol = find(strcmp(findTax{1}, TaxonomyLevels{t, 2}));
-                            tmpFamily(1, taxonCol) = tmpFamily(1, taxonCol) + str2double(abundance{k, i});
-                        end
-                        % calculate genus abundance
-                        t = 5;
-                        findTax = taxonomy(find(strcmp(abundance{k, 1}, inputTaxa)), TaxonomyLevels{t, 3});
-                        if any(strcmp(findTax{1}, TaxonomyLevels{t, 2}))
-                            taxonCol = find(strcmp(findTax{1}, TaxonomyLevels{t, 2}));
-                            tmpGenus(1, taxonCol) = tmpGenus(1, taxonCol) + str2double(abundance{k, i});
-                        end
-                        % calculate species abundance
-                        t = 6;
-                        findTax = taxonomy(find(strcmp(abundance{k, 1}, inputTaxa)), TaxonomyLevels{t, 3});
-                        if any(strcmp(findTax{1}, TaxonomyLevels{t, 2}))
-                            taxonCol = find(strcmp(findTax{1}, TaxonomyLevels{t, 2}));
-                            tmpSpecies(1, taxonCol) = tmpSpecies(1, taxonCol) + str2double(abundance{k, i});
-                        end
-                    end
-                end
-            end
-            if ~isempty(taxonomy)
-                phylumAbun(j, :) = tmpPhyl(1, :);
-                classAbun(j, :) = tmpClass(1, :);
-                orderAbun(j, :) = tmpOrder(1, :);
-                familyAbun(j, :) = tmpFamily(1, :);
-                genusAbun(j, :) = tmpGenus(1, :);
-                speciesAbun(j, :) = tmpSpecies(1, :);
-            end
-        end
-    else
-        % create tables in which abundances for each individual for
-        % all reactions/taxa are stored
-        % no parallellization-takes longer
-        totalAbun = zeros(length(rxnsList), 1);
-        if ~isempty(taxonomy)
-            phylumAbun = zeros(length(rxnsList), length(TaxonomyLevels{1, 2}));
-            classAbun = zeros(length(rxnsList), length(TaxonomyLevels{2, 2}));
-            orderAbun = zeros(length(rxnsList), length(TaxonomyLevels{3, 2}));
-            familyAbun = zeros(length(rxnsList), length(TaxonomyLevels{4, 2}));
-            genusAbun = zeros(length(rxnsList), length(TaxonomyLevels{5, 2}));
-            speciesAbun = zeros(length(rxnsList), length(TaxonomyLevels{6, 2}));
-        end
-        for j = 1:length(rxnsList)
-            for k = 2:size(abundance, 1)
-                % check if the reaction is present in the strain
-                if ReactionPresence{k, j + 1} == 1
-                    % calculate total abundance
-                    totalAbun(j) = totalAbun(j) + str2double(abundance{k, i});
-                    if ~isempty(taxonomy)
-                        % calculate phylum abundance
-                        t = 1;
-                        findTax = taxonomy(find(strcmp(abundance{k, 1}, inputTaxa)), TaxonomyLevels{t, 3});
-                        if any(strcmp(findTax{1}, TaxonomyLevels{t, 2}))
-                            taxonCol = find(strcmp(findTax{1}, TaxonomyLevels{t, 2}));
-                            phylumAbun(j, taxonCol) = phylumAbun(j, taxonCol) + str2double(abundance{k, i});
-                        end
-                        % calculate class abundance
-                        t = 2;
-                        findTax = taxonomy(find(strcmp(abundance{k, 1}, inputTaxa)), TaxonomyLevels{t, 3});
-                        if any(strcmp(findTax{1}, TaxonomyLevels{t, 2}))
-                            taxonCol = find(strcmp(findTax{1}, TaxonomyLevels{t, 2}));
-                            classAbun(j, taxonCol) = classAbun(j, taxonCol) + str2double(abundance{k, i});
-                        end
-                        % calculate order abundance
-                        t = 3;
-                        findTax = taxonomy(find(strcmp(abundance{k, 1}, inputTaxa)), TaxonomyLevels{t, 3});
-                        if any(strcmp(findTax{1}, TaxonomyLevels{t, 2}))
-                            taxonCol = find(strcmp(findTax{1}, TaxonomyLevels{t, 2}));
-                            orderAbun(j, taxonCol) = orderAbun(j, taxonCol) + str2double(abundance{k, i});
-                        end
-                        % calculate family abundance
-                        t = 4;
-                        findTax = taxonomy(find(strcmp(abundance{k, 1}, inputTaxa)), TaxonomyLevels{t, 3});
-                        if any(strcmp(findTax{1}, TaxonomyLevels{t, 2}))
-                            taxonCol = find(strcmp(findTax{1}, TaxonomyLevels{t, 2}));
-                            familyAbun(j, taxonCol) = familyAbun(j, taxonCol) + str2double(abundance{k, i});
-                        end
-                        % calculate genus abundance
-                        t = 5;
-                        findTax = taxonomy(find(strcmp(abundance{k, 1}, inputTaxa)), TaxonomyLevels{t, 3});
-                        if any(strcmp(findTax{1}, TaxonomyLevels{t, 2}))
-                            taxonCol = find(strcmp(findTax{1}, TaxonomyLevels{t, 2}));
-                            genusAbun(j, taxonCol) = genusAbun(j, taxonCol) + str2double(abundance{k, i});
-                        end
-                        % calculate species abundance
-                        t = 6;
-                        findTax = taxonomy(find(strcmp(abundance{k, 1}, inputTaxa)), TaxonomyLevels{t, 3});
-                        if any(strcmp(findTax{1}, TaxonomyLevels{t, 2}))
-                            taxonCol = find(strcmp(findTax{1}, TaxonomyLevels{t, 2}));
-                            speciesAbun(j, taxonCol) = speciesAbun(j, taxonCol) + str2double(abundance{k, i});
-                        end
-                    end
-                end
-            end
-        end
-    end
-    %% store the abundances total and on taxonomic levels calculated for the individual in the output structure
-    for j = 1:length(rxnsList)
-        ReactionAbundance.('Total'){i, j + 1} = totalAbun(j);
-        % abundance on taxon levels
-    end
-    if ~isempty(taxonomy)
-        % phylum abundance
-        t = 1;
-        cnt = 2;
-        for j = 1:length(rxnsList)
-            for l = 1:length(TaxonomyLevels{t, 2})
-                ReactionAbundance.(TaxonomyLevels{t}){i, cnt} = phylumAbun(j, l);
-                cnt = cnt + 1;
-            end
-        end
-        % class abundance
-        t = 2;
-        cnt = 2;
-        for j = 1:length(rxnsList)
-            for l = 1:length(TaxonomyLevels{t, 2})
-                ReactionAbundance.(TaxonomyLevels{t}){i, cnt} = classAbun(j, l);
-                cnt = cnt + 1;
-            end
-        end
-        % order abundance
-        t = 3;
-        cnt = 2;
-        for j = 1:length(rxnsList)
-            for l = 1:length(TaxonomyLevels{t, 2})
-                ReactionAbundance.(TaxonomyLevels{t}){i, cnt} = orderAbun(j, l);
-                cnt = cnt + 1;
-            end
-        end
-        % family abundance
-        t = 4;
-        cnt = 2;
-        for j = 1:length(rxnsList)
-            for l = 1:length(TaxonomyLevels{t, 2})
-                ReactionAbundance.(TaxonomyLevels{t}){i, cnt} = familyAbun(j, l);
-                cnt = cnt + 1;
-            end
-        end
-        % genus abundance
-        t = 5;
-        cnt = 2;
-        for j = 1:length(rxnsList)
-            for l = 1:length(TaxonomyLevels{t, 2})
-                ReactionAbundance.(TaxonomyLevels{t}){i, cnt} = genusAbun(j, l);
-                cnt = cnt + 1;
-            end
-        end
-        % species abundance
-        t = 6;
-        cnt = 2;
-        for j = 1:length(rxnsList)
-            for l = 1:length(TaxonomyLevels{t, 2})
-                ReactionAbundance.(TaxonomyLevels{t}){i, cnt} = speciesAbun(j, l);
-                cnt = cnt + 1;
-            end
-        end
-    end
-end
-
-% finally, delete empty columns to avoid unneccessarily big file sizes
-fprintf('Finalizing the output file... \n')
-
-fNames = fieldnames(ReactionAbundance);
-for i = 1:length(fNames)
-    cnt = 1;
-    delArray = [];
-    for j = 2:size(ReactionAbundance.(fNames{i}), 2)
-        cValues = string(ReactionAbundance.(fNames{i})(2:end, j));
-        cTotal = sum(str2double(cValues));
-        if cTotal < 0.000000001
-            delArray(1, cnt) = j;
-            cnt = cnt + 1;
-        end
-    end
-    if ~isempty(delArray)
-        ReactionAbundance.(fNames{i})(:, delArray) = [];
-    end
-end
-
-% export taxonomical information
-taxonCol = 'Phylum';
-% remove unnecessary columns
-taxonomy(:,taxonCol+1:end)=[];
-
-for t = 2:size(TaxonomyLevels, 1)
-    taxa=ReactionAbundance.(TaxonomyLevels{t})(2:end,1);
-    TaxonomyReduced=taxonomy;
-    taxonCol = find(strcmp(taxonomy(1, :), TaxonomyLevels{t}));
-    TaxonomyReduced(:,1:taxonCol-1)=[];
-    % remove duplicate entries
-    [C,IA] = unique(TaxonomyReduced(:,1),'stable');
-    % remove unclassified taxa
-    findUncl=find(contains(C,'unclassified'));
-    IA(findUncl,:)=[];
-    TaxonomyInfo.(TaxonomyLevels{t})=TaxonomyReduced(IA,:);
-end
-
-% Plot the calculated reaction abundances.
-for i = 1:length(fNames)
-    xlabels = ReactionAbundance.(fNames{i})(1, 2:end);
-    ylabels = ReactionAbundance.(fNames{i})(2:end, 1);
-    data = string(ReactionAbundance.(fNames{i})(2:end, 2:end));
-    data = str2double(data);
-    figure;
-    imagesc(data)
-    colormap('hot')
-    colorbar
-    if length(xlabels) < 50
-        set(gca, 'xtick', 1:length(xlabels));
-        xticklabels(xlabels);
-        xtickangle(90)
-    end
-    if length(ylabels) < 50
-        set(gca, 'ytick', 1:length(ylabels));
-        yticklabels(ylabels);
-    end
-    set(gca, 'TickLabelInterpreter', 'none');
-    title(fNames{i})
-end
-
-end
+function [ReactionAbundance,TaxonomyInfo] = calculateReactionAbundance(abundancePath, modelPath, infoFilePath, rxnsList, numWorkers)
+% Part of the Microbiome Modeling Toolbox. This function calculates and
+% plots the total abundance of reactions of interest in a given microbiome
+% sample based on the strain-level composition.
+% Reaction presence or absence in each strain is derived from the reaction content
+% of the respective AGORA model. Two results are given: the total abundance,
+% and the abundance on different taxonomical levels.
+%
+% USAGE
+%
+%    [ReactionAbundance,TaxonomyInfo] = calculateReactionAbundance(abundancePath, modelPath, infoFilePath, rxnsList, numWorkers)
+%
+% INPUTS:
+%    abundancePath:          Path to the .csv file with the abundance data.
+%                            Example: 'cobratoolbox/papers/018_microbiomeModelingToolbox/examples/normCoverage.csv'
+%    modelPath:              Folder containing the strain-specific AGORA models
+% OPTIONAL INPUTS:
+%    infoFilePath:           Path to the spreadsheet with the taxonomy
+%                            information on organisms (default:
+%                            AGORA_infoFile.xlsx)
+%    rxnsList:               List of reactions for which the abundance
+%                            should be calculated (if left empty: all
+%                            reactions in all models)
+%    numWorkers:             Number of workers used for parallel pool. If
+%                            left empty, the parallel pool will not be
+%                            started. Parallellization is recommended if
+%                            all reactions are computed.
+%
+% OUTPUT:
+%    ReactionAbundance       Structure with abundance for each microbiome
+%                            and reaction in total and on taxon levels
+%     TaxonomyInfo:          Taxonomical information on each taxon level
+%
+% .. Author: - Almut Heinken, 03/2018
+%                             10/2018:  changed input to location of the csv file with the
+%                                       abundance data
+%                           01/2020:    adapted to be suitable for pan-models
+
+% read the csv file with the abundance data
+abundance = readtable(abundancePath, 'ReadVariableNames', false);
+abundance = table2cell(abundance);
+if isnumeric(abundance{2, 1})
+    abundance(:, 1) = [];
+end
+
+%
+
+% load the models
+for i = 2:size(abundance, 1)
+    model = readCbModel([modelPath filesep abundance{i, 1} '.mat']);
+    modelsList{i, 1} = model;
+end
+
+if ~exist('rxnsList', 'var') || isempty(rxnsList)  % define reaction list if not entered
+    fprintf('No reaction list entered. Abundances will be calculated for all reactions in all models. \n')
+    % get model list from abundance input file
+    for i = 2:size(abundance, 1)
+        model = modelsList{i, 1};
+        rxnsList = vertcat(model.rxns, rxnsList);
+    end
+    rxnsList = unique(rxnsList);
+end
+
+% Get the taxonomy information
+if exist('infoFilePath','var') && ~isempty(infoFilePath)
+    taxonomy = readtable(infoFilePath, 'ReadVariableNames', false);
+    taxonomy = table2cell(taxonomy);
+else
+    taxonomy = readtable('AGORA_infoFile.xlsx', 'ReadVariableNames', false);
+    taxonomy = table2cell(taxonomy);
+end
+
+% load the models found in the individuals and extract which reactions are
+% in which model
+for i = 2:size(abundance, 1)
+    model = modelsList{i, 1};
+    ReactionPresence{i, 1} = abundance{i, 1};
+    for j = 1:length(rxnsList)
+        ReactionPresence{1, j + 1} = rxnsList{j};
+        if ~isempty(find(ismember(model.rxns, rxnsList{j})))
+            ReactionPresence{i, j + 1} = 1;
+        else
+            ReactionPresence{i, j + 1} = 0;
+        end
+    end
+end
+
+% put together a Matlab structure of the results
+ReactionAbundance = struct;
+
+% prepare table for the total abundance
+for j = 1:length(rxnsList)
+    ReactionAbundance.('Total'){1, j + 1} = rxnsList{j};
+end
+
+TaxonomyLevels = {
+    'Phylum'
+    'Class'
+    'Order'
+    'Family'
+    'Genus'
+    'Species'
+    };
+% extract the list of entries on each taxonomical level
+for t = 1:size(TaxonomyLevels, 1)
+    % find the columns corresponding to each taxonomy level and the list of
+    % unique taxa
+    taxonCol = find(strcmp(taxonomy(1, :), TaxonomyLevels{t}));
+    % find and save all entries
+    taxa = unique(taxonomy(2:end, taxonCol));
+    % exclude unclassified entries
+    taxa(strncmp('unclassified', taxa, taxonCol)) = [];
+    TaxonomyLevels{t, 2} = taxa;
+    % define the correct columns in taxonomy table
+    TaxonomyLevels{t, 3} = taxonCol;
+    % prepare table for the abundance on taxon levels
+    cnt = 2;
+    for j = 1:length(rxnsList)
+        for l = 1:length(TaxonomyLevels{t, 2})
+            ReactionAbundance.(TaxonomyLevels{t, 1}){1, cnt} = strcat(TaxonomyLevels{t, 2}{l}, '_', rxnsList{j});
+            cnt = cnt + 1;
+        end
+    end
+end
+
+% Find the right column for the input data (strains, species,..)
+taxa=taxonomy(2:end,1);
+if length(intersect(abundance(2:end,1),taxa))==size(abundance,1)-1
+    inputTaxa=taxa;
+    inputCol=1;
+else
+    abundance(:,1)=regexprep(abundance(:,1),'pan','','once');
+    inputTaxa={};
+    for i=2:size(taxonomy,2)
+        taxa=strrep(taxonomy(:,i),' ','_');
+        taxa=strrep(taxa,'.','_');
+        taxa=strrep(taxa,'/','_');
+        taxa=strrep(taxa,'-','_');
+        taxa=strrep(taxa,'__','_');
+        if length(intersect(abundance(2:end,1),taxa))==size(abundance,1)-1
+            inputTaxa=taxa;
+            inputCol=i;
+        end
+    end
+end
+if isempty(inputTaxa)
+    error('Some taxa in the abundance file are not found in the taxonomy file!')
+end
+
+for i = 2:size(abundance, 2)
+    %% calculate reaction abundance for the samples one by one
+    fprintf(['Calculating reaction abundance for sample ', num2str(i - 1), ' of ' num2str(size(abundance, 2) - 1) '.. \n'])
+    ReactionAbundance.('Total'){i, 1} = abundance{1, i};
+    for t = 1:size(TaxonomyLevels, 1)
+        ReactionAbundance.(TaxonomyLevels{t, 1}){i, 1} = abundance{1, i};
+    end
+    % use parallel pool if workers specified as input
+    if exist('numWorkers', 'var') && numWorkers > 0
+        poolobj = gcp('nocreate');
+        if isempty(poolobj)
+            parpool(numWorkers)
+        end
+    end
+    % create tables in which abundances for each individual for
+    % all reactions/taxa are stored
+    totalAbun = zeros(length(rxnsList), 1);
+    phylumAbun = zeros(length(rxnsList), length(TaxonomyLevels{1, 2}));
+    classAbun = zeros(length(rxnsList), length(TaxonomyLevels{2, 2}));
+    orderAbun = zeros(length(rxnsList), length(TaxonomyLevels{3, 2}));
+    familyAbun = zeros(length(rxnsList), length(TaxonomyLevels{4, 2}));
+    genusAbun = zeros(length(rxnsList), length(TaxonomyLevels{5, 2}));
+    speciesAbun = zeros(length(rxnsList), length(TaxonomyLevels{6, 2}));
+    
+    parfor j = 1:length(rxnsList)
+        
+        % store the abundance for each reaction and taxon separately in a
+        % temporary file to enable parallellization
+        tmpPhyl = zeros(length(rxnsList), length(TaxonomyLevels{1, 2}));
+        tmpClass = zeros(length(rxnsList), length(TaxonomyLevels{2, 2}));
+        tmpOrder = zeros(length(rxnsList), length(TaxonomyLevels{3, 2}));
+        tmpFamily = zeros(length(rxnsList), length(TaxonomyLevels{4, 2}));
+        tmpGenus = zeros(length(rxnsList), length(TaxonomyLevels{5, 2}));
+        tmpSpecies = zeros(length(rxnsList), length(TaxonomyLevels{6, 2}));
+        
+        for k = 2:size(abundance, 1)
+            % check if the reaction is present in the strain
+            if ReactionPresence{k, j + 1} == 1
+                % calculate total abundance
+                totalAbun(j) = totalAbun(j) + str2double(abundance{k, i});
+                % calculate phylum abundance
+                t = 1;
+                findTax = taxonomy(find(strcmp(abundance{k, 1}, inputTaxa)), TaxonomyLevels{t, 3});
+                if any(strcmp(findTax{1}, TaxonomyLevels{t, 2}))
+                    taxonCol = find(strcmp(findTax{1}, TaxonomyLevels{t, 2}));
+                    tmpPhyl(1, taxonCol) = tmpPhyl(1, taxonCol) + str2double(abundance{k, i});
+                end
+                % calculate class abundance
+                t = 2;
+                findTax = taxonomy(find(strcmp(abundance{k, 1}, inputTaxa)), TaxonomyLevels{t, 3});
+                if any(strcmp(findTax{1}, TaxonomyLevels{t, 2}))
+                    taxonCol = find(strcmp(findTax{1}, TaxonomyLevels{t, 2}));
+                    tmpClass(1, taxonCol) = tmpClass(1, taxonCol) + str2double(abundance{k, i});
+                end
+                % calculate order abundance
+                t = 3;
+                findTax = taxonomy(find(strcmp(abundance{k, 1}, inputTaxa)), TaxonomyLevels{t, 3});
+                if any(strcmp(findTax{1}, TaxonomyLevels{t, 2}))
+                    taxonCol = find(strcmp(findTax{1}, TaxonomyLevels{t, 2}));
+                    tmpOrder(1, taxonCol) = tmpOrder(1, taxonCol) + str2double(abundance{k, i});
+                end
+                % calculate family abundance
+                t = 4;
+                findTax = taxonomy(find(strcmp(abundance{k, 1}, inputTaxa)), TaxonomyLevels{t, 3});
+                if any(strcmp(findTax{1}, TaxonomyLevels{t, 2}))
+                    taxonCol = find(strcmp(findTax{1}, TaxonomyLevels{t, 2}));
+                    tmpFamily(1, taxonCol) = tmpFamily(1, taxonCol) + str2double(abundance{k, i});
+                end
+                % calculate genus abundance
+                t = 5;
+                findTax = taxonomy(find(strcmp(abundance{k, 1}, inputTaxa)), TaxonomyLevels{t, 3});
+                if any(strcmp(findTax{1}, TaxonomyLevels{t, 2}))
+                    taxonCol = find(strcmp(findTax{1}, TaxonomyLevels{t, 2}));
+                    tmpGenus(1, taxonCol) = tmpGenus(1, taxonCol) + str2double(abundance{k, i});
+                end
+                % calculate species abundance
+                t = 6;
+                findTax = taxonomy(find(strcmp(abundance{k, 1}, inputTaxa)), TaxonomyLevels{t, 3});
+                if any(strcmp(findTax{1}, TaxonomyLevels{t, 2}))
+                    taxonCol = find(strcmp(findTax{1}, TaxonomyLevels{t, 2}));
+                    tmpSpecies(1, taxonCol) = tmpSpecies(1, taxonCol) + str2double(abundance{k, i});
+                end
+            end
+        end
+        phylumAbun(j, :) = tmpPhyl(1, :);
+        classAbun(j, :) = tmpClass(1, :);
+        orderAbun(j, :) = tmpOrder(1, :);
+        familyAbun(j, :) = tmpFamily(1, :);
+        genusAbun(j, :) = tmpGenus(1, :);
+        speciesAbun(j, :) = tmpSpecies(1, :);
+    end
+    %% store the abundances total and on taxonomic levels calculated for the individual in the output structure
+    for j = 1:length(rxnsList)
+        ReactionAbundance.('Total'){i, j + 1} = totalAbun(j);
+        % abundance on taxon levels
+    end
+    % phylum abundance
+    t = 1;
+    cnt = 2;
+    for j = 1:length(rxnsList)
+        for l = 1:length(TaxonomyLevels{t, 2})
+            ReactionAbundance.(TaxonomyLevels{t}){i, cnt} = phylumAbun(j, l);
+            cnt = cnt + 1;
+        end
+    end
+    % class abundance
+    t = 2;
+    cnt = 2;
+    for j = 1:length(rxnsList)
+        for l = 1:length(TaxonomyLevels{t, 2})
+            ReactionAbundance.(TaxonomyLevels{t}){i, cnt} = classAbun(j, l);
+            cnt = cnt + 1;
+        end
+    end
+    % order abundance
+    t = 3;
+    cnt = 2;
+    for j = 1:length(rxnsList)
+        for l = 1:length(TaxonomyLevels{t, 2})
+            ReactionAbundance.(TaxonomyLevels{t}){i, cnt} = orderAbun(j, l);
+            cnt = cnt + 1;
+        end
+    end
+    % family abundance
+    t = 4;
+    cnt = 2;
+    for j = 1:length(rxnsList)
+        for l = 1:length(TaxonomyLevels{t, 2})
+            ReactionAbundance.(TaxonomyLevels{t}){i, cnt} = familyAbun(j, l);
+            cnt = cnt + 1;
+        end
+    end
+    % genus abundance
+    t = 5;
+    cnt = 2;
+    for j = 1:length(rxnsList)
+        for l = 1:length(TaxonomyLevels{t, 2})
+            ReactionAbundance.(TaxonomyLevels{t}){i, cnt} = genusAbun(j, l);
+            cnt = cnt + 1;
+        end
+    end
+    % species abundance
+    t = 6;
+    cnt = 2;
+    for j = 1:length(rxnsList)
+        for l = 1:length(TaxonomyLevels{t, 2})
+            ReactionAbundance.(TaxonomyLevels{t}){i, cnt} = speciesAbun(j, l);
+            cnt = cnt + 1;
+        end
+    end
+end
+
+% finally, delete empty columns to avoid unneccessarily big file sizes
+fprintf('Finalizing the output file... \n')
+
+fNames = fieldnames(ReactionAbundance);
+for i = 1:length(fNames)
+    cValues = string(ReactionAbundance.(fNames{i})(2:end, 2:end));
+    rownames=ReactionAbundance.(fNames{i})(:,1);
+    ReactionAbundance.(fNames{i})(:,1)=[];
+    cTotal = sum(str2double(cValues),1);
+    ReactionAbundance.(fNames{i})(:,find(cTotal<0.000000001))=[];
+    ReactionAbundance.(fNames{i})=[rownames,ReactionAbundance.(fNames{i})];
+    ReactionAbundance.(fNames{i}){1,1}='Samples';
+end
+
+% export taxonomical information
+taxonCol = 'Phylum';
+% remove unnecessary columns
+taxonomy(:,taxonCol+1:end)=[];
+
+for t = 2:size(TaxonomyLevels, 1)
+    taxa=ReactionAbundance.(TaxonomyLevels{t})(2:end,1);
+    TaxonomyReduced=taxonomy;
+    taxonCol = find(strcmp(taxonomy(1, :), TaxonomyLevels{t}));
+    TaxonomyReduced(:,1:taxonCol-1)=[];
+    % remove duplicate entries
+    [C,IA] = unique(TaxonomyReduced(:,1),'stable');
+    % remove unclassified taxa
+    findUncl=find(contains(C,'unclassified'));
+    IA(findUncl,:)=[];
+    TaxonomyInfo.(TaxonomyLevels{t})=TaxonomyReduced(IA,:);
+end
+
+% Plot the calculated reaction abundances.
+for i = 1:length(fNames)
+    xlabels = ReactionAbundance.(fNames{i})(2:end,1);
+    ylabels = ReactionAbundance.(fNames{i})(1,2:end);
+    data = string(ReactionAbundance.(fNames{i})(2:end, 2:end));
+    data = str2double(data);
+    figure;
+    imagesc(data')
+    colormap('hot')
+    colorbar
+    if length(xlabels) < 50
+        set(gca, 'xtick', 1:length(xlabels));
+        xticklabels(xlabels);
+        xtickangle(90)
+    end
+    if length(ylabels) < 50
+        set(gca, 'ytick', 1:length(ylabels));
+        yticklabels(ylabels);
+    end
+    set(gca, 'TickLabelInterpreter', 'none');
+    title(fNames{i})
+end
+
+end
\ No newline at end of file
diff --git a/src/analysis/multiSpecies/microbiomeModelingToolbox/additionalAnalysis/calculateSubsystemAbundance.m b/src/analysis/multiSpecies/microbiomeModelingToolbox/additionalAnalysis/calculateSubsystemAbundance.m
index ae3a5b1083..090b5bd7f1 100644
--- a/src/analysis/multiSpecies/microbiomeModelingToolbox/additionalAnalysis/calculateSubsystemAbundance.m
+++ b/src/analysis/multiSpecies/microbiomeModelingToolbox/additionalAnalysis/calculateSubsystemAbundance.m
@@ -17,7 +17,7 @@
 % AUTHOR
 %       - Almut Heinken, 08/2020
 
-reactionDatabase = readtable('reactionDatabase.txt', 'Delimiter', 'tab','TreatAsEmpty',['UND. -60001','UND. -2011','UND. -62011'], 'ReadVariableNames', false);
+reactionDatabase = readtable('ReactionDatabase.txt', 'Delimiter', 'tab','TreatAsEmpty',['UND. -60001','UND. -2011','UND. -62011'], 'ReadVariableNames', false);
 reactionDatabase=table2cell(reactionDatabase);
 
 reactionAbundance = readtable(reactionAbundancePath, 'ReadVariableNames', false);
@@ -54,6 +54,4 @@
     end
 end
 
-writetable(cell2table(subsystemAbundance),'SubsystemAbundance.txt','FileType','text','WriteVariableNames',false,'Delimiter','\t');
-
 end
\ No newline at end of file
diff --git a/src/analysis/multiSpecies/microbiomeModelingToolbox/additionalAnalysis/fastCalculateReactionAbundance.m b/src/analysis/multiSpecies/microbiomeModelingToolbox/additionalAnalysis/fastCalculateReactionAbundance.m
new file mode 100644
index 0000000000..48e4acdcc6
--- /dev/null
+++ b/src/analysis/multiSpecies/microbiomeModelingToolbox/additionalAnalysis/fastCalculateReactionAbundance.m
@@ -0,0 +1,119 @@
+function ReactionAbundance = fastCalculateReactionAbundance(abundancePath, modelPath, rxnsList, numWorkers)
+% Part of the Microbiome Modeling Toolbox. This function calculates and
+% plots the total abundance of reactions of interest in a given microbiome
+% sample based on the strain-level composition.
+% Reaction presence or absence in each strain is derived from the reaction content
+% of the respective AGORA model.
+%
+% USAGE
+%
+%    ReactionAbundance = fastCalculateReactionAbundance(abundancePath, modelPath, rxnsList, numWorkers)
+%
+% INPUTS:
+%    abundancePath:          Path to the .csv file with the abundance data.
+%                            Example: 'cobratoolbox/papers/018_microbiomeModelingToolbox/examples/normCoverage.csv'
+%    modelPath:              Folder containing the strain-specific AGORA models
+% OPTIONAL INPUTS:
+%    rxnsList:               List of reactions for which the abundance
+%                            should be calculated (if left empty: all
+%                            reactions in all models)
+%    numWorkers:             Number of workers used for parallel pool. If
+%                            left empty, the parallel pool will not be
+%                            started. Parallellization is recommended if
+%                            all reactions are computed.
+%
+% OUTPUT:
+%    ReactionAbundance       Table with total abundance for each microbiome
+%                            and reaction
+%
+% .. Author: - Almut Heinken, 04/2021
+
+% read the csv file with the abundance data
+abundance = readtable(abundancePath, 'ReadVariableNames', false);
+abundance = table2cell(abundance);
+if isnumeric(abundance{2, 1})
+    abundance(:, 1) = [];
+end
+
+% load the models
+for i = 2:size(abundance, 1)
+    model = readCbModel([modelPath filesep abundance{i, 1} '.mat']);
+    modelsList{i, 1} = model;
+end
+
+if ~exist('rxnsList', 'var') || isempty(rxnsList)  % define reaction list if not entered
+    fprintf('No reaction list entered. Abundances will be calculated for all reactions in all models. \n')
+    % get model list from abundance input file
+    for i = 2:size(abundance, 1)
+        model = modelsList{i, 1};
+        rxnsList = vertcat(model.rxns, rxnsList);
+    end
+    rxnsList = unique(rxnsList);
+end
+
+% load the models found in the individuals and extract which reactions are
+% in which model
+for i = 2:size(abundance, 1)
+    model = modelsList{i, 1};
+    ReactionPresence{i, 1} = abundance{i, 1};
+    for j = 1:length(rxnsList)
+        ReactionPresence{1, j + 1} = rxnsList{j};
+        if ~isempty(find(ismember(model.rxns, rxnsList{j})))
+            ReactionPresence{i, j + 1} = '1';
+        else
+            ReactionPresence{i, j + 1} = '0';
+        end
+    end
+end
+ReactionPresence{1,1}='Strains';
+
+
+% prepare table for the total abundance
+ReactionAbundance = {};
+for i = 1:length(rxnsList)
+    ReactionAbundance{1, i + 1} = rxnsList{i};
+end
+for i = 2:size(abundance, 2)
+    ReactionAbundance{i, 1} = abundance{1, i};
+end
+
+% use parallel pool if workers specified as input
+if exist('numWorkers', 'var') && numWorkers > 0
+    poolobj = gcp('nocreate');
+    if isempty(poolobj)
+        parpool(numWorkers)
+    end
+end
+
+clear abundance
+
+totalAbun={};
+parfor i = 2:size(ReactionAbundance, 1)
+    i
+    % reload the file to avoid running out of memory
+    abundance = readtable(abundancePath, 'ReadVariableNames', false);
+    abundance = table2cell(abundance);
+    if isnumeric(abundance{2, 1})
+        abundance(:, 1) = [];
+    end
+    
+    % temporarily store reaction abundances
+    totalAbun{i} = zeros(length(rxnsList), 1);
+    
+    for j = 2:size(abundance, 1)
+        % find all reactions present in the strain
+        presentRxns = find(strcmp(ReactionPresence(j,2:end),'1'));
+        
+        for k = 1:length(presentRxns)
+            % summarize total abundance
+            totalAbun{i}(presentRxns(k),1) = totalAbun{i}(presentRxns(k),1) + str2double(abundance{j,i});
+        end
+    end
+end
+
+% collect the temporarily stored abundances to put together the table
+for i = 2:size(ReactionAbundance, 1)
+    ReactionAbundance(i,2:end) = num2cell(totalAbun{i});
+end
+
+end
\ No newline at end of file
diff --git a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/adaptVMHDietToAGORA.m b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/adaptVMHDietToAGORA.m
index cd991c1355..a8bcf706da 100755
--- a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/adaptVMHDietToAGORA.m
+++ b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/adaptVMHDietToAGORA.m
@@ -51,7 +51,7 @@
 
 % Define the list of metabolites required by at least one AGORA model for
 % growth
-essentialMetabolites = {'EX_12dgr180(e)'; 'EX_26dap_M(e)'; 'EX_2dmmq8(e)'; 'EX_2obut(e)'; 'EX_3mop(e)'; 'EX_4abz(e)'; 'EX_4hbz(e)'; 'EX_ac(e)'; 'EX_acgam(e)'; 'EX_acmana(e)'; 'EX_acnam(e)'; 'EX_ade(e)'; 'EX_adn(e)'; 'EX_adocbl(e)'; 'EX_adpcbl(e)'; 'EX_ala_D(e)'; 'EX_ala_L(e)'; 'EX_amet(e)'; 'EX_amp(e)'; 'EX_arab_D(e)'; 'EX_arab_L(e)'; 'EX_arg_L(e)'; 'EX_asn_L(e)'; 'EX_btn(e)'; 'EX_ca2(e)'; 'EX_cbl1(e)'; 'EX_cgly(e)'; 'EX_chor(e)'; 'EX_chsterol(e)'; 'EX_cit(e)'; 'EX_cl(e)'; 'EX_cobalt2(e)'; 'EX_csn(e)'; 'EX_cu2(e)'; 'EX_cys_L(e)'; 'EX_cytd(e)'; 'EX_dad_2(e)'; 'EX_dcyt(e)'; 'EX_ddca(e)'; 'EX_dgsn(e)'; 'EX_fald(e)'; 'EX_fe2(e)'; 'EX_fe3(e)'; 'EX_fol(e)'; 'EX_for(e)'; 'EX_gal(e)'; 'EX_glc_D(e)'; 'EX_gln_L(e)'; 'EX_glu_L(e)'; 'EX_gly(e)'; 'EX_glyc(e)'; 'EX_glyc3p(e)'; 'EX_gsn(e)'; 'EX_gthox(e)'; 'EX_gthrd(e)'; 'EX_gua(e)'; 'EX_h(e)'; 'EX_h2o(e)'; 'EX_h2s(e)'; 'EX_his_L(e)'; 'EX_hxan(e)'; 'EX_ile_L(e)'; 'EX_k(e)'; 'EX_lanost(e)'; 'EX_leu_L(e)'; 'EX_lys_L(e)'; 'EX_malt(e)'; 'EX_met_L(e)'; 'EX_mg2(e)'; 'EX_mn2(e)'; 'EX_mqn7(e)'; 'EX_mqn8(e)'; 'EX_nac(e)'; 'EX_ncam(e)'; 'EX_nmn(e)'; 'EX_no2(e)'; 'EX_ocdca(e)'; 'EX_ocdcea(e)'; 'EX_orn(e)'; 'EX_phe_L(e)'; 'EX_pheme(e)'; 'EX_pi(e)'; 'EX_pnto_R(e)'; 'EX_pro_L(e)'; 'EX_ptrc(e)'; 'EX_pydx(e)'; 'EX_pydxn(e)'; 'EX_q8(e)'; 'EX_rib_D(e)'; 'EX_ribflv(e)'; 'EX_ser_L(e)'; 'EX_sheme(e)'; 'EX_so4(e)'; 'EX_spmd(e)'; 'EX_thm(e)'; 'EX_thr_L(e)'; 'EX_thymd(e)'; 'EX_trp_L(e)'; 'EX_ttdca(e)'; 'EX_tyr_L(e)'; 'EX_ura(e)'; 'EX_val_L(e)'; 'EX_xan(e)'; 'EX_xyl_D(e)'; 'EX_zn2(e)'; 'EX_glu_D(e)'; 'EX_melib(e)'; 'EX_chtbs(e)'; 'EX_metsox_S_L(e)'; 'EX_hdca(e)'; 'EX_gam(e)'; 'EX_indole(e)'; 'EX_glcn(e)'; 'EX_coa(e)'};
+essentialMetabolites = {'EX_12dgr180(e)'; 'EX_26dap_M(e)'; 'EX_2dmmq8(e)'; 'EX_2obut(e)'; 'EX_3mop(e)'; 'EX_4abz(e)'; 'EX_4hbz(e)'; 'EX_ac(e)'; 'EX_acgam(e)'; 'EX_acmana(e)'; 'EX_acnam(e)'; 'EX_ade(e)'; 'EX_adn(e)'; 'EX_adocbl(e)'; 'EX_ala_D(e)'; 'EX_ala_L(e)'; 'EX_amet(e)'; 'EX_amp(e)'; 'EX_arab_D(e)'; 'EX_arab_L(e)'; 'EX_arg_L(e)'; 'EX_asn_L(e)'; 'EX_btn(e)'; 'EX_ca2(e)'; 'EX_cbl1(e)'; 'EX_cgly(e)'; 'EX_chor(e)'; 'EX_chsterol(e)'; 'EX_cit(e)'; 'EX_cl(e)'; 'EX_cobalt2(e)'; 'EX_csn(e)'; 'EX_cu2(e)'; 'EX_cys_L(e)'; 'EX_cytd(e)'; 'EX_dad_2(e)'; 'EX_dcyt(e)'; 'EX_ddca(e)'; 'EX_dgsn(e)'; 'EX_fald(e)'; 'EX_fe2(e)'; 'EX_fe3(e)'; 'EX_fol(e)'; 'EX_for(e)'; 'EX_gal(e)'; 'EX_glc_D(e)'; 'EX_gln_L(e)'; 'EX_glu_L(e)'; 'EX_gly(e)'; 'EX_glyc(e)'; 'EX_glyc3p(e)'; 'EX_gsn(e)'; 'EX_gthox(e)'; 'EX_gthrd(e)'; 'EX_gua(e)'; 'EX_h(e)'; 'EX_h2o(e)'; 'EX_h2s(e)'; 'EX_his_L(e)'; 'EX_hxan(e)'; 'EX_ile_L(e)'; 'EX_k(e)'; 'EX_lanost(e)'; 'EX_leu_L(e)'; 'EX_lys_L(e)'; 'EX_malt(e)'; 'EX_met_L(e)'; 'EX_mg2(e)'; 'EX_mn2(e)'; 'EX_mqn7(e)'; 'EX_mqn8(e)'; 'EX_nac(e)'; 'EX_ncam(e)'; 'EX_nmn(e)'; 'EX_no2(e)'; 'EX_ocdca(e)'; 'EX_ocdcea(e)'; 'EX_orn(e)'; 'EX_phe_L(e)'; 'EX_pheme(e)'; 'EX_pi(e)'; 'EX_pnto_R(e)'; 'EX_pro_L(e)'; 'EX_ptrc(e)'; 'EX_pydx(e)'; 'EX_pydxn(e)'; 'EX_q8(e)'; 'EX_rib_D(e)'; 'EX_ribflv(e)'; 'EX_ser_L(e)'; 'EX_sheme(e)'; 'EX_so4(e)'; 'EX_spmd(e)'; 'EX_thm(e)'; 'EX_thr_L(e)'; 'EX_thymd(e)'; 'EX_trp_L(e)'; 'EX_ttdca(e)'; 'EX_tyr_L(e)'; 'EX_ura(e)'; 'EX_val_L(e)'; 'EX_xan(e)'; 'EX_xyl_D(e)'; 'EX_zn2(e)'; 'EX_glu_D(e)'; 'EX_melib(e)'; 'EX_chtbs(e)'; 'EX_metsox_S_L(e)'; 'EX_hdca(e)'; 'EX_gam(e)'; 'EX_indole(e)'; 'EX_glcn(e)'; 'EX_coa(e)'; 'EX_man(e)'; 'EX_fum(e)'; 'EX_succ(e)'; 'EX_no3(e)'; 'EX_ins(e)'; 'EX_uri(e)'; 'EX_drib(e)'; 'EX_pime(e)'; 'EX_lac_L(e)'; 'EX_glypro(e)'; 'EX_urea(e)'; 'EX_duri(e)'; 'EX_h2(e)'; 'EX_mal_L(e)'; 'EX_tre(e)'; 'EX_orot(e)'};
 
 % fix any exchange nomenclature issues
 adaptedDietConstraints(:, 1) = strrep(adaptedDietConstraints(:, 1), '[e]', '(e)');
diff --git a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/buildModelStorage.m b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/buildModelStorage.m
new file mode 100644
index 0000000000..5ae37cfa92
--- /dev/null
+++ b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/buildModelStorage.m
@@ -0,0 +1,78 @@
+function [exch,modelStoragePath] = buildModelStorage(microbeNames,modPath)
+
+currentDir=pwd;
+mkdir('modelStorage')
+cd('modelStorage')
+modelStoragePath = pwd;
+
+exch = {};
+for j = 1:size(microbeNames, 1)
+    model = readCbModel([modPath filesep microbeNames{j,1} '.mat']);
+    %exch = union(exch, model.mets(find(sum(model.S(:, strncmp('EX_', model.rxns, 3)), 2) ~= 0)));
+    exStruct = findSExRxnInd(model);
+    new_exch = findMetsFromRxns(model,model.rxns(exStruct.ExchRxnBool & ~exStruct.biomassBool));
+    exch = union(exch,new_exch);
+end
+
+% get already built reconstructions
+dInfo = dir(modelStoragePath);
+modelList={dInfo.name};
+modelList=modelList';
+modelList=strrep(modelList,'.mat','');
+microbesNames=setdiff(microbeNames,modelList);
+
+
+if length(microbesNames)>0
+    %% create a new extracellular space [u] for microbes
+    for j = 1:size(microbeNames, 1)
+        model = readCbModel([modPath filesep microbeNames{j,1} '.mat']);
+        % temp fix
+        if isfield(model,'C')
+            model=rmfield(model,'C');
+            model=rmfield(model,'d');
+        end
+        %
+        
+        % removing possible constraints of the bacs
+        selExc = findExcRxns(model);
+        Reactions2 = model.rxns(find(selExc));
+        allex = Reactions2(strmatch('EX', Reactions2));
+        biomass = allex(find(strncmp(allex,'bio',3)));
+        finrex = setdiff(allex, biomass);
+        model = changeRxnBounds(model, finrex, -1000, 'l');
+        model = changeRxnBounds(model, finrex, 1000, 'u');
+        
+        % removing blocked reactions from the bacs
+        %BlockedRxns = identifyFastBlockedRxns(model,model.rxns, printLevel);
+        %model= removeRxns(model, BlockedRxns);
+        %BlockedReaction = findBlockedReaction(model,'L2')
+        
+        model = convertOldStyleModel(model);
+        exmod = model.rxns(strncmp('EX_', model.rxns, 3));  % find exchange reactions
+        eMets = model.mets(~cellfun(@isempty, strfind(model.mets, '[e]')));  % exchanged metabolites
+        dummyMicEU = createModel();
+        %dummyMicEU = makeDummyModel(2 * size(eMets, 1), size(eMets, 1));
+        dummyMicEUmets = [strcat(strcat(microbeNames{j, 1}, '_'), regexprep(eMets, '\[e\]', '\[u\]')); regexprep(eMets, '\[e\]', '\[u\]')];
+        dummyMicEU = addMultipleMetabolites(dummyMicEU,dummyMicEUmets);
+        nMets = numel(eMets);
+        S = [speye(nMets);-speye(nMets)];
+        lbs = repmat(-1000,nMets,1);
+        ubs = repmat(1000,nMets,1);
+        names = strcat(strcat(microbeNames{j, 1}, '_'), 'IEX_', regexprep(eMets, '\[e\]', '\[u\]'), 'tr');
+        dummyMicEU = addMultipleReactions(dummyMicEU,names,dummyMicEUmets,S,'lb',lbs,'ub',ubs);
+        model = removeRxns(model, exmod);
+        model.rxns = strcat(strcat(microbeNames{j, 1}, '_'), model.rxns);
+        model.mets = strcat(strcat(microbeNames{j, 1}, '_'), regexprep(model.mets, '\[e\]', '\[u\]'));  % replace [e] with [u]
+        [model] = mergeTwoModels(dummyMicEU, model, 2, false, false);
+        
+        %finish up by A: removing duplicate reactions
+        %We will lose information here, but we will just remove the duplicates.
+        [model,rxnToRemove,rxnToKeep]= checkDuplicateRxn(model,'S',1,0,1);
+        
+        writeCbModel(model,'format','mat','fileName',[microbeNames{j,1} '.mat']);  % store model
+    end
+end
+
+cd(currentDir)
+
+end
\ No newline at end of file
diff --git a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/createPersonalizedModel.m b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/createPersonalizedModel.m
index 27cddfe691..b3db602737 100755
--- a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/createPersonalizedModel.m
+++ b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/createPersonalizedModel.m
@@ -28,7 +28,7 @@
 createdModels = {};
 
 % use the setup model containing every strain in every sample
-parfor k = 1:length(sampNames)
+for k = 1:length(sampNames)
     mgmodel = model;
     abunRed = abundance(:,k+1);
     
@@ -64,7 +64,7 @@
         % Coupling constraints for bacteria
         for i = 1:length(presBac)
             IndRxns=find(strncmp(mgmodel.rxns,[presBac{i,1} '_'],length(presBac{i,1})+1));%finding indixes of specific reactions
-            % find the name of biomass reacion in the microbe model
+            % find the name of biomass reaction in the microbe model
             bioRxn=mgmodel.rxns{find(strncmp(mgmodel.rxns,strcat(presBac{i,1},'_bio'),length(char(strcat(presBac{i,1},'_bio')))))};
             mgmodel=coupleRxnList2Rxn(mgmodel,mgmodel.rxns(IndRxns(1:length(mgmodel.rxns(IndRxns(:,1)))-1,1)),bioRxn,400,0); %couple the specific reactions
         end
diff --git a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/fastSetupCreator.m b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/fastSetupCreator.m
index a25377e0e6..750de3eb47 100755
--- a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/fastSetupCreator.m
+++ b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/fastSetupCreator.m
@@ -1,4 +1,4 @@
-function model = fastSetupCreator(modPath, microbeNames, host, objre, numWorkers)
+function model = fastSetupCreator(exch, modelStoragePath, microbeNames, host, objre, buildSetupAll)
 % creates a microbiota model (min 1 microbe) that can be coupled with a host
 % model. Microbes and host are connected with a lumen compartment [u], host
 % can secrete metabolites into body fluids [b]. Diet is simulated as uptake
@@ -25,35 +25,22 @@
 %    host:                Host COBRA model structure, can be left empty if
 %                         there is no host model
 %    objre:               char with reaction name of objective function of microbeNames
-%    numWorkers:          integer indicating the number of cores to use for parallelization
+%    buildSetupAll:       boolean indicating the strategy that should be used to
+%                         build personalized models: if true, build a global setup model
+%                         containing all organisms in at least model (default), false: create
+%                         models one by one (recommended for more than ~500 organisms total)
 %
 % OUTPUT:
 %    model:               COBRA model structure with all models combined
 %
 % .. Author: Stefania Magnusdottir and Federico Baldini 2016-2018
-
-% set parallel pool
-if numWorkers > 1
-    poolobj = gcp('nocreate');
-    if isempty(poolobj)
-        parpool(numWorkers)
-    end
-end
+%           - Almut Heinken, 04/2021: adapted strategy for improved speed.
 
 if ~isempty(host)  % Get list of all exchanged metabolites
     %exch = host.mets(find(sum(host.S(:, strncmp('EX_', host.rxns, 3)), 2) ~= 0));
     exStruct = findSExRxnInd(host);
-    exch = findMetsFromRxns(host,host.rxns(exStruct.ExchRxnBool & ~exStruct.biomassBool));
-else
-    exch = {};
+    exch = union(exch,findMetsFromRxns(host,host.rxns(exStruct.ExchRxnBool & ~exStruct.biomassBool)));
 end
- for j = 1:size(microbeNames, 1)
-    model = readCbModel([modPath filesep microbeNames{j,1} '.mat']);
-    %exch = union(exch, model.mets(find(sum(model.S(:, strncmp('EX_', model.rxns, 3)), 2) ~= 0)));
-    exStruct = findSExRxnInd(model);
-    new_exch = findMetsFromRxns(model,model.rxns(exStruct.ExchRxnBool & ~exStruct.biomassBool));
-    exch = union(exch,new_exch);
- end
 
 % The biomass 'biomass[c]' should not be inserted in the list of exchanges.
 % Hence it will be removed.
@@ -71,20 +58,20 @@
 
 nMets = numel(umets);
 stoich = [-speye(nMets),-speye(nMets),sparse(nMets,nMets),sparse(nMets,nMets);...
-          sparse(nMets,nMets),speye(nMets),-speye(nMets),sparse(nMets,nMets);...
-          sparse(nMets,nMets),sparse(nMets,nMets),speye(nMets),-speye(nMets)];
+    sparse(nMets,nMets),speye(nMets),-speye(nMets),sparse(nMets,nMets);...
+    sparse(nMets,nMets),sparse(nMets,nMets),speye(nMets),-speye(nMets)];
 lbs = [repmat(-1000,nMets,1);zeros(nMets,1);zeros(nMets,1);repmat(-1000,nMets,1)];
 ubs = repmat(1000,4*nMets,1);
 rxnNames = [strcat('EX_',mets(1:nMets));...
-            strcat('DUt_',strrep(umets,'[e]',''));...
-            strcat('UFEt_',strrep(umets,'[e]',''));...
-            strcat('EX_',strrep(umets, '[e]', '[fe]'))];
+    strcat('DUt_',strrep(umets,'[e]',''));...
+    strcat('UFEt_',strrep(umets,'[e]',''));...
+    strcat('EX_',strrep(umets, '[e]', '[fe]'))];
 dummy = addMultipleReactions(dummy,rxnNames,mets,stoich,'lb',lbs,'ub',ubs');
 order = [1:nMets;nMets+1:2*nMets;2*nMets+1:3*nMets;3*nMets+1:4*nMets];
 order = order(:);
 dummy = updateFieldOrderForType(dummy,'rxns',order);
 %Now, we could 'reorder' this reaction list but I'm not sure its necessary.
-% 
+%
 % cnt = 0;
 % for j = 1:size(exch, 1)
 %     mdInd = find(ismember(dummy.mets, strrep(exch{j, 1}, '[e]', '[d]')));
@@ -119,175 +106,155 @@
 
 %% create a new extracellular space [b] for host
 if ~isempty(host)
-exMets = find(~cellfun(@isempty, strfind(host.mets, '[e]')));  % find all mets that appear in [e]
-exRxns = host.rxns(strncmp('EX_', host.rxns, 3));  % find exchanges in host
-exMetRxns = find(sum(abs(host.S(exMets, :)), 1) ~= 0);  % find reactions that contain mets from [e]
-exMetRxns = exMetRxns';
-exMetRxnsMets = find(sum(abs(host.S(:, exMetRxns)), 2) ~= 0);  % get all metabolites of [e] containing rxns
-dummyHostB = createModel(); %makeDummyModel(size(exMetRxnsMets, 1), size(exMetRxns, 1));
-dummyHostB = addMultipleMetabolites(dummyHostB,strcat({'Host_'}, regexprep(host.mets(exMetRxnsMets), '\[e\]', '\[b\]')));
-dummyHostB = addMultipleReactions(dummyHostB,strcat({'Host_'}, host.rxns(exMetRxns), {'b'}),dummyHostB.mets,host.S(exMetRxnsMets, exMetRxns),'c',host.c(exMetRxns),'lb',host.lb(exMetRxns),'ub',host.ub(exMetRxns));
-%dummyHostB.rxns = ;
-%dummyHostB.mets = strcat({'Host_'}, regexprep(host.mets(exMetRxnsMets), '\[e\]', '\[b\]'));  % replace [e] with [b]
-%dummyHostB.S = host.S(exMetRxnsMets, exMetRxns);
-%dummyHostB.c = host.c(exMetRxns);
-%dummyHostB.lb = host.lb(exMetRxns);
-%dummyHostB.ub = host.ub(exMetRxns);
-
-
-% remove exchange reactions from host while leaving demand and sink
-% reactions
-host = removeRxns(host, exRxns);
-host.mets = strcat({'Host_'}, host.mets);
-host.rxns = strcat({'Host_'}, host.rxns);
-
-% use mergeToModels without combining genes
-[host] = mergeTwoModels(dummyHostB, host, 2, false, false);
-
-% Change remaining [e] (transporters) to [u] to transport diet metabolites
-exMets2 = ~cellfun(@isempty, strfind(host.mets, '[e]'));  % again, find all mets that appear in [e]
-% exMetRxns2=find(sum(host.S(exMets2,:),1)~=0);%find reactions that contain mets from [e]
-% exMetRxns2=exMetRxns2';
-% exMetRxnsMets2=find(sum(host.S(:,exMetRxns2),2)~=0);%get all metabolites of [e] containing rxns
-% host.mets=regexprep(host.mets,'\[e\]','\[u\]');%replace [e] with [u]
-dummyHostEU = createModel();
-%makeDummyModel(2 * size(exMets2, 1), size(exMets2, 1));
-hostmets = host.mets(exMets2);
-dummyHostEUmets = [strrep(strrep(hostmets, 'Host_', ''), '[e]', '[u]'); hostmets];
-dummyHostEU = addMultipleMetabolites(dummyHostEU,dummyHostEUmets);
-nMets = numel(hostmets);
-S = [-speye(nMets),speye(nMets)];
-lbs = repmat(-1000,nMets,1);
-ubs = repmat(1000,nMets,1);
-names = strrep(strcat('Host_IEX_', strrep(hostmets, 'Host_', ''), 'tr'), '[e]', '[u]');
-dummyHostEU = addMultipleReactions(dummyHostEU,names,dummyHostEUmets,S','lb',lbs,'ub',ubs);
-% for j = 1:size(exMets2, 1)
-%     dummyHostEU.rxns{j, 1} = strrep(strcat('Host_IEX_', strrep(host.mets{exMets2(j), 1}, 'Host_', ''), 'tr'), '[e]', '[u]');
-%     metU = find(ismember(dummyHostEU.mets, strrep(strrep(host.mets{exMets2(j)}, 'Host_', ''), '[e]', '[u]')));
-%     metE = find(ismember(dummyHostEU.mets, host.mets{exMets2(j)}));
-%     dummyHostEU.S(metU, j) = 1;
-%     dummyHostEU.S(metE, j) = -1;
-%     dummyHostEU.lb(j) = -1000;
-%     dummyHostEU.ub(j) = 1000;
-% end
-[host] = mergeTwoModels(dummyHostEU, host, 2, false, false);
-end
-
-
-%% create a new extracellular space [u] for microbes, code runs in parallel
-modelStorage = cell(size(microbeNames));
-% MexGJoined=MexGHost;
-parfor j = 1:size(microbeNames, 1)
-    model = readCbModel([modPath filesep microbeNames{j,1} '.mat']);
+    exMets = find(~cellfun(@isempty, strfind(host.mets, '[e]')));  % find all mets that appear in [e]
+    exRxns = host.rxns(strncmp('EX_', host.rxns, 3));  % find exchanges in host
+    exMetRxns = find(sum(abs(host.S(exMets, :)), 1) ~= 0);  % find reactions that contain mets from [e]
+    exMetRxns = exMetRxns';
+    exMetRxnsMets = find(sum(abs(host.S(:, exMetRxns)), 2) ~= 0);  % get all metabolites of [e] containing rxns
+    dummyHostB = createModel(); %makeDummyModel(size(exMetRxnsMets, 1), size(exMetRxns, 1));
+    dummyHostB = addMultipleMetabolites(dummyHostB,strcat({'Host_'}, regexprep(host.mets(exMetRxnsMets), '\[e\]', '\[b\]')));
+    dummyHostB = addMultipleReactions(dummyHostB,strcat({'Host_'}, host.rxns(exMetRxns), {'b'}),dummyHostB.mets,host.S(exMetRxnsMets, exMetRxns),'c',host.c(exMetRxns),'lb',host.lb(exMetRxns),'ub',host.ub(exMetRxns));
+    %dummyHostB.rxns = ;
+    %dummyHostB.mets = strcat({'Host_'}, regexprep(host.mets(exMetRxnsMets), '\[e\]', '\[b\]'));  % replace [e] with [b]
+    %dummyHostB.S = host.S(exMetRxnsMets, exMetRxns);
+    %dummyHostB.c = host.c(exMetRxns);
+    %dummyHostB.lb = host.lb(exMetRxns);
+    %dummyHostB.ub = host.ub(exMetRxns);
     
-    % removing possible constraints of the bacs
-    selExc = findExcRxns(model);
-    Reactions2 = model.rxns(find(selExc));
-    allex = Reactions2(strmatch('EX', Reactions2));
-    biomass = allex(strmatch(objre, allex));
-    finrex = setdiff(allex, biomass);
-    model = changeRxnBounds(model, finrex, -1000, 'l');
-    model = changeRxnBounds(model, finrex, 1000, 'u');
     
-    % removing blocked reactions from the bacs
-    %BlockedRxns = identifyFastBlockedRxns(model,model.rxns, printLevel);
-    %model= removeRxns(model, BlockedRxns);
-    %BlockedReaction = findBlockedReaction(model,'L2')
+    % remove exchange reactions from host while leaving demand and sink
+    % reactions
+    host = removeRxns(host, exRxns);
+    host.mets = strcat({'Host_'}, host.mets);
+    host.rxns = strcat({'Host_'}, host.rxns);
     
-    model = convertOldStyleModel(model);
-    exmod = model.rxns(strncmp('EX_', model.rxns, 3));  % find exchange reactions
-    eMets = model.mets(~cellfun(@isempty, strfind(model.mets, '[e]')));  % exchanged metabolites
-    dummyMicEU = createModel();    
-    %dummyMicEU = makeDummyModel(2 * size(eMets, 1), size(eMets, 1));
-    dummyMicEUmets = [strcat(strcat(microbeNames{j, 1}, '_'), regexprep(eMets, '\[e\]', '\[u\]')); regexprep(eMets, '\[e\]', '\[u\]')];
-    dummyMicEU = addMultipleMetabolites(dummyMicEU,dummyMicEUmets);
-    nMets = numel(eMets);
-    S = [speye(nMets);-speye(nMets)];
+    % use mergeToModels without combining genes
+    [host] = mergeTwoModels(dummyHostB, host, 2, false, false);
+    
+    % Change remaining [e] (transporters) to [u] to transport diet metabolites
+    exMets2 = ~cellfun(@isempty, strfind(host.mets, '[e]'));  % again, find all mets that appear in [e]
+    % exMetRxns2=find(sum(host.S(exMets2,:),1)~=0);%find reactions that contain mets from [e]
+    % exMetRxns2=exMetRxns2';
+    % exMetRxnsMets2=find(sum(host.S(:,exMetRxns2),2)~=0);%get all metabolites of [e] containing rxns
+    % host.mets=regexprep(host.mets,'\[e\]','\[u\]');%replace [e] with [u]
+    dummyHostEU = createModel();
+    %makeDummyModel(2 * size(exMets2, 1), size(exMets2, 1));
+    hostmets = host.mets(exMets2);
+    dummyHostEUmets = [strrep(strrep(hostmets, 'Host_', ''), '[e]', '[u]'); hostmets];
+    dummyHostEU = addMultipleMetabolites(dummyHostEU,dummyHostEUmets);
+    nMets = numel(hostmets);
+    S = [-speye(nMets),speye(nMets)];
     lbs = repmat(-1000,nMets,1);
     ubs = repmat(1000,nMets,1);
-    names = strcat(strcat(microbeNames{j, 1}, '_'), 'IEX_', regexprep(eMets, '\[e\]', '\[u\]'), 'tr');
-    dummyMicEU = addMultipleReactions(dummyMicEU,names,dummyMicEUmets,S,'lb',lbs,'ub',ubs);
-    model = removeRxns(model, exmod);
-    model.rxns = strcat(strcat(microbeNames{j, 1}, '_'), model.rxns);
-    model.mets = strcat(strcat(microbeNames{j, 1}, '_'), regexprep(model.mets, '\[e\]', '\[u\]'));  % replace [e] with [u]
-    [model] = mergeTwoModels(dummyMicEU, model, 2, false, false);
-    modelStorage{j, 1} = model;  % store model
+    names = strrep(strcat('Host_IEX_', strrep(hostmets, 'Host_', ''), 'tr'), '[e]', '[u]');
+    dummyHostEU = addMultipleReactions(dummyHostEU,names,dummyHostEUmets,S','lb',lbs,'ub',ubs);
+    % for j = 1:size(exMets2, 1)
+    %     dummyHostEU.rxns{j, 1} = strrep(strcat('Host_IEX_', strrep(host.mets{exMets2(j), 1}, 'Host_', ''), 'tr'), '[e]', '[u]');
+    %     metU = find(ismember(dummyHostEU.mets, strrep(strrep(host.mets{exMets2(j)}, 'Host_', ''), '[e]', '[u]')));
+    %     metE = find(ismember(dummyHostEU.mets, host.mets{exMets2(j)}));
+    %     dummyHostEU.S(metU, j) = 1;
+    %     dummyHostEU.S(metE, j) = -1;
+    %     dummyHostEU.lb(j) = -1000;
+    %     dummyHostEU.ub(j) = 1000;
+    % end
+    [host] = mergeTwoModels(dummyHostEU, host, 2, false, false);
 end
 
-%% Merge the models in a parallel way
-
-% Find the base 2 log of the number of models (how many branches are needed), and merge the models two by two:
-% In each column of model storage the number of models decreases of half
-%(because they have been pairwise merged) till the last column where only
-% one big model is contained. The models that are not pairwise merged
-%(because number of rows is not even ) are stored and then merged
-% sequentially to the big model.
-
-
-pos = {};  % array where the position of models that cannot be merged pairwise (because their number in that iter is not
-% even) in the original modelStorage vector is stored
-dim = size(microbeNames, 1);
-for j = 2:(floor(log2(size(microbeNames, 1))) + 1)  % +1 because it starts with one column shifted
-	if mod(dim, 2) == 1  % check if number is even or not
-		halfdim = dim - 1;  % approximated half dimension (needed to find how many iters to do
-        % for the pairwise merging
-		pos{1, j} = halfdim + 1;  % find index of extramodel
-		halfdim = halfdim / 2;
-	else
-		halfdim = dim / 2;  % no need for approximation
+if buildSetupAll
+    % Merge the models in a parallel way
+    % First load the stored models with lumen compartment in place
+    modelStorage = {};
+    for i = 1:size(microbeNames, 1)
+        loadedModel = readCbModel([modelStoragePath filesep microbeNames{i,1} '.mat']);
+        modelStorage{i, 1} = loadedModel;
     end
-    FirstSaveStore=modelStorage(:,(j-1));
-    % SecondSaveStore=modelStorage(:,(j-1)); %changes 010318
-    modelStorage(1:(dim-1),(j-1))={[]}; %this line will erase all the models from the container 
-    %with the only exception of the last one that might be needed to be
-    %merged separately. This prevents a dramatic increase in ram usage in
-    %each iteration as result of stoaring all the merging three. 
     
-    parfor k=1:halfdim
-        parind = k;	
-        parind=parind+(k-1);
-        FirstMod=FirstSaveStore(parind);
-        % SecondMod=SecondSaveStore(parind+1);%changes 010318
-        SecondMod=FirstSaveStore(parind+1);%changes 010318
-        % modelStorage{k,j} = mergeTwoModels(FirstMod{1},SecondMod{1},1,false,false)%changes 010318
-        modelStorage{k,j} = mergeTwoModels(FirstMod{1},SecondMod{1},1,false,false)	
+    % Find the base 2 log of the number of models (how many branches are needed), and merge the models two by two:
+    % In each column of model storage the number of models decreases of half
+    %(because they have been pairwise merged) till the last column where only
+    % one big model is contained. The models that are not pairwise merged
+    %(because number of rows is not even ) are stored and then merged
+    % sequentially to the big model.
+    
+    pos = {};  % array where the position of models that cannot be merged pairwise (because their number in that iter is not
+    % even) in the original modelStorage vector is stored
+    dim = size(microbeNames, 1);
+    for j = 2:(floor(log2(size(microbeNames, 1))) + 1)  % +1 because it starts with one column shifted
+        if mod(dim, 2) == 1  % check if number is even or not
+            halfdim = dim - 1;  % approximated half dimension (needed to find how many iters to do
+            % for the pairwise merging
+            pos{1, j} = halfdim + 1;  % find index of extramodel
+            halfdim = halfdim / 2;
+        else
+            halfdim = dim / 2;  % no need for approximation
+        end
+        FirstSaveStore=modelStorage(:,(j-1));
+        % SecondSaveStore=modelStorage(:,(j-1)); %changes 010318
+        modelStorage(1:(dim-1),(j-1))={[]}; %this line will erase all the models from the container
+        %with the only exception of the last one that might be needed to be
+        %merged separately. This prevents a dramatic increase in ram usage in
+        %each iteration as result of stoaring all the merging three.
+        
+        for k=1:halfdim
+            parind = k;
+            parind=parind+(k-1);
+            FirstMod=FirstSaveStore(parind);
+            % SecondMod=SecondSaveStore(parind+1);%changes 010318
+            SecondMod=FirstSaveStore(parind+1);%changes 010318
+            % modelStorage{k,j} = mergeTwoModels(FirstMod{1},SecondMod{1},1,false,false)%changes 010318
+            modelStorage{k,j} = mergeTwoModels(FirstMod{1},SecondMod{1},1,false,false);
+        end
+        dim = halfdim;
     end
-	dim = halfdim;
-end
-
-% Merging the models remained alone and non-pairwise matched
-if isempty(pos)== 1 %all the models were pairwise-merged
-[model] = modelStorage{1,(floor(log2(size(microbeNames,1)))+1)};
-else
-    position = pos(1,:); %finding positions of non merged models
-    nexmod = find(~cellfun(@isempty,pos(1,:)));
-    toMerge = cell2mat(position(nexmod));%list of models still to merge 
-    if (length(toMerge)) > 1 %more than 1 model was not pairwise merged
-        for k=2:(length(toMerge)+1)
-            if k==2
-               [model] = mergeTwoModels(modelStorage{toMerge(1,k-1),(nexmod(k-1))-1},modelStorage{toMerge(1,k),(nexmod(k))-1},1,false,false);                   
-            elseif k > 3       
-               [model] = mergeTwoModels(modelStorage{toMerge(1,k-1),(nexmod(k-1))-1},model,1,false,false);
+    
+    % Merging the models remained alone and non-pairwise matched
+    if isempty(pos)== 1 %all the models were pairwise-merged
+        [model] = modelStorage{1,(floor(log2(size(microbeNames,1)))+1)};
+    else
+        position = pos(1,:); %finding positions of non merged models
+        nexmod = find(~cellfun(@isempty,pos(1,:)));
+        toMerge = cell2mat(position(nexmod));%list of models still to merge
+        if (length(toMerge)) > 1 %more than 1 model was not pairwise merged
+            for k=2:(length(toMerge)+1)
+                if k==2
+                    [model] = mergeTwoModels(modelStorage{toMerge(1,k-1),(nexmod(k-1))-1},modelStorage{toMerge(1,k),(nexmod(k))-1},1,false,false);
+                elseif k > 3
+                    [model] = mergeTwoModels(modelStorage{toMerge(1,k-1),(nexmod(k-1))-1},model,1,false,false);
+                end
             end
+            [model] = mergeTwoModels(modelStorage{1,(floor(log2(size(microbeNames,1)))+1)},model,1,false,false);
+        end
+        if (length(toMerge)) == 1 %1 model was not pairwise merged
+            [model] = mergeTwoModels(modelStorage{1,(floor(log2(size(microbeNames,1)))+1)},modelStorage{toMerge(1,1),(nexmod-1)},1,false,false);
         end
-      [model] = mergeTwoModels(modelStorage{1,(floor(log2(size(microbeNames,1)))+1)},model,1,false,false);
     end
-    if (length(toMerge)) == 1 %1 model was not pairwise merged
-        [model] = mergeTwoModels(modelStorage{1,(floor(log2(size(microbeNames,1)))+1)},modelStorage{toMerge(1,1),(nexmod-1)},1,false,false);
+    
+else
+    % merge in non-parallel way
+    for i = 2:size(microbeNames, 1)
+        if i==2
+            model1 = readCbModel([modelStoragePath filesep microbeNames{1,1} '.mat']);
+            modelNew = readCbModel([modelStoragePath filesep microbeNames{i,1} '.mat']);
+            model = mergeTwoModels(model1,modelNew,1,false,false);
+        else
+            modelNew = readCbModel([modelStoragePath filesep microbeNames{i,1} '.mat']);
+            model = mergeTwoModels(model,modelNew,1,false,false);
+        end
     end
 end
 
+% Merging with host if present
+
+% temp fix
+if isfield(model,'C')
+    model=rmfield(model,'C');
+end
+%
 
-% Merging with host if present 
 if ~isempty(host)
     [model] = mergeTwoModels(host,model,1,false,false);
 end
 [model] = mergeTwoModels(dummy,model,2,false,false);
 
-%finish up by A: removing duplicate reactions
-%We will lose information here, but we will just remove the duplicates.
-[model,rxnToRemove,rxnToKeep]= checkDuplicateRxn(model,'S',1,0,1);
-
 end
 
-
diff --git a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/getIndividualSizeName.m b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/getIndividualSizeName.m
index f0e4397669..3c84985fa8 100755
--- a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/getIndividualSizeName.m
+++ b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/getIndividualSizeName.m
@@ -1,17 +1,20 @@
-function [sampNames, organisms] = getIndividualSizeName(abunFilePath)
+function [sampNames, organisms, exMets] = getIndividualSizeName(abunFilePath,modPath)
 % This function automatically detects organisms, names and number of individuals present
 % in the study.
 %
 % USAGE:
 %
-%   [sampNames, organisms] = getIndividualSizeName(abunFilePath)
+%   [sampNames, organisms, exMets] = getIndividualSizeName(abunFilePath)
 %
 % INPUTS:
-%   abunFilePath:        char with path and name of file from which to retrieve information
+%   abunFilePath:      char with path and name of file from which to retrieve information
+%   modPath:           char with path of directory where models are stored
 %
 % OUTPUTS:
-%   sampNamess:          nx1 cell array cell array with names of individuals in the study
-%   organisms:           nx1 cell array cell array with names of organisms in the study
+%   sampNames:         nx1 cell array cell array with names of individuals in the study
+%   organisms:         nx1 cell array cell array with names of organisms in the study
+%   exMets:            cell array with all unique extracellular metabolites
+%                      contained in the models
 %
 % .. Author: Federico Baldini 2017-2018
 %            Almut Heinken, 03/2021: simplified inputs
@@ -65,4 +68,17 @@
 % getting info on present strains
 organisms = oldsampNames(2:height(oldsampNames), 2);  
 organisms = table2cell(organisms);  % extracted names of models
+
+parfor i = 1:length(organisms) % find the unique set of all the reactions contained in the models
+    model =readCbModel([modPath filesep organisms{i,1} '.mat']);
+    models{i, 1} = model;
+end
+
+exMets = {};
+for i = 1:length(organisms) % find the unique set of all the reactions contained in the models
+    model = models{i, 1};
+    findmets = model.mets(find(contains(model.mets,'[e]')));
+    exMets = union(exMets,findmets);
+end
+
 end
diff --git a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/getMappingInfo.m b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/getMappingInfo.m
index 40ec164c02..60052bed07 100755
--- a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/getMappingInfo.m
+++ b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/getMappingInfo.m
@@ -31,7 +31,6 @@
 %
 % .. Author: Federico Baldini 2017-2018
 
-
 reac = {}; % array with unique set of all the reactions present in the models
 exMets = {}; % array with unique set of all the extracellular metabolites present in the models
 
diff --git a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/initMgPipe.m b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/initMgPipe.m
index eb1fa8ddca..fbb6176b4c 100755
--- a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/initMgPipe.m
+++ b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/initMgPipe.m
@@ -1,9 +1,10 @@
 function [init, netSecretionFluxes, netUptakeFluxes, Y, modelStats, summary, statistics] = initMgPipe(modPath, abunFilePath, computeProfiles, varargin)
-% This function is called from the MgPipe driver `StartMgPipe` takes care of saving some variables
-% in the environment (in case that the function is called without a driver), does some checks on the
-% inputs, and automatically launches MgPipe. As matter of fact, if all the inputs are properly inserted
-% in the function it can replace the driver.
-
+% This function initializes the mgPipe pipeline and sets the optional input 
+% variables if not defined.
+%
+% USAGE
+%       [init, netSecretionFluxes, netUptakeFluxes, Y, modelStats, summary, statistics] = initMgPipe(modPath, abunFilePath, computeProfiles, varargin)
+%
 % INPUTS:
 %    modPath:                char with path of directory where models are stored
 %    abunFilePath:           char with path and name of file from which to retrieve abundance information
@@ -12,7 +13,10 @@
 %
 % OPTIONAL INPUTS:
 %    resPath:                char with path of directory where results are saved
-%    dietFilePath:           char with path of directory where the diet is saved
+%    dietFilePath:           char with path of directory where the diet is saved.
+%                            Can also be a character array with a separate diet for
+%                            each individual, in that case, size(dietFilePath,1) 
+%                            needs to equal the length of samples.
 %    infoFilePath:           char with path to stratification criteria if available
 %    hostPath:               char with path to host model, e.g., Recon3D (default: empty)
 %    hostBiomassRxn:         char with name of biomass reaction in host (default: empty)
@@ -55,6 +59,8 @@
 %               - Almut Heinken 02/2021: added option for creation of each 
 %                                        personalized model separately and
 %                                        output of model stats
+%               - Almut Heinken 03/2021: inserted error message if 
+%                                        abundances are not normalized.
 
 
 % Define default input parameters if not specified
@@ -128,6 +134,13 @@
     error('Path to file with dietary information is incorrect!');
 end
 
+% test if abundances are normalized
+abundance = table2cell(readtable(abunFilePath,'ReadVariableNames',false));
+totalAbun=sum(str2double(abundance(2:end,2:end)),1);
+if any(totalAbun > 1.01)
+    error('Abundances are not normalized. Please run the function normalizeCoverage!')
+end
+
 if strcmp(infoFilePath, '')
     patStat = false;
 else
diff --git a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/mgPipe.m b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/mgPipe.m
index ca6be02951..c88d0ef576 100755
--- a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/mgPipe.m
+++ b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/mgPipe.m
@@ -1,5 +1,5 @@
 function [netSecretionFluxes, netUptakeFluxes, Y, modelStats, summary, statistics] = mgPipe(modPath, abunFilePath, computeProfiles, resPath, dietFilePath, infoFilePath, hostPath, hostBiomassRxn, hostBiomassRxnFlux, objre, buildSetupAll, saveConstrModels, figForm, numWorkers, rDiet, pDiet, includeHumanMets, lowerBMBound, repeatSim, adaptMedium)
-% MgPipe is a MATLAB based pipeline to integrate microbial abundances
+% mgPipe is a MATLAB based pipeline to integrate microbial abundances
 % (coming from metagenomic data) with constraint based modeling, creating
 % individuals' personalized models.
 % The pipeline is divided in 3 parts:
@@ -9,7 +9,7 @@
 % integrating abundance data retrieved from metagenomics. For each organism,
 % reactions are coupled to the objective function.
 % [PART 3] Simulations under different diet regimes.
-% MgPipe was created (and tested) for AGORA 1.0 please first download AGORA
+% mgPipe was created (and tested) for AGORA 1.0 please first download AGORA
 % version 1.0 from https://www.vmh.life/#downloadview and place the mat files
 % into a folder.
 %
@@ -28,7 +28,7 @@
 %    hostBiomassRxn:         char with name of biomass reaction in host (default: empty)
 %    hostBiomassRxnFlux:     double with the desired flux through the host
 %                            biomass reaction (default: zero)
-%    objre:                  char with reaction name of objective function of microbeNames
+%    objre:                  char with reaction name of objective function
 %    buildSetupAll:       	 boolean indicating the strategy that should be used to
 %                            build personalized models: if true, build a global setup model
 %                            containing all organisms in at least model (default), false: create
@@ -65,11 +65,6 @@
 %   - Almut Heinken, 01/21: added option for creation of each personalized model separately
 
 %% PIPELINE: [PART 1]
-% The number of microbeNames, their names, the number of samples and their identifiers
-% are automatically detected from the input file.
-
-[sampNames,microbeNames]=getIndividualSizeName(abunFilePath);
-%%
 % If PART1 was already
 % computed: if the associated file is already present in the results folder its
 % execution is skipped else its execution starts
@@ -93,20 +88,61 @@
         end
     end
     
-    % Computing genetic information
-    [reac,exMets,micRea,binOrg,patOrg,reacPat,reacNumb,reacSet,reacTab,reacAbun,reacNumber]=getMappingInfo(modPath,microbeNames,abunFilePath);
-    writetable(cell2table(reacAbun,'VariableNames',['Reactions';sampNames]'),strcat(resPath,'reactions.csv'));
+    abundance = table2cell(readtable(abunFilePath));
     
-    %Create tables and save all the created variables
-    reacTab=[array2table(reac),array2table(reacTab,'VariableNames',sampNames')],[resPath 'compfile' filesep 'ReacTab.csv'];
-    reacSet=cell2table(reacSet,'VariableNames',sampNames');
-    reacPat=[array2table(microbeNames),array2table(reacPat,'VariableNames',sampNames')];
-end
+    % The number of microbeNames, their names, the number of samples and their identifiers
+    % are automatically detected from the input file.
+    [sampNames,microbeNames,exMets]=getIndividualSizeName(abunFilePath,modPath);
+    
+    % remove rows of organisms that are not present in any sample
+    microbeNames(sum(cell2mat(abundance(:,2:end)),2)<0.0000001,:)=[];
+    abundance(sum(cell2mat(abundance(:,2:end)),2)<0.0000001,:)=[];
+    
+    % Extracellular spaces simulating the lumen are built and stored for
+    % each microbe.
+    [exch,modelStoragePath]=buildModelStorage(microbeNames,modPath);
+    
+    % Computing reaction presence
+    ReactionPresence=calculateReactionPresence(abunFilePath, modPath, {});
+    writetable(cell2table(ReactionPresence),[resPath filesep 'ReactionPresence.csv'], 'WriteVariableNames', false);
 
-% Plotting genetic information
-[PCoA]=plotMappingInfo(resPath,patOrg,reacPat,reacTab,reacNumber,infoFilePath,figForm,sampNames,microbeNames);
+    % Computing reaction abundance
+    ReactionAbundance = fastCalculateReactionAbundance(abunFilePath, modPath, {}, numWorkers);
+    writetable(cell2table(ReactionAbundance'),[resPath filesep 'ReactionAbundance.csv'], 'WriteVariableNames', false);
+    
+    % Computing subsystem abundance
+    subsystemAbundance = calculateSubsystemAbundance([resPath filesep 'ReactionAbundance.csv']);
+    writetable(cell2table(subsystemAbundance),[resPath filesep 'SubsystemAbundance.csv'], 'WriteVariableNames', false);
 
-save([resPath filesep 'mapInfo.mat'],'binOrg', 'mapP', 'exMets', 'micRea', 'patOrg', 'PCoA', 'reac', 'reacAbun', 'reacNumb', 'reacNumber', 'reacPat', 'reacSet', 'reacTab', 'sampNames', 'microbeNames')
+    % plot subsystem abundance
+    data=cell2mat(subsystemAbundance(2:end,2:end));
+    xlabels=subsystemAbundance(1,2:end);
+    ylabels=subsystemAbundance(2:end,1);
+    figure;
+    imagesc(data)
+    colormap('hot')
+    colorbar
+    if length(xlabels)<30
+        set(gca,'xtick',1:length(xlabels));
+        xticklabels(xlabels);
+        xtickangle(90)
+    end
+    set(gca,'ytick',1:length(ylabels));
+    yticklabels(ylabels);
+    ax=gca;
+    
+    if length(ylabels)<50
+        ax.YAxis.FontSize = 8;
+    else
+        ax.YAxis.FontSize = 6;
+    end
+    set(gca,'TickLabelInterpreter', 'none');
+    title('Relative reaction abundances summarized by subsystem')
+    print(strcat(resPath, 'Subsystem_abundances'), figForm)
+    
+    % save mapping info
+    save([resPath filesep 'mapInfo.mat'], 'mapP', 'exMets', 'exch', 'sampNames', 'microbeNames', 'modelStoragePath','abundance','-v7.3')
+end
 
 %end of trigger for Autoload
 %% PIPELINE: [PART 2.1]
@@ -158,16 +194,19 @@
 % are also added. Models that are already existent will not be recreated, and
 % new microbiota models will be saved in the results folder.
 
-abundance = table2cell(readtable(abunFilePath));
-
-% remove rows of organisms that are not present in any sample
-abundance(sum(cell2mat(abundance(:,2:end)),2)<0.0000001,:)=[];
+% set parallel pool
+if numWorkers > 1
+    poolobj = gcp('nocreate');
+    if isempty(poolobj)
+        parpool(numWorkers)
+    end
+end
 
 % if there is 500 reconstruction total or less, use fast setup creator to
 % carve each personalized model from one large setup model.
 if buildSetupAll
     if modbuild == 1
-        setup=fastSetupCreator(modPath, microbeNames, host, objre, numWorkers);
+        setup=fastSetupCreator(exch, modelStoragePath, microbeNames, host, objre, buildSetupAll);
         setup.name='Global reconstruction with lumen / fecal compartments no host';
         setup.recon=0;
         if ~isempty(host)
@@ -178,7 +217,11 @@
     end
     
     if modbuild==0
-        load(strcat(resPath,'Setup_allbacs.mat'))
+        if ~isempty(host)
+            load(strcat(resPath,'Setup_host_allbacs.mat'))
+        else
+            load(strcat(resPath,'Setup_allbacs.mat'))
+        end
     end
     
     [createdModels]=createPersonalizedModel(abundance,resPath,setup,sampNames,microbeNames,host,hostBiomassRxn);
@@ -188,28 +231,31 @@
     % define what counts as zero abundance
     tol=0.0000001;
     
-    for i=1:length(sampNames)
-        % Here, we will not be starting from one joined model containing all
-        % reconstructions. Instead, each personalized model will be created separately.)
-        % get the list of models for each sample and remove the ones not in
-        % this sample
-        
-        % retrieving current model ID
-        if ~isempty(host)
-            mId = ['host_microbiota_model_samp_', sampNames{i,1}, '.mat'];
+    clear('microbeNames','exMets','abundance')
+    
+    steps=50;
+    % proceed in batches for improved effiency
+    for j=1:steps:length(sampNames)
+        if length(sampNames)-j>=steps-1
+            endPnt=steps-1;
         else
-            mId = ['microbiota_model_samp_', sampNames{i,1}, '.mat'];
+            endPnt=length(sampNames)-j;
         end
         
-        % if the model doesn't exist yet
-        mapP = detectOutput(resPath, mId);
-        if isempty(mapP)
-            microbeNamesSample = microbeNames;
-            abunRed=abundance(:,i+1);
-            abunRed=[abundance(:,1),abunRed];
+        parfor i=j:j+endPnt
+            % Here, we will not be starting from one joined model containing all
+            % reconstructions. Instead, each personalized model will be created separately.)
+            % get the list of models for each sample and remove the ones not in
+            % this sample
+            mappingData=load([resPath filesep 'mapInfo.mat'])
+            microbeNamesSample = mappingData.microbeNames;
+            abunRed=mappingData.abundance(:,i+1);
+            abunRed=[mappingData.abundance(:,1),abunRed];
             microbeNamesSample(cell2mat(abunRed(:,2)) < tol,:)=[];
             abunRed(cell2mat(abunRed(:,2)) < tol,:)=[];
-            setupModel = fastSetupCreator(modPath, microbeNamesSample, host, objre, numWorkers);
+            setupModel = fastSetupCreator(exch, modelStoragePath, microbeNamesSample, host, objre, buildSetupAll);
+            
+            % create personalized models for the batch
             createdModel=createPersonalizedModel(abunRed,resPath,setupModel,sampNames(i,1),microbeNamesSample,host,hostBiomassRxn);
         end
     end
@@ -222,13 +268,16 @@
 % analysis for all the exchange reactions of the diet and fecal compartment is
 % also computed and saved in a file called "simRes".
 
-[exchanges, netProduction, netUptake, presol, inFesMat] = microbiotaModelSimulator(resPath, exMets, sampNames, dietFilePath, hostPath, hostBiomassRxn, hostBiomassRxnFlux, numWorkers, rDiet, pDiet, saveConstrModels, computeProfiles, includeHumanMets, lowerBMBound, repeatSim, adaptMedium);
-% Finally, NMPCs (net maximal production capability) are computed in a metabolite
-% resolved manner and saved in a comma delimited file in the results folder. NMPCs
-% indicate the maximal production of each metabolite and are computing summing
-% the maximal secretion flux with the maximal uptake flux. Similarity of metabolic
-% profiles (using the different NMPCs as features) between individuals are also
-% evaluated with classical multidimensional scaling.
+load([resPath filesep 'mapInfo.mat'])
+if computeProfiles || saveConstrModels
+    [exchanges, netProduction, netUptake, presol, inFesMat] = microbiotaModelSimulator(resPath, exMets, sampNames, dietFilePath, hostPath, hostBiomassRxn, hostBiomassRxnFlux, numWorkers, rDiet, pDiet, saveConstrModels, computeProfiles, includeHumanMets, lowerBMBound, repeatSim, adaptMedium);
+    % Finally, NMPCs (net maximal production capability) are computed in a metabolite
+    % resolved manner and saved in a comma delimited file in the results folder. NMPCs
+    % indicate the maximal production of each metabolite and are computing summing
+    % the maximal secretion flux with the maximal uptake flux. Similarity of metabolic
+    % profiles (using the different NMPCs as features) between individuals are also
+    % evaluated with classical multidimensional scaling.
+end
 
 if computeProfiles
     [netSecretionFluxes, netUptakeFluxes, Y] = mgSimResCollect(resPath, sampNames, exchanges, rDiet, pDiet, infoFilePath, netProduction, netUptake, figForm);
@@ -237,6 +286,7 @@
     netUptakeFluxes={};
     Y=[];
     delete('simRes.mat','intRes.mat')
+    rmdir([resPath filesep 'modelStorage'],'s')
 end
 
 % get stats on microbiome models-number of reactions and metabolites
diff --git a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/microbiotaModelSimulator.m b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/microbiotaModelSimulator.m
index d7bdb1ca09..676a5d6132 100755
--- a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/microbiotaModelSimulator.m
+++ b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/microbiotaModelSimulator.m
@@ -50,6 +50,13 @@
 % .. Author: Federico Baldini, 2017-2018
 %            Almut Heinken, 03/2021: simplified inputs
 
+% set a solver if not done yet
+global CBT_LP_SOLVER
+solver = CBT_LP_SOLVER;
+if isempty(solver)
+    initCobraToolbox(false); %Don't update the toolbox automatically
+end
+
 for i=1:length(exMets)
     exchanges{i,1} = ['EX_' exMets{i}];
 end
@@ -219,12 +226,6 @@
                 end
             end
             
-            % set a solver if not done yet
-            global CBT_LP_SOLVER
-            solver = CBT_LP_SOLVER;
-            if isempty(solver)
-                initCobraToolbox(false); %Don't update the toolbox automatically
-            end
             solution_allOpen = solveCobraLP(buildLPproblemFromModel(model));
             % solution_allOpen=solveCobraLPCPLEX(model,2,0,0,[],0);
             if solution_allOpen.stat==0
diff --git a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/normalizeCoverage.m b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/normalizeCoverage.m
index 738a86b684..a595266b9b 100755
--- a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/normalizeCoverage.m
+++ b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/normalizeCoverage.m
@@ -31,6 +31,17 @@
 
 coverage = table2cell(readtable(abunFilePath,'ReadVariableNames',false));
 coverage{1,1}='ID';
+
+% delete samples that are all zeros (if applies)
+totalAbun=sum(str2double(coverage(2:end,2:end)),1);
+allzero=find(totalAbun<0.0000001);
+if ~isempty(allzero)
+    for i=1:length(allzero)
+        allzero(i)=allzero(i)+1;
+    end
+    coverage(:,allzero)=[];
+end
+
 abundanceNew = coverage;
 
 for i=2:size(coverage,2)
diff --git a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/plotMappingInfo.m b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/plotMappingInfo.m
index 84aa5b8f32..0c46a12561 100755
--- a/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/plotMappingInfo.m
+++ b/src/analysis/multiSpecies/microbiomeModelingToolbox/mgPipe/plotMappingInfo.m
@@ -185,7 +185,7 @@
 %         hold on
 %     end
 
-    print(strcat(resPath, 'PCoA reactions'), figForm)
+    print(strcat(resPath, 'PCoA_reactions'), figForm)
 else
     disp('noPcoA will be plotted')
 end
diff --git a/src/analysis/multiSpecies/microbiomeModelingToolbox/pairwiseInteractionModeling/createMultipleSpeciesModel.m b/src/analysis/multiSpecies/microbiomeModelingToolbox/pairwiseInteractionModeling/createMultipleSpeciesModel.m
index 474466e5b3..70aa2e5b99 100644
--- a/src/analysis/multiSpecies/microbiomeModelingToolbox/pairwiseInteractionModeling/createMultipleSpeciesModel.m
+++ b/src/analysis/multiSpecies/microbiomeModelingToolbox/pairwiseInteractionModeling/createMultipleSpeciesModel.m
@@ -162,7 +162,7 @@
 for i = 1:modelNumber
     model=models{i, 1};
     biomassReaction=model.rxns(find(strncmp(model.rxns, 'bio', 3)));
-    model = removeBalancedCycles(model, biomassReaction, database,unionRxns);
+    model = removeFutileCycles(model, biomassReaction, database,unionRxns);
     models{i, 1} = model;
 end
 %% define some variables
diff --git a/src/analysis/multiSpecies/microbiomeModelingToolbox/pairwiseInteractionModeling/joinModelsPairwiseFromList.m b/src/analysis/multiSpecies/microbiomeModelingToolbox/pairwiseInteractionModeling/joinModelsPairwiseFromList.m
index 4918a7cd0a..8e743d2ace 100755
--- a/src/analysis/multiSpecies/microbiomeModelingToolbox/pairwiseInteractionModeling/joinModelsPairwiseFromList.m
+++ b/src/analysis/multiSpecies/microbiomeModelingToolbox/pairwiseInteractionModeling/joinModelsPairwiseFromList.m
@@ -56,6 +56,22 @@ function joinModelsPairwiseFromList(modelList, modelFolder, varargin)
 mergeGenesFlag = parser.Results.mergeGenesFlag;
 pairwiseModelFolder = parser.Results.pairwiseModelFolder;
 
+% initialize COBRA Toolbox and parallel pool
+global CBT_LP_SOLVER
+if isempty(CBT_LP_SOLVER)
+    initCobraToolbox
+end
+solver = CBT_LP_SOLVER;
+
+if numWorkers>0 && ~isempty(ver('parallel'))
+    % with parallelization
+    poolobj = gcp('nocreate');
+    if isempty(poolobj)
+        parpool(numWorkers)
+    end
+end
+environment = getEnvironment();
+
 pairedModelInfo = {};
 cnt = 1;
 
@@ -67,104 +83,66 @@ function joinModelsPairwiseFromList(modelList, modelFolder, varargin)
 existingModels(find(strcmp(existingModels(:,1),'..')),:)=[];
 
 % then join all models in modelList
+inputModels={};
 for i = 1:size(modelList, 1)
-    if numWorkers > 0
-        % with parallelization
-        poolobj = gcp('nocreate');
-        if isempty(poolobj)
-            parpool(numWorkers)
-        end
-        pairedModelsTemp = {};
-        
-        % Load the reconstructions to be joined
-        inputModels={};
-        model=readCbModel([modelFolder filesep modelList{i,1} '.mat']);
-        inputModels{i}=model;
-        for k = i + 1:size(modelList, 1)
-            model=readCbModel([modelFolder filesep modelList{k,1} '.mat']);
-            inputModels{k}=model;
-        end
+    
+    % Load the reconstructions to be joined
+    model=readCbModel([modelFolder filesep modelList{i,1} '.mat']);
+    inputModels{i}=model;
+    for k = i + 1:size(modelList, 1)
+        model=readCbModel([modelFolder filesep modelList{k,1} '.mat']);
+        inputModels{k}=model;
+    end
+    
+    pairedModelsTemp = {};
+    
+    parfor k = i + 1:size(modelList, 1)
+        restoreEnvironment(environment);
+        changeCobraSolver(solver, 'LP', 0, -1);
         
-        parfor k = i + 1:size(modelList, 1)
-            model1 = inputModels{i};
-            model2 = inputModels{k};
-            models = {
-                model1
-                model2
-                };
-            nameTagsModels = {
-                strcat(modelList{i}, '_')
-                strcat(modelList{k}, '_')
-                };
-            if ~contains(existingModels,['pairedModel', '_', modelList{i}, '_', modelList{k}, '.mat'])
-                [pairedModel] = createMultipleSpeciesModel(models, 'nameTagsModels', nameTagsModels,'mergeGenesFlag', mergeGenesFlag);
-                [pairedModel] = coupleRxnList2Rxn(pairedModel, pairedModel.rxns(strmatch(nameTagsModels{1, 1}, pairedModel.rxns)), strcat(nameTagsModels{1, 1}, model1.rxns(find(strncmp(model1.rxns, 'bio', 3)))), c, u);
-                [pairedModel] = coupleRxnList2Rxn(pairedModel, pairedModel.rxns(strmatch(nameTagsModels{2, 1}, pairedModel.rxns)), strcat(nameTagsModels{2, 1}, model2.rxns(find(strncmp(model2.rxns, 'bio', 3)))), c, u);
-                pairedModelsTemp{k} = pairedModel;
-            else
-                pairedModelsTemp{k} = {};
-            end
+        model1 = inputModels{i};
+        model2 = inputModels{k};
+        models = {
+            model1
+            model2
+            };
+        nameTagsModels = {
+            strcat(modelList{i}, '_')
+            strcat(modelList{k}, '_')
+            };
+        if isempty(contains(existingModels,['pairedModel', '_', modelList{i}, '_', modelList{k}, '.mat']))
+            [pairedModel] = createMultipleSpeciesModel(models, 'nameTagsModels', nameTagsModels,'mergeGenesFlag', mergeGenesFlag);
+            [pairedModel] = coupleRxnList2Rxn(pairedModel, pairedModel.rxns(strmatch(nameTagsModels{1, 1}, pairedModel.rxns)), strcat(nameTagsModels{1, 1}, model1.rxns(find(strncmp(model1.rxns, 'bio', 3)))), c, u);
+            [pairedModel] = coupleRxnList2Rxn(pairedModel, pairedModel.rxns(strmatch(nameTagsModels{2, 1}, pairedModel.rxns)), strcat(nameTagsModels{2, 1}, model2.rxns(find(strncmp(model2.rxns, 'bio', 3)))), c, u);
+            pairedModelsTemp{k} = pairedModel;
+        else
+            pairedModelsTemp{k} = {};
         end
-        for k = i + 1:size(modelList, 1)
-            % keep track of the generated models and populate the output file with
-            % information on joined models
-            model1 = inputModels{i};
-            pairedModelInfo{cnt, 1} = ['pairedModel', '_', modelList{i}, '_', modelList{k}, '.mat'];
-            pairedModelInfo{cnt, 2} = modelList{i};
-            pairedModelInfo{cnt, 3} = model1.rxns(find(strncmp(model1.rxns, 'bio', 3)));
-            model2 = inputModels{k};
-            pairedModelInfo{cnt, 4} = modelList{k};
-            pairedModelInfo{cnt, 5} = model2.rxns(find(strncmp(model2.rxns, 'bio', 3)));
-            % save file regularly
-            if floor(cnt/1000) == cnt/1000
+    end
+    
+    for k = i + 1:size(modelList, 1)
+        % keep track of the generated models and populate the output file with
+        % information on joined models
+        model1 = inputModels{i};
+        pairedModelInfo{cnt, 1} = ['pairedModel', '_', modelList{i}, '_', modelList{k}, '.mat'];
+        pairedModelInfo{cnt, 2} = modelList{i};
+        pairedModelInfo{cnt, 3} = model1.rxns(find(strncmp(model1.rxns, 'bio', 3)));
+        model2 = inputModels{k};
+        pairedModelInfo{cnt, 4} = modelList{k};
+        pairedModelInfo{cnt, 5} = model2.rxns(find(strncmp(model2.rxns, 'bio', 3)));
+        % save file regularly
+        if floor(cnt/1000) == cnt/1000
             save([pairwiseModelFolder filesep 'pairedModelInfo'],'pairedModelInfo');
-            end
-            
-            if ~contains(existingModels,['pairedModel', '_', modelList{i}, '_', modelList{k}, '.mat'])
-                pairedModel=pairedModelsTemp{k};
-                save([pairwiseModelFolder filesep pairedModelInfo{cnt,1}],'pairedModel');
-            end
-            cnt = cnt + 1;
         end
-    else
-        % without parallelization
-        for j = i + 1:size(modelList, 1)
-            model1 = inputModels{i};
-            model2 = inputModels{j};
-            models = {
-                model1
-                model2
-                };
-            nameTagsModels = {
-                strcat(modelList{i}, '_')
-                strcat(modelList{j}, '_')
-                };
-            % keep track of the generated models and populate the output file with
-            % information on joined models
-            pairedModelInfo{cnt, 1} = ['pairedModel', '_', modelList{i}, '_', modelList{j}, '.mat'];
-            pairedModelInfo{cnt, 2} = modelList{i};
-            pairedModelInfo{cnt, 3} = model1.rxns(find(strncmp(model1.rxns, 'bio', 3)));
-            pairedModelInfo{cnt, 4} = modelList{j};
-            pairedModelInfo{cnt, 5} = model2.rxns(find(strncmp(model2.rxns, 'bio', 3)));
-            % save file regularly
-            if floor(cnt/1000) == cnt/1000
-                save([pairwiseModelFolder filesep 'pairedModelInfo'],'pairedModelInfo');
-            end
-            
-            if ~contains(existingModels,['pairedModel', '_', modelList{i}, '_', modelList{j}, '.mat'])
-                [pairedModel] = createMultipleSpeciesModel(models, 'nameTagsModels', nameTagsModels,'mergeGenesFlag', mergeGenesFlag);
-                [pairedModel] = coupleRxnList2Rxn(pairedModel, pairedModel.rxns(strmatch(nameTagsModels{1, 1}, pairedModel.rxns)), strcat(nameTagsModels{1, 1}, model1.rxns(find(strncmp(model1.rxns, 'bio', 3)))), c, u);
-                [pairedModel] = coupleRxnList2Rxn(pairedModel, pairedModel.rxns(strmatch(nameTagsModels{2, 1}, pairedModel.rxns)), strcat(nameTagsModels{2, 1}, model2.rxns(find(strncmp(model2.rxns, 'bio', 3)))), c, u);
-                pairedModels{cnt, 1} = pairedModel;
-                save([pairwiseModelFolder filesep pairedModelInfo{cnt,1}],'pairedModel');
-            else
-                pairedModels{cnt, 1} = {};
-            end
-            cnt = cnt + 1;
+        
+        if isempty(contains(existingModels,['pairedModel', '_', modelList{i}, '_', modelList{k}, '.mat']))
+            pairedModel=pairedModelsTemp{k};
+            save([pairwiseModelFolder filesep pairedModelInfo{cnt,1}],'pairedModel');
         end
+        cnt = cnt + 1;
     end
 end
 
 save([pairwiseModelFolder filesep 'pairedModelInfo'],'pairedModelInfo');
 
-end
+end
\ No newline at end of file
diff --git a/src/analysis/multiSpecies/microbiomeModelingToolbox/pairwiseInteractionModeling/simulatePairwiseInteractions.m b/src/analysis/multiSpecies/microbiomeModelingToolbox/pairwiseInteractionModeling/simulatePairwiseInteractions.m
index cad80b01ae..afede59287 100755
--- a/src/analysis/multiSpecies/microbiomeModelingToolbox/pairwiseInteractionModeling/simulatePairwiseInteractions.m
+++ b/src/analysis/multiSpecies/microbiomeModelingToolbox/pairwiseInteractionModeling/simulatePairwiseInteractions.m
@@ -118,20 +118,20 @@
 solutionsTmp={};
 
 if isfile([pairwiseModelFolder filesep 'pairedModelInfo.mat'])
-load([pairwiseModelFolder filesep 'pairedModelInfo.mat'])
+load([pairwiseModelFolder filesep 'pairedModelInfo.mat'],'pairedModelInfo')
 else
     error('Pairwise models have not been created. Please run joinModelsPairwiseFromList first.')
 end
 
-parfor i = 1:size(pairedModelInfo, 1)
+info=pairedModelInfo;
+parfor i = 1:size(info, 1)
     restoreEnvironment(environment);
     changeCobraSolver(solver, 'LP', 0, -1);
     changeCobraSolverParams('LP', 'logFile', 0);
     
     % load the model
-    fileToLoad=load([pairwiseModelFolder filesep pairedModelInfo{i,1}]);
-    toLoad=fieldnames(fileToLoad);
-    pairedModel=fileToLoad.(toLoad{1});
+    filename=[pairwiseModelFolder filesep info{i,1}];
+    pairedModel=readCbModel(filename);
     pairedModelOrg=pairedModel;
     % if a diet was input
     if ~isempty(inputDiet)
@@ -139,8 +139,8 @@
     end
     % for each paired model, set both biomass objective functions as
     % objectives
-    biomass1 = strcat(pairedModelInfo{i, 2}, '_', pairedModelInfo{i, 3});
-    biomass2 = strcat(pairedModelInfo{i, 4}, '_', pairedModelInfo{i, 5});
+    biomass1 = strcat(info{i, 2}, '_', info{i, 3});
+    biomass2 = strcat(info{i, 4}, '_', info{i, 5});
     model1biomass = find(ismember(pairedModel.rxns, biomass1));
     pairedModel.c(model1biomass, 1) = 1;
     model2biomass = find(ismember(pairedModel.rxns, biomass2));
@@ -160,7 +160,7 @@
     end
     pairedModel = changeObjective(pairedModel, biomass1);
     % disable flux through the second model
-    pairedModel = changeRxnBounds(pairedModel, pairedModel.rxns(strmatch(strcat(pairedModelInfo{i, 4}, '_'), pairedModel.rxns)), 0, 'b');
+    pairedModel = changeRxnBounds(pairedModel, pairedModel.rxns(strmatch(strcat(info{i, 4}, '_'), pairedModel.rxns)), 0, 'b');
     % calculate single biomass
     solutionSingle1 = solveCobraLP(buildLPproblemFromModel(pairedModel,false));
     % silence model 1 and optimize model 2
@@ -172,7 +172,7 @@
     end
     pairedModel = changeObjective(pairedModel, biomass2);
     % disable flux through the first model
-    pairedModel = changeRxnBounds(pairedModel, pairedModel.rxns(strmatch(strcat(pairedModelInfo{i, 2}, '_'), pairedModel.rxns)), 0, 'b');
+    pairedModel = changeRxnBounds(pairedModel, pairedModel.rxns(strmatch(strcat(info{i, 2}, '_'), pairedModel.rxns)), 0, 'b');
     % calculate single biomass
     solutionSingle2 = solveCobraLP(buildLPproblemFromModel(pairedModel,false));
     
diff --git a/src/analysis/thermo/componentContribution/old/invertProjection.m b/src/analysis/thermo/componentContribution/old/invertProjection.m
index 335354a566..3cf4dce6b4 100644
--- a/src/analysis/thermo/componentContribution/old/invertProjection.m
+++ b/src/analysis/thermo/componentContribution/old/invertProjection.m
@@ -1,43 +1,27 @@
 function [inv_A, r, P_R, P_N] = invertProjection(A, epsilon)
-% Inverts a general matrix A using the pseudoinverse
-%
-% USAGE:
-%
-%    [inv_A, r, P_R, P_N] = invertProjection(A, epsilon)
-% INPUTS:
-%    A:          general matrix
-%    epsilon:    default = 1e-10
-%
-% OUTPUTS:
-%    inv_A:      the pseudoinverse of `A`
-%    r:          the rank of `A`
-%    P_R:        the projection matrix onto the `range(A)`
-%    P_N:        the projection matrix onto the `null(A')`
+% invert a general matrix A using the pseudoinverse
+% return values are:
+%     inv_A - the pseudoinverse of A
+%     r     - the rank of A
+%     P_R   - the projection matrix onto the range(A)
+%     P_N   - the projection matrix onto the null(A')
 
 if nargin < 2
     epsilon = 1e-10;
 end
 [m, n] = size(A);
 
-if 1
-    %[U, S, V] = svd(A); % not working, uncommented line below - Lemmer
-    %[U, S, V] = svds(A,min(size(A))); % Bugfix due to svd convergence problems
-    [U, S, V] = svd(full(A),'econ'); %from Michael Saunders code
-    r = sum(sum(abs(S) > epsilon));
-    inv_S = diag(1 ./ S(abs(S) > epsilon));
-    inv_A = V(:, 1:r) * inv_S(1:r, 1:r) * U(:, 1:r)';
-    P_R = U(:, 1:r)       * U(:, 1:r)';
-    P_N = U(:, (r+1):end) * U(:, (r+1):end)';
-        
-else
-    %Michael Saunders code -TODO integrate this properly
-    [U1,D1,V1,r] = subspaceSVD(A);
-    P_R=U1*U1';%projection matrix onto the range(A)
-    P_N=eye(m) - U1*U1';%projection matrix onto null(A')
-    inv_A=pinv(A,1e-12);
-end
-    % Michael Saunders code
-    % [U1,D1,V1,r] = subspaceSVD(A);
-    % P_R=U1*U1';%projection matrix onto the range(A)
-    % P_N=eye(m) - U1*U1';%projection matrix onto the null(A')
-    % inv_A=pinv(A,1e-12);
\ No newline at end of file
+%[U, S, V] = svd(A); % not working, uncommented line below - Lemmer
+[U, S, V] = svds(A,min(size(A))); % Bugfix due to svd convergence problems
+%[U, S, V] = svd(full(A),'econ'); %from Michael Saunders code
+r = sum(sum(abs(S) > epsilon));
+inv_S = diag(1 ./ S(abs(S) > epsilon));
+inv_A = V(:, 1:r) * inv_S(1:r, 1:r) * U(:, 1:r)';
+P_R = U(:, 1:r)       * U(:, 1:r)';
+P_N = U(:, (r+1):end) * U(:, (r+1):end)';
+
+% Michael Saunders code
+% [U1,D1,V1,r] = subspaceSVD(A);
+% P_R=U1*U1';%projection matrix onto the range(A)
+% P_N=eye(m) - U1*U1';%projection matrix onto the null(A')
+% inv_A=pinv(A,1e-12);
diff --git a/src/analysis/thermo/componentContribution/setupComponentContribution.m b/src/analysis/thermo/componentContribution/setupComponentContribution.m
new file mode 100644
index 0000000000..60f500c6a1
--- /dev/null
+++ b/src/analysis/thermo/componentContribution/setupComponentContribution.m
@@ -0,0 +1,132 @@
+function model = setupComponentContribution(model,molFileDir,cid,printLevel)
+% Estimates standard transformed reaction Gibbs energy and directionality
+% at in vivo conditions in multicompartmental metabolic reconstructions.
+% Has external dependencies on the COBRA toolbox, the component
+% contribution method, Python (with numpy and Open Babel bindings),
+% ChemAxon's Calculator Plugins, and Open Babel. See details on
+% availability at the end of help text. 
+% 
+% modelT = setupThermoModel(model,molfileDir,cid,T,cellCompartments,ph,...
+%                           is,chi,concMin,concMax,confidenceLevel) 
+% 
+% INPUTS
+% model             Model structure with following fields:
+% .S                m x n stoichiometric matrix.
+% .mets             m x 1 array of metabolite identifiers.
+% .rxns             n x 1 array of reaction identifiers.
+% .metFormulas      m x 1 cell array of metabolite formulas. Formulas for
+%                   protons should be H, and formulas for water should be
+%                   H2O.
+% .metCharges       m x 1 numerical array of metabolite charges.
+% 
+% OPTIONAL INPUTS
+% molFileDir                Path to a directory containing molfiles for the
+%                           major tautomer of the major microspecies of
+%                           each metabolite at pH 7. Molfiles should be
+%                           named with the metabolite identifiers in
+%                           model.mets (without compartment assignments).
+%                           Not required if cid are specified.
+% cid                       m x 1 cell array of KEGG Compound identifiers.
+%                           Not required if molfiledir is specified.
+% model.metCompartments     m x 1 array of metabolite compartment
+%                           assignments. Not required if metabolite
+%                           identifiers are strings of the format ID[*]
+%                           where * is the appropriate compartment
+%                           identifier.
+% 
+% OUTPUTS
+% model                 Model structure with following additional fields:
+% .inchi                Structure containing four m x 1 cell array's of
+%                       IUPAC InChI strings for metabolites, with varying
+%                       levels of structural detail.
+% .pKa                  m x 1 structure containing metabolite pKa values
+%                       estimated with ChemAxon's Calculator Plugins.
+% .pseudoisomers        p x 4 matrix with the following columns:
+%                       1. Metabolite index.
+%                       2. Estimated pseudoisomer standard Gibbs energy.
+%                       3. Number of hydrogen atoms in pseudoisomer
+%                       chemical formula.
+%                       4. Charge on pseudoisomer.
+% 
+% WRITTEN OUTPUTS
+% MetStructures.sdf     An SDF containing all structures input to the
+%                       component contribution method for estimation of
+%                       standard Gibbs energies. 
+% 
+% 
+% Ronan M. T. Fleming, Sept. 2012   Version 1.0
+% Hulda S. H., Dec. 2012            Version 2.0
+
+
+%% Configure inputs
+% Retreive molfiles from KEGG if KEGG ID are given. Otherwise use molfiles
+% in molfileDir.
+if ~exist('cid','var')
+    cid = [];
+end
+if ~exist('printLevel','var')
+    printLevel = 1;
+end
+%% Get metabolite structures
+if ~isempty(cid)
+    molFileDir = 'molfilesFromKegg';
+    fprintf('\nRetreiving molfiles from KEGG.\n');
+    takeMajorMS = true; % Convert molfile from KEGG to major tautomer of major microspecies at pH 7
+    pH = 7;
+    takeMajorTaut = true;
+    kegg2mol(cid,molFileDir,model.mets,takeMajorMS,pH,takeMajorTaut); % Retreive mol files
+end
+
+if printLevel>0
+fprintf('Creating MetStructures.sdf from molfiles.\n')
+end
+
+sdfFileName = 'MetStructures.sdf';
+includeRs = 0; % Do not include structures with R groups in SDF
+[sdfMetList,noMolMetList] = mol2sdf(model.mets,molFileDir,sdfFileName,includeRs);
+
+if printLevel>0
+fprintf('Converting SDF to InChI strings.\n')
+end
+model.inchi = createInChIStruct(model.mets,sdfFileName);
+compositeBool = ~cellfun('isempty',regexp(model.inchi.nonstandard,'\.')); % Remove InChI for composite compounds as they cause problems later.
+model.inchi.standard(compositeBool) = cell(sum(compositeBool),1);
+model.inchi.standardWithStereo(compositeBool) = cell(sum(compositeBool),1);
+model.inchi.standardWithStereoAndCharge(compositeBool) = cell(sum(compositeBool),1);
+model.inchi.nonstandard(compositeBool) = cell(sum(compositeBool),1);
+
+%% Estimate metabolite pKa values with ChemAxon calculator plugins and determine all relevant pseudoisomers.
+if printLevel>0
+fprintf('Estimating metabolite pKa values.\n');
+end
+npKas = 20; % Number of acidic and basic pKa values to estimate
+takeMajorTaut = false; % Estimate pKa for input tautomer. Input tautomer is assumed to be the major tautomer for the major microspecies at pH 7.
+model.pseudoisomers = estimate_pKa(model.mets,model.inchi.nonstandard,npKas,takeMajorTaut); % Estimate pKa and determine pseudoisomers
+model.pseudoisomers = rmfield(model.pseudoisomers,'met');
+
+% Add number of hydrogens and charge for metabolites with no InChI
+if any(~[model.pseudoisomers.success]) && printLevel>0
+    fprintf('Assuming that metabolite species in model.metFormulas are representative for metabolites where pKa could not be estimated.\n');
+end
+nonphysicalMetSpecies = {};
+for i = 1:length(model.mets)
+    model_z = model.metCharges(i); % Get charge from model
+    model_nH = numAtomsOfElementInFormula(model.metFormulas{i},'H'); % Get number of hydrogens from metabolite formula in model
+    if ~model.pseudoisomers(i).success
+        model.pseudoisomers(i).zs = model_z;
+        model.pseudoisomers(i).nHs = model_nH;
+        model.pseudoisomers(i).majorMSpH7 = true; % Assume species in model is the major (and only) metabolite species %RF: this seems dubious
+    end
+    if ~any(model.pseudoisomers(i).nHs == model_nH)
+        nonphysicalMetSpecies = [nonphysicalMetSpecies; model.mets(i)];
+    end
+end
+if ~isempty(nonphysicalMetSpecies)
+    nonphysicalMetSpecies = unique(regexprep(nonphysicalMetSpecies,'\[\w\]',''));
+    if printLevel>1
+        fprintf('%s\n','#H in model.metFormulas does not match any of the species calculated mol file for metabolites:')
+        for n=1:length(nonphysicalMetSpecies)
+            fprintf('%s\n',nonphysicalMetSpecies{n});%,model.metFormulas{m});
+        end
+    end
+end
diff --git a/src/analysis/thermo/groupContribution/old/createGroupIncidenceMatrix_old.m b/src/analysis/thermo/groupContribution/jankowski/createGroupIncidenceMatrix_jankowski.m
similarity index 100%
rename from src/analysis/thermo/groupContribution/old/createGroupIncidenceMatrix_old.m
rename to src/analysis/thermo/groupContribution/jankowski/createGroupIncidenceMatrix_jankowski.m
diff --git a/src/analysis/thermo/groupContribution/new/createGroupIncidenceMatrix.m b/src/analysis/thermo/groupContribution/new/createGroupIncidenceMatrix.m
index 665bf5bc1f..7e116e5638 100644
--- a/src/analysis/thermo/groupContribution/new/createGroupIncidenceMatrix.m
+++ b/src/analysis/thermo/groupContribution/new/createGroupIncidenceMatrix.m
@@ -1,4 +1,6 @@
-function combinedModel = createGroupIncidenceMatrix(model, trainingModel, param)
+function trainingModel = createGroupIncidenceMatrix(model, trainingModel, mappingScore, printLevel)
+% Initialize `G` matrix, and then use the python script "inchi2gv.py" to decompose each of the
+% compounds that has an 'InChI' and save the decomposition as a row in the `G` matrix.
 %
 % USAGE:
 %
@@ -13,306 +15,19 @@
 % trainingModel.S:                          p x n stoichiometric matrix of training data
 % trainingModel.metKEGGID:                  p x 1 cell array of metabolite KEGGID
 % trainingModel.inchi.nonstandard:          p x 1 cell array of nonstandard InChI
-% trainingModel.test2CombinedModelMap:          m x 1 mapping of model.mets to training data metabolites
-% trainingModel.mappingScore
+% trainingModel.Model2TrainingMap:          m x 1 mapping of model.mets to training data metabolites
+% trainingModel.mappingScore                
 %
 % OUTPUT:
-% combinedModel:
-% combinedModel.S:                          k x n stoichiometric matrix of training padded with zero rows for metabolites exclusive to test data
-% combinedModel.G:                          k x g group incicence matrix
-% combinedModel.groups:                     g x 1 cell array of group definitions
-% combinedModel.trainingMetBool             k x 1 boolean indicating training metabolites in G
-% combinedModel.testMetBool                 k x 1 boolean indicating test metabolites in G
-% combinedModel.groupDecomposableBool:      k x 1 boolean indicating metabolites with group decomposition
-% combinedModel.inchiBool                   k x 1 boolean indicating metabolites with inchi
-
-% combinedModel.cids_that_dont_decompose:   z x 1 ids of compounds that do not decomopose
-
-%
-
-if isempty(model)
-    model.inchi.nonstandard=[];
-end
-if isempty(trainingModel)
-    trainingModel.inchi.nonstandard=[];
-end
-
-if ~exist('param','var')
-    param=struct();
-end
-if ~isfield(param,'printLevel')
-    param.printLevel=0;
-end
-
-%parameters for auto fragmentation
-if ~isfield(param,'fragmentationMethod')
-    param.fragmentationMethod='abinito';
-end
-if ~isfield(param,'modelCache')
-    param.modelCache=[];
-end
-if ~isfield(param,'printLevel')
-    param.printLevel=0;
-end
-if ~isfield(param,'radius')
-    param.radius=1;
-end
-if ~isfield(param,'dGPredictorPath')
-    param.dGPredictorPath='/home/rfleming/work/sbgCloud/code/dGPredictor';
-end
-if ~isfield(param,'canonicalise')
-    param.canonicalise=0;
-end
-        
-fprintf('Creating group incidence matrix\n');
-
-switch param.fragmentationMethod
-    case 'abinito'
-        %function model = createFragmentIncidenceMatrix(inchi,radius,dGPredictorPath,canonicalise)
-        % model.G:    k x g  fragment incidence matrix
-        
-
-
-        %fragment each of the trainingModel inchi
-        if param.printLevel>0
-            fprintf('%s\n','Ab inito fragmentation of each of the trainingModel inchi...')
-        end
-        trainingModelInchi=trainingModel.inchi.nonstandard;
-        [trainingModelFragmentedMol,trainingModelDecomposableBool,trainingModelInchiExistBool] = autoFragment(trainingModelInchi,param.radius,param.dGPredictorPath,param.canonicalise,'autoFragment_trainingModel',param.printLevel-1);
-        if param.printLevel>0
-            fprintf('%s\n','...done.')
-        end
-        
-        %fragment each of the model inchi
-        if param.printLevel>0
-            fprintf('%s\n','Ab inito fragmentation of each of the model inchi...')
-        end
-        modelInchi=model.inchi.nonstandard;
-        [modelFragmentedMol,modelDecomposableBool,modelInchiExistBool] = autoFragment(modelInchi,param.radius,param.dGPredictorPath,param.canonicalise,param.modelCache,param.printLevel-1);
-        if param.printLevel>0
-            fprintf('%s\n','...done.')
-        end
-
-               
-
-        nTrainingModelMets=length(trainingModelFragmentedMol);
-        nModelMets=length(modelFragmentedMol);
-        
-        trainingFragmentSmiles = cell(0); 
-        %collate the fragments in the training model
-        for i = 1:nTrainingModelMets
-            if trainingModelDecomposableBool(i)
-                if param.printLevel>1
-                    disp(trainingModelFragmentedMol(i).inchi)
-                    trainingModelFragmentedMol(i).smilesCounts
-                end
-                if trainingModelInchiExistBool(i)
-                    if isempty(trainingModelFragmentedMol(i).inchi)
-                        trainingModelDecomposableBool(i)=0;
-                        trainingModelInchiExistBool(i)=0;
-                    else
-                        trainingFragmentSmiles = [trainingFragmentSmiles;trainingModelFragmentedMol(i).smilesCounts.keys'];
-                    end
-                else
-                    trainingModelDecomposableBool(i)=0;
-                end
-            end
-        end
-        %set of unique fragments in the training model
-        uniqueTrainingFragmentSmiles = unique(trainingFragmentSmiles);
-        
-        testFragmentSmiles = cell(0); 
-        %now the model mol
-        for i = 1:nModelMets
-            if modelDecomposableBool(i)
-                if param.printLevel>1
-                    disp(modelFragmentedMol(i).inchi)
-                    modelFragmentedMol(i).smilesCounts
-                end
-                if modelInchiExistBool(i)
-                    if isempty(modelFragmentedMol(i).inchi)
-                        modelDecomposableBool(i)=0;
-                        modelInchiExistBool(i)=0;
-                    else
-                        testFragmentSmiles = [testFragmentSmiles;modelFragmentedMol(i).smilesCounts.keys'];
-                    end
-                else
-                    modelDecomposableBool(i)=0;
-                end
-            end
-        end
-        %set of unique fragments in the test model
-        uniqueTestFragmentSmiles = unique(testFragmentSmiles);
-        
-        fragmentSmilesUniqueToTraining = setdiff(uniqueTrainingFragmentSmiles,uniqueTestFragmentSmiles);
-        if ~isempty(fragmentSmilesUniqueToTraining)
-            fprintf('%s\n',['There are ' int2str(length(fragmentSmilesUniqueToTraining)) ' fragments unique to the training model.'])
-        end
-        fragmentSmilesInCommon = intersect(uniqueTestFragmentSmiles,uniqueTrainingFragmentSmiles);
-        if ~isempty(fragmentSmilesInCommon)
-            fprintf('%s\n',['There are ' int2str(length(fragmentSmilesInCommon)) ' fragments in common between the training and test models.'])
-        end
-        fragmentSmilesUniqueToTest = setdiff(uniqueTestFragmentSmiles,uniqueTrainingFragmentSmiles);
-        if ~isempty(fragmentSmilesUniqueToTest)
-            fprintf('%s\n',['There are ' int2str(length(fragmentSmilesUniqueToTest)) ' fragments unique to the test model.'])
-        end
-        
-        %start based on training model
-        combinedModel = trainingModel;
-        
-        % combinedModel.groupDecomposableBool:      k x 1 boolean indicating metabolites with group decomposition
-        combinedModel.groupDecomposableBool=[trainingModelDecomposableBool;modelDecomposableBool];
-        combinedModel.inchiBool = [trainingModelInchiExistBool;modelInchiExistBool];
-        % combinedModel.trainingMetBool             k x 1 boolean indicating training metabolites in G
-        combinedModel.trainingMetBool = [true(nTrainingModelMets,1);false(nModelMets,1)];
-        % combinedModel.testMetBool                 k x 1 boolean indicating test metabolites in G
-        combinedModel.testMetBool = [false(nTrainingModelMets,1);true(nModelMets,1)];
-        
-        %fragments unique to the combined model
-        uniqueFragmentSmiles = unique([uniqueTrainingFragmentSmiles;uniqueTestFragmentSmiles]);
-        nFrag=length(uniqueFragmentSmiles);
-        nNonDecomposable=nnz(combinedModel.groupDecomposableBool==0);
-        
-        combinedModel.inchi.nonstandard = [trainingModelInchi;modelInchi];
-        
-        nMets = nTrainingModelMets + nModelMets;
-        %preallocate the group incidence matrix
-        combinedModel.G = sparse(nMets,nFrag+nNonDecomposable);
-        
-        %use the keys to define the groups
-        combinedModel.groups = [uniqueFragmentSmiles;trainingModel.inchi.nonstandard(~trainingModelDecomposableBool);model.inchi.nonstandard(~modelDecomposableBool)];
-        
-        %map each of the training model fragments to the consolidated list of fragments
-        d=1;
-        for i = 1:nTrainingModelMets
-            if trainingModelDecomposableBool(i)
-                bool = isKey(trainingModelFragmentedMol(i).smilesCounts,uniqueFragmentSmiles);
-                combinedModel.G(i,bool)=cell2mat(values(trainingModelFragmentedMol(i).smilesCounts));
-            else
-                %non decomposable training molecule
-                combinedModel.G(i,nFrag+d) = 1;
-                d=d+1;
-            end
-        end
-        %map each of the test model fragments to the consolidated list of fragments
-        for i = 1:nModelMets
-            if modelDecomposableBool(i)
-                bool = isKey(modelFragmentedMol(i).smilesCounts,uniqueFragmentSmiles);
-                combinedModel.G(nTrainingModelMets+i,bool)=cell2mat(values(modelFragmentedMol(i).smilesCounts));
-            else
-                if d>nNonDecomposable
-                    error('inconsistent number of non-decomposable metabolites')
-                end
-                %non decomposable training molecule
-                combinedModel.G(nTrainingModelMets+i,nFrag+d) = 1;
-                d=d+1;
-            end
-        end
-        if d~=nNonDecomposable+1
-            error('inconsistent number of non-decomposable metabolites')
-        end
-        
-        nExclusivelyTestMets = nnz(~combinedModel.trainingMetBool & combinedModel.testMetBool);
-        combinedModel.S = [trainingModel.S; sparse(nExclusivelyTestMets,size(trainingModel.S,2))]; % Add an empty row to S
-        combinedModel.mets=[trainingModel.mets;model.mets];
-        combinedModel.rxns=trainingModel.rxns;
-        
-        uniqueMets = unique(combinedModel.mets);
-        if length(uniqueMets)~=length(combinedModel.mets)
-            error('combinedModel.mets is not a primary key')
-        end
-        if any(cellfun(@isempty,combinedModel.mets))
-            error('combinedModel.mets is not a primary key')
-        end
-        
-        if 0
-        save('debug_prior_to_regulariseGroupIncidenceMatrix')
-        
-        %analyse similar and duplicate metabolites
-        [groupM,inchiM] = regulariseGroupIncidenceMatrix(combinedModel,param.printLevel);
-        
-        %assume that metabolites with the same group decomposition are identical
-        test2CombinedModelM = groupM;
-        
-        %ignore duplicates within the training metabolite set
-        test2CombinedModelM(:,combinedModel.trainingMetBool)=0;
-
-        %boolean identifier of duplicates
-        duplicatesBool = any(test2CombinedModelM,1);
-        
-        %add the test metabolites on the diagonal to preserve mapping to unique metabolites in test model
-        test2CombinedModelM = test2CombinedModelM + diag(combinedModel.testMetBool);
-        
-        %remove all duplicate metabolites from the arguments to the map
-        test2CombinedModelM = test2CombinedModelM(~duplicatesBool,:);
-        
-        combinedModel.test2CombinedModelMap=zeros(nModelMets,1);
-        for i=1:size(test2CombinedModelM,1)
-            if any(test2CombinedModelM(i,:))
-                combinedModel.test2CombinedModelMap(test2CombinedModelM(i,combinedModel.testMetBool)~=0)=i;
-            end
-        end
-        if any(combinedModel.test2CombinedModelMap==0)
-            error('Mismatch in combinedModel.test2CombinedModelMap')
-        end
-        
-        %                            S: [6507×4149 double]
-        %                         rxns: {4149×1 cell}
-        %                           lb: [4149×1 double]
-        %                         cids: {672×1 cell}
-        %                    dG0_prime: [4149×1 double]
-        %                            T: [4149×1 double]
-        %                            I: [4149×1 double]
-        %                           pH: [4149×1 double]
-        %                          pMg: [4149×1 double]
-        %                      weights: [4149×1 double]
-        %                      balance: [4149×1 double]
-        %     cids_that_dont_decompose: [43×1 double]
-        %                         mets: {6507×1 cell}
-        %                    metKEGGID: {672×1 cell}
-        %                        inchi: [1×1 struct]
-        %                      molBool: [672×1 logical]
-        %                    inchiBool: [6507×1 logical]
-        %           compositeInchiBool: [672×1 logical]
-        %                  metFormulas: {672×1 cell}
-        %                   metCharges: [672×1 double]
-        %                pseudoisomers: [672×1 struct]
-        %                           ub: [4149×1 double]
-        %                          dG0: [4149×1 double]
-        %        groupDecomposableBool: [6507×1 logical]
-        %              trainingMetBool: [6507×1 logical]
-        %                  testMetBool: [6507×1 logical]
-        %                            G: [6507×1536 double]
-        %                       groups: {1536×1 cell}
-        %        test2CombinedModelMap: [5835×1 double]
-                
-        %save the original combined model
-        combinedModelOld = combinedModel;
-        clear combinedModel;
-        
-        %each row of the group incidence matrix should correspond to a unique metabolite
-        %account for any metabolites in the test dataset that are duplicated in the training dataset
-        combinedModel.S = combinedModelOld.S(~duplicatesBool,:);
-        combinedModel.mets = combinedModelOld.mets(~duplicatesBool);
-        combinedModel.inchi.nonstandard = combinedModelOld.inchi.nonstandard(~duplicatesBool);
-        combinedModel.inchiBool = combinedModelOld.inchiBool(~duplicatesBool);
-        combinedModel.groupDecomposableBool = combinedModelOld.groupDecomposableBool(~duplicatesBool);
-        combinedModel.trainingMetBool = combinedModelOld.trainingMetBool(~duplicatesBool);
-        combinedModel.testMetBool = combinedModelOld.testMetBool(~duplicatesBool);
-        combinedModel.rxns = combinedModelOld.rxns;
-        combinedModel.dG0 = combinedModelOld.dG0;
-        combinedModel.T = combinedModelOld.T;
-        combinedModel.pH = combinedModelOld.pH;
-        combinedModel.pMg = combinedModelOld.pMg;
-        combinedModel.I = combinedModelOld.I;
-        combinedModel.G = combinedModelOld.G(~duplicatesBool,:);
-        combinedModel.groups = combinedModelOld.groups;
-        combinedModel.test2CombinedModelMap = combinedModelOld.test2CombinedModelMap;
-        else
-            combinedModel.test2CombinedModelMap = (1:nModelMets)' + nTrainingModelMets;
-        end
-    case 'manual'
-        %
+% trainingModel:
+% trainingModel.S:                          k x n stoichiometric matrix of training + test data
+% trainingModel.G:                          k x g group incicence matrix
+% trainingModel.groups:                     g x 1 cell array of group definitions
+% trainingModel.trainingMetBool             k x 1 boolean indicating training metabolites in G
+% trainingModel.testMetBool                 k x 1 boolean indicating test metabolites in G
+% trainingModel.groupDecomposableBool:      k x 1 boolean indicating metabolites with group decomposition
+% trainingModel.cids_that_dont_decompose:   z x 1 ids of compounds that do not decomopose    
+% 
 
 %                          dG0: n x 1 standard Gibbs energy
 %                    dG0_prime: n x 1 standard transformed Gibbs energy
@@ -336,142 +51,126 @@
 %                            G: m x g group incicence matrix
 %                       groups: g x 1 cell array of group definitions
 %
-%            test2CombinedModelMap: mlt x 1 mapping of model.mets to training data metabolites
+%            Model2TrainingMap: mlt x 1 mapping of model.mets to training data metabolites
+%
+
+if ~exist('printLevel','var')
+    printLevel=0;
+end
+
 
-        % Initialize `G` matrix, and then use the python script "inchi2gv.py" to decompose each of the
-        % compounds that has an 'InChI' and save the decomposition as a row in the `G` matrix.
+fprintf('Creating group incidence matrix\n');
+
+% first just run the script to get the list of group names
+fullpath = which('getGroupVectorFromInchi.m');
+fullpath = regexprep(fullpath,'getGroupVectorFromInchi.m','');
+
+
+[status,result] = system('python2 --version');
+if status~=0
+    % https://github.com/bdu91/group-contribution/blob/master/compound_groups.py
+    % Bin Du et al. Temperature-Dependent Estimation of Gibbs Energies Using an Updated Group-Contribution Method     
+    [status,groupsTemp] = system(['python ' fullpath 'compound_groups.py -l']);
+    if status~=0
+        error('createGroupIncidenceMatrix: call to compound_groups.py failed')
+    end
+else
+    if 1
+        inchi2gv = 'inchi2gv';
+    else
+        inchi2gv = 'compound_groups';
+    end
+    [status,groupsTemp] = system(['python2 ' fullpath  inchi2gv '.py -l']);%seems to only work with python 2, poor coding to not check the status here!
+    if status~=0
+        fprintf('%s\n','If you get a python error like: undefined symbol: PyFPE_jbuf, then see the following:')
+        fprintf('%s\n','https://stackoverflow.com/questions/36190757/numpy-undefined-symbol-pyfpe-jbuf/47703373')
+        error('createGroupIncidenceMatrix: call to inchi2gv.py failed')
+    end
+end
 
-        %Match between the metabolites in the model and the metabolites in the training model
-        mappingScore = getMappingScores(model, trainingModel);
+if isnumeric(trainingModel.cids_that_dont_decompose)
+    eval(['trainingModel.cids_that_dont_decompose = {' regexprep(sprintf('''C%05d''; ',trainingModel.cids_that_dont_decompose),'(;\s)$','') '};']);
+end
 
-        % first just run the script to get the list of group names
-        fullpath = which('getGroupVectorFromInchi.m');
-        fullpath = regexprep(fullpath,'getGroupVectorFromInchi.m','');
-        
-        
-        [status,result] = system('python2 --version');
-        if status~=0
-            % https://github.com/bdu91/group-contribution/blob/master/compound_groups.py
-            % Bin Du et al. Temperature-Dependent Estimation of Gibbs Energies Using an Updated Group-Contribution Method
-            [status,groupsTemp] = system(['python ' fullpath 'compound_groups.py -l']);
-            if status~=0
-                error('createGroupIncidenceMatrix: call to compound_groups.py failed')
-            end
+groups = regexp(groupsTemp,'\n','split')';
+clear groupsTemp;
+trainingModel.groups = groups(~cellfun(@isempty, groups));
+trainingModel.G = sparse(length(trainingModel.metKEGGID), length(trainingModel.groups));
+trainingModel.groupDecomposableBool = false(size(trainingModel.metKEGGID));
+trainingModel.testMetBool = false(size(trainingModel.metKEGGID));
+for i = 1:length(trainingModel.metKEGGID)
+    [score, modelRow] = max(full(mappingScore(:,i)));
+    if score == 0 
+        inchi = trainingModel.inchi.nonstandard{i};
+    else
+        % if there is a match to the model, use the InChI from there to be consistent with later transforms
+        inchi = model.inchi.nonstandard{modelRow};
+        trainingModel.testMetBool(i)=1;
+    end
+
+    % There might be compounds in the model but not in the training data that also cannot be
+    % decomposed, we need to take care of them too (e.g. DMSO - C11143)
+    if isempty(inchi) || any(ismember(trainingModel.metKEGGID{i}, trainingModel.cids_that_dont_decompose))
+        trainingModel.G(:, end+1) = 0; % add a unique 1 in a new column for this undecomposable compound
+        trainingModel.G(i, end) = 1;
+        trainingModel.groupDecomposableBool(i) = false;
+    else
+        group_def = getGroupVectorFromInchi(inchi);
+        if length(group_def) == length(trainingModel.groups)
+            trainingModel.G(i, 1:length(group_def)) = group_def;
+            trainingModel.groupDecomposableBool(i) = true;
+        elseif isempty(group_def)
+            warning(['createGroupIncidenceMatrix: undecomposable inchi: ' inchi])
+            trainingModel.G(:, end+1) = 0; % add a unique 1 in a new column for this undecomposable compound
+            trainingModel.G(i, end) = 1;
+            trainingModel.groupDecomposableBool(i) = false;
+            trainingModel.cids_that_dont_decompose = [trainingModel.cids_that_dont_decompose; trainingModel.metKEGGID{i}];
         else
-            if 1
-                inchi2gv = 'inchi2gv';
-            else
-                inchi2gv = 'compound_groups';
-            end
-            [status,groupsTemp] = system(['python2 ' fullpath  inchi2gv '.py -l']);%seems to only work with python 2, poor coding to not check the status here!
-            if status~=0
-                fprintf('%s\n','If you get a python error like: undefined symbol: PyFPE_jbuf, then see the following:')
-                fprintf('%s\n','https://stackoverflow.com/questions/36190757/numpy-undefined-symbol-pyfpe-jbuf/47703373')
-                error('createGroupIncidenceMatrix: call to inchi2gv.py failed')
-            end
+            fprintf('InChI = %s\n', inchi);
+            fprintf('*************\n%s\n', getGroupVectorFromInchi(inchi, printLevel));
+            error(sprintf('ERROR: while trying to decompose compound C%05d', trainingModel.metKEGGID{i}));
         end
-        
-        if isnumeric(trainingModel.cids_that_dont_decompose)
-            eval(['trainingModel.cids_that_dont_decompose = {' regexprep(sprintf('''C%05d''; ',trainingModel.cids_that_dont_decompose),'(;\s)$','') '};']);
-        end
-        
-        groups = regexp(groupsTemp,'\n','split')';
-        clear groupsTemp;
-        trainingModel.groups = groups(~cellfun(@isempty, groups));
-        trainingModel.G = sparse(length(trainingModel.metKEGGID), length(trainingModel.groups));
-        trainingModel.groupDecomposableBool = false(size(trainingModel.metKEGGID));
-        trainingModel.testMetBool = false(size(trainingModel.metKEGGID));
-        trainingModel.trainingMetBool = false(size(trainingModel.metKEGGID));
-        for i = 1:length(trainingModel.metKEGGID)
-            trainingModel.trainingMetBool(i)=1;
-            [score, modelRow] = max(full(mappingScore(:,i)));
-            if score == 0
-                inchi = trainingModel.inchi.nonstandard{i};
-                trainingModel.testMetBool(i)=0;
-                
-            else
-                % if there is a match to the model, use the InChI from there to be consistent with later transforms
-                inchi = model.inchi.nonstandard{modelRow};
-                trainingModel.testMetBool(i)=1;
-            end
-            
-            % There might be compounds in the model but not in the training data that also cannot be
-            % decomposed, we need to take care of them too (e.g. DMSO - C11143)
-            if isempty(inchi) || any(ismember(trainingModel.metKEGGID{i}, trainingModel.cids_that_dont_decompose))
-                trainingModel.G(:, end+1) = 0; % add a unique 1 in a new column for this undecomposable compound
-                trainingModel.G(i, end) = 1;
-                trainingModel.groupDecomposableBool(i) = false;
-            else
-                group_def = getGroupVectorFromInchi(inchi);
-                if length(group_def) == length(trainingModel.groups)
-                    trainingModel.G(i, 1:length(group_def)) = group_def;
-                    trainingModel.groupDecomposableBool(i) = true;
-                elseif isempty(group_def)
-                    warning(['createGroupIncidenceMatrix: undecomposable inchi: ' inchi])
-                    trainingModel.G(:, end+1) = 0; % add a unique 1 in a new column for this undecomposable compound
-                    trainingModel.G(i, end) = 1;
-                    trainingModel.groupDecomposableBool(i) = false;
-                    trainingModel.cids_that_dont_decompose = [trainingModel.cids_that_dont_decompose; trainingModel.metKEGGID{i}];
-                else
-                    fprintf('InChI = %s\n', inchi);
-                    fprintf('*************\n%s\n', getGroupVectorFromInchi(inchi, param.printLevel));
-                    error(sprintf('ERROR: while trying to decompose compound C%05d', trainingModel.metKEGGID{i}));
-                end
-            end
-        end
-        trainingModel.G = sparse(trainingModel.G);
-        
-        
-        trainingModel.test2CombinedModelMap = zeros(size(model.mets));
-        done = {};
-        for n = 1:length(model.mets)
-            % first find all metabolites with the same name (can be in different compartments)
-            met = model.mets{n}(1:end-3);
-            if any(strcmp(met, done)) % this compound was already mapped
-                continue;
-            end
-            done = [done; {met}];
-            
-            inchi = model.inchi.nonstandard{n};
-            
-            [score, trainingRow] = max(full(mappingScore(n,:)));
-            if score == 0 % this compound is not in the training data
-                trainingRow = size(trainingModel.G, 1) + 1;
-                trainingModel.trainingMetBool(trainingRow)=1;
-                trainingModel.S(trainingRow, :) = 0; % Add an empty row to S
-                
-                % Add a row in G for this compound, either with its group vector,
-                % or with a unique 1 in a new column dedicate to this compound
-                trainingModel.G(trainingRow, :) = 0;
-                group_def = getGroupVectorFromInchi(inchi);
-                if length(group_def) == length(trainingModel.groups)
-                    trainingModel.G(trainingRow, 1:length(group_def)) = group_def;
-                    trainingModel.groupDecomposableBool(trainingRow) = true;
-                elseif isempty(group_def)
-                    trainingModel.G(:, end+1) = 0; % add a unique 1 in a new column for this undecomposable compound
-                    trainingModel.G(trainingRow, end) = 1;
-                    trainingModel.groupDecomposableBool(trainingRow) = false;
-                else
-                    error('The length of the group vector is different from the number of groups');
-                end
-            end
+    end
+end
+trainingModel.G = sparse(trainingModel.G);
+
+trainingModel.Model2TrainingMap = zeros(size(model.mets));
+done = {};
+
+for n = 1:length(model.mets)
+    % first find all metabolites with the same name (can be in different compartments)
+    met = model.mets{n}(1:end-3);
+    if any(strcmp(met, done)) % this compound was already mapped
+        continue;
+    end
+    done = [done; {met}];
+    metIdx = strmatch([met '['], model.mets);
+    inchi = model.inchi.nonstandard{n};
+
+    [score, trainingRow] = max(full(mappingScore(n,:)));
+    if score == 0 % this compound is not in the training data
+        trainingRow = size(trainingModel.G, 1) + 1;
+        trainingModel.S(trainingRow, :) = 0; % Add an empty row to S
+        trainingModel.testMetBool(i)=1;
+        % Add a row in G for this compound, either with its group vector,
+        % or with a unique 1 in a new column dedicated to this compound
+        trainingModel.G(trainingRow, :) = 0;
+        group_def = getGroupVectorFromInchi(inchi);
+        if length(group_def) == length(trainingModel.groups)
+            trainingModel.G(trainingRow, 1:length(group_def)) = group_def;
+            trainingModel.groupDecomposableBool(trainingRow) = true;
             
-            metIdx = contains(model.mets,[met '[']);
-            trainingModel.test2CombinedModelMap(metIdx) = trainingRow; % map the model met to this NIST compound
+        elseif isempty(group_def)
+            trainingModel.G(:, end+1) = 0; % add a unique 1 in a new column for this undecomposable compound
+            trainingModel.G(trainingRow, end) = 1;
+            trainingModel.groupDecomposableBool(trainingRow) = false;
+        else
+            error('The length of the group vector is different than the number of groups');
         end
-        %new variable name for combined model
-        combinedModel = trainingModel;
-    otherwise
-        error(['Unrecognised param.fragmentationMethod =' param.fragmentationMethod])
+    end
+    trainingModel.Model2TrainingMap(metIdx) = trainingRow; % map the model met to this NIST compound
 end
 
-boolGroup=~any(combinedModel.G,1);
-if any(boolGroup)
-    error([int2str(nnz(boolGroup)) ' groups without any corresponding metabolite'])
-end
-boolMet=~any(combinedModel.G,2);
-if any(boolMet)
-    error([int2str(nnz(boolMet)) ' metabolites without any corresponding group'])
-end
-
-
+[m,g]=size(trainingModel.G);
+trainingModel.trainingMetBool=false(m,1);
+trainingModel.trainingMetBool(1:length(trainingModel.metKEGGID),1)=1;
\ No newline at end of file
diff --git a/src/analysis/thermo/vonBertalanffy/addInchiToModel.m b/src/analysis/thermo/inchi/addInchiToModel.m
similarity index 100%
rename from src/analysis/thermo/vonBertalanffy/addInchiToModel.m
rename to src/analysis/thermo/inchi/addInchiToModel.m
diff --git a/src/dataIntegration/chemoInformatics/molecularWeight/getMolecularWeight.m b/src/analysis/thermo/inchi/getMolecularWeight.m
similarity index 100%
rename from src/dataIntegration/chemoInformatics/molecularWeight/getMolecularWeight.m
rename to src/analysis/thermo/inchi/getMolecularWeight.m
diff --git a/src/analysis/thermo/protons/old/inchi2mol.m b/src/analysis/thermo/inchi/inchi2mol.m
similarity index 100%
rename from src/analysis/thermo/protons/old/inchi2mol.m
rename to src/analysis/thermo/inchi/inchi2mol.m
diff --git a/src/analysis/thermo/inchi/createInChIStruct.m b/src/analysis/thermo/inchi/new/createInChIStruct.m
similarity index 100%
rename from src/analysis/thermo/inchi/createInChIStruct.m
rename to src/analysis/thermo/inchi/new/createInChIStruct.m
diff --git a/src/analysis/thermo/inchi/getFormulaAndChargeFromInChI.m b/src/analysis/thermo/inchi/new/getFormulaAndChargeFromInChI.m
similarity index 100%
rename from src/analysis/thermo/inchi/getFormulaAndChargeFromInChI.m
rename to src/analysis/thermo/inchi/new/getFormulaAndChargeFromInChI.m
diff --git a/src/analysis/thermo/inchi/old/createInChIStruct.m b/src/analysis/thermo/inchi/old/createInChIStruct.m
new file mode 100644
index 0000000000..5b6d2e1375
--- /dev/null
+++ b/src/analysis/thermo/inchi/old/createInChIStruct.m
@@ -0,0 +1,66 @@
+function inchiStruct = createInChIStruct(mets,sdfFileName)
+% Converts metabolite structures in SDF to InChI strings with OpenBabel,
+% and maps InChIs to mets. 
+% 
+% inchi = createInChIStruct(mets,sdfFileName)
+% 
+% INPUTS
+% mets          m x 1 cell array of metabolite identifiers (e.g., BiGG
+%               abbreviations).
+% sdfFileName   SDF with structures of metabolites in mets. Metabolite
+%               identifiers in the SDF are assumed to be the same as in
+%               mets.
+% 
+% OUTPUTS
+% inchi                         Structure with following fields:
+% .standard                     Standard InChIs with no isotope, stereo
+%                               or charge layers. 
+% .standardWithStereo           Standard InChIs with stereo layers.
+% .standardWithStereoAndCharge  Standard InChIs with stereo and charge
+%                               layers
+% .nonstandard                  Nonstandard InChI with all layers.
+% 
+% Hulda SH, Nov. 2012
+
+% Convert SDF to InChIs
+[standard,metList1] = sdf2inchi(sdfFileName,'-xtT/noiso/nochg/nostereo');
+
+[standardWithStereo,metList2] = sdf2inchi(sdfFileName,'-xtT/noiso/nochg');
+if ~all(strcmp(metList1,metList2))
+    error('Error creating InChI structure.');
+end
+
+[standardWithStereoAndCharge,metList3] = sdf2inchi(sdfFileName,'-xtT/noiso');
+if ~all(strcmp(metList1,metList3))
+    error('Error creating InChI structure.');
+end
+
+[nonstandard,metList4] = sdf2inchi(sdfFileName,'-xtFT/noiso');
+if ~all(strcmp(metList1,metList4))
+    error('Error creating InChI structure.');
+end
+
+%  Map InChIs to mets
+inchiStruct.standard = cell(size(mets));
+inchiStruct.standardWithStereo = cell(size(mets));
+inchiStruct.standardWithStereoAndCharge = cell(size(mets));
+inchiStruct.nonstandard = cell(size(mets));
+
+mets = reshape(mets,length(mets),1);
+if ischar(mets)
+    mets = strtrim(cellstr(mets));
+end
+if iscell(mets)
+    mets = regexprep(mets,'(\[\w\])$',''); % Remove compartment assignment
+end
+if isnumeric(mets)
+    mets = strtrim(cellstr(num2str(mets)));
+end
+
+for i = 1:length(metList1)
+    inchiStruct.standard(ismember(mets,metList1{i})) = standard(i);
+    inchiStruct.standardWithStereo(ismember(mets,metList1{i})) = standardWithStereo(i);
+    inchiStruct.standardWithStereoAndCharge(ismember(mets,metList1{i})) = standardWithStereoAndCharge(i);
+    inchiStruct.nonstandard(ismember(mets,metList1{i})) = nonstandard(i);
+end
+
diff --git a/src/analysis/thermo/inchi/old/getFormulaAndChargeFromInChI.m b/src/analysis/thermo/inchi/old/getFormulaAndChargeFromInChI.m
new file mode 100644
index 0000000000..030065bdaf
--- /dev/null
+++ b/src/analysis/thermo/inchi/old/getFormulaAndChargeFromInChI.m
@@ -0,0 +1,57 @@
+function [formula, nH, charge] = getFormulaAndChargeFromInChI(inchi)
+
+% [formula,charge] = getFormulaAndChargeFromInChI(inchi)
+% 
+% INPUT
+% inchi.......Nonstandard IUPAC InChI for a particular pseudoisomer of a
+%             metabolite
+% 
+% OUTPUTS
+% formula....The chemical formula for the input pseudoisomer
+% charge.....The charge on the input pseudoisomer
+
+
+layers = regexp(inchi,'/','split');
+f1 = layers{2}; % Fully protonated formula
+
+p = {};
+if ~isempty(strmatch('p',layers))
+    p = layers{strmatch('p',layers)}; % nH to add or subtract
+end
+
+q = {};
+if ~isempty(strmatch('q',layers))
+    q = layers{strmatch('q',layers)}; % charge
+end
+
+if ~isempty(q)
+   charge = str2double(q(2:end));
+else
+    charge = 0;
+end
+
+f1_nH = numAtomsOfElementInFormula(f1,'H'); % nH in fully protonated formula
+if ~isempty(p)
+    nH = f1_nH + str2double(p(2:end)); % nH in pseudoisomer formula
+else
+    nH = f1_nH;
+end
+
+formula = regexprep(f1,'H[a-z]*[0-9]*',''); % Remove all H from fully protonated formula
+if nH == 1
+    formula = [formula 'H'];
+elseif nH > 1
+    formula = [formula 'H' num2str(nH)]; % Add appropriate nH back in to create pseudoisomer formula
+elseif nH < 0
+    error('Negative number of H in formula.') % Should never get here
+end
+
+% In case there is Hg in formula
+f1_nHg = numAtomsOfElementInFormula(f1,'Hg');
+if f1_nHg == 1
+    formula = [formula 'Hg'];
+elseif f1_nHg > 1
+    formula = [formula 'Hg' num2str(f1_nHg)];
+end
+
+end
diff --git a/src/analysis/thermo/inchi/sdf2inchi.m b/src/analysis/thermo/inchi/sdf2inchi.m
deleted file mode 100644
index c263bb9217..0000000000
--- a/src/analysis/thermo/inchi/sdf2inchi.m
+++ /dev/null
@@ -1,62 +0,0 @@
-function [inchi, metList] = sdf2inchi(sdfFileName, options)
-% Converts metabolite structures in an SDF to a cell array of InChI
-% strings with OpenBabel.
-%
-% USAGE:
-%
-%    [inchi, metList] = sdf2inchi(sdfFileName, options)
-%
-% INPUT:
-%    sdfFileName:    Path to SDF file.
-%
-% OPTIONAL INPUTS:
-%    options:        Write options for InChI strings. See InChI documentation
-%                    for details. If no options are specified the function will
-%                    output standard InChI.
-%
-% OUTPUTS:
-%    inchi:          Cell array of InChI strings for metabolites in the SDF file.
-%    metList:        Cell array of metabolite identifiers (first line of each
-%                    molfile in SDF). Will be empty unless write option `t` is
-%                    used (i.e., options >= '-xt').
-%
-% .. Author: - Hulda SH, Nov. 2012
-
-if ~strcmp(sdfFileName(end-3:end),'.sdf') % Check inputs
-    sdfFileName = [sdfFileName '.sdf'];
-end
-
-if ~exist('options','var')
-    options = [];
-end
-if ~isempty(options)
-   options = [' ' strtrim(options)];
-end
-
-%babelcmd='obabel';
-babelcmd='babel';
-
-% Convert to InChI with OpenBabel
-[success,result] = system([babelcmd ' ' sdfFileName ' -oinchi' options]);
-
-% Parse output from OpenBabel
-if success == 0
-    result = regexp(result,'InChI=[^\n]*\n','match');
-    result = result';
-    result = strtrim(result);
-
-    [inchi,metList] = strtok(result);
-    inchi = strtrim(inchi);
-    metList = strtrim(metList);
-else
-    [success,result] = system([babelcmd ' ' sdfFileName ' -oinchi' options])
-    fprintf('%s\n','If you get a ''not found'' message from the call to Babel, make sure that Matlab''s LD_LIBRARY_PATH is edited to include correct system libraries. See initVonBertylanffy')
-    error('Conversion to InChI not successful. Make sure OpenBabel is installed correctly.\n')
-end
-
-if size(inchi,2) > size(inchi,1)
-    inchi = inchi';
-end
-if size(metList,2) > size(metList,1)
-    metList = metList';
-end
diff --git a/src/analysis/thermo/vonBertalanffy/addPseudoisomersToModel.m b/src/analysis/thermo/protons/addPseudoisomersToModel.m
similarity index 100%
rename from src/analysis/thermo/vonBertalanffy/addPseudoisomersToModel.m
rename to src/analysis/thermo/protons/addPseudoisomersToModel.m
diff --git a/src/analysis/thermo/protons/new/inchi2mol.m b/src/analysis/thermo/protons/new/inchi2mol.m
deleted file mode 100644
index ec50745cef..0000000000
--- a/src/analysis/thermo/protons/new/inchi2mol.m
+++ /dev/null
@@ -1,85 +0,0 @@
-function successbool = inchi2mol(inchis, filenames, outputdir, overwrite)
-% Convert InChI strings to mol files using OpenBabel. Compatible with
-% Windows and Unix.
-%
-% USAGE:
-%
-%    successbool = inchi2mol(inchis, filenames, outputdir, overwrite)
-%
-% INPUTS:
-%    inchis:         `n x 1` Cell array of InChI strings
-%    filenames:      `n x 1` Cell array of mol file names without file extension. Default is {'1' ;'2' ;...}
-%    outputdir:      Directory for mol files. Default is 'CurrentDirectory\molfiles'.
-%    overwrite:      0, [1]. Specify whether to overwrite existing mol files in outputdir.
-%
-% OUTPUTS:
-%    successbool:    `n x 1` logical vector with 1 at indices corresponding to
-%                    inchis that were successfully converted to mol files and 0 elsewhere.
-%
-% .. Author: - Hulda SH  Feb. 2011
-
-if ~iscell(inchis) && size(inchis,1) == 1 % Format input and set defaults, If a single InChI is input as string
-    inchis = {inchis};
-end
-
-if ~exist('filenames','var') || isempty(filenames) % Create cell array of default filenames
-    filenames = 1:length(inchis);
-    filenames = filenames';
-    filenames = num2str(filenames);
-    filenames = mat2cell(filenames,ones(1,size(filenames,1)),size(filenames,2));
-    filenames = deblank(filenames);
-end
-
-if ~iscell(filenames) && size(filenames,1) == 1 % If a single file name is input as string
-    filenames = {filenames};
-end
-
-if size(inchis) ~= size(filenames)
-    error('inchis and filenames should be the same size.');
-end
-
-% Set other defaults
-if ~exist('outputdir','var') || isempty(outputdir)
-    if ~exist([pwd '\molfiles'],'dir')
-        mkdir(pwd,'molfiles')
-    end
-    outputdir = [pwd '\molfiles'];
-end
-
-if ~exist('overwrite','var') || isempty(overwrite)
-    overwrite = 0;
-end
-
-% Begin conversion
-if ~exist('outputdir','dir') % Create output directory
-    mkdir(outputdir)
-end
-
-system(['cd ' outputdir]); % Make OS cd to outputdir
-cd(outputdir); % Make Matlab cd to outputdir
-
-% List existing mol files in outputdir
-d = dir(outputdir);
-existingMolFiles = cat(2,d.name);
-existingMolFiles = ['.mol' existingMolFiles];
-
-nInchis = length(inchis);
-successbool = false(nInchis,1);
-
-for n = 1:nInchis
-    if ~isempty(inchis{n})
-        if isempty(strfind(existingMolFiles,['.mol' filenames{n} '.mol'])) || overwrite == 1 % If mol file of same name does not already exist or if it should be overwritten
-            system(['echo ' inchis{n} ' > inchi.inchi']); % Temporary place holder for inchi. Overwritten in each loop.
-            system(['babel inchi.inchi ' filenames{n} '.mol']); % Convert InChI to mol file
-
-            f = dir([outputdir '\' filenames{n} '.mol']);
-            if f.bytes > eps % If mol file is empty the conversion failed
-                successbool(n) = true;
-            else
-                delete([outputdir '\' filenames{n} '.mol']) % Delete empty mol files
-            end
-        end
-    end
-end
-
-delete([outputdir '\inchi.inchi'])
diff --git a/src/analysis/thermo/vonBertalanffy/assignQuantDir.m b/src/analysis/thermo/thermoDirectionality/assignQuantDir.m
similarity index 100%
rename from src/analysis/thermo/vonBertalanffy/assignQuantDir.m
rename to src/analysis/thermo/thermoDirectionality/assignQuantDir.m
diff --git a/src/analysis/thermo/vonBertalanffy/thermoConstrainFluxBounds.m b/src/analysis/thermo/thermoDirectionality/thermoConstrainFluxBounds.m
similarity index 100%
rename from src/analysis/thermo/vonBertalanffy/thermoConstrainFluxBounds.m
rename to src/analysis/thermo/thermoDirectionality/thermoConstrainFluxBounds.m
diff --git a/src/dataIntegration/chemoInformatics/addMetInfoInCBmodel.m b/src/dataIntegration/chemoInformatics/addMetInfoInCBmodel.m
new file mode 100644
index 0000000000..eee5fc11fb
--- /dev/null
+++ b/src/dataIntegration/chemoInformatics/addMetInfoInCBmodel.m
@@ -0,0 +1,116 @@
+function [newModel, hasEffect] = addMetInfoInCBmodel(model, inputData, replace)
+% Integrates metabolite data from an external file and incorporates it into 
+% the COBRA model.
+%
+% USAGE:
+%
+%    model = addMetInfoInCBmodel(model, inputData)
+%
+% INPUTS:
+%    model:         COBRA model with following fields:
+%
+%                       * .S - The m x n stoichiometric matrix for the
+%                              metabolic network.
+%                       * .mets - An m x 1 array of metabolite identifiers.
+%    inputData:     COBRA model with following fields:
+%
+% OPTIONAL INPUTS:
+%    replace:       If the new ID should replace an existing ID, this 
+%                   logical value indicates so (default: false).
+%
+% OUTPUTS:          COBRA model with updated identifiersCOBRA model with 
+%                   the identifiers updated.
+
+if nargin < 3 || isempty(replace)
+    replace = false;
+end
+
+hasEffect = false;
+
+if isfile(inputData)
+    dbData = readtable(inputData);
+elseif isstruct(inputData)
+    dbData = readtable(inputData);
+end
+
+% Get data from model and external source
+sources = {'KEGG', 'HMDB', 'ChEBI', 'PubChem', 'SMILES', 'InChI'};
+fields = fieldnames(model);
+newDataVariableNames = dbData.Properties.VariableNames;
+metsInModel = regexprep(model.mets, '(\[\w\])', '');
+
+for i = 1:size(sources, 2)
+    
+    % Get the corresponding field for a database in the model
+    fieldInModelBool = ~cellfun(@isempty, regexpi(fields, sources{i}));
+    if sum(fieldInModelBool) > 1
+        metDataInModelBool = ~cellfun(@isempty, regexpi(fields, 'met'));
+        fieldInModelBool = fieldInModelBool & metDataInModelBool;
+    end
+    % Create the field if it does not exist
+    if sum(fieldInModelBool) == 0
+        model.(['met' sources{i}]) = cell(size(metsInModel, 1), 1);
+        fields = fieldnames(model);
+        fieldInModelBool = ~cellfun(@isempty, regexpi(fields, sources{i}));
+    end
+    
+    % Get the corresponding field for a database in the data
+    fieldInDataBool = ~cellfun(@isempty, regexpi(newDataVariableNames, sources{i}));
+    if sum(fieldInDataBool) > 1
+        metDataInDataBool = ~cellfun(@isempty, regexpi(newDataVariableNames, 'met'));
+        inchiKeyInDataBool = cellfun(@isempty, regexpi(newDataVariableNames, 'inchikey'));
+        fieldInDataBool = fieldInDataBool & (metDataInDataBool | inchiKeyInDataBool);
+    end
+    
+    if sum(fieldInModelBool) == 1 && sum(fieldInDataBool) == 1
+        
+        % Identify the correct idx per metabolite
+        for j = 1:size(dbData.(newDataVariableNames{1}), 1)
+            
+            % Fix for numeric values omitting 0 and NaN
+            if isnumeric(dbData.(newDataVariableNames{fieldInDataBool})(j))
+                if ~isnan(dbData.(newDataVariableNames{fieldInDataBool})(j)) && dbData.(newDataVariableNames{fieldInDataBool})(j) ~= 0
+                    data2add = num2str(dbData.(newDataVariableNames{fieldInDataBool})(j));
+                else
+                    data2add = [];
+                end
+            else
+                data2add = dbData.(newDataVariableNames{fieldInDataBool}){j};
+            end
+            idx = strmatch(dbData.(newDataVariableNames{1}){j}, metsInModel, 'exact');
+            
+            % Fill the data
+            if ~isempty(idx) && replace && ~isempty(data2add)
+                
+                % Add the ID
+                for k = 1:size(idx, 1)
+                    model.(fields{fieldInModelBool})(idx(k)) = data2add;
+                end
+                if ~hasEffect
+                    hasEffect = true;
+                end
+                
+            elseif ~isempty(idx) && ~replace && ~isempty(data2add)
+                
+                % Only add data on empty cells
+                idxBool = cellfun(@isempty, model.(fields{fieldInModelBool})(idx));
+                if any(idxBool)
+                    idx = idx(idxBool);
+                    % Add the ID
+                    for k = 1:size(idx, 1)
+                        model.(fields{fieldInModelBool}){idx(k)} = data2add;
+                    end
+                    if ~hasEffect
+                        hasEffect = true;
+                    end
+                end
+                
+            end
+            
+        end
+    elseif sum(fieldInModelBool) > 1 || sum(fieldInDataBool) > 1
+        error(['Rename the data, nameTag "' sources{i} '" was found more than 1 time'])
+    end
+end
+
+newModel = model;
\ No newline at end of file
diff --git a/src/dataIntegration/chemoInformatics/checkMetIdsInModel.m b/src/dataIntegration/chemoInformatics/checkMetIdsInModel.m
new file mode 100644
index 0000000000..58d82e3ca7
--- /dev/null
+++ b/src/dataIntegration/chemoInformatics/checkMetIdsInModel.m
@@ -0,0 +1,75 @@
+function metsIdsStatistics = checkMetIdsInModel(model)
+% Prints the statistics of the metabolites id in the a model.
+%
+% USAGE:
+%
+%    metsIdsStatistics = checkMetIdsInModel(model)
+%
+% INPUTS:
+%    model:              COBRA model with following fields:
+%
+%                       * .mets - An m x 1 array of metabolite identifiers.
+%                                 Should match metabolite identifiers in
+%                                 RXN.
+% OUTPUTS:
+%    metsIdsStatistics:	 Information such as the number of metabolites per 
+%                        database, coverage, or the number of times another 
+%                        ID is shared for the same metabolite.
+
+fields = fieldnames(model);
+sources = {'kegg', 'hmdb', 'chebi', 'pubchem', 'smiles', 'inchi'};
+mets = regexprep(model.mets, '(\[\w\])', '');
+umets = unique(mets);
+
+for i = 1:length(sources)
+    
+    fieldInModelBool = ~cellfun(@isempty, regexpi(fields, sources{i}));
+    if any(fieldInModelBool)
+        
+        if sum(fieldInModelBool) == 1
+            sourceIdsPerMet = model.(fields{fieldInModelBool});
+        else
+            dbFields = fields(fieldInModelBool);
+            sourceIdsPerMet = model.(dbFields{~cellfun(@isempty, regexpi(dbFields, 'met'))});
+        end
+        
+        
+        sourceData.metWithIdBool = ~cellfun(@isempty, sourceIdsPerMet);
+        sourceData.metWithIdTotal = sum(sourceData.metWithIdBool);
+        sourceData.DBcoverage = (sourceData.metWithIdTotal * 100) / size(mets, 1);
+        
+        % unique metabolites
+        dbMets = sourceIdsPerMet(sourceData.metWithIdBool);
+        uDbMets = unique(dbMets);
+        uniqueBool = false(size(mets, 1), 1);
+        for j = 1:size(uDbMets, 1)
+            idx = find(ismember(sourceIdsPerMet, uDbMets{j}));
+            uniqueBool(idx(1)) = true;
+        end
+        
+        sourceData.uniqueMetWithIdBool = uniqueBool;
+        sourceData.uniqueMetWithIdTotal = sum(sourceData.uniqueMetWithIdBool);
+        sourceData.uniqueIdDBcoverage = (sourceData.uniqueMetWithIdTotal * 100) / size(umets, 1);
+        
+        switch sources{i}
+            
+            case 'kegg'
+                metsIdsStatistics.KEEG = sourceData;
+            case 'hmdb'
+                metsIdsStatistics.HMDB = sourceData;
+            case 'chebi'
+                metsIdsStatistics.ChEBi = sourceData;
+            case 'pubchem'
+                metsIdsStatistics.PubChem = sourceData;
+            case 'smiles'
+                metsIdsStatistics.SMILES = sourceData;
+            case 'inchi'
+                metsIdsStatistics.InChI = sourceData;
+        end
+        
+        clear DBdata dbIdsPerMet
+    end
+end
+
+end
+
diff --git a/src/dataIntegration/chemoInformatics/editChemicalFormula.m b/src/dataIntegration/chemoInformatics/editChemicalFormula.m
new file mode 100644
index 0000000000..4700258498
--- /dev/null
+++ b/src/dataIntegration/chemoInformatics/editChemicalFormula.m
@@ -0,0 +1,76 @@
+function newFormula = editChemicalFormula(metFormula, addOrRemove)
+% For each instance, removes non-chemical characters from the chemical
+% formula and replaces them with a R group. Removes atoms from the formula 
+% as well. Produces a chemical formula from a string of atoms.
+%
+% USAGE:
+%
+%    [metFormula] = cobraFormulaToChemFormula(metReconFormula)
+%
+% INPUTS:
+%    metReconFormula:   An n x 1 array of metabolite Recon formulas
+% OPTIONAL INPUTS:
+%    addOrRemove:          A struct array containing:
+%                           *.elements - element to edit
+%                           *.times - vector indicated the times the
+%                                     element will be deleted (negative) or 
+%                                     added (positive)
+%
+% OUTPUTS:
+%    chemicalFormula:	A chemical formula for a metabolite
+
+if nargin < 2 || isempty(addOrRemove)
+    addOrRemove = [];
+end
+
+% Update R groups
+metFormula = regexprep(char(metFormula), 'X|Y|*|FULLR', 'R');
+
+% Rearrange the formulas (e.g., from RCRCOC to C3OR2)
+
+% Count the atoms
+[elemetList, ~ , elemetEnd] = regexp(metFormula, ['[', 'A':'Z', '][', 'a':'z', ']?'], 'match');
+[num, numStart] = regexp(metFormula, '\d+', 'match');
+numList = ones(size(elemetList));
+idx = ismember(elemetEnd + 1, numStart);
+numList(idx) = cellfun(@str2num, num);
+
+% Combine atoms if neccesary
+uniqueElemetList = unique(elemetList);
+for j = 1:size(uniqueElemetList, 2)
+    atomBool = ismember(elemetList, uniqueElemetList{j});
+    if sum(atomBool) > 1
+        dataToKeep = find(atomBool);
+        numList(dataToKeep(1)) = sum(numList(atomBool));
+        numList(dataToKeep(2:end)) = [];
+        elemetList(dataToKeep(2:end)) = [];
+    end
+end
+
+% Delete/add atoms if neccesary
+if ~isempty(addOrRemove)
+    for i = 1:length(addOrRemove.elements)
+        elemetBool = ismember(elemetList, addOrRemove.elements(i));
+        if any(elemetBool)
+            numList(elemetBool) = numList(elemetBool) + addOrRemove.times(i);
+        else
+            elemetList{size(elemetList, 2) + 1} = addOrRemove.elements{i};
+            numList(size(elemetList, 2)) = addOrRemove.times(i);
+        end
+    end
+end
+
+% Sort atoms
+[elemetList, ia] = sort(elemetList);
+
+% Make formula
+newFormula = [];
+for j = 1:size(elemetList, 2)
+    if numList(ia(j)) == 1
+        newFormula = [newFormula elemetList{j}];
+    else
+        newFormula = [newFormula elemetList{j} num2str(numList(ia(j)))];
+    end
+end
+
+end
diff --git a/src/dataIntegration/chemoInformatics/generateChemicalDatabase.m b/src/dataIntegration/chemoInformatics/generateChemicalDatabase.m
new file mode 100644
index 0000000000..68d739e25e
--- /dev/null
+++ b/src/dataIntegration/chemoInformatics/generateChemicalDatabase.m
@@ -0,0 +1,1095 @@
+function [info, newModel] = generateChemicalDatabase(model, options)
+% This function uses the metabolite identifiers in the model to compare
+% them and save the identifiers with the best score in MDL MOL format
+% and/or inchi and simles and jpeg if it's installed cxcalc and openBabel.
+% The obtained MDL MOL files will serve as the basis for creating the MDL
+% RXN files that represent a metabolic reaction and can only be written if
+% there is a MDL MOL file for each metabolite in a metabolic reaction.
+% If JAVA is installed, it also atom maps the metabolic reactions
+% with an MDL RXN file. 
+%
+% USAGE:
+%
+%    [info, newModel] = generateChemicalDatabase(model, options)
+%
+% INPUTS:
+%
+%    model:    COBRA model with following fields:
+%
+%                  * .S - The m x n stoichiometric matrix for the
+%                         metabolic network.
+%                  * .rxns - An n x 1 array of reaction identifiers.
+%                  * .mets - An m x 1 array of metabolite identifiers.
+%                  * .metFormulas - An m x 1 array of metabolite chemical formulas.
+%                  * .metinchi - An m x 1 array of metabolite identifiers.
+%                  * .metsmiles - An m x 1 array of metabolite identifiers.
+%                  * .metKEGG - An m x 1 array of metabolite identifiers.
+%                  * .metHMDB - An m x 1 array of metabolite identifiers.
+%                  * .metPubChem - An m x 1 array of metabolite identifiers.
+%                  * .metCHEBI - An m x 1 array of metabolite identifiers.
+%
+%    options:  A structure containing all the arguments for the function:
+%
+%               * .outputDir: The path to the directory containing the RXN 
+%                 files with atom mappings (default: current directory)
+%               * .printlevel: Verbose level 
+%               * .standardisationApproach: String containing the type of 
+%                 standardisation for the molecules (default: 'explicitH' 
+%                 if openBabel is installed, otherwise default: 'basic'):
+%                   - explicitH: Normal chemical graphs; 
+%                   - implicitH: Hydrogen suppressed chemical graph; 
+%                   - neutral: Chemical graphs with protonated molecules; 
+%                   - basic: Update the header.  
+%               * .keepMolComparison: Logic value for comparing MDL MOL 
+%                 files from various sources (default: FALSE) 
+%               * .onlyUnmapped: Logic value to select create only unmapped 
+%                 MDL RXN files (default: FALSE). 
+%               * .adjustToModelpH: Logic value used to determine whether a 
+%                 molecule's pH must be adjusted in accordance with the 
+%                 COBRA model. (default: TRUE, requires MarvinSuite). 
+%               * .addDirsToCompare: Cell(s) with the path to directory to 
+%                 an existing database (default: empty).
+%               * .dirNames: Cell(s) with the name of the directory(ies) 
+%                 (default: empty).
+%               * .debug: Logical value used to determine whether or not 
+%                 the results of different points in the function will be 
+%                 saved for debugging (default: empty).
+%
+% OUTPUTS:
+%
+%    newModel:      A new model with the comparison and if onlyUnmapped =
+%                   false, the informaton about the bonds broken and formed
+%                   as well as the bond enthalpies for each metabolic
+%                   reaction.
+%    info:          A diary of the database generation process
+
+%% 1. Initialise data and set default variables
+
+if ~isfield(options, 'outputDir')
+    outputDir = [pwd filesep];
+else
+    % Make sure input path ends with directory separator
+    outputDir = [regexprep(options.outputDir,'(/|\\)$',''), filesep];
+end
+metDir = [outputDir 'mets'];
+rxnDir = [outputDir 'rxns'];
+
+modelFields = fieldnames(model);
+
+if ~isfield(options, 'debug')
+    options.debug = false;
+end
+if ~isfield(options, 'printlevel')
+    options.printlevel = 1;
+end
+if ~isfield(options, 'standardisationApproach')
+    options.standardisationApproach = 'explicitH';
+end
+if ~isfield(options, 'keepMolComparison')
+    options.keepMolComparison = false;
+end
+if ~isfield(options, 'onlyUnmapped')
+    options.onlyUnmapped = false;
+end
+if ~isfield(options, 'adjustToModelpH')
+    options.adjustToModelpH = true;
+end
+if isfield(options, 'dirsToCompare')
+    dirsToCompare = true;
+    for i = 1:length(options.dirsToCompare)
+        options.dirsToCompare{i} = [regexprep(options.dirsToCompare{i},'(/|\\)$',''), filesep];
+        if ~isfolder(options.dirsToCompare)
+            display([options.dirsToCompare{i} ' is not a directory'])
+            options.dirsToCompare{i} = [];
+        end
+    end
+else
+    options.dirsToCompare = [];
+    dirsToCompare = false;
+end
+if ~isfield(options, 'dirNames')
+    for i = 1:length(options.dirsToCompare)
+        options.dirNames{i, 1} = ['localDir' num2str(i)];
+    end
+else
+    if isrow(options.dirNames)
+        options.dirNames = options.dirNames';
+    end
+end
+
+% Check if ChemAxon and openBabel are installed
+[cxcalcInstalled, ~] = system('cxcalc');
+cxcalcInstalled = ~cxcalcInstalled;
+if cxcalcInstalled == 0
+    cxcalcInstalled = false;
+    display('cxcalc is not installed, two features cannot be used: ')
+    display('1 - jpeg files for molecular structures (obabel required)')
+    display('2 - pH adjustment according to model.met Formulas')
+end
+[oBabelInstalled, ~] = system('obabel');
+if oBabelInstalled ~= 1
+    oBabelInstalled = false;
+    options.standardisationApproach = 'basic';
+    display('obabel is not installed, two features cannot be used: ')
+    display('1 - Generation of SMILES, InChI and InChIkey')
+    display('2 - MOL file standardisation')
+end
+[javaInstalled, ~] = system('java');
+if javaInstalled ~= 1 && ~options.onlyUnmapped
+    display('java is not installed, atom mappings cannot be computed')
+    options.onlyUnmapped = true;
+end
+
+% Start diary
+if ~isfolder(outputDir)
+    mkdir(outputDir);
+end
+diaryFilename = [outputDir datestr(now,30) '_DatabaseDiary.txt'];
+diary(diaryFilename)
+
+if options.printlevel > 0
+    disp('--------------------------------------------------------------')
+    disp('CHEMICAL DATABASE')
+    disp('--------------------------------------------------------------')
+    disp(' ')
+    fprintf('%s\n', 'Generating a chemical database with the following options:')
+    disp(' ')
+    disp(options)
+    disp('--------------------------------------------------------------')
+end
+
+directories = {'inchi'; 'smiles'; 'KEGG'; 'HMDB'; 'PubChem'; 'CHEBI'};
+if dirsToCompare
+    directories = [directories; options.dirNames];
+end
+
+mets = regexprep(model.mets, '(\[\w\])', '');
+umets = unique(mets);
+
+%% 2. Obtain metabolite structures from different sources
+
+% SOURCES
+% 1.- InChI (requires openBabel to obtain MOL file)
+% 2.- Smiles (requires openBabel to obtain MOL file)
+% 3.- KEGG (https://www.genome.jp/)
+% 4.- HMDB (https://hmdb.ca/)
+% 5.- PubChem (https://pubchem.ncbi.nlm.nih.gov/)
+% 6.- CHEBI (https://www.ebi.ac.uk/)
+
+if options.printlevel > 0
+    fprintf('%s\n\n', 'Obtaining MOL files from chemical databases ...')
+end
+
+comparisonDir = [metDir filesep 'molComparison' filesep];
+source = [0 0 0 0 0 0 0];
+for i = 1:6
+    dirBool(i) = false; 
+    if any(~cellfun(@isempty, regexpi(modelFields, directories{i})))
+        dirBool(i) = true;
+        sourceData = source;
+        sourceData(i + 1) = source(i + 1) + i + 1;
+        molCollectionReport = obtainMetStructures(model, comparisonDir, false, [], sourceData);
+        movefile([comparisonDir filesep 'newMol'], ...
+            [comparisonDir filesep directories{i}])
+        info.sourcesCoverage.(directories{i}) = molCollectionReport;
+        info.sourcesCoverage.totalCoverage(i) = molCollectionReport.noOfMets;
+        info.sourcesCoverage.source{i} = directories{i};
+        if options.printlevel > 0
+            disp([directories{i} ':'])
+            display(molCollectionReport)
+        end
+    end
+end
+if ~isempty(dirsToCompare)
+    for i = 1:length(options.dirsToCompare)
+        % Get list of MOL files
+        d = dir(options.dirsToCompare{i});
+        d = d(~[d.isdir]);
+        metList = {d.name}';
+        metList = metList(~cellfun('isempty', regexp(metList,'(\.mol)$')));
+        metList = regexprep(metList, '.mol', '');
+        metList(~ismember(metList, umets)) = [];
+        info.sourcesCoverage.totalCoverage(i + 6) = length(metList);
+        info.sourcesCoverage.source{i + 6} = options.dirNames{i};
+    end
+end
+% Remove sources without a single metabolite present the model
+if dirsToCompare
+    emptySourceBool = info.sourcesCoverage.totalCoverage == 0;
+    info.sourcesCoverage.totalCoverage(emptySourceBool) = [];
+    directories(emptySourceBool) = [];
+    dirsToDeleteBool = ismember(options.dirNames, info.sourcesCoverage.source(emptySourceBool));
+    options.dirsToCompare(dirsToDeleteBool) = [];
+    options.dirNames(dirsToDeleteBool) = [];
+    info.sourcesCoverage.source(emptySourceBool) = [];
+else
+    directories(~dirBool) = [];
+end
+
+if options.debug
+    save([outputDir '2.debug_afterDownloadMetabolicStructures.mat'])
+end
+
+%% 3. Compare MOL files downloaded and save the best match
+
+if options.printlevel > 0
+    fprintf('%s\n', 'Comparing information from sources ...')
+end
+
+for i = 1:size(directories, 1)
+    
+    % Set dir
+    if i > 6 && dirsToCompare
+        sourceDir = options.dirsToCompare{i - 6};
+    else
+        sourceDir = [comparisonDir directories{i} filesep];
+    end
+    
+    
+    % Get list of MOL files
+    d = dir(sourceDir);
+    d = d(~[d.isdir]);
+    metList = {d.name}';
+    metList = metList(~cellfun('isempty', regexp(metList,'(\.mol)$')));
+    metList = regexprep(metList, '.mol', '');
+    metList(~ismember(metList, mets)) = [];
+    
+    warning('off')
+    for j = 1:size(metList, 1)
+        name = [metList{j} '.mol'];
+        
+        if oBabelInstalled
+            
+            % Get inchis of the original metabolites
+            command = ['obabel -imol ' sourceDir name ' -oinchi '];
+            [~, result] = system(command);
+            result = split(result);
+            
+            % Group inchis in the correct group
+            if any(~cellfun(@isempty, regexp(result, 'InChI=1S')))
+                
+                % Create InChI table
+                if ~exist('groupedInChIs','var')
+                    groupedInChIs = table();
+                end
+                % Identify the correct index
+                if ~ismember('metNames', groupedInChIs.Properties.VariableNames)
+                    groupedInChIs.metNames{j} = regexprep(name, '.mol', '');
+                    idx = 1;
+                elseif ~ismember(regexprep(name, '.mol', ''), groupedInChIs.metNames)
+                    idx = size(groupedInChIs.metNames, 1) + 1;
+                    groupedInChIs.metNames{idx} = regexprep(name, '.mol', '');
+                else
+                    idx = find(ismember(groupedInChIs.metNames, regexprep(name, '.mol', '')));
+                end
+                % Save inchi in the table
+                groupedInChIs.(directories{i}){idx} = result{~cellfun(@isempty, regexp(result, 'InChI=1S'))};
+                
+            else
+                
+                % Create SMILES table for molecules with R groups
+                if ~exist('groupedSMILES','var')
+                    groupedSMILES = table();
+                end
+                % Identify the correct index
+                if ~ismember('metNames', groupedSMILES.Properties.VariableNames)
+                    groupedSMILES.metNames{1} = regexprep(name, '.mol', '');
+                    idx = 1;
+                elseif ~ismember(regexprep(name, '.mol', ''), groupedSMILES.metNames)
+                    idx = size(groupedSMILES.metNames, 1) + 1;
+                    groupedSMILES.metNames{idx} = regexprep(name, '.mol', '');
+                else
+                    idx = find(ismember(groupedSMILES.metNames, regexprep(name, '.mol', '')));
+                end
+                % Get SMILES
+                command = ['obabel -imol ' sourceDir name ' -osmiles '];
+                [~, result] = system(command);
+                if contains(result, '0 molecules converted')
+                    continue
+                end
+                result = splitlines(result);
+                result = split(result{end - 2});
+                % Save SMILES in the table
+                groupedSMILES.(directories{i}){idx} = result{1};
+                
+            end
+            
+        else
+            
+            % Create SMILES table for molecules with R groups
+            if ~exist('groupedFormula','var')
+                groupedFormula = table();
+            end
+            % Identify the correct index
+            if ~ismember('metNames', groupedFormula.Properties.VariableNames)
+                groupedFormula.metNames{1} = regexprep(name, '.mol', '');
+                idx = 1;
+            elseif ~ismember(regexprep(name, '.mol', ''), groupedFormula.metNames)
+                idx = size(groupedFormula.metNames, 1) + 1;
+                groupedFormula.metNames{idx} = regexprep(name, '.mol', '');
+            else
+                idx = find(ismember(groupedFormula.metNames, regexprep(name, '.mol', '')));
+            end
+            % Get formula from MOL
+            molFile = regexp(fileread([sourceDir name]), '\n', 'split')';
+            atomsString = [];
+            for k = 1:str2num(molFile{4}(1:3))
+                atomsString = [atomsString strtrim(molFile{4 + k}(32:33))];
+            end
+            groupedFormula.(directories{i}){idx} = editChemicalFormula(atomsString);
+            
+        end
+    end
+    warning('on')
+end
+
+if options.debug
+    save([outputDir '3a.debug_beforeComparison.mat'])
+end
+
+if oBabelInstalled
+    
+    % Obtain the most consistent molecules
+    reportComparisonInChIs = consistentData(model, groupedInChIs, 'InChI');
+    fields = fieldnames(reportComparisonInChIs);
+    metsInStruct = regexprep(fields(contains(fields, 'met_')), 'met_', '');
+    emptyBool = cellfun(@isempty, reportComparisonInChIs.sourcesToSave);
+    source = reportComparisonInChIs.sourcesToSave(~emptyBool);
+    
+    % Obtain the most consistent for molecules with R groups
+    if exist('groupedSMILES','var')
+        reportComparisonSMILES = consistentData(model, groupedSMILES, 'SMILES');
+        fields = fieldnames(reportComparisonSMILES);
+        metNamesSmiles = regexprep(fields(contains(fields, 'met_')), 'met_', '');
+        metsInStruct(end + 1: end + size(metNamesSmiles, 1)) = metNamesSmiles;
+        emptyBool = cellfun(@isempty, reportComparisonSMILES.sourcesToSave);
+        source(end + 1: end + size(metNamesSmiles, 1)) = reportComparisonSMILES.sourcesToSave(~emptyBool);
+    end
+    
+    nRows = size(metsInStruct, 1);
+    varTypes = {'string', 'string', 'string', 'string', 'string', 'double', 'double', 'string'};
+    varNames = {'mets', 'sourceWithHighestScore', 'metNames', 'metFormulas', 'sourceFormula', 'layersInChI', 'socre', 'idUsed'};
+    info.comparisonTable = table('Size', [nRows length(varTypes)], 'VariableTypes', varTypes, 'VariableNames', varNames);
+    
+    fields = fieldnames(reportComparisonInChIs);
+    metFields = fields(contains(fields, 'met_'));
+    for i = 1:size(metFields, 1)
+        info.comparisonTable.mets(i) = metsInStruct(i);
+        info.comparisonTable.sourceWithHighestScore(i) = source(i);
+        if isfield(model, 'metNames')
+            info.comparisonTable.metNames(i) = reportComparisonInChIs.(metFields{i}).metNames;
+        end
+        info.comparisonTable.metFormulas(i) = reportComparisonInChIs.(metFields{i}).metFormula;
+        selectedIdIdx = find(reportComparisonInChIs.(metFields{i}).idsScore == max(reportComparisonInChIs.(metFields{i}).idsScore));
+        info.comparisonTable.sourceFormula(i) = reportComparisonInChIs.(metFields{i}).sourceFormula(selectedIdIdx(1));
+        info.comparisonTable.layersInChI(i) = reportComparisonInChIs.(metFields{i}).layersOfDataInChI(selectedIdIdx(1));
+        info.comparisonTable.idUsed(i) = reportComparisonInChIs.(metFields{i}).ids(selectedIdIdx(1));
+        info.comparisonTable.socre(i) = reportComparisonInChIs.(metFields{i}).idsScore(selectedIdIdx(1));
+    end
+    if exist('groupedSMILES','var')
+        fields = fieldnames(reportComparisonSMILES);
+        metFields = fields(contains(fields, 'met_'));
+        startFrom = i;
+        for i = 1:size(metFields, 1)
+            info.comparisonTable.mets(i + startFrom) = metsInStruct(i + startFrom);
+            info.comparisonTable.sourceWithHighestScore(i + startFrom) = source(i + startFrom);
+            if isfield(model, 'metNames')
+                info.comparisonTable.metNames(i + startFrom, 1) = reportComparisonSMILES.(metFields{i}).metNames;
+            end
+            info.comparisonTable.metFormulas(i + startFrom) = char(reportComparisonSMILES.(metFields{i}).metFormula);
+            selectedIdIdx = find(reportComparisonSMILES.(metFields{i}).idsScore == max(reportComparisonSMILES.(metFields{i}).idsScore));
+            info.comparisonTable.sourceFormula(i + startFrom) = reportComparisonSMILES.(metFields{i}).sourceFormula(selectedIdIdx(1));
+            info.comparisonTable.layersInChI(i + startFrom) = NaN;
+            info.comparisonTable.idUsed(i + startFrom) = reportComparisonSMILES.(metFields{i}).ids(selectedIdIdx(1));
+            info.comparisonTable.socre(i + startFrom) = reportComparisonSMILES.(metFields{i}).idsScore(selectedIdIdx(1));
+        end
+    end
+    
+    % Print data
+    if options.printlevel > 0
+        display(info.comparisonTable)
+        
+        % heatMap comparison
+        figure
+        comparison = reportComparisonInChIs.comparison;
+        if exist('groupedSMILES','var')
+            rows = size(comparison, 1);
+            comparison = [comparison; zeros(size(reportComparisonSMILES.comparison, 1),...
+                size(comparison, 2))];
+            bool = ismember(reportComparisonInChIs.sources, reportComparisonSMILES.sources);
+            comparison(rows + 1:end, bool) = reportComparisonSMILES.comparison;
+        end
+        for i = 1:size(comparison, 2)
+            for j = 1:size(comparison, 2)
+                boolToCompare = ~isnan(comparison(:, i)) & ~isnan(comparison(:, j));
+                group1 = comparison(boolToCompare, i);
+                group2 = comparison(boolToCompare, j);
+                comparisonMatrix(i, j) = sqrt(sum((group1 - group2).^2));
+            end
+        end
+        h = heatmap(comparisonMatrix);
+        h.YDisplayLabels = directories;
+        h.XDisplayLabels = directories;
+        h.FontSize = 16;
+        title('Sources disimilarity comparison')
+        
+        % Sources comparison
+        figure
+        [db, ~, idx] = unique(split(strjoin(source, ' '), ' '));
+        [~, ib1] = ismember(db, directories);
+        yyaxis left
+        bar(histcounts(idx, size(db, 1)))
+        title({'Sources comparison', ...
+            ['Metabolites collected: ' num2str(size(info.comparisonTable, 1))]}, 'FontSize', 20)
+        ylabel('Times with highest score', 'FontSize', 18)
+        set(gca, 'XTick', 1:size(db, 1), 'xticklabel', db, 'FontSize', 18)
+        xtickangle(45)
+        yyaxis right
+        bar(info.sourcesCoverage.totalCoverage(ib1), 0.3)
+        ylabel('IDs coverage', 'FontSize', 20)
+        
+        if options.printlevel > 1
+            display(groupedInChIs)
+        end
+    end
+    
+else
+    
+    % Obtain the most consistent molecules
+    reportComparisonFormula = consistentData(model, groupedFormula, 'MOL');
+    fields = fieldnames(reportComparisonFormula);
+    metsInStruct = regexprep(fields(contains(fields, 'met_')), 'met_', '');
+    emptyBool = cellfun(@isempty, reportComparisonFormula.sourcesToSave);
+    source = reportComparisonFormula.sourcesToSave(~emptyBool);
+    
+    nRows = size(metsInStruct, 1);
+    varTypes = {'string', 'string', 'string', 'string', 'string', 'double', 'double', 'string'};
+    varNames = {'mets', 'sourceWithHighestScore', 'metNames', 'metFormulas', 'sourceFormula', 'layersInChI', 'socre', 'idUsed'};
+    info.comparisonTable = table('Size', [nRows length(varTypes)], 'VariableTypes', varTypes, 'VariableNames', varNames);
+    
+    fields = fieldnames(reportComparisonFormula);
+    metFields = fields(contains(fields, 'met_'));
+    for i = 1:size(metFields, 1)
+        info.comparisonTable.mets(i) = metsInStruct(i);
+        info.comparisonTable.sourceWithHighestScore(i) = source(i);
+        if isfield(model, 'metNames')
+            info.comparisonTable.metNames(i) = reportComparisonFormula.(metFields{i}).metNames;
+        end
+        info.comparisonTable.metFormulas(i) = char(reportComparisonFormula.(metFields{i}).metFormula);
+        selectedIdIdx = find(reportComparisonFormula.(metFields{i}).idsScore == max(reportComparisonFormula.(metFields{i}).idsScore));
+        info.comparisonTable.sourceFormula(i) = reportComparisonFormula.(metFields{i}).sourceFormula(selectedIdIdx(1));
+        info.comparisonTable.idUsed(i) = reportComparisonFormula.(metFields{i}).ids(selectedIdIdx(1));
+        info.comparisonTable.socre(i) = reportComparisonFormula.(metFields{i}).idsScore(selectedIdIdx(1));
+    end
+    
+    if options.printlevel > 0
+        display(info.comparisonTable)
+        
+        % heatMap comparison
+        figure
+        comparison = reportComparisonFormula.comparison;
+        for i = 1:size(comparison, 2)
+            for j = 1:size(comparison, 2)
+                % Ignore NaN data
+                boolToCompare = ~isnan(comparison(:, i)) & ~isnan(comparison(:, j));
+                group1 = comparison(boolToCompare, i);
+                group2 = comparison(boolToCompare, j);
+                comparisonMatrix(i, j) = sqrt(sum((group1 - group2).^2));
+            end
+        end
+        h = heatmap(comparisonMatrix);
+        h.YDisplayLabels = directories;
+        h.XDisplayLabels = directories;
+        title('Dissimilarity comparison')
+        
+        % Sources comparison
+        figure
+        [db, ~, idx] = unique(split(strjoin(source, ' '), ' '));
+        bar(histcounts(idx, size(db, 1)))
+        title({'Source of molecules with the highest score', ...
+            ['Total ' num2str(size(info.comparisonTable, 1))]}, 'FontSize', 20)
+        ylabel('Times ', 'FontSize', 18)
+        set(gca, 'XTick', 1:size(db, 1), 'xticklabel', db, 'FontSize', 18)
+        xtickangle(45)
+        
+        if options.printlevel > 1
+            display(groupedFormula)
+        end
+    end
+    
+end
+
+% Save the MOL files with highest score
+tmpDir = [metDir filesep 'tmp'];
+if ~isfolder(tmpDir)
+    mkdir(tmpDir)
+end
+source = source(ismember(metsInStruct, unique(regexprep(model.mets, '(\[\w\])', ''))));
+metsInStruct = metsInStruct(ismember(metsInStruct, unique(regexprep(model.mets, '(\[\w\])', ''))));
+for i = 1:size(metsInStruct, 1)
+    dirToCopy = strsplit(source{i}, ' ');
+    if dirsToCompare && ismember(dirToCopy{1}, options.dirNames)
+        copyfile([options.dirsToCompare{ismember(options.dirNames, dirToCopy{1})} metsInStruct{i} '.mol'], tmpDir)
+    else
+        copyfile([comparisonDir dirToCopy{1} filesep metsInStruct{i} '.mol'], tmpDir)
+    end
+end
+if ~options.keepMolComparison
+    rmdir(comparisonDir, 's')
+end
+if ~options.adjustToModelpH || ~cxcalcInstalled
+    model.comparison = info.comparisonTable;
+end
+if options.debug
+    save([outputDir '3b.debug_afterComparison.mat'])
+end
+
+%% 4. Adjust pH based on the model's chemical formula
+
+if options.adjustToModelpH && cxcalcInstalled
+    
+    info.adjustedpHTable = info.comparisonTable;
+    
+    if options.printlevel > 0
+        fprintf('%s\n', 'Adjusting pH based on the model''s chemical formula ...')
+        display(' ')
+    end
+    
+    [needAdjustmentBool, differentFormula, loopError, pHRangePassed] = deal(false(size(info.comparisonTable, 1), 1));
+    for i = 1:size(info.comparisonTable, 1)
+        try
+            
+            name = [info.comparisonTable.mets{i} '.mol'];
+            
+            if isequal(regexprep(info.comparisonTable.metFormulas(i), 'H\d*', ''),...
+                    regexprep(info.comparisonTable.sourceFormula(i), 'H\d*', '')) % pH different only
+                
+                %  Get number of hydrogens in the model's metabolite
+                [elemetList, ~ , elemetEnd] = regexp(info.comparisonTable.metFormulas(i), ['[', 'A':'Z', '][', 'a':'z', ']?'], 'match');
+                hBool = contains(elemetList, 'H');
+                [num, numStart] = regexp(info.comparisonTable.metFormulas(i), '\d+', 'match');
+                numList = ones(size(elemetList));
+                numList(ismember(elemetEnd + 1, numStart)) = cellfun(@str2num, num);
+                noOfH_model = numList(hBool);
+                
+                %  Get number of hydrogens in the source's metabolite
+                [elemetList, ~ , elemetEnd] = regexp(info.comparisonTable.sourceFormula(i), ['[', 'A':'Z', '][', 'a':'z', ']?'], 'match');
+                hBool = contains(elemetList, 'H');
+                [num, numStart] = regexp(info.comparisonTable.sourceFormula(i), '\d+', 'match');
+                numList = ones(size(elemetList));
+                idx = ismember(elemetEnd + 1, numStart);
+                numList(idx) = cellfun(@str2num, num);
+                noOfH_source = numList(hBool);
+                
+                % Start with a neutral pH
+                pH = 7;
+                while noOfH_model ~= noOfH_source
+                    
+                    if ~needAdjustmentBool(i)
+                        needAdjustmentBool(i) = true;
+                        if noOfH_model - noOfH_source > 0
+                            pHDifference = -0.33;
+                        else
+                            pHDifference = 0.33;
+                        end
+                    end
+                    
+                    % Change pH
+                    command = ['cxcalc majormicrospecies -H ' num2str(pH) ' -f mol ' tmpDir '' filesep name];
+                    [~, result] = system(command);
+                    molFile = regexp(result, '\n', 'split')';
+                    fid2 = fopen([tmpDir filesep 'tmp.mol'], 'w');
+                    fprintf(fid2, '%s\n', molFile{:});
+                    fclose(fid2);
+                    % Obtain the chemical formula
+                    command = ['cxcalc elementalanalysistable -t "formula" ' tmpDir filesep 'tmp.mol'];
+                    [~, formula] = system(command);
+                    formula = split(formula);
+                    formula = formula{end - 1};
+                    %  Get number of hydrogens in the adjusted metabolite
+                    [elemetList, ~ , elemetEnd] = regexp(formula, ['[', 'A':'Z', '][', 'a':'z', ']?'], 'match');
+                    hBool = contains(elemetList, 'H');
+                    [num, numStart] = regexp(formula, '\d+', 'match');
+                    numList = ones(size(elemetList));
+                    idx = ismember(elemetEnd + 1, numStart);
+                    numList(idx) = cellfun(@str2num, num);
+                    noOfH_source = numList(hBool);
+                    
+                    if pH <= 0 || pH >= 14
+                        pHRangePassed(i) = true;
+                        break
+                    end
+                    pH = pH + pHDifference;
+                    
+                    if noOfH_model == noOfH_source
+                        movefile([tmpDir filesep 'tmp.mol'], [tmpDir filesep name])
+                        info.adjustedpHTable.sourceFormula(i) = formula;
+                    end
+                end
+            else
+                differentFormula(i) = true;
+            end
+        catch
+            loopError(i) = true;
+        end
+    end
+    
+    if isfile([tmpDir filesep 'tmp.mol'])
+        delete([tmpDir filesep 'tmp.mol'])
+    end
+    
+    info.adjustedpHTable.needAdjustmentBool = needAdjustmentBool;
+    info.adjustedpHTable.notPossible2AdjustBool = differentFormula | loopError | pHRangePassed;
+    info.adjustedpHTable.differentFormula = differentFormula;
+    info.adjustedpHTable.loopError = loopError;
+    info.adjustedpHTable.pHRangePassed = pHRangePassed;
+    
+    if options.printlevel > 0
+        display('adjustedpH:')
+        display(info.adjustedpHTable)
+    end
+    
+    model.comparison = info.adjustedpHTable;
+    
+end
+
+%% 5. Standardise the MOL files according options
+
+standardisationApproach = options.standardisationApproach;
+
+% Get list of MOL files
+d = dir(tmpDir);
+d = d(~[d.isdir]);
+metList = {d.name}';
+metList = metList(~cellfun('isempty', regexp(metList,'(\.mol)$')));
+metList = regexprep(metList, '.mol', '');
+metList(~ismember(metList, regexprep(model.mets, '(\[\w\])', ''))) = [];
+
+% Standardise MOL files the most consitent MOL files
+standardisationReport = standardiseMolDatabase(tmpDir, metList, metDir, standardisationApproach);
+info.standardisationReport = standardisationReport;
+
+if oBabelInstalled
+    % Create table
+    nRows = size(standardisationReport.SMILES, 1);
+    varTypes = {'string', 'string', 'string', 'string'};
+    varNames = {'mets', 'InChIKeys', 'InChIs', 'SMILES'};
+    info.standardisationReport = table('Size', [nRows length(varTypes)], 'VariableTypes', varTypes, 'VariableNames', varNames);
+    info.standardisationReport.mets(1:end) = standardisationReport.standardised;
+    info.standardisationReport.InChIKeys(1:size(standardisationReport.InChIKeys, 1)) = standardisationReport.InChIKeys;
+    info.standardisationReport.InChIs(1:size(standardisationReport.InChIs, 1)) = standardisationReport.InChIs;
+    info.standardisationReport.SMILES(1:size(standardisationReport.SMILES, 1)) = standardisationReport.SMILES;
+    % Write table
+    writetable(info.standardisationReport, [metDir filesep 'standardisationReport'])
+end
+
+if options.printlevel > 0
+    display(info.standardisationReport)
+end
+rmdir(tmpDir, 's')
+model.standardisation = info.standardisationReport;
+
+if options.debug
+    save([outputDir '5.debug_afterStandardisation.mat'])
+end
+
+%% 6. Atom map data
+
+% Set options
+
+% MOL file directory
+molFileDir = [metDir filesep 'molFiles'];
+
+% Create the reaction data directory
+if ~isfolder(rxnDir)
+    mkdir(rxnDir)
+end
+
+% Reactions to atom map
+rxnsToAM = model.rxns;
+
+% Keep standardisation approach used with the molecular structures
+switch options.standardisationApproach
+    case 'explicitH'
+        hMapping = true;
+    case 'implicitH'
+        hMapping = false;
+    case 'neutral'
+    case 'basic'
+        hMapping = true;
+end
+
+% Atom map metabolic reactions
+reactionsReport = obtainAtomMappingsRDT(model, molFileDir, rxnDir, rxnsToAM, hMapping, options.onlyUnmapped);
+info.reactionsReport = reactionsReport;
+
+
+rxnsFilesDir = [rxnDir filesep 'unMapped'];
+if ~options.onlyUnmapped
+    
+    % Atom map transport reactions
+    mappedRxns = transportRxnAM(rxnsFilesDir, [rxnDir filesep 'atomMapped']);
+    for i = 1:size(mappedRxns, 2)
+        delete([rxnDir filesep 'images' filesep mappedRxns{i} '.png']);
+    end
+    
+    % Generate rinchis and reaction SMILES
+    if oBabelInstalled
+        
+        nRows = size(rxnsToAM, 1);
+        varTypes = {'string', 'string', 'string'};
+        varNames = {'rxns', 'rinchi', 'rsmi'};
+        info.reactionsReport.rxnxIDsTable = table('Size', [nRows length(varTypes)], 'VariableTypes', varTypes, 'VariableNames', varNames);
+        
+        model.rinchi = repmat({''}, size(model.rxns));
+        model.rsmi = repmat({''}, size(model.rxns));
+        for i = 1:size(rxnsToAM, 1)
+            info.reactionsReport.rxnxIDsTable.rxns(i) = rxnsToAM(i);
+            if isfile([rxnDir filesep 'atomMapped' filesep rxnsToAM{i} '.rxn'])
+                % Get rinchis
+                command = ['obabel -irxn ' rxnDir filesep 'atomMapped' filesep rxnsToAM{i} '.rxn -orinchi'];
+                [~, result] = system(command);
+                result = split(result);
+                info.reactionsReport.rxnxIDsTable.rinchi(i) = result{~cellfun(@isempty, regexp(result, 'RInChI='))};
+                model.rinchi{findRxnIDs(model, rxnsToAM{i})} = result{~cellfun(@isempty, regexp(result, 'RInChI='))};
+                % Get reaction SMILES
+                command = ['obabel -irxn ' rxnDir filesep 'atomMapped' filesep rxnsToAM{i} '.rxn -osmi'];
+                [~, result] = system(command);
+                result = splitlines(result);
+                result = split(result{end - 2});
+                info.reactionsReport.rxnxIDsTable.rsmi(i) = result{1};
+                model.rsmi{findRxnIDs(model, rxnsToAM{i}), 1} = result{1};
+            end
+        end
+    end
+end
+
+% Find unbalanced RXN files
+% Get list of RXN files to check
+d = dir(rxnsFilesDir);
+d = d(~[d.isdir]);
+rxnList = {d.name}';
+rxnList = rxnList(~cellfun('isempty', regexp(rxnList,'(\.rxn)$')));
+rxnList = regexprep(rxnList, '.rxn', '');
+rxnList(~ismember(rxnList, rxnsToAM)) = [];
+
+[unbalancedBool, v3000] = deal(false(size(rxnList)));
+for i = 1:size(rxnList, 1)
+    
+    name = [rxnList{i} '.rxn'];
+    % Read the RXN file
+    rxnFile = regexp(fileread([rxnsFilesDir filesep name]), '\n', 'split')';
+    
+    % Identify molecules
+    substrates = str2double(rxnFile{5}(1:3));
+    products = str2double(rxnFile{5}(4:6));
+    begMol = strmatch('$MOL', rxnFile);
+    
+    if ~isnan(products)
+        % Count atoms in substrates and products
+        atomsS = 0;
+        for j = 1:substrates
+            atomsS = atomsS + str2double(rxnFile{begMol(j) + 4}(1:3));
+        end
+        atomsP = 0;
+        for j = substrates + 1: substrates +products
+            atomsP = atomsP + str2double(rxnFile{begMol(j) + 4}(1:3));
+        end
+        
+        % Check if the file is unbalanced
+        if atomsS ~= atomsP
+            unbalancedBool(i) = true;
+        end
+    else
+        v3000(i) = true;
+    end
+end
+
+info.reactionsReport.balancedReactions = rxnList(~unbalancedBool);
+info.reactionsReport.unbalancedReactions = rxnList(unbalancedBool);
+model = findSExRxnInd(model);
+info.reactionsReport.rxnMissing = setdiff(model.rxns(model.SIntRxnBool), info.reactionsReport.rxnFilesWritten);
+
+metsInBalanced = unique(regexprep(findMetsFromRxns(model, rxnList(~unbalancedBool)), '(\[\w\])', ''));
+metsInUnbalanced = unique(regexprep(findMetsFromRxns(model, rxnList(unbalancedBool)), '(\[\w\])', ''));
+info.reactionsReport.metsAllwaysInBalancedRxns = umets(ismember(umets, setdiff(metsInBalanced, metsInUnbalanced)));
+info.reactionsReport.metsSometimesInUnbalancedRxns = umets(ismember(umets, intersect(metsInBalanced, metsInUnbalanced)));
+info.reactionsReport.metsAllwaysInUnbalancedRxns = umets(ismember(umets, setdiff(metsInUnbalanced, metsInBalanced)));
+info.reactionsReport.missingMets = setdiff(umets, [metsInBalanced; metsInUnbalanced]);
+
+info.reactionsReport.table = table([...
+    size(info.reactionsReport.rxnFilesWritten, 1);...
+    size(info.reactionsReport.mappedRxns, 1);...
+    size(info.reactionsReport.balancedReactions, 1);...
+    size(info.reactionsReport.unbalancedReactions, 1);...
+    size(info.reactionsReport.rxnMissing, 1);...
+    size(info.reactionsReport.metsAllwaysInBalancedRxns, 1) + ...
+    size(info.reactionsReport.metsSometimesInUnbalancedRxns, 1) + ...
+    size(info.reactionsReport.metsAllwaysInUnbalancedRxns, 1);...
+    size(info.reactionsReport.metsAllwaysInBalancedRxns, 1) + ...
+    size(info.reactionsReport.metsSometimesInUnbalancedRxns, 1);...
+    size(info.reactionsReport.metsAllwaysInUnbalancedRxns, 1);...
+    size(info.reactionsReport.missingMets, 1)],...
+    ...
+    'VariableNames', ...
+    {'Var'},...
+    'RowNames',...
+    {'RXN files written'; ...
+    'Atom mapped reactions';...
+    'Balanced reactions';...
+    'Unbalanced reactions';...
+    'Missing reactions';...
+    'Metabolites obtained'; ...
+    'Metabolites in balanced rxns'; ...
+    'Metabolites allways in unbalanced rxns'; ...
+    'Missing metabolites'});
+
+if options.printlevel > 0
+    
+    if ~options.onlyUnmapped
+        display(info.reactionsReport.rxnxIDsTable)
+    end
+    
+    display(info.reactionsReport.table)
+    
+    % Reactions
+    figure
+    labelsToAdd = {'Balanced', 'Unbalanced', 'Missing'};
+    X = [size(info.reactionsReport.balancedReactions, 1);...
+        size(info.reactionsReport.unbalancedReactions, 1);...
+        size(info.reactionsReport.rxnMissing, 1)];
+    pieChart = pie(X(find(X)));
+    title({'RXN coverage', ['From ' num2str(sum(X)) ' internal rxns in the model']}, 'FontSize', 20)
+    legend(labelsToAdd(find(X)), 'FontSize', 16)
+    set(findobj(pieChart,'type','text'),'fontsize',18)
+    
+    % Metabolites
+    figure
+    labelsToAdd = {'In balanced rxn', 'Ocassionally in unbalanced rxn', 'In unbalanced rxn', 'Missing'};
+    X = [size(info.reactionsReport.metsAllwaysInBalancedRxns, 1);...
+        size(info.reactionsReport.metsSometimesInUnbalancedRxns, 1);...
+        size(info.reactionsReport.metsAllwaysInUnbalancedRxns, 1);...
+        size(info.reactionsReport.missingMets, 1)];
+    pieChart = pie(X(find(X)));
+    title({'Met percentage coverage', ['From ' num2str(size(umets, 1)) ' unique mets in the model']}, 'FontSize', 20)
+    legend(labelsToAdd(find(X)), 'FontSize', 16)
+    set(findobj(pieChart,'type','text'),'fontsize',18)
+    
+end
+if options.printlevel > 1
+    disp('RXN files written')
+    display(info.reactionsReport.rxnFilesWritten)
+    disp('Atom mapped reactions')
+    display(info.reactionsReport.mappedRxns)
+    disp('Balanced reactions')
+    display(info.reactionsReport.balancedReactions)
+    disp('Unbalanced reactions')
+    display(info.reactionsReport.unbalancedReactions)
+    disp('Metabolites allways in balanced rxns')
+    display(info.reactionsReport.metsAllwaysInBalancedRxns)
+    disp('Metabolites ocasional in unbalanced rxns')
+    display(info.reactionsReport.metsSometimesInUnbalancedRxns)
+    disp('Metabolites allways in unbalanced rxns')
+    display(info.reactionsReport.metsAllwaysInUnbalancedRxns)
+    disp('Missing metabolites')
+    display(info.reactionsReport.missingMets)
+end
+
+if options.debug
+    save([outputDir '6.debug_endOfreactionDatabase.mat'])
+end
+
+%% 7. Bond enthalpies and bonds broken and formed
+
+if ~options.onlyUnmapped    
+    
+    % Get bond enthalpies and bonds broken and formed
+    if options.printlevel  > 0
+        display('Obtaining RInChIes and reaction SMILES ...')
+        [bondsBF, bondsE, meanBBF, meanBE] = findBEandBBF(model, [rxnDir filesep 'atomMapped'], 1);
+        info.bondsData.bondsDataTable = table(model.rxns, model.rxnNames, bondsBF, bondsE, ...
+            'VariableNames', {'rxns', 'rxnNames', 'bondsBF', 'bondsE'});
+        info.bondsData.meanBBF = meanBBF;
+        info.bondsData.meanBE = meanBE;
+        display(info.bondsData.bondsDataTable)
+    else
+        [bondsBF, bondsE, meanBBF, meanBE] = findBEandBBF(model, [rxnDir filesep 'atomMapped']);
+        info.bondsData.table = table(model.rxns, model.rxnNames, bondsBF, bondsE, ...
+            'VariableNames', {'rxns','rxnNames','bondsBF','bondsE'});
+        info.bondsData.table = sortrows(info.bondsData.table, {'bondsBF'}, {'descend'});
+        info.bondsData.meanBBF = meanBBF;
+        info.bondsData.meanBE = meanBE;
+    end
+        
+    % Add data in the model
+    model.bondsBF = bondsBF;
+    model.bondsE = bondsE;
+    model.meanBBF = meanBBF;
+    model.meanBE = meanBE;
+    
+end
+
+newModel = model;
+if options.debug
+    save([outputDir '7.debug_endOfGenerateChemicalDatabase.mat'])
+end
+
+diary off
+if options.printlevel > 0 > 0
+    fprintf('%s\n', ['Diary written to: ' options.outputDir])
+    fprintf('%s\n', 'generateChemicalDatabase run is complete.')
+end
+
+end
+
+function reportComparison = consistentData(model, groupedIDs, typeID)
+% The most cross-validated ID is saved. It's compared molecular formula of
+% the model, the charge and the similarity with other IDs
+
+% Compare and sort results based on prime numbers. The smallest prime
+% number represent the most similar inchis between databases
+IDsArray = table2cell(groupedIDs);
+
+% Start report
+reportComparison.groupedID = groupedIDs;
+reportComparison.sources = groupedIDs.Properties.VariableNames(2:end)';
+reportComparison.sourcesToSave = cell(size(IDsArray, 1), 1);
+
+% Delete the Names
+metNames = IDsArray(:, 1);
+IDsArray(:, 1) = [];
+for i = 1:size(IDsArray, 1)
+    
+    % 1st comparison - Similarity
+    emptyBool = cellfun(@isempty, IDsArray(i, 1:end));
+    IDsArray(i, emptyBool) = {'noData'};
+    [~, ia, ic] = unique(IDsArray(i, 1:end));
+    ic(emptyBool) = NaN;
+    ia(contains(IDsArray(i, ia), 'noData')) = [];
+    if ~isempty(ia)
+        ia = sort(ia);
+        % Acsending values
+        c = 0;
+        icAcsending = ic;
+        for j = 1:size(ia, 1)
+            if ~isnan(ic(ia(j)))
+                c = c + 1;
+                icAcsending(ic == ic(ia(j))) = c;
+            end
+        end
+        comparison(i, :) = icAcsending';
+        
+        % 2nd comparison - cross-validation & chemical formulas comparison &
+        % InChI data (if is inchi)
+        
+        % Cross-validation (assign score)
+        idsScore = zeros(size(ia, 1), 1);
+        for j = 1:size(ia, 1)
+            idsScore(j) = idsScore(j)  + (sum(ic == ic(ia(j))) / size(ic, 1));
+        end
+        
+        % Chemical formula comparison
+        % Get model formula
+        metIdx = find(ismember(regexprep(model.mets, '(\[\w\])', ''), groupedIDs.metNames{i}));
+        rGroup = ["X", "Y", "*", "FULLR"];
+        if contains(model.metFormulas(metIdx(1)), rGroup)
+            modelFormula = editChemicalFormula(model.metFormulas{metIdx(1)});
+        else
+            modelFormula = model.metFormulas{metIdx(1)};
+        end
+        if isfield(model, 'metNames')
+            reportComparison.(['met_' groupedIDs.metNames{i}]).metNames = model.metNames(metIdx(1));
+        end
+        reportComparison.(['met_' groupedIDs.metNames{i}]).metFormula = {modelFormula};
+        % Get ID formula
+        reportComparison.(['met_' groupedIDs.metNames{i}]).uniqueIdIdx = ia;
+        for j = 1:size(ia, 1)
+            consistentID = IDsArray{i, ia(j)};
+            
+            switch typeID
+                case 'InChI'
+                    [elemetList, ~ , ~] = regexp(consistentID, '/([^/]*)/', 'match');
+                    if isempty(elemetList)
+                        IDformula = '';
+                    else
+                        IDformula = regexprep(elemetList{1}, '/', '');
+                    end
+                    
+                case 'SMILES'
+                    
+                    % Get formula from MOL
+                    currentDir = pwd;
+                    fid2 = fopen([currentDir filesep 'tmp'], 'w');
+                    fprintf(fid2, '%s\n', consistentID);
+                    fclose(fid2);
+                    command = 'obabel -ismi tmp -O tmp.mol mol -h';
+                    [~, ~] = system(command);
+                    molFile = regexp(fileread([currentDir filesep 'tmp.mol']), '\n', 'split')';
+                    if ~isempty(char(molFile))
+                        atomsString = [];
+                        for k = 1:str2num(molFile{4}(1:3))
+                            atomsString = [atomsString strtrim(molFile{4 + k}(32:33))];
+                        end
+                        IDformula = editChemicalFormula(atomsString);
+                    end
+                    delete([currentDir filesep 'tmp.mol'])
+                    
+                case 'MOL'
+                    IDformula = consistentID;
+            end
+            reportComparison.(['met_' groupedIDs.metNames{i}]).sourceFormula{j, 1} = IDformula;
+            
+            % Compare (assign score)
+            if isequal(modelFormula, IDformula)
+                idsScore(j) = idsScore(j) + 10; % ten times the scale in cross-validation
+            elseif isequal(regexprep(modelFormula, 'H\d*', ''), regexprep(IDformula, 'H\d*', '')) % pH different
+                idsScore(j) = idsScore(j)  + 8;
+            end
+            
+            % InChI layers comparison
+            switch typeID
+                case 'InChI'
+                    inchiLayersDetail = getInchiData(IDsArray{i, ia(j)});
+                    layersOfDataInChI(j, 1) = inchiLayersDetail.layers;
+                    idsScore(j) = idsScore(j) + 4 + layersOfDataInChI(j, 1); % scale in formula comparison
+                    reportComparison.(['met_' groupedIDs.metNames{i}]).layersOfDataInChI(j, 1) = ...
+                        layersOfDataInChI(j, 1);
+                    
+                case 'SMILES'
+                    
+                    fid2 = fopen([currentDir filesep 'tmp'], 'w');
+                    fprintf(fid2, '%s\n', IDsArray{i, ia(j)});
+                    fclose(fid2);
+                    command = 'obabel -ismi tmp -O tmp.mol mol -h';
+                    [~, ~] = system(command);
+                    molFile = regexp(fileread([currentDir filesep 'tmp.mol']), '\n', 'split')';
+                    molFile = regexprep(molFile, 'X|Y|*|R|A', 'H');
+                    fid2 = fopen([currentDir filesep 'tmp.mol'], 'w');
+                    fprintf(fid2, '%s\n', molFile{:});
+                    fclose(fid2);
+                    % Get inchis of the original metabolites
+                    command = ['obabel -imol ' currentDir filesep 'tmp.mol -oinchi'];
+                    [~, result] = system(command);
+                    result = split(result);
+                    inchiLayersDetail = getInchiData(result{contains(result, 'InChI=1S')});
+                    layersOfDataInChI(j, 1) = inchiLayersDetail.layers;
+                    idsScore(j) = idsScore(j) + 4 + layersOfDataInChI(j, 1); % scale in formula comparison
+                    reportComparison.(['met_' groupedIDs.metNames{i}]).layersOfDataInChI(j, 1) = ...
+                        layersOfDataInChI(j, 1);
+                    delete([currentDir filesep 'tmp'])
+                    delete([currentDir filesep 'tmp.mol'])
+                    
+            end
+        end
+        
+        % Continue report
+        reportComparison.(['met_' groupedIDs.metNames{i}]).ids(:, 1) = IDsArray(i, ia);
+        reportComparison.(['met_' groupedIDs.metNames{i}]).idsScore = idsScore;
+        toSaveidx = find(ismember(ic, ic(ia(idsScore == max(idsScore)))));
+        reportComparison.sourcesToSave{i, :} = strjoin(reportComparison.sources(toSaveidx), ' ');
+    else
+        reportComparison.sourcesToSave{i, :} = '';
+    end
+end
+reportComparison.comparison = comparison;
+end
diff --git a/src/dataIntegration/chemoInformatics/inchi/getInchiData.m b/src/dataIntegration/chemoInformatics/inchi/getInchiData.m
new file mode 100644
index 0000000000..14d04cdc36
--- /dev/null
+++ b/src/dataIntegration/chemoInformatics/inchi/getInchiData.m
@@ -0,0 +1,70 @@
+function inchiLayersDetail = getInchiData(inchi)
+% Classify the inchi according to its various layers of information
+% 
+% USAGE:
+%
+% detailLevelInchi = inchiDetail(inchi)
+%
+% INPUTS:
+%    inchi:             String with the InChI to classify
+%
+% OUTPUTS:
+%    detailLevelInchi:  Struct file with the following fields:
+%
+%                       * .layers - Number of layers in the InChI.
+%                       * .standart - Logical, indicates whether the inchi 
+%                                     is standard or not.
+%                       * .metFormulas - The molecula formula.
+%                       * .positiveCharges - Number of positive charges.
+%                       * .negativeCharges - Number of negative charges.
+%                       * .netCharge - Summ of the charges.
+%                       * .stereochemicalLayer - Logical, indicates whether 
+%                                                the inchi  represent 
+%                                                stereochemical information
+%                                                or not.
+%                       * .isotopicLayer - Logical, indicates whether the 
+%                                          inchi represent isotopic 
+%                                          information or not.
+
+inchiSplited = split(inchi, '/');
+
+% Check inchi layers
+inchiLayersDetail.layers = numel(inchiSplited);
+
+% Check if it is a standard inchi 
+assert(contains(inchiSplited{1}, 'InChI='), [inchi ' is not an InChI'])
+if contains(inchiSplited{1}, '1S')
+    inchiLayersDetail.standart = true;
+else
+    inchiLayersDetail.standart = false;
+end
+    
+% Chemical formula 
+inchiLayersDetail.metFormulas = inchiSplited{2};
+
+% Charge layer
+pLayer = contains(inchiSplited, 'p');
+if any(pLayer)
+    inchiLayersDetail.positiveCharges = str2double(regexprep(inchiSplited{pLayer}, 'p-|;', ''));
+else
+    inchiLayersDetail.positiveCharges = 0;
+end
+qLayer = contains(inchiSplited, 'q');
+if any(qLayer)
+    inchiLayersDetail.negativeCharges = str2double(regexprep(inchiSplited{qLayer}, 'q\+|;', ''));
+else
+    inchiLayersDetail.negativeCharges = 0;
+end
+inchiLayersDetail.netCharge = inchiLayersDetail.positiveCharges - inchiLayersDetail.negativeCharges;
+
+% Stereochemical layer
+if any(~cellfun(@isempty, regexp(inchiSplited, 'b|t|m|s')))
+    inchiLayersDetail.stereochemicalLayer = true;
+end
+
+% Isotopic layer
+if any(~cellfun(@isempty, regexp(inchiSplited, 'i|h')))
+    inchiLayersDetail.isotopicLayer = true;
+end
+
+end
\ No newline at end of file
diff --git a/src/dataIntegration/chemoInformatics/obtainAtomMappingsRDT.m b/src/dataIntegration/chemoInformatics/obtainAtomMappingsRDT.m
index 5e8ea34b1a..65f96b43bb 100644
--- a/src/dataIntegration/chemoInformatics/obtainAtomMappingsRDT.m
+++ b/src/dataIntegration/chemoInformatics/obtainAtomMappingsRDT.m
@@ -1,10 +1,13 @@
-function standardisedRxns = obtainAtomMappingsRDT(model, molFileDir, outputDir, rxnsToAM, hMapping, maxTime, standariseRxn)
-% Compute atom mappings for reactions with implicit hydrogens in a
-% metabolic network using RDT algorithm
+function atomMappingReport = obtainAtomMappingsRDT(model, molFileDir, rxnDir, rxnsToAM, hMapping, onlyUnmapped)
+% Using the reaction decoder tool, compute atom mappings for reactions in a 
+% COBRA model. Atom mapping data is presented in a variety of formats, 
+% including MDL RXN, SMILES, and images. If this option is selected, the 
+% function can remove all protons from the model and represent the 
+% reactions as a hydrogen suppressed chemical graph.
 %
 % USAGE:
 %
-%    standardisedRxns = obtainAtomMappingsRDT(model, molFileDir, outputDir, rxnsToAM, hMapping, maxTime, standariseRxn)
+%    standardisedRxns = obtainAtomMappingsRDT(model, molFileDir, rxnDir, rxnsToAM, hMapping, maxTime, standariseRxn)
 %
 % INPUTS:
 %    model:         COBRA model with following fields:
@@ -24,16 +27,14 @@
 %                   reaction identifiers in input mets.
 %
 % OPTIONAL INPUTS:
-%    rxnsToAM:      List of reactions to atom map (default: all in the 
+%    rxnsToAM:      List of reactions to atom map (default: all in the
 %                   model).
 %    hMapping:      Logic value to select if hydrogen atoms will be atom
 %                   mapped (default: TRUE).
 %    rxnDir:        Path to directory that will contain the RXN files with
 %                   atom mappings (default: current directory).
-%    maxTime:       Maximum time assigned to compute atom mapping (default
-%                   1800s).
-%    standariseRxn: Logic value for standardising the atom mapped RXN file.
-%                   ChemAxon license is required (default: TRUE).
+%    onlyUnmapped:  Logic value to select create only unmapped MDL RXN
+%                   files (default: FALSE).
 %
 % OUTPUTS:
 %    balancedRxns:	List of standadised atom mapped reactions.
@@ -56,11 +57,11 @@
 %
 % .. Author: - German A. Preciat Gonzalez 25/05/2017
 
-if nargin < 3 || isempty(outputDir)
-    outputDir = [pwd filesep];
+if nargin < 3 || isempty(rxnDir)
+    rxnDir = [pwd filesep];
 else
     % Make sure input path ends with directory separator
-    outputDir = [regexprep(outputDir,'(/|\\)$',''), filesep];
+    rxnDir = [regexprep(rxnDir,'(/|\\)$',''), filesep];
 end
 if nargin < 4 || isempty(rxnsToAM)
     rxnsToAM = model.rxns;
@@ -68,36 +69,37 @@
 if nargin < 5 || isempty(hMapping)
     hMapping = true;
 end
-if nargin < 6 || isempty(maxTime)
-    maxTime = 1800;
-end
-if nargin < 7 || isempty(standariseRxn)
-    standariseRxn = true;
-end
-if exist('rtdDir','var')
-    rtdDir = [pwd filesep];
-else
-    % Make sure input path ends with directory separator
-    rtdDir = [regexprep(rdtDir,'(/|\\)$',''), filesep];
+if nargin < 6 || isempty(onlyUnmapped)
+    onlyUnmapped = false;
 end
+
+maxTime = 1800;
+
+% Check if JAVA is installed
+[javaInstalled, ~] = system('java');
+
 % Generating new directories
-if ~exist([outputDir filesep 'rxnFiles'],'dir')
-    mkdir([outputDir filesep 'rxnFiles'])
+if ~exist([rxnDir filesep 'unMapped'],'dir')
+    mkdir([rxnDir filesep 'unMapped'])
 end
-if ~exist([outputDir filesep 'atomMapped'],'dir')
-    mkdir([outputDir filesep 'atomMapped'])
-end
-if ~exist([outputDir filesep 'images'],'dir')
-    mkdir([outputDir filesep 'images'])
-end
-if ~exist([outputDir filesep 'txtData'],'dir')
-    mkdir([outputDir filesep 'txtData'])
+if javaInstalled && ~onlyUnmapped
+    if ~exist([rxnDir filesep 'atomMapped'],'dir')
+        mkdir([rxnDir filesep 'atomMapped'])
+    end
+    if ~exist([rxnDir filesep 'images'],'dir')
+        mkdir([rxnDir filesep 'images'])
+    end
+    if ~exist([rxnDir filesep 'txtData'],'dir')
+        mkdir([rxnDir filesep 'txtData'])
+    end
 end
 
 % Download the RDT algorithm, if it is not present in the output directory
-if exist([rdtDir filesep 'rdtAlgorithm.jar'])~=2
-    urlwrite('https://github.com/asad/ReactionDecoder/releases/download/v2.1.0/rdt-2.1.0-SNAPSHOT-jar-with-dependencies.jar',[rdtDir filesep 'rdtAlgorithm.jar']);
-    %urlwrite('https://github.com/asad/ReactionDecoder/releases/download/1.5.1/rdt-1.5.1-SNAPSHOT-jar-with-dependencies.jar',[outputDir filesep 'rdtAlgorithm.jar']);
+if exist([rxnDir filesep 'rdtAlgorithm.jar']) ~= 2 && javaInstalled && ~onlyUnmapped
+    urlwrite('https://github.com/asad/ReactionDecoder/releases/download/v2.4.1/rdt-2.4.1-jar-with-dependencies.jar',[rxnDir filesep 'rdtAlgorithm.jar']);
+    % Previous releases:
+    %     urlwrite('https://github.com/asad/ReactionDecoder/releases/download/v2.1.0/rdt-2.1.0-SNAPSHOT-jar-with-dependencies.jar',[outputDir filesep 'rdtAlgorithm.jar']);
+    %     urlwrite('https://github.com/asad/ReactionDecoder/releases/download/1.5.1/rdt-1.5.1-SNAPSHOT-jar-with-dependencies.jar',[outputDir filesep 'rdtAlgorithm.jar']);
 end
 
 % Delete the protons (hydrogens) for the metabolic network
@@ -114,20 +116,20 @@
     model.S = S;
 end
 
-
 % Format inputs
 mets = model.mets;
 fmets = regexprep(mets, '(\[\w\])', '');
 rxns = rxnsToAM;
+rxnsInModel = model.rxns;
 clear model
 
 % Get list of MOL files
 d = dir(molFileDir);
 d = d(~[d.isdir]);
 aMets = {d.name}';
-aMets = aMets(~cellfun('isempty',regexp(aMets,'(\.mol)$')));
+aMets = aMets(~cellfun('isempty', regexp(aMets,'(\.mol)$')));
 % Identifiers for atom mapped reactions
-aMets = regexprep(aMets, '(\.mol)$','');
+aMets = regexprep(aMets, '(\.mol)$', '');
 assert(~isempty(aMets), 'MOL files directory is empty or nonexistent.');
 
 % Extract MOL files
@@ -140,72 +142,203 @@
 % Create the RXN files. Three conditions are required: 1) To have all the
 % MOL files in the reaction, 2) No exchange reactions, 3) Only integers in
 % the stoichiometry
-for i=1:length(rxns)
-    a = ismember(regexprep(mets(find(S(:,i))), '(\[\w\])', ''), aMets);
-    s = S(find(S(:, i)), i);
-    if all(a(:) > 0) && length(a) ~= 1 && all(abs(round(s) - s) < (1e-2))
-        writeRxnfile(S(:, i), mets, fmets, molFileDir, rxns{i}, [outputDir...
-            filesep 'rxnFiles' filesep])
+rxnFilesWrittenBool = false(length(rxns), 1);
+for i = 1:length(rxns)
+    rxnBool = ismember(rxnsInModel, rxns{i});
+    metsInRxns = ismember(fmets(find(S(:, rxnBool))), aMets);
+    stoichiometry = S(find(S(:, rxnBool)), rxnBool);
+    if ~any(~metsInRxns) && length(metsInRxns) > 1 && all(abs(round(stoichiometry) - stoichiometry) < (1e-2))
+        writeRxnfile(S(:, rxnBool), mets, fmets, molFileDir, rxns{i}, [rxnDir...
+            filesep 'unMapped' filesep])
+        rxnFilesWrittenBool(i) = true;
     end
 end
+atomMappingReport.rxnFilesWritten = rxns(rxnFilesWrittenBool);
+
+% Get list of new RXN files
+d = dir([rxnDir filesep 'unMapped' filesep]);
+d = d(~[d.isdir]);
+aRxns = regexprep({d.name}', '.rxn', '');
+assert(~isempty(aRxns), 'No rxn file was written.');
+rxnsToAM = rxnsToAM(ismember(rxnsToAM, aRxns));
+
+mappedBool = false(length(rxnsToAM), 1);
 
 % Atom map RXN files
-fnames = dir([outputDir filesep 'rxnFiles' filesep '*.rxn']);
-fprintf('Computing atom mappings for %d reactions.\n\n', length(fnames));
-
-% Start from the lighter RXN to the heavier
-[~,bytes] = sort([fnames.bytes]);
-counterBalanced = 0;
-counterNotMapped = 0;
-counterUnbalanced = 0;
-for i=1:length(fnames)
-    name = [outputDir 'rxnFiles' filesep fnames(bytes(i)).name];
-    command = ['timeout ' num2str(maxTime) 's java -jar ' rdtDir filesep 'rdtAlgorithm.jar -Q RXN -q "' name '" -g -j AAM -f TEXT'];
-    if ismac
-        command = ['g' command];
-    end
-    [status, result] = system(command);
-    if status ~= 0
-        fprintf(result);
-        error('Command %s could not be run.\n', command);
+if javaInstalled == 1 && ~onlyUnmapped
+    fprintf('Computing atom mappings for %d reactions.\n\n', length(rxnsToAM));
+    for i = 1:length(rxnsToAM)
+        
+        name = [rxnDir 'unMapped' filesep rxnsToAM{i} '.rxn'];
+        command = ['timeout ' num2str(maxTime) 's java -jar ' rxnDir 'rdtAlgorithm.jar -Q RXN -q "' name '" -g -j AAM -f TEXT'];
+        
+        if ismac
+            command = ['g' command];
+        end
+        [status, result] = system(command);
+        if contains(result, 'file not found!')
+            warning(['The file ' name ' was not found'])
+        end
+        if ~status
+            fprintf(result);
+            error('Command %s could not be run.\n', command);
+        end
+        mNames = dir('ECBLAST_*');
+        if length(mNames) == 3
+            name = regexprep({mNames.name}, 'ECBLAST_|_AAM', '');
+            cellfun(@movefile, {mNames.name}, name)
+            cellfun(@movefile, name, {[rxnDir 'images'], [rxnDir...
+                'atomMapped'], [rxnDir 'txtData']})
+            mappedBool(i) = true;
+        elseif ~isempty(mNames)
+            delete(mNames.name)
+        end
+        
     end
     
-    mNames = dir('ECBLAST_*');
-    if length(mNames) == 3
-        name = regexprep({mNames.name}, 'ECBLAST_|_AAM', '');
-        cellfun(@movefile, {mNames.name}, name)
-        cellfun(@movefile, name, {[outputDir 'images'], [outputDir...
-            'atomMapped'], [outputDir 'txtData']})
-    elseif ~isempty(mNames)
-        delete(mNames.name)
-        counterNotMapped = counterNotMapped + 1;
-    else
-        counterNotMapped = counterNotMapped + 1;
-    end
-end
-
-% Standarize reactions
-if standariseRxn == true
-    fnames = dir([outputDir filesep 'atomMapped' filesep '*.rxn']);
-    for i = 1:length(fnames)
-        standardised = canonicalRxn(fnames(i).name, [outputDir...
-            'atomMapped'], [outputDir 'rxnFiles']);
-        if standardised
-            counterBalanced = counterBalanced + 1;
-            standardisedRxns{counterBalanced} = regexprep(fnames(i).name, '.rxn', '');
-        else
-            counterUnbalanced = counterUnbalanced + 1;
+    mappedRxns = rxnsToAM(mappedBool);
+    atomMappingReport.mappedRxns = mappedRxns;
+    
+    delete([rxnDir 'rdtAlgorithm.jar'])
+    
+    for i = 1:length(mappedRxns)
+        
+        name = [mappedRxns{i} '.rxn'];
+        
+        % Add header
+        mappedFile = regexp(fileread([rxnDir 'atomMapped' filesep name]), '\n', 'split')';
+        standardFile = regexp(fileread([rxnDir 'unMapped' filesep name]), '\n', 'split')';
+        mappedFile{2} = standardFile{2};
+        mappedFile{3} = ['COBRA Toolbox v3.0 - Atom mapped - ' datestr(datetime)];
+        mappedFile{4} = standardFile{4};
+        
+        formula = strsplit(mappedFile{4}, {'->', '<=>'});
+        
+        substratesFormula = strtrim(strsplit(formula{1}, '+'));
+        % Check if a metabolite is repeated in the substrates formula
+        repMetsSubInx = find(~cellfun(@isempty, regexp(substratesFormula, ' ')));
+        if ~isempty(repMetsSubInx)
+            for j = 1:length(repMetsSubInx)
+                metRep = strsplit(substratesFormula{repMetsSubInx(j)});
+                timesRep = str2double(metRep{1});
+                metRep = metRep{2};
+                substratesFormula{repMetsSubInx(j)} = strjoin(repmat({metRep}, [1 timesRep]));
+            end
+            substratesFormula = strsplit(strjoin(substratesFormula));
+        end
+        
+        productsFormula = strtrim(strsplit(formula{2}, '+'));
+        % Check if a metabolite is repeated in the products formula
+        repMetsProInx = find(~cellfun(@isempty, regexp(productsFormula, ' ')));
+        if ~isempty(repMetsProInx)
+            for     j = 1:length(repMetsProInx)
+                metRep = strsplit(productsFormula{repMetsProInx(j)});
+                timesRep = str2double(metRep{1});
+                metRep = metRep{2};
+                productsFormula{repMetsProInx(j)} = strjoin(repmat({metRep}, [1 timesRep]));
+            end
+            productsFormula = strsplit(strjoin(productsFormula));
         end
+        
+        % RXN file data
+        begmol = strmatch('$MOL', mappedFile);
+        noOfMolSubstrates = str2double(mappedFile{5}(1:3));
+        substratesMol = mappedFile(begmol(1:noOfMolSubstrates) + 1)';
+        noOfMolProducts = str2double(mappedFile{5}(4:6));
+        productsMol = mappedFile(begmol(noOfMolSubstrates + 1:noOfMolSubstrates + noOfMolProducts) + 1)';
+        
+        % Formula data
+        noOfsubstrates = numel(substratesFormula);
+        noOfproducts = numel(productsFormula);
+        
+        % Check if the stoichemestry is correct
+        if ~isequal(noOfsubstrates, substratesMol) || ~isequal(noOfproducts, productsMol)
+            mappedFile = sortMets(mappedFile, substratesMol, substratesFormula, productsMol, productsFormula, rxnDir);
+        end
+        % Rewrite the file
+        fid2 = fopen([rxnDir 'atomMapped' filesep name], 'w');
+        fprintf(fid2, '%s\n', mappedFile{:});
+        fclose(fid2);
     end
 else
-    standardisedRxns = [];
-    counterUnbalanced = length(dir([outputDir 'atomMapped' filesep '*.rxn']));
+    atomMappingReport.mappedRxns = [];
 end
 
-fprintf('\n%d reactions were atom mapped\n', length(dir([outputDir 'atomMapped' filesep '*.rxn'])));
-fprintf('%d reactions are not standardised\n', counterUnbalanced);
-fprintf('%d reactions were not mapped\n\n\n', counterNotMapped);
+% fprintf('\n%d reactions were atom mapped\n', length(dir([outputDir 'atomMapped' filesep '*.rxn'])));
+% fprintf('%d reactions are not standardised\n', counterUnbalanced);
+% fprintf('%d reactions were not mapped\n\n\n', counterNotMapped);
+%
+% fprintf('RDT algorithm was developed by:\n');
+% fprintf('SA Rahman et al.: Reaction Decoder Tool (RDT): Extracting Features from Chemical\n');
+% fprintf('Reactions, Bioinformatics (2016), doi: 10.1093/bioinformatics/btw096\n');
+end
+
+
+function newFile = sortMets(mappedFile, substratesMol, substratesFormula, productsMol, productsFormula, outputDir)
+% Function to sort the metabolites as in the model's stoichiometry
 
-fprintf('RDT algorithm was developed by:\n');
-fprintf('SA Rahman et al.: Reaction Decoder Tool (RDT): Extracting Features from Chemical\n');
-fprintf('Reactions, Bioinformatics (2016), doi: 10.1093/bioinformatics/btw096\n');
+begmol = strmatch('$MOL', mappedFile);
+
+% Check if bondless atoms were divided
+if numel(substratesFormula) ~= numel(substratesMol) || numel(productsFormula) ~= numel(productsMol)
+    
+    if ~exist([outputDir filesep 'atomMapped' filesep 'toFix'],'dir')
+        mkdir([outputDir filesep 'atomMapped' filesep 'toFix'])
+    end
+    copyfile([outputDir filesep 'atomMapped' filesep mappedFile{2} '.rxn'], [outputDir filesep 'atomMapped' filesep 'toFix'])
+    newFile(1:5, 1) = mappedFile(1:5);
+else
+    
+    newFile(1:5, 1) = mappedFile(1:5);
+    
+    %%% Sort substrates
+    [~,idm] = sort(substratesMol);
+    [~,ids] = sort(substratesFormula);
+    [~,ids] = sort(ids);
+    indexes = idm(ids);
+    
+    % Save each metabolite
+    for k = 1:numel(substratesFormula)
+        lineInMol = 1;
+        eval(sprintf('molS%d{%d} = mappedFile{begmol(%d)};', k, lineInMol, k))
+        while ~isequal(mappedFile{begmol(k) + lineInMol}, 'M  END')
+            eval(sprintf('molS%d{%d + 1} = mappedFile{begmol(%d) + %d};', k, lineInMol, k, lineInMol))
+            lineInMol = lineInMol + 1;
+        end
+        eval(sprintf('molS%d{%d + 1} = ''M  END'';', k, lineInMol))
+    end
+    % From the start of the mol files
+    c = 5;
+    for k = 1:numel(substratesFormula)
+        eval(sprintf('noOfLines = numel(molS%d);', indexes(k)))
+        for j = 1:noOfLines
+            c = c + 1;
+            eval(sprintf('newFile{%d} = molS%d{%d};', c, indexes(k), j))
+        end
+    end
+    
+    %%% Sort products
+    [~,idmp] = sort(productsMol);
+    [~,idp] = sort(productsFormula);
+    [~,idp] = sort(idp);
+    indexes = idmp(idp);
+    
+    for i = numel(substratesFormula) + 1:numel(substratesFormula) + numel(productsFormula)
+        lineInMol=1;
+        eval(sprintf('molP%d{%d} = mappedFile{begmol(%d)};', i - numel(substratesFormula), lineInMol, i))
+        while ~isequal(mappedFile{begmol(i) + lineInMol}, 'M  END')
+            eval(sprintf('molP%d{%d + 1} = mappedFile{begmol(%d) + %d};', i - numel(substratesFormula), lineInMol, i, lineInMol))
+            lineInMol = lineInMol + 1;
+        end
+        eval(sprintf('molP%d{%d + 1} = ''M  END'';', i - numel(substratesFormula), lineInMol))
+    end
+    for i = numel(substratesFormula) + 1:numel(substratesFormula) + numel(productsFormula)
+        molName = regexprep(productsFormula{i - numel(substratesFormula)}, '\[|\]', '_');
+        eval(sprintf('noOfLines = numel(molP%d);', indexes(i - numel(substratesFormula))))
+        for j = 1:noOfLines
+            c = c + 1;
+            eval(sprintf('newFile{%d} = molP%d{%d};', c, indexes(i - numel(substratesFormula)), j))
+        end
+    end
+end
+end
\ No newline at end of file
diff --git a/src/dataIntegration/chemoInformatics/obtainMetStructures.m b/src/dataIntegration/chemoInformatics/obtainMetStructures.m
new file mode 100644
index 0000000000..285317ba26
--- /dev/null
+++ b/src/dataIntegration/chemoInformatics/obtainMetStructures.m
@@ -0,0 +1,379 @@
+function molCollectionReport = obtainMetStructures(model, outputDir, updateDB, standardisationApproach, orderOfPreference)
+% Obtain MDL MOL files from various databases, including KEGG, HMDB, ChEBI, 
+% and PubChem. Alternatively, openBabel can be used to convert InChI 
+% strings or SMILES in MDL MOL files.
+%
+% USAGE:
+%
+% missingMolFiles = obtainMetStructures(model, outputDir, updateDB, standardisationApproach, orderOfPreference)
+%
+% INPUTS:
+%    model:         COBRA model with following fields:
+%
+%                       * .S - The m x n stoichiometric matrix for the
+%                              metabolic network.
+%                       * .mets - An m x 1 array of metabolite identifiers.
+%                       * .metInChIString - An m x 1 array of metabolite identifiers.
+%                       * .metSmiles - An m x 1 array of metabolite identifiers.
+%                       * .metVMHID - An m x 1 array of metabolite identifiers.
+%                       * .metCHEBIID - An m x 1 array of metabolite identifiers.
+%                       * .metKEGGID - An m x 1 array of metabolite identifiers.
+%                       * .metPubChemID - An m x 1 array of metabolite identifiers.
+%                       * .metHMDBID - An m x 1 array of metabolite identifiers.
+%
+% OPTIONAL INPUTS:
+%    outputDir:            Path to directory that will contain the MOL files
+%                          (default: current directory).
+%    updateDB:             Logical value idicating if the database will be
+%                          updated or not. If it's true, "outputDir" should
+%                          contain an existing database (default: false).
+%    standardisationApproach:  String contianing the type of standarization for
+%                          the moldecules (default: empty)
+%                             * explicitH - Normal chemical graphs.
+%                             * implicitH - Hydrogen suppressed chemical
+%                                           graphs.
+%                             * Neutral   - Chemical graphs with protonated
+%                                           molecules.
+%                             * basic     - Adding the header.
+%    orderOfPreference:    Vector indicating the source of preference
+%                          (default: 1:7)
+%                          1.- VMH (http://vmh.life/)
+%                          2.- InChI (requires openBabel)
+%                          3.- Smiles (requires openBabel)
+%                          4.- KEGG (https://www.genome.jp/)
+%                          5.- HMDB (https://hmdb.ca/)
+%                          6.- PubChem (https://pubchem.ncbi.nlm.nih.gov/)
+%                          7.- CHEBI (https://www.ebi.ac.uk/)
+%
+% OUTPUTS:
+%    missingMolFiles:      List of missing MOL files
+%    nonStandardised:      List of non-standardised MDL MOL file.
+
+if nargin < 2 || isempty(outputDir)
+    outputDir = [pwd filesep];
+else
+    % Make sure input path ends with directory separator
+    outputDir = [regexprep(outputDir,'(/|\\)$',''), filesep];
+end
+if nargin < 3
+    updateDB = false;
+end
+if nargin < 4
+    standardisationApproach = [];
+end
+if nargin < 5
+    orderOfPreference = 1:7;
+end
+
+[oBabelInstalled, ~] = system('obabel');
+webTimeout = weboptions('Timeout', 30);
+
+% Set directories
+if exist([outputDir 'newMol'], 'dir') == 0
+    mkdir([outputDir 'newMol'])
+end
+newMolFilesDir  = [outputDir 'newMol' filesep];
+if updateDB
+    if exist([outputDir 'met' filesep standardisationApproach filesep], 'dir') ~= 0
+        modelMets = regexprep(model.mets,'(\[\w\])','');
+        fnames = dir([newMolFilesDir '*.mol']);
+        model = removeMetabolites(model, model.mets(~ismember(modelMets, setdiff(modelMets, split([fnames(:).name], '.mol')))));
+    else
+        display('Directory with MOL files was not found to be updated in:')
+        display([outputDir 'met' filesep standardisationApproach filesep])
+        display('A new database will be created')
+    end
+end
+
+%% Obtain met data
+
+% Obtain ID's
+fields = fieldnames(model);
+% inchi
+inchiFieldBool = ~cellfun(@isempty, regexpi(fields, 'inchi'));
+if any(inchiFieldBool)
+    inchis = model.(fields{inchiFieldBool});
+end
+% SMILES
+smilesFieldBool = ~cellfun(@isempty, regexpi(fields, 'smiles'));
+if any(smilesFieldBool)
+    smiles = model.(fields{smilesFieldBool});
+end
+% HMDB
+hmdbFieldBool = ~cellfun(@isempty, regexpi(fields, 'hmdb'));
+if any(hmdbFieldBool)
+    hmdbIDs = model.(fields{hmdbFieldBool});
+end
+% KEGG
+keggFieldBool = ~cellfun(@isempty, regexpi(fields, 'kegg'));
+if any(keggFieldBool)
+    if sum(keggFieldBool) > 1
+        metFieldBool = ~cellfun(@isempty, regexpi(fields, 'met'));
+        keggFieldBool = keggFieldBool & metFieldBool;
+    end
+    keggIDs = model.(fields{keggFieldBool});
+end
+% PubChem
+PubChemFieldBool = ~cellfun(@isempty, regexpi(fields, 'PubChem'));
+if any(PubChemFieldBool)
+    PubChemIDs = model.(fields{PubChemFieldBool});
+end
+% chebi
+chebiFieldBool = ~cellfun(@isempty, regexpi(fields, 'chebi'));
+if any(chebiFieldBool)
+    chebiIDs = model.(fields{chebiFieldBool});
+end
+
+%% Obtain met structures
+
+% Unique metabolites idexes
+[umets, ia] = unique(regexprep(model.mets, '(\[\w\])', ''));
+% umets = model.mets;
+% ia = 1:numel(model.mets);
+
+missingMetBool = true(length(umets), 1);
+% Obtain MDL MOL files
+idsToCheck = {};
+for i = 1:length(umets)
+    for j = 1:7
+        switch orderOfPreference(j)
+            
+            case 1
+                % VMH
+                %                     if prod(~isnan(VMH{metIdxs(i)})) && ~isempty(VMH{metIdxs(i)}) && exist('VMH', 'var') && missing
+                %
+                %                     end
+                
+            case 2 % inchi
+                if prod(~isnan(inchis{ia(i)})) && ~isempty(inchis{ia(i)}) && oBabelInstalled && missingMetBool(i)
+                    try
+                        fid2 = fopen([outputDir 'tmp'], 'w');
+                        fprintf(fid2, '%s\n', inchis{ia(i)});
+                        fclose(fid2);
+                        command = ['obabel -iinchi ' outputDir 'tmp -O ' newMolFilesDir umets{i} '.mol mol'];
+                        [status, cmdout] = system(command);
+                        if contains(cmdout, '1 molecule converted')
+                            missingMetBool(i) = false;
+                        end
+                        delete([outputDir 'tmp'])
+                    catch ME
+                        disp(ME.message)
+                        idsToCheck(end + 1, 1) = inchis(ia(i));
+                    end
+                end
+                
+            case 3 % Smiles
+                if prod(~isnan(smiles{ia(i)})) && ~isempty(smiles{ia(i)}) && oBabelInstalled && missingMetBool(i)
+                    try
+                    fid2 = fopen([outputDir 'tmp'], 'w');
+                    fprintf(fid2, '%s\n', smiles{ia(i)});
+                    fclose(fid2);
+                    command = ['obabel -ismi ' outputDir 'tmp -O ' newMolFilesDir umets{i} '.mol mol'];
+                    [status,cmdout] = system(command);
+                    if status == 0
+                        missingMetBool(i) = false;
+                    end
+                    delete([outputDir 'tmp'])
+                    catch ME
+                        disp(ME.message)
+                        idsToCheck(end + 1, 1) = smiles(ia(i));
+                    end
+                end
+                
+            case 4 % KEGG
+                if  prod(~isnan(keggIDs{ia(i)})) && ~isempty(keggIDs{ia(i)}) && missingMetBool(i)
+                    try
+                        switch keggIDs{ia(i)}(1)
+                            case 'C'
+                                molFile = webread(['https://www.genome.jp/dbget-bin/www_bget?-f+m+compound+' keggIDs{ia(i)}], webTimeout);
+                            case 'D'
+                                molFile = webread(['https://www.kegg.jp/dbget-bin/www_bget?-f+m+drug+' keggIDs{ia(i)}], webTimeout);
+                        end
+                        if ~isempty(regexp(molFile, 'M  END'))
+                            fid2 = fopen([newMolFilesDir umets{i} '.mol'], 'w');
+                            fprintf(fid2, '%s\n', molFile);
+                            fclose(fid2);
+                            missingMetBool(i) = false;
+                        end
+                    catch ME
+                        disp(ME.message)
+                        idsToCheck(end + 1, 1) = keggIDs(ia(i));
+                    end
+                end
+                
+            case 5 % HMDB
+                if  prod(~isnan(hmdbIDs{ia(i)})) && ~isempty(hmdbIDs{ia(i)}) && missingMetBool(i)
+                    try
+                        numbersID = hmdbIDs{ia(i)}(5:end);
+                        if size(numbersID, 2) < 7
+                            numbersID = [repelem('0', 7 - size(numbersID, 2)) numbersID];
+                        end
+                        molFile = webread(['https://hmdb.ca/structures/metabolites/HMDB' numbersID '.mol'], webTimeout);
+                        if ~isempty(regexp(molFile, 'M  END'))
+                            fid2 = fopen([newMolFilesDir umets{i} '.mol'], 'w');
+                            fprintf(fid2, '%s\n', molFile);
+                            fclose(fid2);
+                            missingMetBool(i) = false;
+                        end
+                    catch ME
+                        disp(ME.message)
+                        idsToCheck(end + 1, 1) = hmdbIDs(ia(i));
+                    end
+                end
+                
+            case 6 % PubChem
+                if  prod(~isnan(PubChemIDs{ia(i)})) && ~isempty(PubChemIDs{ia(i)}) && missingMetBool(i)
+                    try
+                    molFile = webread(['https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/CID/'...
+                        num2str(PubChemIDs{ia(i)}) ...
+                        '/record/SDF/?record_type=2d&response_type=display'], webTimeout);
+                    %         Delete all after 'M  END' from the SDF filte to
+                    %         make it MOL file
+                    if ~isempty(regexp(molFile, 'M  END'))
+                        molFile(regexp(molFile, 'M  END') + 6:end) = [];
+                        fid2 = fopen([newMolFilesDir umets{i} '.mol'], 'w');
+                        fprintf(fid2, '%s\n', molFile);
+                        fclose(fid2);
+                        missingMetBool(i) = false;
+                    end
+                    catch ME
+                        disp(ME.message)
+                        idsToCheck(end + 1, 1) = PubChemIDs(ia(i));
+                    end
+                end
+                
+            case 7 % ChEBI
+                if  prod(~isnan(chebiIDs{ia(i)})) && ~isempty(chebiIDs{ia(i)}) && missingMetBool(i)
+                    try
+                    molFile = webread(['https://www.ebi.ac.uk/chebi/saveStructure.do?defaultImage=true&chebiId=' num2str(chebiIDs{ia(i)}) '&imageId=0'], webTimeout);
+                    if ~isempty(regexp(molFile, 'M  END'))
+                        fid2 = fopen([newMolFilesDir umets{i} '.mol'], 'w');
+                        fprintf(fid2, '%s\n', molFile);
+                        fclose(fid2);
+                        missingMetBool(i) = false;
+                    end
+                    catch ME
+                        disp(ME.message)
+                        idsToCheck(end + 1, 1) = chebiIDs(ia(i));
+                    end
+                end
+        end
+    end
+end
+
+%% Standardise Mol Files
+
+if ~isempty(standardisationApproach)
+    
+    % Set up directories
+    switch standardisationApproach
+        case 'explicitH'
+            standardisedDir = [outputDir 'explicitH' filesep];
+        case 'implicitH'
+            standardisedDir = [outputDir 'implicitH' filesep];
+        case 'protonated'
+            standardisedDir = [outputDir 'protonated' filesep];
+        otherwise
+            standardisationApproach = 'basic';
+            standardisedDir = molDir;
+    end
+    
+    % Standardise files
+    umets(missingMetBool) = [];
+    standardisationReport = standardiseMolDatabase(tmpDir, umets, standardisedDir, standardisationApproach);
+    
+    % Get SMILES and InChIs
+    if isfield(standardisationReport, 'SMILES')
+        SMILES = standardisationReport.SMILES;
+    else
+        SMILES = '';
+    end
+    if isfield(standardisationReport, 'InChIs')
+        InChIs = standardisationReport.InChIs;
+    else
+        InChIs = '';
+    end
+    % Delete empty cells
+    InChIs(cellfun(@isempty, InChIs)) = [];
+    SMILES(cellfun(@isempty, SMILES)) = [];
+    
+    if updateDB && ~isempty(InChIs) && ~isempty(SMILES)
+        
+        % For InChIs
+        if isfile([standardisedDir 'InChIs'])
+            % Merge old and new InChIs
+            InChIsFile = regexp( fileread([standardisedDir 'InChIs']), '\n', 'split')';
+            InChIsFile(cellfun(@isempty, InChIsFile)) = [];
+            InChIsFileSp = split(InChIsFile, ' - ');
+            smilesSp = split(InChIs, ' - ');
+            mergedSmiles(:, 2) = unique([InChIsFileSp(:, 2); smilesSp(:, 2)]);
+            mergedSmiles(ismember(mergedSmiles(:, 2), smilesSp(:, 2)), 1) = smilesSp(:, 2);
+            mergedSmiles(ismember(mergedSmiles(:, 2), InChIsFileSp(:, 2)), 1) = InChIsFileSp(:, 2);
+            mergedSmiles = strcat(mergedSmiles(:, 1), {' - '}, mergedSmiles(:, 2));
+            % Write InChIs
+            fid2 = fopen([standardisedDir 'InChIs'], 'w');
+            fprintf(fid2, '%s\n', mergedSmiles{:});
+            fclose(fid2);
+        else
+            % Write InChIs
+            fid2 = fopen([standardisedDir 'InChIs'], 'w');
+            fprintf(fid2, '%s\n', InChIs{:});
+            fclose(fid2);
+        end
+        
+        % For SMILES
+        if isfile([standardisedDir 'SMILES'])
+            % Merge old and new InChIs
+            smilesFile = regexp( fileread([standardisedDir 'SMILES']), '\n', 'split')';
+            smilesFile(cellfun(@isempty, smilesFile)) = [];
+            smilesFileSp = split(smilesFile, ' - ');
+            smilesSp = split(SMILES, ' - ');
+            mergedSmiles(:, 2) = unique([smilesFileSp(:, 2); smilesSp(:, 2)]);
+            mergedSmiles(ismember(mergedSmiles(:, 2), smilesSp(:, 2)), 1) = smilesSp(:, 2);
+            mergedSmiles(ismember(mergedSmiles(:, 2), smilesFileSp(:, 2)), 1) = smilesFileSp(:, 2);
+            mergedSmiles = strcat(mergedSmiles(:, 1), {' - '}, mergedSmiles(:, 2));
+            % Write InChIs
+            fid2 = fopen([standardisedDir 'SMILES'], 'w');
+            fprintf(fid2, '%s\n', mergedSmiles{:});
+            fclose(fid2);
+        else
+            % Write InChIs
+            fid2 = fopen([standardisedDir 'SMILES'], 'w');
+            fprintf(fid2, '%s\n', SMILES{:});
+            fclose(fid2);
+        end
+        
+    else
+        % Write InChIs
+        fid2 = fopen([standardisedDir 'InChIs'], 'w');
+        fprintf(fid2, '%s\n', InChIs{:});
+        fclose(fid2);
+        % Write SMILES
+        fid2 = fopen([standardisedDir 'SMILES'], 'w');
+        fprintf(fid2, '%s\n', SMILES{:});
+        fclose(fid2);
+    end
+end
+
+%% Report
+
+% Make report
+molCollectionReport.noOfMets = size(umets, 1);
+molCollectionReport.noOfMetsWithMol = sum(~missingMetBool);
+molCollectionReport.noOfMetsWithoutMol = sum(missingMetBool);
+molCollectionReport.coverage = (molCollectionReport.noOfMetsWithMol * 100) / molCollectionReport.noOfMets;
+
+% Check standardised data
+if ~isempty(standardisationApproach)
+    nRows = size(standardisationReport.SMILES, 1);
+    varTypes = {'string', 'string', 'string', 'string'};
+    varNames = {'mets', 'InChIKeys', 'InChIs', 'SMILES'};
+    molCollectionReport.standardisationReport = table('Size', [nRows length(varTypes)], 'VariableTypes', varTypes, 'VariableNames', varNames);
+    molCollectionReport.standardisationApproach = standardisationApproach;
+    molCollectionReport.standardisationReport(1:end) = standardisationReport.standardised;
+    molCollectionReport.standardisationReport.InChIKeys(1:size(standardisationReport.InChIKeys, 1)) = standardisationReport.InChIKeys;
+    molCollectionReport.standardisationReport.InChIs(1:size(standardisationReport.InChIs, 1)) = standardisationReport.InChIs;
+    molCollectionReport.standardisationReport.SMILES(1:size(standardisationReport.SMILES, 1)) = standardisationReport.SMILES;
+end
+
+end
\ No newline at end of file
diff --git a/src/dataIntegration/chemoInformatics/standardiseMolDatabase.m b/src/dataIntegration/chemoInformatics/standardiseMolDatabase.m
new file mode 100644
index 0000000000..e49af35f31
--- /dev/null
+++ b/src/dataIntegration/chemoInformatics/standardiseMolDatabase.m
@@ -0,0 +1,212 @@
+function standardisationReport = standardiseMolDatabase(molDir, metList, standardisedDir, standardisationApproach)
+% Standardize an MDL MOL file directory by representing the reaction using 
+% normal chemical graphs, hydrogen suppressed chemical graphs, and chemical 
+% graphs with protonated molecules. The function also updates the header 
+% with the standardization information. It makes use of ChemAxon's 
+% standardizer and openBabel.
+%
+% USAGE:
+%
+% [standardised, nonStandardised, InChIs, SMILES] = standardiseMolDatabase(molDir, standardisedDir, standardiseMolFiles)
+%
+% INPUTS:
+%    molDir:               Path to directory that contain the MDL MOL files
+%                          to be standardised.
+%
+% OPTIONAL INPUTS:
+%    metList:              1 x n lest of metabolites to standardise
+%    standardisedDir       Path to directory that will contain the
+%                          standardised MDL MOL files (default: current
+%                          directory).
+%    standardisationApproach:  String contianing the type of standarization for
+%                              the moldecules (default: 'explicitH' if
+%                              openBabel is installed, otherwise 'basic')
+%                              * explicitH - Normal chemical graphs.
+%                              * implicitH - Hydrogen suppressed chemical
+%                                            graphs.
+%                              * neutral   - Chemical graphs with protonated
+%                                            molecules.
+%                              * basic     - Updating the header.
+%
+% OUTPUTS:
+%    A standardised molecular structures database
+%    standardisationReport:    Struct array with the standarization report
+%                                * standardised
+%                                * nonStandardised
+%                                * InChIs
+%                                * SMILES
+%                                * InChIKeys
+%
+% .. Author: - German Preciat 25/06/2020
+
+
+molDir = [regexprep(molDir,'(/|\\)$',''), filesep];
+if nargin < 2 || isempty(metList)
+    metList = [];
+end
+if nargin < 3 || isempty(standardisedDir)
+    standardisedDir = [pwd filesep];
+else
+    % Make sure input path ends with directory separator
+    standardisedDir = [regexprep(standardisedDir,'(/|\\)$',''), filesep];
+end
+% Check if openBabel is installed
+if nargin < 4 || isempty(standardisationApproach)
+    standardisationApproach = 'explicitH';
+end
+
+% Check if chemAxon and OpenBabel are installed
+[oBabelInstalled, ~] = system('obabel');
+[marvinInstalled, ~] = system('cxcalc');
+if marvinInstalled ~= 0
+    marvinInstalled = false;
+end
+if oBabelInstalled ~= 1
+    oBabelInstalled = false;
+    standardisationApproach = 'basic';
+end
+
+% Assign directories and create them
+standardisedMolFiles = [standardisedDir 'molFiles' filesep];
+if marvinInstalled
+    standardisedImages = [standardisedDir 'images' filesep];
+end
+if ~exist(standardisedMolFiles, 'dir')
+    mkdir(standardisedMolFiles)
+    if marvinInstalled
+        mkdir(standardisedImages)
+    end
+end
+
+% The new MOL files are readed
+% Get list of MOL files
+d = dir(molDir);
+d = d(~[d.isdir]);
+aMets = {d.name}';
+aMets = aMets(~cellfun('isempty', regexp(aMets,'(\.mol)$')));        
+if ~isempty(metList)
+    aMets = aMets(ismember(regexprep(aMets, '.mol', ''), metList));
+end
+    
+fprintf('\n Standardizing %d MOL files ... \n', size(aMets, 1));
+
+y = 0;
+n = 0;
+nonStandardised = [];
+InChIs = [];
+InChIKeys = [];
+for i = 1:size(aMets, 1)
+%     try
+        name = aMets{i};
+        
+        if oBabelInstalled
+            
+            % Obtain SMILES
+            command = ['obabel -imol ' molDir name ' -osmiles'];
+            [~, cmdout] = system(command);
+            cmdout = splitlines(cmdout);
+            cmdout = split(cmdout{end - 2});
+            smiles = cmdout{1};
+            SMILES{i, 1} = smiles;
+            
+            % Obtain inChIKey and InChI
+            % inChIKey
+            command = ['obabel -imol ' molDir name ' -oinchikey'];
+            [~, cmdout] = system(command);
+            cmdout = split(cmdout);
+            inchikeyIdx = find(cellfun(@numel, cmdout) == 27);
+            if ~isempty(inchikeyIdx)
+                InChIKey = ['InChIKey=' cmdout{inchikeyIdx}];
+                InChIKeys{i, 1} = InChIKey;
+            else
+                InChIKey = '';
+                InChIKeys{i, 1} = {''};
+            end
+            % InChI
+            command = ['obabel -imol ' molDir name ' -oinchi'];
+            [~, cmdout] = system(command);
+            cmdout = split(cmdout);
+            if any(contains(cmdout,'InChI=1S'))
+                InChIs{i, 1} = cmdout{contains(cmdout,'InChI=1S')};
+                fid2 = fopen('tmp', 'w');
+                fprintf(fid2, '%s\n', cmdout{contains(cmdout,'InChI=1S')});
+                fclose(fid2);
+                % Create an InChI based-MOL file
+                command = ['obabel -iinchi tmp -O ' standardisedMolFiles name ' --gen2D'];
+                [~, ~] = system(command);
+                delete('tmp')
+            else
+                InChIs{i, 1} = {''};
+                fid2 = fopen('tmp', 'w');
+                fprintf(fid2, '%s\n', smiles);
+                fclose(fid2);
+                command = ['obabel -ismiles tmp -O ' standardisedMolFiles name ' --gen2D'];
+                [~, ~] = system(command);
+                delete('tmp')
+            end
+            
+            % Adapt database
+            switch standardisationApproach
+                case 'explicitH'
+                    command = ['obabel -imol ' standardisedMolFiles name ' -O ' standardisedMolFiles name ' -h'];
+                    [~, ~] = system(command);
+                case 'implicitH'
+                    % Delete explicit hydrogens
+                    command = ['obabel -imol ' standardisedMolFiles name ' -O ' standardisedMolFiles name ' -d'];
+                    [~, ~] = system(command);
+                case 'neutral'
+                    % Neutralize molecule
+                    command = ['obabel -imol ' standardisedMolFiles name ' -O ' standardisedMolFiles name ' –neutralize'];
+                    [~, ~] = system(command);
+            end
+        else
+            
+            InChIKey = '';
+            copyfile([molDir name], standardisedMolFiles)
+            
+        end
+        
+        % Rewrite headings
+        molFile = regexp(fileread([standardisedMolFiles name]), '\n', 'split')';
+        molFile{1} = name(1:end-4);
+        molFile{2} = ['COBRA Toolbox v3.0 - ' standardisationApproach ' molecule - ' datestr(datetime)];
+        molFile{3} = InChIKey;
+        fid2 = fopen([standardisedMolFiles name], 'w');
+        fprintf(fid2, '%s\n', molFile{:});
+        fclose(fid2);
+        % Generate images
+        if marvinInstalled
+            fdata = dir([standardisedMolFiles name]);
+            command = ['molconvert jpeg:w' num2str(fdata.bytes / 10) ',h' num2str(fdata.bytes / 10) ' ' standardisedMolFiles name ' -o ' standardisedImages name(1:end-4) '.jpeg'];
+            [~, ~] = system(command);
+        end
+        
+        % Save data and delete the non-standardised molecule
+        y = y + 1;
+        standardised{y, 1} = name(1:end - 4);
+        %     delete([molDir name])
+        
+%     catch
+%         
+%         % Save data in case it is not standardised
+%         n = n + 1;
+%         nonStandardised{n, 1} = name(1:end - 4);
+%         
+%     end
+end
+
+% Prepare report
+standardisationReport.standardisationApproach = standardisationApproach;
+standardisationReport.standardised = standardised;
+standardisationReport.nonStandardised = nonStandardised;
+if exist('InChIs', 'var')
+    standardisationReport.InChIs = InChIs;
+end
+if exist('SMILES', 'var')
+    standardisationReport.SMILES = SMILES;
+end
+if exist('InChIKeys', 'var')
+    standardisationReport.InChIKeys = InChIKeys;
+end
+
+end
\ No newline at end of file
diff --git a/src/dataIntegration/chemoInformatics/transportRxnAM.m b/src/dataIntegration/chemoInformatics/transportRxnAM.m
new file mode 100644
index 0000000000..1cc36c0bbe
--- /dev/null
+++ b/src/dataIntegration/chemoInformatics/transportRxnAM.m
@@ -0,0 +1,121 @@
+function mappedRxns = transportRxnAM(rxnDir, outputDir)
+% This function atom maps the transport reactions for a given director in 
+% MDL RXN file format.
+%
+% USAGE:
+%
+% mappedRxns = transportRxnAM(rxnDir, outputDir)
+%
+% INPUTS:
+%    rxnDir:               Path to directory that contains the RXN files
+%                          (default: current directory).
+%
+% OPTIONAL INPUTS:
+%    outputDir:            Path to directory that will contain the atom
+%                          mapped transport reactions (default: current
+%                          directory).
+%
+% OUTPUTS:
+%    mappedRxns:           List of missing MOL files atom mapped transport
+%                          reactions.
+
+rxnDir = [regexprep(rxnDir,'(/|\\)$',''), filesep]; % Make sure input path ends with directory separator
+if nargin < 2 || isempty(outputDir)
+    outputDir = [pwd filesep];
+else
+    % Make sure input path ends with directory separator
+    outputDir = [regexprep(outputDir,'(/|\\)$',''), filesep];
+end
+
+% Create directory if it is missing
+if exist(outputDir) ~= 7
+    mkdir('transportRxnsAM')
+end
+
+% Check if the directory is not empty
+fnames = dir([rxnDir '*.rxn']);
+assert(~isempty(fnames), '''rxnDir'' does not contain RXN files');
+
+c = 0;
+for i = 1:length(fnames)
+    
+    % Read the MOL file
+    rxnFile = regexp( fileread([rxnDir fnames(i).name]), '\n', 'split')';
+    rxnFormula = rxnFile{4};
+    assert(~isempty(rxnFormula), 'There is not a chemical formula.');
+    % Check if it is a transport reaction
+    rxnFormula = split(rxnFormula, {' -> ', ' <=> '});
+    substrates = split(rxnFormula{1}, ' + ');
+    substrates = expandMets(substrates);
+    products = split(rxnFormula{2}, ' + ');
+    products = expandMets(products);
+    if isequal(substrates, products)
+        
+        % Identify the corresponding metabolites in the substrates and
+        % products
+        begMol = strmatch('$MOL', rxnFile);
+        for j = 1:length(begMol)
+            if j <= numel(substrates)
+                metSubs{j} = regexprep((rxnFile{begMol(j) + 1}), '(\[\w\])', '');
+            else
+                metProds{j - numel(substrates)} = regexprep((rxnFile{begMol(j) + 1}), '(\[\w\])', '');
+            end
+        end
+        
+        % Atom map
+        atom = 0;
+        for j = 1:numel(metSubs)
+            nuOfAtoms = str2double(rxnFile{begMol(j) + 4}(1:3));
+            productIdx = strmatch(metSubs{j}, metProds);
+            for k = 1:nuOfAtoms
+                atom = atom + 1;
+                switch length(num2str(atom))
+                    case 1
+                        data2print = ['  ' num2str(atom) '  0  0'];
+                    case 2
+                        data2print = [' ' num2str(atom) '  0  0'];
+                    case 3
+                        data2print = [num2str(atom) '  0  0'];
+                end
+                rxnFile{begMol(j) + 4 + k} = [rxnFile{begMol(j) + 4 + k}(1:60) data2print];
+                rxnFile{begMol(productIdx(1) + numel(metSubs)) + 4 + k} = [rxnFile{begMol(productIdx(1) + numel(metSubs)) + 4 + k}(1:60) data2print];
+            end
+            metProds(productIdx(1)) = {'done'};
+        end
+        
+        % Write the file
+        fid2 = fopen([outputDir fnames(i).name], 'w');
+        fprintf(fid2, '%s\n', rxnFile{:});
+        fclose(fid2);
+        
+        c = c + 1;
+        mappedRxns{c} = regexprep(fnames(i).name, '.rxn', '');
+        clear metSubs metProds
+        
+    end
+end
+
+if ~exist('mappedRxns', 'var')
+    mappedRxns = [];
+end
+end
+
+function newMetList = expandMets(metList)
+
+% Check if a metabolite has an number to be expanded
+idxsCheck = ~cellfun(@isempty, regexp(metList, ' '));
+if any(idxsCheck)
+    idx = find(idxsCheck);
+    % Add repeated metabolites
+    for i = 1:length(idx)
+        met2expand = split(metList(idx(i)));
+        metList = [metList; repelem(met2expand(2), str2double(met2expand(1)))'];
+    end
+    metList(idx) = [];
+end
+
+% Create the new list with metabolites sorted and without a compartment
+newMetList = metList;
+newMetList = sort(regexprep(newMetList, '(\[\w\])', ''));
+
+end
diff --git a/src/reconstruction/comparison/compareModels.m b/src/reconstruction/comparison/compareModels.m
new file mode 100644
index 0000000000..d49363e615
--- /dev/null
+++ b/src/reconstruction/comparison/compareModels.m
@@ -0,0 +1,46 @@
+function [isSameModel,differences] = compareModels(modelA,modelB,printLevel)
+%compares modelA with modelB, looking for differences between the
+%structures
+%
+% INPUT
+% modelA:       structure
+% modelB:       structure
+% printLevel:    
+%
+% OUTPUT
+% isSameModel:   true if identical models, false otherwise
+% differences:   structure listing differences between models
+%               *.reason: gives a text stack of why the difference occurred
+%                         as well as a field
+%               *.where: contains the indices and subfields of the structure
+%                        where the comparison failed.
+%
+%
+% Note: This function depends on structeq.m and celleq.m
+
+if~exist('printLevel','var')
+    printLevel=1;
+end
+
+differences=[];
+result = 0;
+i=0;
+while result==0
+    [result, why] = structeq(modelA,modelB);
+    if result==0
+        i=i+1;
+        if printLevel>0
+            fprintf('%s\n',why.Reason)
+            fprintf('%s\n',why.Where)
+        end
+        differences(i).reason = why.Reason;
+        differences(i).where = why.Where;
+        %fieldName = strrep(why.Where,'(1)','');
+        fieldName = strtok(why.Where,'(1)');
+        eval(['modelB' fieldName ' = modelA' fieldName ';']);
+    end
+end
+isSameModel = i==0;
+
+end
+
diff --git a/test/verifiedTests/dataIntegration/testChemoInformatics/metaboliteIds.xlsx b/test/verifiedTests/dataIntegration/testChemoInformatics/metaboliteIds.xlsx
new file mode 100644
index 0000000000..1b46cd9755
Binary files /dev/null and b/test/verifiedTests/dataIntegration/testChemoInformatics/metaboliteIds.xlsx differ
diff --git a/test/verifiedTests/dataIntegration/testChemoInformatics/refData_generateChemicalDatabase.mat b/test/verifiedTests/dataIntegration/testChemoInformatics/refData_generateChemicalDatabase.mat
new file mode 100644
index 0000000000..a3e0ee2354
Binary files /dev/null and b/test/verifiedTests/dataIntegration/testChemoInformatics/refData_generateChemicalDatabase.mat differ
diff --git a/test/verifiedTests/dataIntegration/testChemoInformatics/testGenerateChemicalDatabase.m b/test/verifiedTests/dataIntegration/testChemoInformatics/testGenerateChemicalDatabase.m
new file mode 100644
index 0000000000..5a62115bca
--- /dev/null
+++ b/test/verifiedTests/dataIntegration/testChemoInformatics/testGenerateChemicalDatabase.m
@@ -0,0 +1,64 @@
+% The COBRAToolbox: testGenerateChemicalDatabase.m
+%
+% Purpose:
+%     - tests the generateChemicalDatabase function using the Citric Acid 
+%       Cycle E. coli Core Model
+%
+
+fprintf('   Testing generateChemicalDatabase ... \n' );
+
+% Save the current path
+currentDir = pwd;
+mkdir([currentDir filesep 'tmpDB'])
+
+% Check external software
+[cxcalcInstalled, ~] = system('cxcalc');
+cxcalcInstalled = ~cxcalcInstalled;
+[oBabelInstalled, ~] = system('obabel');
+[javaInstalled, ~] = system('java');
+
+% Load the E. coli Core Model TCA rxns
+load ecoli_core_model.mat
+model.mets = regexprep(model.mets, '\-', '\_');
+rxnsToExtract = {'AKGDH', 'CS', 'FUM', 'ICDHyr', 'MDH', 'SUCOAS'};
+model = extractSubNetwork(model, rxnsToExtract);
+
+% initialize the test
+fileDir = fileparts(which('testGenerateChemicalDatabase.m'));
+
+% Load reference data
+load('refData_generateChemicalDatabase.mat')
+
+%% Add external information in the model
+
+inputData = [fileDir filesep 'metaboliteIds.xlsx'];
+replace = false;
+[model, hasEffect] = addMetInfoInCBmodel(model, inputData, replace);
+
+assert(isequal(model, model1), 'The identifiers are different')
+assert(hasEffect, 'The function addMetInfoInCBmodel din not have effect')
+
+%% Set optional variables according the software installed
+
+options.outputDir = [currentDir filesep 'tmpDB'];
+options.printlevel = 0;
+
+if cxcalcInstalled && oBabelInstalled && javaInstalled
+    options.adjustToModelpH = true;
+    options.onlyUnmapped = false;
+else
+    options.adjustToModelpH = false;
+    options.onlyUnmapped = true;
+end
+
+[info, model] = generateChemicalDatabase(model, options);
+
+if cxcalcInstalled && oBabelInstalled && javaInstalled
+    assert(isequal(model, model2), 'The model ouput is different')
+    assert(isequal(info, info1), 'The database report is different')
+else
+    assert(isequal(model, model3), 'The model ouput is different')
+    assert(isequal(info, info2), 'The database report is different')
+end
+
+rmdir([currentDir filesep 'tmpDB'], 's')
\ No newline at end of file