Merge pull request #93 from SysBioChalmers/dev

Dev
SysBioChalmers · Jun 18, 2022 · 5a892bf · 5a892bf
2 parents 06763a4 + 0740767
commit 5a892bf
Show file tree

Hide file tree

Showing 1,411 changed files with 11,141 additions and 1,416,658 deletions.
diff --git a/.gitignore b/.gitignore
@@ -48,4 +48,5 @@ helpsearch*/
 # Non-complying tables #
 ########################
 *.xls
-*.tab
+*.tab
+.Rproj.user
diff --git a/README.md b/README.md
@@ -1,12 +1,12 @@
-# Yeast-Species-GEM: The pan-GEM and strain specific GEMs for 332 different yeast species
+# Yeast-Species-GEM: The pan-GEM and strain specific GEMs for yeast species with whole-genome sequence.
 
 * Brief introduction:
 
-As the unicellular fungi, the budding yeasts contain over 1000 different yeast species, which are widely distributed on our earth. Some yeast species, like _Saccharomyces cerevisiae_ and _Candida albicans_, are very important to our human life and health. To further explore how different yeast species with interesting traits evolved in the long history, we built a pan-GEM for nearly all currently sequenced yeast species based on the latest research (Cell, 2018; DOI: 10.1016/j.cell.2018.10.023). With the advanced GEM of _S.cerevisiae_ (Yeast8, https://github.com/SysBioChalmers/yeast-GEM) developed in our lab, the pan-GEM of for all these sequenced yeast species can be built by adding the new reactions with the gene association into Yeast8 through the detailed pan-genome annotation and mounts of physiological data mining. Next, with the gene function annotation of each yeast species, 332 species GEMs of high quality can be built automatically. With the gap-filling and model test verified by the experimental data from different sources, the quality and prediction performances of species GEM will be improved continuously. We hope that this work could set a solid base in modelling simulation and systematic utilization of different yeast species for the wide research community.
+As the unicellular fungi, the budding yeasts contain over 1000 different yeast species, which are widely distributed on our earth. Some yeast species, like _Saccharomyces cerevisiae_ and _Candida albicans_, are very important to our human life and health. To further explore how different yeast species with interesting traits evolved in the long history, we built a pan-GEM for nearly all currently sequenced yeast species based on the latest research (Cell, 2018; DOI: 10.1016/j.cell.2018.10.023). With the advanced GEM of _S.cerevisiae_ (Yeast8, https://github.com/SysBioChalmers/yeast-GEM) developed in our lab, the pan-GEM of for all these sequenced yeast species can be built by adding the new reactions with the gene association into Yeast8 through the detailed pan-genome annotation and mounts of physiological data mining. Next, with the gene function annotation of each yeast species, yeast species-specific GEMs of high quality can be built automatically. With the gap-filling and model test verified by the experimental data from different sources, the quality and prediction performances of species GEM will be improved continuously. We hope that this work could set a solid base in modelling simulation and systematic utilization of different yeast species for the wide research community.
 
 * Model KeyWords:
 
-**GEM Category:** species; **Utilisation:** experimental data reconstruction, multi-omics integrative analysis, _in silico_ strain design, model template, evolution analysis; **Field:** metabolic-network reconstruction; **Type of Model:** reconstruction, curated; **Model Source:** YeastMetabolicNetwork; **Omic Source:** genomics; **Taxonomy:** subphylum _Saccharomycotina_; **Metabolic System:** general metabolism; **Strain:** 332 budding yeast species and 11 fungal species outgroup species;
+**GEM Category:** species; **Utilisation:** experimental data reconstruction, multi-omics integrative analysis, _in silico_ strain design, model template, evolution analysis; **Field:** metabolic-network reconstruction; **Type of Model:** reconstruction, curated; **Model Source:** YeastMetabolicNetwork; **Omic Source:** genomics; **Taxonomy:** subphylum _Saccharomycotina_; **Metabolic System:** general metabolism; **Strain:**  budding yeast species;
 
 * Contributing
 

diff --git a/Reconstruction_script/.DS_Store b/Reconstruction_script/.DS_Store
diff --git a/draft_GEM_all_yeast_species/.RData b/draft_GEM_all_yeast_species/.RData
diff --git a/draft_GEM_all_yeast_species/.Rhistory b/draft_GEM_all_yeast_species/.Rhistory
@@ -0,0 +1,83 @@
+# Compare the original s288c reactions from different sources: RAVEN, kegg
+# Revised by Hongzhong 2020-7-27
+# load library
+library(readxl)
+library(stringr)
+library(tidyverse)
+#---------------------------------------------------
+## Small task- to check whether the specific reactions existing in all yeast species
+# RAVEN kegg
+gene_all <- vector()
+rxn_all <- vector()
+exist_R01867 <- vector()
+ec <- c('ec:1.3.98.1','ec:1.3.5.2')#,'ec:1.3.1.14')
+ec_all <- vector()
+strain <- list.files('strain_specific_model_from_RAVEN_kegg')
+ec_rxn <- read.table('data/EC_rxn_mapping_kegg.txt', stringsAsFactors = FALSE) # ec rxn mapping from kegg
+ec_rxn$V2 <- str_replace_all(ec_rxn$V2, "rn:", "")
+View(ec_rxn)
+for (i in strain) {
+print(i)
+#i <- "biocyc_panYeast2_45_100"
+inputfile <- paste('strain_specific_model_from_RAVEN_kegg/',i, '/excelGenes.txt', sep = "")
+inputfile2 <- paste('strain_specific_model_from_RAVEN_kegg/',i, '/excelRxns.txt', sep = "")
+gene_biocyc <- read.table(inputfile, header =FALSE, sep = "\t", stringsAsFactors = FALSE)
+gene <- length(unique(gene_biocyc$V2))
+rxn_biocyc <- read_table2(inputfile2)
+rxn <- length(unique(rxn_biocyc$`#`))
+rxn_detail <- data_frame(rxn=rxn_biocyc$`#`)
+rxn_detail <- merge(rxn_detail, ec_rxn, by.x = 'rxn', by.y = 'V2', all.x = TRUE)
+existence <- length(which(rxn_detail$rxn %in% 'R01867'))
+ec_existence <- length(which(rxn_detail$V1 %in% ec))
+#save the result
+exist_R01867 <- c(exist_R01867, existence)
+ec_all <- c(ec_all, ec_existence)
+gene_all <- c(gene_all, gene)
+rxn_all <- c(rxn_all, rxn)
+}
+inputfile <- paste('strain_specific_model_from_RAVEN_kegg/',i, '/excelGenes.txt', sep = "")
+inputfile2 <- paste('strain_specific_model_from_RAVEN_kegg/',i, '/excelRxns.txt', sep = "")
+gene_biocyc <- read.table(inputfile, header =FALSE, sep = "\t", stringsAsFactors = FALSE)
+gene <- length(unique(gene_biocyc$V2))
+rxn_biocyc <- read_table2(inputfile2)
+rxn <- length(unique(rxn_biocyc$`#`))
+View(rxn_detail)
+existence <- length(which(rxn_detail$rxn %in% 'R01867'))
+length(which(rxn_detail$V1 %in% ec))
+gene_all
+# Compare the original s288c reactions from different sources: RAVEN, kegg
+# Revised by Hongzhong 2020-7-27
+# load library
+library(readxl)
+library(stringr)
+library(tidyverse)
+#---------------------------------------------------
+## Small task- to check whether the specific reactions existing in all yeast species
+# RAVEN kegg
+gene_all <- vector()
+rxn_all <- vector()
+exist_R01867 <- vector()
+ec <- c('ec:1.3.98.1','ec:1.3.5.2')#,'ec:1.3.1.14')
+ec_all <- vector()
+strain <- list.files('strain_specific_model_from_RAVEN_kegg')
+ec_rxn <- read.table('data/EC_rxn_mapping_kegg.txt', stringsAsFactors = FALSE) # ec rxn mapping from kegg
+ec_rxn$V2 <- str_replace_all(ec_rxn$V2, "rn:", "")
+for (i in strain) {
+print(i)
+#i <- "biocyc_panYeast2_45_100"
+inputfile <- paste('strain_specific_model_from_RAVEN_kegg/',i, '/excelGenes.txt', sep = "")
+inputfile2 <- paste('strain_specific_model_from_RAVEN_kegg/',i, '/excelRxns.txt', sep = "")
+gene_one_species <- read.table(inputfile, header =FALSE, sep = "\t", stringsAsFactors = FALSE)
+gene <- length(unique(gene_one_species$V2))
+rxn_one_species <- read_table2(inputfile2)
+rxn <- length(unique(rxn_one_species$`#`))
+rxn_detail <- data_frame(rxn=rxn_one_species$`#`)
+rxn_detail <- merge(rxn_detail, ec_rxn, by.x = 'rxn', by.y = 'V2', all.x = TRUE)
+existence <- length(which(rxn_detail$rxn %in% 'R01867'))
+ec_existence <- length(which(rxn_detail$V1 %in% ec))
+#save the result
+exist_R01867 <- c(exist_R01867, existence)
+ec_all <- c(ec_all, ec_existence)
+gene_all <- c(gene_all, gene)
+rxn_all <- c(rxn_all, rxn)
+}
diff --git a/draft_GEM_all_yeast_species/.idea/draft_GEM_all_yeast_species.iml b/draft_GEM_all_yeast_species/.idea/draft_GEM_all_yeast_species.iml
diff --git a/draft_GEM_all_yeast_species/.idea/encodings.xml b/draft_GEM_all_yeast_species/.idea/encodings.xml
diff --git a/draft_GEM_all_yeast_species/.idea/inspectionProfiles/profiles_settings.xml b/draft_GEM_all_yeast_species/.idea/inspectionProfiles/profiles_settings.xml
diff --git a/draft_GEM_all_yeast_species/.idea/libraries/R_User_Library.xml b/draft_GEM_all_yeast_species/.idea/libraries/R_User_Library.xml