Skip to content

Commit

Permalink
update to Reactome gene and Rhea rxn class addition
Browse files Browse the repository at this point in the history
  • Loading branch information
johnbraisted committed Nov 9, 2023
1 parent 84bd0a7 commit af85eb3
Show file tree
Hide file tree
Showing 10 changed files with 239 additions and 229 deletions.
1 change: 1 addition & 0 deletions config/db_load_resource_config.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,4 @@ ready reaction.txt reaction bulk ramp_rxn_id "ramp_rxn_id,rxn_source_id,status,i
ready reaction_to_metabolite.txt reaction2met bulk None "ramp_rxn_id,rxn_source_id,ramp_cmpd_id,substrate_product,met_source_id,met_name,is_cofactor"
ready reaction_to_protein.txt reaction2protein bulk None "ramp_rxn_id,rxn_source_id,ramp_gene_id,uniprot,protein_name"
ready reaction_protein_to_metabolite.txt reaction_protein2met bulk None "ramp_rxn_id,rxn_source_id,ramp_gene_id,gene_source_id,substrate_product,ramp_cmpd_id,cmpd_source_id,cmpd_name,is_cofactor"
ready rheaReactionToEcClass.txt reaction_ec_class bulk None "ramp_rxn_id,rxn_source_id,rxn_class_ec,ec_level,rxn_class,rxn_class_hierarchy"
1 change: 1 addition & 0 deletions config/external_resource_config.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,4 @@ rhea_to_ec http https://ftp.expasy.org/databases/rhea/tsv/rhea2ec.tsv rhea2ec.ts
rhea_rxn_direction http https://ftp.expasy.org/databases/rhea/tsv/rhea-directions.tsv rhea-directions.tsv rhea-directions.tsv ../misc/data/rhea/ none rhea_rxn_direction_table
chebi_to_chebi_relations http http://ftp.ebi.ac.uk/pub/databases/chebi/Flat_file_tab_delimited/relation.tsv relation.tsv relation.tsv ../misc/data/chebi/ none chebi_relations
chebi_ontology_owl http http://ftp.ebi.ac.uk/pub/databases/chebi/ontology/chebi.owl.gz chebi.owl.gz chebi.owl ../misc/data/chebi/ gzip chebi_ontology
expasy_ec2class ftp https://ftp.expasy.org/databases/enzyme/enzclass.txt enzclass.txt enzclass.txt ../misc/data/rhea none expasy_ec2class
14 changes: 7 additions & 7 deletions config/ramp_resource_version_update.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
ramp_db_version db_mod_date status data_source_id data_source_name data_source_url data_source_version
v2.4.0 10/24/2023 current hmdb HMDB https://hmdb.ca/ v5.0 (2021-11-17)
v2.4.0 10/24/2023 current reactome Reactome https://reactome.org/ v86 (Sep 2023)
v2.4.0 10/24/2023 current wiki WikiPathways https://www.wikipathways.org/index.php/WikiPathways v20231010 (2023-10-10)
v2.4.0 10/24/2023 current kegg KEGG https://www.genome.jp/kegg/ from HMDB (v5.0) (2021-11-17)
v2.4.0 10/24/2023 current chebi ChEBI https://www.ebi.ac.uk/chebi/ Release 226 (2023-10-01)
v2.4.0 10/24/2023 current lipidmaps Lipid Maps https://www.lipidmaps.org/ Release 2023-10-24
v2.4.0 10/24/2023 current rhea Rhea https://www.rhea-db.org/ Release 128 (2023-06-28)
v2.4.1 10/24/2023 current hmdb HMDB https://hmdb.ca/ v5.0 (2021-11-17)
v2.4.1 10/24/2023 current reactome Reactome https://reactome.org/ v86 (Sep 2023)
v2.4.1 10/24/2023 current wiki WikiPathways https://www.wikipathways.org/index.php/WikiPathways v20231010 (2023-10-10)
v2.4.1 10/24/2023 current kegg KEGG https://www.genome.jp/kegg/ from HMDB (v5.0) (2021-11-17)
v2.4.1 10/24/2023 current chebi ChEBI https://www.ebi.ac.uk/chebi/ Release 226 (2023-10-01)
v2.4.1 10/24/2023 current lipidmaps Lipid Maps https://www.lipidmaps.org/ Release 2023-10-24
v2.4.1 10/24/2023 current rhea Rhea https://www.rhea-db.org/ Release 129 (2023-09-13)
187 changes: 0 additions & 187 deletions main/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,192 +52,6 @@ def runEverything(self, resourceConfigFile, getDatabaseFiles = True):

print("Getting Rhea info...")
rhea.processRhea()

#Here are the identifiers that are present for each gene:
#kegg: keggid (mainID), 'Ensembl', 'HGNC', 'HPRD', 'NCBI-GeneID', 'NCBI-ProteinID', 'OMIM', 'UniProt', 'Vega', 'miRBase'
#wikipathways: (no mainID), 'Entrez', 'Enzyme Nomenclature', 'Uniprot (Uniprot-TrEMBL)
#hmdb: HMDB-protien-accession (mainID), 'Uniprot'
#reactome:Uniprot (mainID)

"""
print('Generate compound id')
hmdbcompoundnum = sql.createRampCompoundID(hmdb.metaboliteIDDictionary, "hmdb", 0)
print("hmdbcompoundnum: ", hmdbcompoundnum)
keggcompoundnum = sql.createRampCompoundID(kegg.metaboliteIDDictionary, "kegg", hmdbcompoundnum)
wikicompoundnum = sql.createRampCompoundID(wikipathways.metaboliteIDDictionary, "wiki", keggcompoundnum)
print("wikicompoundnum: ", wikicompoundnum)
reactomecompoundnum = sql.createRampCompoundID(reactome.metaboliteIDDictionary, "reactome", wikicompoundnum)
print('Generate gene id ...')
hmdbgenenum = sql.createRampGeneID(hmdb.geneInfoDictionary, "hmdb", 0)
kegggenenum = sql.createRampGeneID(kegg.geneInfoDictionary, "kegg", hmdbgenenum)
wikigenenum = sql.createRampGeneID(wikipathways.geneInfoDictionary, "wiki", kegggenenum)
reactomegenenum = sql.createRampGeneID(reactome.geneInfoDictionary, "reactome", wikigenenum)
print(" hmdbgenenum ", hmdbgenenum, " kegggenenum ", kegggenenum, " wikigenenum ", wikigenenum, " reactomegenenum ", reactomegenenum)
"""

""" print('Write to sql file...')
hmdbnumbers = sql.write(
hmdb.metaboliteCommonName,
hmdb.pathwayDictionary,
hmdb.pathwayCategory,
hmdb.metabolitesWithPathwaysDictionary,
hmdb.metabolitesWithSynonymsDictionary,
hmdb.metaboliteIDDictionary,
hmdb.pathwaysWithGenesDictionary,
hmdb.metabolitesLinkedToGenes,
hmdb.geneInfoDictionary,
hmdb.biofluidLocation,
hmdb.biofluid,
hmdb.cellularLocation,
hmdb.cellular,
hmdb.pathwayOntology,
hmdb.exoEndoDictionary,
hmdb.exoEndo,
hmdb.tissueLocation,
hmdb.tissue,
hmdb.metaInchi,
"hmdb",
0,0)
wikipathwaysnumbers = sql.write(
wikipathways.metaboliteCommonName,
wikipathways.pathwayDictionary,
wikipathways.pathwayCategory,
wikipathways.metabolitesWithPathwaysDictionary,
wikipathways.metabolitesWithSynonymsDictionary,
wikipathways.metaboliteIDDictionary,
wikipathways.pathwaysWithGenesDictionary,
wikipathways.metabolitesLinkedToGenes,
wikipathways.geneInfoDictionary,
wikipathways.biofluidLocation,
wikipathways.biofluid,
wikipathways.cellularLocation,
wikipathways.cellular,
wikipathways.pathwayOntology,
wikipathways.exoEndoDictionary,
wikipathways.exoEndo,
wikipathways.tissueLocation,
wikipathways.tissue,
dict(),
"wiki",
hmdbnumbers[0],hmdbnumbers[1])
reactomenumbers = sql.write(
reactome.metaboliteCommonName,
reactome.pathwayDictionary,
reactome.pathwayCategory,
reactome.metabolitesWithPathwaysDictionary,
reactome.metabolitesWithSynonymsDictionary,
reactome.metaboliteIDDictionary,
reactome.pathwaysWithGenesDictionary,
reactome.metabolitesLinkedToGenes,
reactome.geneInfoDictionary,
reactome.biofluidLocation,
reactome.biofluid,
reactome.cellularLocation,
reactome.cellular,
reactome.pathwayOntology,
reactome.exoEndoDictionary,
reactome.exoEndo,
reactome.tissueLocation,
reactome.tissue,
dict(),
"reactome",
wikipathwaysnumbers[0],wikipathwaysnumbers[1])
keggnumbers = sql.write(
kegg.metaboliteCommonName,
kegg.pathwayDictionary,
kegg.pathwayCategory,
kegg.metabolitesWithPathwaysDictionary,
kegg.metabolitesWithSynonymsDictionary,
kegg.metaboliteIDDictionary,
kegg.pathwaysWithGenesDictionary,
kegg.metabolitesLinkedToGenes,
kegg.geneInfoDictionary,
kegg.biofluidLocation,
kegg.biofluid,
kegg.cellularLocation,
kegg.cellular,
kegg.pathwayOntology,
kegg.exoEndoDictionary,
kegg.exoEndo,
kegg.tissueLocation,
kegg.tissue,
dict(),
"kegg",
reactomenumbers[0],reactomenumbers[1])
print("Done ... for importing database")
print("Compound:")
stat.analyteOverlaps(sql.rampCompoundIdInWhichDatabases, sql.rampCompoundIDdictionary, "Compound")
print("\n")
print("Gene:")
stat.analyteOverlaps(sql.rampGeneIdInWhichDatabases, sql.rampGeneIDdictionary, "Gene")
stat.databaseContent(hmdb.pathwayDictionary,
hmdb.pathwayCategory,
hmdb.metabolitesWithPathwaysDictionary,
hmdb.metabolitesWithSynonymsDictionary,
hmdb.metaboliteIDDictionary,
hmdb.pathwaysWithGenesDictionary,
hmdb.geneInfoDictionary,
hmdb.biofluidLocation,
hmdb.biofluid,
hmdb.cellularLocation,
hmdb.cellular,
hmdb.pathwayOntology,
hmdb.exoEndoDictionary,
"hmdb")
stat.databaseContent(kegg.pathwayDictionary,
kegg.pathwayCategory,
kegg.metabolitesWithPathwaysDictionary,
kegg.metabolitesWithSynonymsDictionary,
kegg.metaboliteIDDictionary,
kegg.pathwaysWithGenesDictionary,
kegg.geneInfoDictionary,
kegg.biofluidLocation,
kegg.biofluid,
kegg.cellularLocation,
kegg.cellular,
kegg.pathwayOntology,
kegg.exoEndoDictionary,
"kegg")
stat.databaseContent(reactome.pathwayDictionary,
reactome.pathwayCategory,
reactome.metabolitesWithPathwaysDictionary,
reactome.metabolitesWithSynonymsDictionary,
reactome.metaboliteIDDictionary,
reactome.pathwaysWithGenesDictionary,
reactome.geneInfoDictionary,
reactome.biofluidLocation,
reactome.biofluid,
reactome.cellularLocation,
reactome.cellular,
reactome.pathwayOntology,
reactome.exoEndoDictionary,
"reactome")
stat.databaseContent(wikipathways.pathwayDictionary,
wikipathways.pathwayCategory,
wikipathways.metabolitesWithPathwaysDictionary,
wikipathways.metabolitesWithSynonymsDictionary,
wikipathways.metaboliteIDDictionary,
wikipathways.pathwaysWithGenesDictionary,
wikipathways.geneInfoDictionary,
wikipathways.biofluidLocation,
wikipathways.biofluid,
wikipathways.cellularLocation,
wikipathways.cellular,
wikipathways.pathwayOntology,
wikipathways.exoEndoDictionary,
"wiki")
"""

# constructs the entity builder
builder = EntityBuilder(resourceConf)
Expand All @@ -253,7 +67,6 @@ def runEverything(self, resourceConfigFile, getDatabaseFiles = True):

# Database loading is handled as a separate, un-coupled step.


resourceConfFile = "../config/external_resource_config.txt"
main = Main()
main.runEverything(resourceConfigFile = resourceConfFile)
Expand Down
7 changes: 2 additions & 5 deletions main/mainDBLoad.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
from util.rampDBBulkLoader import rampDBBulkLoader



class mainDBLoad():

def __init__(self):
Expand Down Expand Up @@ -73,14 +72,12 @@ def loadDBAfterTruncatingTables(self, incrementLevel = 'increment_patch_release'
# this process replaced the old system of having Rdata in the package
loader.generateAndLoadRampSupplementalData()



loader = mainDBLoad()

# increment level 'increment_patch_release', 'increment_minor_release',
# or 'specified' (new version, perhaps major release)
loader.loadDBAfterTruncatingTables(incrementLevel = 'specified',
optionalVersionOveride = "2.4.0",
optionalVersionNote = "20231027 Data refresh. Inchi-key harmonization.",
optionalVersionOveride = "2.4.2",
optionalVersionNote = "20231107 Data refresh. Rhea Reaction Classes. Reactome gene patch.",
truncateTables=True)

6 changes: 3 additions & 3 deletions main/mainSqliteDBLoad.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,8 @@ def loadDBAfterTruncatingTables(self, sqliteFile, incrementLevel = 'increment_pa

# increment level 'increment_patch_release', 'increment_minor_release',
# or 'specified' (new version, perhaps major release)
loader.loadDBAfterTruncatingTables(sqliteFile = '/mnt/ncatsprod/braistedjc/tmp_work/RaMP_SQLite_v2.3.0.sqlite', incrementLevel = 'specified',
optionalVersionOveride = "2.3.0",
optionalVersionNote = "20230727 data update/refresh release",
loader.loadDBAfterTruncatingTables(sqliteFile = '/mnt/ncatsprod/braistedjc/tmp_work/RaMP_SQLite_BASE.sqlite', incrementLevel = 'specified',
optionalVersionOveride = "2.4.2",
optionalVersionNote = "20231107 data update, Rhea reaction to EC reaction class. Reactome Genes Patch.",
truncateTables=True)

Loading

0 comments on commit af85eb3

Please sign in to comment.