ncats · johnbraisted · Nov 9, 2023 · Oct 27, 2023 · Oct 27, 2023 · Oct 30, 2023
diff --git a/config/db_load_resource_config.txt b/config/db_load_resource_config.txt
@@ -4,7 +4,7 @@ ready	analytesource.txt	source	bulk	None	"sourceId,rampId,IDtype,geneOrCompound,
 ready	analytesynonym.txt	analytesynonym	bulk	None	"Synonym,rampId,geneOrCompound,source"
 ready	analytetopathway.txt	analytehaspathway	bulk	None	"rampId,pathwayRampId,pathwaySource"
 ready	analyte.txt	analyte	bulk	rampId	"rampId,type"
-ready	catalyzes.txt	catalyzed	bulk	None	"rampCompoundId,rampGeneId, proteinType"
+ready	catalyzes.txt	catalyzed	bulk	None	"rampCompoundId,rampGeneId,proteinType"
 empty	reactomecatalyzed.sql	catalyzed	bulk	None	"rampCompoundId,rampGeneId"
 empty	wikicatalyzed.sql	catalyzed	bulk	None	"rampCompoundId,rampGeneId"
 ready	chemProps.txt	chem_props	bulk	None	"ramp_id,chem_data_source,chem_source_id,iso_smiles,inchi_key_prefix,inchi_key,inchi,mw,monoisotop_mass,common_name,mol_formula"
@@ -15,3 +15,4 @@ ready	reaction.txt	reaction	bulk	ramp_rxn_id	"ramp_rxn_id,rxn_source_id,status,i
 ready	reaction_to_metabolite.txt	reaction2met	bulk	None	"ramp_rxn_id,rxn_source_id,ramp_cmpd_id,substrate_product,met_source_id,met_name,is_cofactor"
 ready	reaction_to_protein.txt	reaction2protein	bulk	None	"ramp_rxn_id,rxn_source_id,ramp_gene_id,uniprot,protein_name"
 ready	reaction_protein_to_metabolite.txt	reaction_protein2met	bulk	None	"ramp_rxn_id,rxn_source_id,ramp_gene_id,gene_source_id,substrate_product,ramp_cmpd_id,cmpd_source_id,cmpd_name,is_cofactor"
+ready	rheaReactionToEcClass.txt	reaction_ec_class	bulk	None	"ramp_rxn_id,rxn_source_id,rxn_class_ec,ec_level,rxn_class,rxn_class_hierarchy"
diff --git a/config/external_resource_config.txt b/config/external_resource_config.txt
@@ -4,7 +4,7 @@ hmdb_gene	http	https://hmdb.ca/system/downloads/current/hmdb_proteins.zip	hmdb_p
 hmdb_met_sdf	http	https://hmdb.ca/system/downloads/current/structures.zip	structures.zip	structures.sdf	../misc/data/chemprops/hmdb/	zip	chem_props_sdf
 reactome_met	http	http://www.reactome.org/download/current/ChEBI2Reactome_All_Levels.txt	ChEBI2Reactome_All_Levels.txt	ChEBI2Reactome_All_Levels.txt	../misc/data/reactome/	none	pathways_mets
 reactome_gene	http	http://www.reactome.org/download/current/UniProt2Reactome_All_Levels.txt	UniProt2Reactome_All_Levels.txt	UniProt2Reactome_All_Levels.txt	../misc/data/reactome/	none	pathways_genes
-wiki_pathways_mets_genes	http	https://wikipathways-data.wmcloud.org/current/rdf/wikipathways-20230710-rdf-wp.zip	wikipathways-20230710-rdf-wp.zip	./wp/	../misc/data/wikipathwaysRDF/	zip	pathways_mets_genes
+wiki_pathways_mets_genes	http	https://wikipathways-data.wmcloud.org/current/rdf/wikipathways-20231010-rdf-wp.zip	wikipathways-20231010-rdf-wp.zip	./wp/	../misc/data/wikipathwaysRDF/	zip	pathways_mets_genes
 chebi_met_sdf	ftp	https://ftp.ebi.ac.uk/pub/databases/chebi/SDF/ChEBI_complete_3star.sdf.gz	ChEBI_complete_3star.sdf.gz	ChEBI_complete_3star.sdf	../misc/data/chemprops/chebi/	gzip	chem_props_sdf
 lipidmaps_met	http	https://www.lipidmaps.org/files/?file=LMSD&ext=sdf.zip	LMSD.sdf.zip	structures.sdf	../misc/data/chemprops/lipidmaps/	zip	chem_props_sdf
 uniprot_human	http	https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/taxonomic_divisions/uniprot_sprot_human.dat.gz	uniprot_sprot_human.dat.gz	uniprot_sprot_human.dat	../misc/data/uniprot_human/	gzip	proteins
@@ -14,3 +14,4 @@ rhea_to_ec	http	https://ftp.expasy.org/databases/rhea/tsv/rhea2ec.tsv	rhea2ec.ts
 rhea_rxn_direction	http	https://ftp.expasy.org/databases/rhea/tsv/rhea-directions.tsv	rhea-directions.tsv	rhea-directions.tsv	../misc/data/rhea/	none	rhea_rxn_direction_table
 chebi_to_chebi_relations	http	http://ftp.ebi.ac.uk/pub/databases/chebi/Flat_file_tab_delimited/relation.tsv	relation.tsv	relation.tsv	../misc/data/chebi/	none	chebi_relations
 chebi_ontology_owl	http	http://ftp.ebi.ac.uk/pub/databases/chebi/ontology/chebi.owl.gz	chebi.owl.gz	chebi.owl	../misc/data/chebi/	gzip	chebi_ontology
+expasy_ec2class	ftp	https://ftp.expasy.org/databases/enzyme/enzclass.txt	enzclass.txt	enzclass.txt	../misc/data/rhea	none	expasy_ec2class
diff --git a/config/ramp_resource_version_update.txt b/config/ramp_resource_version_update.txt
@@ -1,8 +1,8 @@
 ramp_db_version	db_mod_date	status	data_source_id	data_source_name	data_source_url	data_source_version
-v2.3.0	7/20/2023	current	hmdb	HMDB	https://hmdb.ca/	v5.0 (2021-11-17)
-v2.3.0	7/20/2023	current	reactome	Reactome	https://reactome.org/	v85 (May 2023)
-v2.3.0	7/20/2023	current	wiki	WikiPathways	https://www.wikipathways.org/index.php/WikiPathways	v20230710 (2023-07-10)
-v2.3.0	7/20/2023	current	kegg	KEGG	https://www.genome.jp/kegg/	from HMDB (v5.0) (2021-11-17) 
-v2.3.0	7/20/2023	current	chebi	ChEBI	https://www.ebi.ac.uk/chebi/	Release 223 (2023-07-01)
-v2.3.0	7/20/2023	current	lipidmaps	Lipid Maps	https://www.lipidmaps.org/	Release 2023-07-12
-v2.3.0  8/3/2022        current rhea    Rhea    https://www.rhea-db.org/        Release 128 (2023-06-28)
+v2.4.1	10/24/2023	current	hmdb	HMDB	https://hmdb.ca/	v5.0 (2021-11-17)
+v2.4.1	10/24/2023	current	reactome	Reactome	https://reactome.org/	v86 (Sep 2023)
+v2.4.1	10/24/2023	current	wiki	WikiPathways	https://www.wikipathways.org/index.php/WikiPathways	v20231010 (2023-10-10)
+v2.4.1	10/24/2023	current	kegg	KEGG	https://www.genome.jp/kegg/	from HMDB (v5.0) (2021-11-17) 
+v2.4.1	10/24/2023	current	chebi	ChEBI	https://www.ebi.ac.uk/chebi/	Release 226 (2023-10-01)
+v2.4.1	10/24/2023	current	lipidmaps	Lipid Maps	https://www.lipidmaps.org/	Release 2023-10-24
+v2.4.1	10/24/2023	current	rhea	Rhea	https://www.rhea-db.org/	Release 129 (2023-09-13)
diff --git a/main/main.py b/main/main.py
@@ -16,225 +16,42 @@
 class Main():
 
     def runEverything(self, resourceConfigFile, getDatabaseFiles = True):
+
+        start = time.time()
+
         sql = writeToSQL()
 
         # build the ramp resource config
         resourceConf = RampConfig()
         resourceConf.loadConfig(resourceConfigFile)
-
-        #stat = getStatistics()
+        
+        stat = getStatistics()
         hmdb = hmdbData(resourceConf)
         wikipathways = WikipathwaysRDF(resourceConf)
         reactome = reactomeData(resourceConf)
         kegg = KeggData()
         lipidmaps = lipidmapsChemData(resourceConf)
         rhea = RheaParser(resourceConf)
-         
+
         # works based on your computer, setup working directory
         os.chdir('../main/')
-# 
-#         #kegg.getEverything(False)
-#         #print("KEGG Wonder")
-#         print("Getting hmdb...")
-#         hmdb.getEverything(True)
-#         print("Getting wiki...")
-#         wikipathways.getEverything(True)
-#         print("Getting reactome...")
-#         reactome.getEverything(True)
-#         
-#         # This parses and writes lipid maps
-#         # sql write will be handled by EntityBuilder
-#         print("Getting LipidMaps...")
-#         lipidmaps.getEverything(True)
-#  
-#         print("Getting Rhea info...")
-#         rhea.processRhea()
-
-
-
-#
-#         #Here are the identifiers that are present for each gene:
-#         #kegg: keggid (mainID), 'Ensembl', 'HGNC', 'HPRD', 'NCBI-GeneID', 'NCBI-ProteinID', 'OMIM', 'UniProt', 'Vega', 'miRBase'
-#         #wikipathways: (no mainID), 'Entrez', 'Enzyme Nomenclature', 'Uniprot (Uniprot-TrEMBL)
-#         #hmdb: HMDB-protien-accession (mainID), 'Uniprot'
-#         #reactome:Uniprot (mainID)
-#         
-#         print('Generate compound id')
-#         hmdbcompoundnum = sql.createRampCompoundID(hmdb.metaboliteIDDictionary, "hmdb", 0)
-#         print("hmdbcompoundnum:   ", hmdbcompoundnum)
-#         keggcompoundnum = sql.createRampCompoundID(kegg.metaboliteIDDictionary, "kegg", hmdbcompoundnum)
-#         wikicompoundnum = sql.createRampCompoundID(wikipathways.metaboliteIDDictionary, "wiki", keggcompoundnum)
-#         print("wikicompoundnum:   ", wikicompoundnum)
-#         reactomecompoundnum = sql.createRampCompoundID(reactome.metaboliteIDDictionary, "reactome", wikicompoundnum)
-#         
-#         print('Generate gene id ...')
-#         hmdbgenenum = sql.createRampGeneID(hmdb.geneInfoDictionary, "hmdb", 0)
-#         kegggenenum = sql.createRampGeneID(kegg.geneInfoDictionary, "kegg", hmdbgenenum)
-#         wikigenenum = sql.createRampGeneID(wikipathways.geneInfoDictionary, "wiki", kegggenenum)
-#         reactomegenenum = sql.createRampGeneID(reactome.geneInfoDictionary, "reactome", wikigenenum)
-#         print(" hmdbgenenum ", hmdbgenenum, " kegggenenum ", kegggenenum, " wikigenenum ", wikigenenum, " reactomegenenum ", reactomegenenum)
-        """        print('Write to sql file...')
-        hmdbnumbers = sql.write(
-                 hmdb.metaboliteCommonName,
-                 hmdb.pathwayDictionary,
-                 hmdb.pathwayCategory,
-                 hmdb.metabolitesWithPathwaysDictionary,
-                 hmdb.metabolitesWithSynonymsDictionary,
-                 hmdb.metaboliteIDDictionary,
-                 hmdb.pathwaysWithGenesDictionary,
-                 hmdb.metabolitesLinkedToGenes,
-                 hmdb.geneInfoDictionary,
-                 hmdb.biofluidLocation,
-                 hmdb.biofluid,
-                 hmdb.cellularLocation,
-                 hmdb.cellular,
-                 hmdb.pathwayOntology,
-                 hmdb.exoEndoDictionary,
-                 hmdb.exoEndo,
-                 hmdb.tissueLocation,
-                 hmdb.tissue,
-                 hmdb.metaInchi,
-                 "hmdb",
-                 0,0)
-
-        wikipathwaysnumbers = sql.write(
-                wikipathways.metaboliteCommonName,
-                wikipathways.pathwayDictionary, 
-                 wikipathways.pathwayCategory,
-                 wikipathways.metabolitesWithPathwaysDictionary,
-                 wikipathways.metabolitesWithSynonymsDictionary,
-                 wikipathways.metaboliteIDDictionary,
-                 wikipathways.pathwaysWithGenesDictionary,
-                 wikipathways.metabolitesLinkedToGenes,
-                 wikipathways.geneInfoDictionary,
-                 wikipathways.biofluidLocation,
-                 wikipathways.biofluid,
-                 wikipathways.cellularLocation,
-                 wikipathways.cellular,
-                 wikipathways.pathwayOntology,
-                 wikipathways.exoEndoDictionary,
-                 wikipathways.exoEndo,
-                 wikipathways.tissueLocation,
-                 wikipathways.tissue,
-                 dict(),
-                 "wiki",
-                 hmdbnumbers[0],hmdbnumbers[1])
-
-        reactomenumbers = sql.write(
-                reactome.metaboliteCommonName,
-                reactome.pathwayDictionary, 
-                reactome.pathwayCategory,
-                reactome.metabolitesWithPathwaysDictionary,
-                reactome.metabolitesWithSynonymsDictionary,
-                reactome.metaboliteIDDictionary,
-                reactome.pathwaysWithGenesDictionary,
-                reactome.metabolitesLinkedToGenes,
-                reactome.geneInfoDictionary,
-                reactome.biofluidLocation,
-                reactome.biofluid,
-                reactome.cellularLocation,
-                reactome.cellular,
-                reactome.pathwayOntology,
-                reactome.exoEndoDictionary,
-                reactome.exoEndo,
-                reactome.tissueLocation,
-                reactome.tissue,
-                dict(),
-                "reactome",
-                wikipathwaysnumbers[0],wikipathwaysnumbers[1])
-
-        keggnumbers = sql.write(
-                kegg.metaboliteCommonName,
-                kegg.pathwayDictionary, 
-                 kegg.pathwayCategory,
-                 kegg.metabolitesWithPathwaysDictionary,
-                 kegg.metabolitesWithSynonymsDictionary,
-                 kegg.metaboliteIDDictionary,
-                 kegg.pathwaysWithGenesDictionary,
-                 kegg.metabolitesLinkedToGenes,
-                 kegg.geneInfoDictionary,
-                 kegg.biofluidLocation,
-                 kegg.biofluid,
-                 kegg.cellularLocation,
-                 kegg.cellular,
-                 kegg.pathwayOntology,
-                 kegg.exoEndoDictionary,
-                 kegg.exoEndo,
-                 kegg.tissueLocation,
-                 kegg.tissue,
-                 dict(),
-                 "kegg",
-                 reactomenumbers[0],reactomenumbers[1])
 
-
-        print("Done ... for importing database")
+        #kegg.getEverything(False)
+        #print("KEGG Wonder")
+        print("Getting hmdb...")
+        hmdb.getEverything(True)
+        print("Getting wiki...")
+        wikipathways.getEverything(True)
+        print("Getting reactome...")
+        reactome.getEverything(True)
 
-        print("Compound:") 
-        stat.analyteOverlaps(sql.rampCompoundIdInWhichDatabases, sql.rampCompoundIDdictionary, "Compound")
-        print("\n")
-        print("Gene:") 
-        stat.analyteOverlaps(sql.rampGeneIdInWhichDatabases, sql.rampGeneIDdictionary, "Gene")
+        # This parses and writes lipid maps
+        # sql write will be handled by EntityBuilder
+        print("Getting LipidMaps...")
+        lipidmaps.getEverything(True)
 
-        stat.databaseContent(hmdb.pathwayDictionary, 
-                 hmdb.pathwayCategory,
-                 hmdb.metabolitesWithPathwaysDictionary,
-                 hmdb.metabolitesWithSynonymsDictionary,
-                 hmdb.metaboliteIDDictionary,
-                 hmdb.pathwaysWithGenesDictionary,
-                 hmdb.geneInfoDictionary,
-                 hmdb.biofluidLocation,
-                 hmdb.biofluid,
-                 hmdb.cellularLocation,
-                 hmdb.cellular,
-                 hmdb.pathwayOntology,
-                 hmdb.exoEndoDictionary,
-                 "hmdb")
-
-        stat.databaseContent(kegg.pathwayDictionary, 
-                 kegg.pathwayCategory,
-                 kegg.metabolitesWithPathwaysDictionary,
-                 kegg.metabolitesWithSynonymsDictionary,
-                 kegg.metaboliteIDDictionary,
-                 kegg.pathwaysWithGenesDictionary,
-                 kegg.geneInfoDictionary,
-                 kegg.biofluidLocation,
-                 kegg.biofluid,
-                 kegg.cellularLocation,
-                 kegg.cellular,
-                 kegg.pathwayOntology,
-                 kegg.exoEndoDictionary,
-                 "kegg")
-
-        stat.databaseContent(reactome.pathwayDictionary, 
-                 reactome.pathwayCategory,
-                 reactome.metabolitesWithPathwaysDictionary,
-                 reactome.metabolitesWithSynonymsDictionary,
-                 reactome.metaboliteIDDictionary,
-                 reactome.pathwaysWithGenesDictionary,
-                 reactome.geneInfoDictionary,
-                 reactome.biofluidLocation,
-                 reactome.biofluid,
-                 reactome.cellularLocation,
-                 reactome.cellular,
-                 reactome.pathwayOntology,
-                 reactome.exoEndoDictionary,
-                 "reactome")
-
-        stat.databaseContent(wikipathways.pathwayDictionary, 
-                 wikipathways.pathwayCategory,
-                 wikipathways.metabolitesWithPathwaysDictionary,
-                 wikipathways.metabolitesWithSynonymsDictionary,
-                 wikipathways.metaboliteIDDictionary,
-                 wikipathways.pathwaysWithGenesDictionary,
-                 wikipathways.geneInfoDictionary,
-                 wikipathways.biofluidLocation,
-                 wikipathways.biofluid,
-                 wikipathways.cellularLocation,
-                 wikipathways.cellular,
-                 wikipathways.pathwayOntology,
-                 wikipathways.exoEndoDictionary,
-                 "wiki")
-        """
+        print("Getting Rhea info...")
+        rhea.processRhea()
 
         # constructs the entity builder
         builder = EntityBuilder(resourceConf)
@@ -244,10 +61,12 @@ def runEverything(self, resourceConfigFile, getDatabaseFiles = True):
         # the result are files for DB loading in /misc/sql
 
         builder.fullBuild()
+
+        print(time.time() - start)
 
+
         # Database loading is handled as a separate, un-coupled step.
 
-
 resourceConfFile = "../config/external_resource_config.txt"                
 main = Main()
 main.runEverything(resourceConfigFile = resourceConfFile)

diff --git a/main/mainDBLoad.py b/main/mainDBLoad.py
@@ -3,7 +3,6 @@
 from util.rampDBBulkLoader import rampDBBulkLoader
 
 
-
 class mainDBLoad():
 
     def __init__(self):
@@ -68,14 +67,17 @@ def loadDBAfterTruncatingTables(self, incrementLevel = 'increment_patch_release'
         # this method populates a table that reflects the current status of the database.
         # metrics such as gene and metabolite counts for reach data sets are tallied.
         loader.updateDataStatusSummary()
-
+
+        # generate pathway similarity matrices, analyte lists and whatnot
+        # this process replaced the old system of having Rdata in the package
+        loader.generateAndLoadRampSupplementalData()
 
 loader = mainDBLoad()
 
 # increment level 'increment_patch_release', 'increment_minor_release', 
 # or 'specified' (new version, perhaps major release)
-loader.loadDBAfterTruncatingTables(incrementLevel = 'increment_patch_release', 
-                                   optionalVersionOveride = "", 
-                                   optionalVersionNote = "20220822 patch release, update chem_props inchi values.", 
+loader.loadDBAfterTruncatingTables(incrementLevel = 'specified', 
+                                   optionalVersionOveride = "2.4.2", 
+                                   optionalVersionNote = "20231107 Data refresh. Rhea Reaction Classes. Reactome gene patch.",
                                    truncateTables=True)
 
diff --git a/main/mainSqliteDBLoad.py b/main/mainSqliteDBLoad.py
@@ -36,7 +36,7 @@ def loadDBAfterTruncatingTables(self, sqliteFile, incrementLevel = 'increment_pa
 
         # pass the credentials object to the constructed rampDBBulLoader
 
-        loader = SQLiteDBBulkLoader(self.dbPropsFile, sqliteFile)
+        loader = SQLiteDBBulkLoader(dbPropsFile=self.dbPropsFile, sqliteFileName=sqliteFile)
 
 
         # truncate tables
@@ -79,8 +79,8 @@ def loadDBAfterTruncatingTables(self, sqliteFile, incrementLevel = 'increment_pa
 
 # increment level 'increment_patch_release', 'increment_minor_release', 
 # or 'specified' (new version, perhaps major release)
-loader.loadDBAfterTruncatingTables(sqliteFile = '../RaMP_SQLite_v2.3.0_Structure.sqlite', incrementLevel = 'specified',  
-                                   optionalVersionOveride = "2.3.0", 
-                                   optionalVersionNote = "20230727 data update/refresh release", 
+loader.loadDBAfterTruncatingTables(sqliteFile = '/mnt/ncatsprod/braistedjc/tmp_work/RaMP_SQLite_BASE.sqlite', incrementLevel = 'specified',  
+                                   optionalVersionOveride = "2.4.2", 
+                                   optionalVersionNote = "20231107 data update, Rhea reaction to EC reaction class. Reactome Genes Patch.", 
                                    truncateTables=True)