Updated due to Paxtools and Validator were updated -

to make/use abs. URIs with schema, i.e. http://bioregistry.io/... instead of just bioregistry.io/... (for jena, jsonld converter to work properly)
PathwayCommons · Apr 23, 2024 · 2ebd946 · 2ebd946
1 parent 40c2a29
commit 2ebd946
Show file tree

Hide file tree

Showing 13 changed files with 60 additions and 85 deletions.
diff --git a/src/main/java/cpath/cleaner/KeggHsaCleaner.java b/src/main/java/cpath/cleaner/KeggHsaCleaner.java
@@ -46,7 +46,7 @@ public void clean(InputStream data, OutputStream cleanedData)
 			if(!uxrefs.isEmpty()) {
 				UnificationXref x = uxrefs.iterator().next();
 				if(x.getId() != null && x.getId().startsWith("hsa")) {
-					String uri = "bioregistry.io/kegg.pathway:" + x.getId();
+					String uri = "http://bioregistry.io/kegg.pathway:" + x.getId();
 					if(!model.containsID(uri) && !newUriToEntityMap.containsKey(uri)) {
 						newUriToEntityMap.put(uri, pw); //collect to replace URIs later (below)
 					} else { //shared unification xref bug

diff --git a/src/main/java/cpath/cleaner/PathbankCleaner.java b/src/main/java/cpath/cleaner/PathbankCleaner.java
@@ -38,7 +38,7 @@ public void clean(InputStream data, OutputStream cleanedData) {
 
     //since Apr-2018, top pathway URIs are "normalized" like: http://identifiers.org/smpdb/...
     //let's fix pathway uris base - use bioregistry.io/pathbank: instead
-    CPathUtils.rebaseUris(model, "http://identifiers.org/smpdb/", "bioregistry.io/pathbank:");
+    CPathUtils.rebaseUris(model, "http://identifiers.org/smpdb/", "http://bioregistry.io/pathbank:");
 
     //remove pathways that have "SubPathway" name;
     //though all these could be merged to become more informative pathways (once all the datafiles get merged),

diff --git a/src/main/java/cpath/cleaner/ReactomeCleaner.java b/src/main/java/cpath/cleaner/ReactomeCleaner.java
@@ -58,7 +58,7 @@ public void clean(InputStream data, OutputStream cleanedData)
 						stableId = stableId.substring(9);
 						// stableID is like 'R-HSA-123456'
 					}
-					final String uri = "bioregistry.io/reactome:" + stableId;
+					final String uri = "http://bioregistry.io/reactome:" + stableId;
 					if (!model.containsID(uri) && !newUriToEntityMap.containsKey(uri)) {
 						//save it in the map to replace the URI later (see below)
 						newUriToEntityMap.put(uri, proc);

diff --git a/src/main/java/cpath/converter/ChebiOntologyAnalysis.java b/src/main/java/cpath/converter/ChebiOntologyAnalysis.java
@@ -51,7 +51,7 @@ private void processOBOEntry(StringBuilder entryBuffer, Model model) throws IOEx
 			return;
 		}
 		final String thisID = childChebiIDs.iterator().next();
-		SmallMoleculeReference thisSMR = (SmallMoleculeReference) model.getByID("bioregistry.io/chebi:" + thisID.toLowerCase());
+		SmallMoleculeReference thisSMR = (SmallMoleculeReference) model.getByID("http://bioregistry.io/chebi:" + thisID.toLowerCase());
 		if (thisSMR == null) {
 			log.debug("processOBOEntry(), Skipped (not found): " + thisID);
 			return;

diff --git a/src/main/java/cpath/converter/UniprotConverter.java b/src/main/java/cpath/converter/UniprotConverter.java
@@ -409,7 +409,7 @@ private ProteinReference newProteinReferenceWithAccessionXrefs(String idLine, St
 		final List<String> acList = new ArrayList<>(Arrays.asList(accessions.split(";")));
 		// Pop the first item, the primary AC, to generate canonical URI and unif. xref:
 		final String primaryId = acList.remove(0).trim();
-		final String uri = "bioregistry.io/uniprot:" + primaryId;
+		final String uri = "http://bioregistry.io/uniprot:" + primaryId;
 
 		// create a new PR with the name and primary unification xref
 		ProteinReference proteinReference = model.addNew(ProteinReference.class, uri);
@@ -450,7 +450,7 @@ private BioSource getBioSource(String taxId, String name, Model model) {
 		if(taxonomy==null || taxonomy <= 0) {
 			throw new RuntimeException("Illegal taxonomy ID: " + taxId);
 		} else {
-			String uri = "bioregistry.io/ncbitaxon:" + taxonomy;
+			String uri = "http://bioregistry.io/ncbitaxon:" + taxonomy;
 			if (model.containsID(uri)) {
 				toReturn = (BioSource) model.getByID(uri);
 			} else {

diff --git a/src/main/java/cpath/service/Merger.java b/src/main/java/cpath/service/Merger.java
@@ -599,7 +599,7 @@ private void mayAddHgncXrefs(final Model m, final XReferrable bpe,
 		}
 		final Set<String> hgncSymbols = new HashSet<>();
 		for (String ac : accessions) {
-			ProteinReference canonicalPR = (ProteinReference) warehouseModel.getByID("bioregistry.io/uniprot:" + ac);
+			ProteinReference canonicalPR = (ProteinReference) warehouseModel.getByID("http://bioregistry.io/uniprot:" + ac);
 			if (canonicalPR != null) {
 				for (Xref x : canonicalPR.getXref())
 					if (x.getDb().equalsIgnoreCase("hgnc.symbol")) {

diff --git a/src/main/java/cpath/service/api/RelTypeVocab.java b/src/main/java/cpath/service/api/RelTypeVocab.java
@@ -4,15 +4,15 @@
  * Values to generate standard BioPAX RelationshipTypeVocabulary objects.
  */
 public enum RelTypeVocab {
-    IDENTITY("identity", "bioregistry.io/mi:0356", "mi", "0356"),
-    SECONDARY_ACCESSION_NUMBER("secondary-ac", "bioregistry.io/mi:0360", "mi", "0360"),
-    ADDITIONAL_INFORMATION("see-also", "bioregistry.io/mi:0361", "mi", "0361"),
+    IDENTITY("identity", "http://bioregistry.io/mi:0356", "mi", "0356"),
+    SECONDARY_ACCESSION_NUMBER("secondary-ac", "http://bioregistry.io/mi:0360", "mi", "0360"),
+    ADDITIONAL_INFORMATION("see-also", "http://bioregistry.io/mi:0361", "mi", "0361"),
     //next should work for rel. xrefs pointing to a protein but attached to a Gene, Dna*, Rna* objects
-    GENE_PRODUCT("gene product", "bioregistry.io/mi:0251", "mi", "0251"),
-    SET_MEMBER("set member", "bioregistry.io/mi:1341", "mi", "1341"),
+    GENE_PRODUCT("gene product", "http://bioregistry.io/mi:0251", "mi", "0251"),
+    SET_MEMBER("set member", "http://bioregistry.io/mi:1341", "mi", "1341"),
     //next one is for chebi "is_a" relationships (when parent is a chemical class/concept rather than compound)
-    MULTIPLE_PARENT_REFERENCE("multiple parent reference", "bioregistry.io/mi:0829", "mi", "0829"),
-    ISOFORM_PARENT("isoform-parent", "bioregistry.io/mi:0243", "mi", "0243"),;
+    MULTIPLE_PARENT_REFERENCE("multiple parent reference", "http://bioregistry.io/mi:0829", "mi", "0829"),
+    ISOFORM_PARENT("isoform-parent", "http://bioregistry.io/mi:0243", "mi", "0243"),;
 
     public final String term;
     public final String uri;

diff --git a/src/test/java/cpath/cleaner/KeggHsaCleanerTest.java b/src/test/java/cpath/cleaner/KeggHsaCleanerTest.java
@@ -23,7 +23,7 @@ public class KeggHsaCleanerTest {
 	@Test
 	public final void testClean() throws IOException {	
 		Cleaner cleaner = new KeggHsaCleaner();
-		final String testPathwayUri = "bioregistry.io/kegg.pathway:hsa00010";
+		final String testPathwayUri = "http://bioregistry.io/kegg.pathway:hsa00010";
 
 		String f10 = getClass().getClassLoader().getResource("").getPath() 
 				+ File.separator + "testCleanKegghsa00010.owl";
@@ -45,7 +45,7 @@ public final void testClean() throws IOException {
 		assertEquals("PGM", named.getStandardName()); //was "PGM1, GSD14..."
 		assertFalse(named.getXref().isEmpty());
 		//Pathway
-		named = (Named)m10.getByID("bioregistry.io/kegg.pathway:hsa00010");
+		named = (Named)m10.getByID("http://bioregistry.io/kegg.pathway:hsa00010");
 		assertEquals("Glycolysis / Gluconeogenesis", named.getDisplayName()); //was "Glycolysis / ..."
 		//SM or SMR
 		named = (Named)m10.getByID(m10.getXmlBase()+"cpdC00236");
@@ -63,7 +63,7 @@ public final void testClean() throws IOException {
 		cleaner.clean(new FileInputStream(getClass().getResource("/hsa00562.owl").getFile()), 
 				new FileOutputStream(f562));		
 		Model m562 = new SimpleIOHandler().convertFromOWL(new FileInputStream(f562));		
-		assertTrue(m562.containsID("bioregistry.io/kegg.pathway:hsa00562"));
+		assertTrue(m562.containsID("http://bioregistry.io/kegg.pathway:hsa00562"));
 		assertTrue(m562.containsID(testPathwayUri));
 
 
@@ -73,7 +73,7 @@ public final void testClean() throws IOException {
 		model.merge(m10);
 		model.merge(m562);		
 		assertTrue(model.containsID(testPathwayUri));
-		assertTrue(model.containsID("bioregistry.io/kegg.pathway:hsa00562"));
+		assertTrue(model.containsID("http://bioregistry.io/kegg.pathway:hsa00562"));
 
 		//save result 1
 		new SimpleIOHandler().convertToOWL(model, new FileOutputStream(
@@ -88,7 +88,7 @@ public final void testClean() throws IOException {
 		model.merge(m562);	
 		model.merge(m10);
 		assertTrue(model.containsID(testPathwayUri));
-		assertTrue(model.containsID("bioregistry.io/kegg.pathway:hsa00562"));
+		assertTrue(model.containsID("http://bioregistry.io/kegg.pathway:hsa00562"));
 
 		//save test result 2
 		new SimpleIOHandler().convertToOWL(model, new FileOutputStream(
@@ -104,7 +104,7 @@ public final void testClean() throws IOException {
 		merger.merge(model, m562);
 		merger.merge(model, m10);
 		assertTrue(model.containsID(testPathwayUri));
-		assertTrue(model.containsID("bioregistry.io/kegg.pathway:hsa00562"));
+		assertTrue(model.containsID("http://bioregistry.io/kegg.pathway:hsa00562"));
 
 		//save test result 3
 		new SimpleIOHandler().convertToOWL(model, new FileOutputStream(

diff --git a/src/test/java/cpath/cleaner/PathbankCleanerTest.java b/src/test/java/cpath/cleaner/PathbankCleanerTest.java
@@ -22,12 +22,12 @@ public class PathbankCleanerTest {
 	@Test
 	public final void testClean() throws IOException {	
 		Cleaner cleaner = new PathbankCleaner();
-		String uri1 = "bioregistry.io/pathbank:SMP0000040"; //was "http://identifiers.org/smpdb/SMP0000040";
+		String uri1 = "http://bioregistry.io/pathbank:SMP0000040"; //was "http://identifiers.org/smpdb/SMP0000040";
 		String f1 = getClass().getClassLoader().getResource("").getPath() + File.separator + "PW000146.cleaned.owl";
 		cleaner.clean(new FileInputStream(getClass().getResource("/PW000146.owl").getFile()), new FileOutputStream(f1));
 		Model m1 = new SimpleIOHandler().convertFromOWL(new FileInputStream(f1));
 		Pathway p1 = (Pathway)m1.getByID(uri1);
-		String uri2 = "bioregistry.io/pathbank:SMP0000057"; //was "http://identifiers.org/smpdb/SMP0000057";
+		String uri2 = "http://bioregistry.io/pathbank:SMP0000057"; //was "http://identifiers.org/smpdb/SMP0000057";
 		String f2 = getClass().getClassLoader().getResource("").getPath() + File.separator + "PW000005.cleaned.owl";
 		cleaner.clean(new FileInputStream(getClass().getResource("/PW000005.owl").getFile()), new FileOutputStream(f2));
 		Model m2 = new SimpleIOHandler().convertFromOWL(new FileInputStream(f2));

diff --git a/src/test/java/cpath/converter/ChebiConvertersTest.java b/src/test/java/cpath/converter/ChebiConvertersTest.java
@@ -53,10 +53,10 @@ public void convertObo() throws IOException {
 
 		// get all small molecule references out
 		assertEquals(7, model.getObjects(SmallMoleculeReference.class).size());
-		assertNotNull(model.getByID("bioregistry.io/chebi:58342")); //the SMR without InChIKey
+		assertNotNull(model.getByID("http://bioregistry.io/chebi:58342")); //the SMR without InChIKey
 
 		// get lactic acid sm
-		String rdfID = "bioregistry.io/chebi:422";
+		String rdfID = "http://bioregistry.io/chebi:422";
 		assertTrue(model.containsID(rdfID));
 		SmallMoleculeReference smallMoleculeReference = (SmallMoleculeReference) model.getByID(rdfID);
 
@@ -77,12 +77,12 @@ public void convertObo() throws IOException {
 		assertEquals(0, publicationXrefCount); //there are no such xrefs anymore
 
 		// following checks work in this test only (using in-memory model); with DAO - use getObject...
-		assertTrue(model.containsID("bioregistry.io/chebi:20"));
-		EntityReference er20 = (EntityReference) model.getByID("bioregistry.io/chebi:20");
-		assertTrue(model.containsID("bioregistry.io/chebi:28"));
-//		EntityReference er28 = (EntityReference) model.getByID("bioregistry.io/chebi:28");
-		assertTrue(model.containsID("bioregistry.io/chebi:422"));
-		EntityReference er422 = (EntityReference) model.getByID("bioregistry.io/chebi:422");
+		assertTrue(model.containsID("http://bioregistry.io/chebi:20"));
+		EntityReference er20 = (EntityReference) model.getByID("http://bioregistry.io/chebi:20");
+		assertTrue(model.containsID("http://bioregistry.io/chebi:28"));
+//		EntityReference er28 = (EntityReference) model.getByID("http://bioregistry.io/chebi:28");
+		assertTrue(model.containsID("http://bioregistry.io/chebi:422"));
+		EntityReference er422 = (EntityReference) model.getByID("http://bioregistry.io/chebi:422");
 
 		assertTrue(er20.getMemberEntityReferenceOf().isEmpty());
 		assertTrue(er422.getMemberEntityReferenceOf().isEmpty());

diff --git a/src/test/java/cpath/converter/UniprotConverterTest.java b/src/test/java/cpath/converter/UniprotConverterTest.java
@@ -54,14 +54,14 @@ public void convert() throws IOException {
 		assertEquals(10, proteinReferences.size());
 		assertTrue(proteinReferences.iterator().next().getXref().iterator().hasNext());
 
-		ProteinReference pr = (ProteinReference) model.getByID("bioregistry.io/uniprot:P27797");
+		ProteinReference pr = (ProteinReference) model.getByID("http://bioregistry.io/uniprot:P27797");
 		assertEquals(10, pr.getName().size()); //make sure this one is passed (important!)
 		assertEquals("CALR_HUMAN", pr.getDisplayName());
 		assertEquals("Calreticulin", pr.getStandardName());
 		assertEquals(48, pr.getXref().size()); //no duplicates (UniProt, HGNC, PDB, IPI, EMBL, PIR, DIP, etc., xrefs)
-		assertEquals("bioregistry.io/ncbitaxon:9606", pr.getOrganism().getUri());
+		assertEquals("http://bioregistry.io/ncbitaxon:9606", pr.getOrganism().getUri());
 
-		pr = (ProteinReference) model.getByID("bioregistry.io/uniprot:P0DP23");
+		pr = (ProteinReference) model.getByID("http://bioregistry.io/uniprot:P0DP23");
 		assertNotNull(pr);
 		assertTrue(pr.getName().contains("CALM"));
 		assertTrue(pr.getName().contains("CALM1"));
@@ -70,32 +70,7 @@ public void convert() throws IOException {
 		assertTrue(pr.getXref().toString().contains("1J7P")); // has that PDB xref too
 		//we don't convert/add features anymore
 		assertTrue(pr.getEntityFeature().isEmpty());
-//		assertEquals(11, pr.getEntityFeature().size());
-//		//check for a feature object by using URI generated the same way as it's in the converter:
-//		String mfUri = Normalizer.uri(model.getXmlBase(), null, pr.getDisplayName() + "_1", ModificationFeature.class);
-//		ModificationFeature mf = (ModificationFeature) model.getByID(mfUri);
-//		assertNotNull(mf);
-//		assertTrue(pr.getEntityFeature().contains(mf));
-//		SequenceLocation sl = mf.getFeatureLocation();
-//		assertTrue(sl instanceof SequenceSite);
-//		assertEquals(2, ((SequenceSite)sl).getSequencePosition());
-//		assertEquals("N-acetylalanine", mf.getModificationType().getTerm().iterator().next());
-//		//test for the following FT entry was correctly parsed/converted:
-//		//		FT   MOD_RES         45
-//		//		FT                   /note="Phosphothreonine; by CaMK4"
-//		//		FT                   /evidence="ECO:0000250|UniProtKB:P0DP29"
-//		ModificationFeature f = null;
-//		for(EntityFeature ef : pr.getEntityFeature()) {
-//			if((ef instanceof ModificationFeature) && ((ModificationFeature)ef).getModificationType().getTerm().contains("Phosphothreonine")) {
-//				f = (ModificationFeature) ef;
-//			}
-//		}
-//		assertNotNull(f);
-//		assertTrue(f.getModificationType() instanceof SequenceModificationVocabulary);
-//		Set<String> terms = f.getModificationType().getTerm();
-//		assertTrue(terms.contains("Phosphothreonine"));
-//		assertTrue(f.getFeatureLocation() instanceof SequenceSite);
-
+
 		//this is just to test for a bug in the DR text format parser...
 		boolean rel = false;
 		for(Xref x : pr.getXref()) {