Skip to content

Commit

Permalink
Updated due to Paxtools and Validator were updated -
Browse files Browse the repository at this point in the history
to make/use abs. URIs with schema, i.e. http://bioregistry.io/...
instead of just bioregistry.io/...
(for jena, jsonld converter to work properly)
  • Loading branch information
IgorRodchenkov committed Apr 23, 2024
1 parent 40c2a29 commit 2ebd946
Show file tree
Hide file tree
Showing 13 changed files with 60 additions and 85 deletions.
2 changes: 1 addition & 1 deletion src/main/java/cpath/cleaner/KeggHsaCleaner.java
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ public void clean(InputStream data, OutputStream cleanedData)
if(!uxrefs.isEmpty()) {
UnificationXref x = uxrefs.iterator().next();
if(x.getId() != null && x.getId().startsWith("hsa")) {
String uri = "bioregistry.io/kegg.pathway:" + x.getId();
String uri = "http://bioregistry.io/kegg.pathway:" + x.getId();
if(!model.containsID(uri) && !newUriToEntityMap.containsKey(uri)) {
newUriToEntityMap.put(uri, pw); //collect to replace URIs later (below)
} else { //shared unification xref bug
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/cpath/cleaner/PathbankCleaner.java
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ public void clean(InputStream data, OutputStream cleanedData) {

//since Apr-2018, top pathway URIs are "normalized" like: http://identifiers.org/smpdb/...
//let's fix pathway uris base - use bioregistry.io/pathbank: instead
CPathUtils.rebaseUris(model, "http://identifiers.org/smpdb/", "bioregistry.io/pathbank:");
CPathUtils.rebaseUris(model, "http://identifiers.org/smpdb/", "http://bioregistry.io/pathbank:");

//remove pathways that have "SubPathway" name;
//though all these could be merged to become more informative pathways (once all the datafiles get merged),
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/cpath/cleaner/ReactomeCleaner.java
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ public void clean(InputStream data, OutputStream cleanedData)
stableId = stableId.substring(9);
// stableID is like 'R-HSA-123456'
}
final String uri = "bioregistry.io/reactome:" + stableId;
final String uri = "http://bioregistry.io/reactome:" + stableId;
if (!model.containsID(uri) && !newUriToEntityMap.containsKey(uri)) {
//save it in the map to replace the URI later (see below)
newUriToEntityMap.put(uri, proc);
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/cpath/converter/ChebiOntologyAnalysis.java
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ private void processOBOEntry(StringBuilder entryBuffer, Model model) throws IOEx
return;
}
final String thisID = childChebiIDs.iterator().next();
SmallMoleculeReference thisSMR = (SmallMoleculeReference) model.getByID("bioregistry.io/chebi:" + thisID.toLowerCase());
SmallMoleculeReference thisSMR = (SmallMoleculeReference) model.getByID("http://bioregistry.io/chebi:" + thisID.toLowerCase());
if (thisSMR == null) {
log.debug("processOBOEntry(), Skipped (not found): " + thisID);
return;
Expand Down
4 changes: 2 additions & 2 deletions src/main/java/cpath/converter/UniprotConverter.java
Original file line number Diff line number Diff line change
Expand Up @@ -409,7 +409,7 @@ private ProteinReference newProteinReferenceWithAccessionXrefs(String idLine, St
final List<String> acList = new ArrayList<>(Arrays.asList(accessions.split(";")));
// Pop the first item, the primary AC, to generate canonical URI and unif. xref:
final String primaryId = acList.remove(0).trim();
final String uri = "bioregistry.io/uniprot:" + primaryId;
final String uri = "http://bioregistry.io/uniprot:" + primaryId;

// create a new PR with the name and primary unification xref
ProteinReference proteinReference = model.addNew(ProteinReference.class, uri);
Expand Down Expand Up @@ -450,7 +450,7 @@ private BioSource getBioSource(String taxId, String name, Model model) {
if(taxonomy==null || taxonomy <= 0) {
throw new RuntimeException("Illegal taxonomy ID: " + taxId);
} else {
String uri = "bioregistry.io/ncbitaxon:" + taxonomy;
String uri = "http://bioregistry.io/ncbitaxon:" + taxonomy;
if (model.containsID(uri)) {
toReturn = (BioSource) model.getByID(uri);
} else {
Expand Down
2 changes: 1 addition & 1 deletion src/main/java/cpath/service/Merger.java
Original file line number Diff line number Diff line change
Expand Up @@ -599,7 +599,7 @@ private void mayAddHgncXrefs(final Model m, final XReferrable bpe,
}
final Set<String> hgncSymbols = new HashSet<>();
for (String ac : accessions) {
ProteinReference canonicalPR = (ProteinReference) warehouseModel.getByID("bioregistry.io/uniprot:" + ac);
ProteinReference canonicalPR = (ProteinReference) warehouseModel.getByID("http://bioregistry.io/uniprot:" + ac);
if (canonicalPR != null) {
for (Xref x : canonicalPR.getXref())
if (x.getDb().equalsIgnoreCase("hgnc.symbol")) {
Expand Down
14 changes: 7 additions & 7 deletions src/main/java/cpath/service/api/RelTypeVocab.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,15 @@
* Values to generate standard BioPAX RelationshipTypeVocabulary objects.
*/
public enum RelTypeVocab {
IDENTITY("identity", "bioregistry.io/mi:0356", "mi", "0356"),
SECONDARY_ACCESSION_NUMBER("secondary-ac", "bioregistry.io/mi:0360", "mi", "0360"),
ADDITIONAL_INFORMATION("see-also", "bioregistry.io/mi:0361", "mi", "0361"),
IDENTITY("identity", "http://bioregistry.io/mi:0356", "mi", "0356"),
SECONDARY_ACCESSION_NUMBER("secondary-ac", "http://bioregistry.io/mi:0360", "mi", "0360"),
ADDITIONAL_INFORMATION("see-also", "http://bioregistry.io/mi:0361", "mi", "0361"),
//next should work for rel. xrefs pointing to a protein but attached to a Gene, Dna*, Rna* objects
GENE_PRODUCT("gene product", "bioregistry.io/mi:0251", "mi", "0251"),
SET_MEMBER("set member", "bioregistry.io/mi:1341", "mi", "1341"),
GENE_PRODUCT("gene product", "http://bioregistry.io/mi:0251", "mi", "0251"),
SET_MEMBER("set member", "http://bioregistry.io/mi:1341", "mi", "1341"),
//next one is for chebi "is_a" relationships (when parent is a chemical class/concept rather than compound)
MULTIPLE_PARENT_REFERENCE("multiple parent reference", "bioregistry.io/mi:0829", "mi", "0829"),
ISOFORM_PARENT("isoform-parent", "bioregistry.io/mi:0243", "mi", "0243"),;
MULTIPLE_PARENT_REFERENCE("multiple parent reference", "http://bioregistry.io/mi:0829", "mi", "0829"),
ISOFORM_PARENT("isoform-parent", "http://bioregistry.io/mi:0243", "mi", "0243"),;

public final String term;
public final String uri;
Expand Down
12 changes: 6 additions & 6 deletions src/test/java/cpath/cleaner/KeggHsaCleanerTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ public class KeggHsaCleanerTest {
@Test
public final void testClean() throws IOException {
Cleaner cleaner = new KeggHsaCleaner();
final String testPathwayUri = "bioregistry.io/kegg.pathway:hsa00010";
final String testPathwayUri = "http://bioregistry.io/kegg.pathway:hsa00010";

String f10 = getClass().getClassLoader().getResource("").getPath()
+ File.separator + "testCleanKegghsa00010.owl";
Expand All @@ -45,7 +45,7 @@ public final void testClean() throws IOException {
assertEquals("PGM", named.getStandardName()); //was "PGM1, GSD14..."
assertFalse(named.getXref().isEmpty());
//Pathway
named = (Named)m10.getByID("bioregistry.io/kegg.pathway:hsa00010");
named = (Named)m10.getByID("http://bioregistry.io/kegg.pathway:hsa00010");
assertEquals("Glycolysis / Gluconeogenesis", named.getDisplayName()); //was "Glycolysis / ..."
//SM or SMR
named = (Named)m10.getByID(m10.getXmlBase()+"cpdC00236");
Expand All @@ -63,7 +63,7 @@ public final void testClean() throws IOException {
cleaner.clean(new FileInputStream(getClass().getResource("/hsa00562.owl").getFile()),
new FileOutputStream(f562));
Model m562 = new SimpleIOHandler().convertFromOWL(new FileInputStream(f562));
assertTrue(m562.containsID("bioregistry.io/kegg.pathway:hsa00562"));
assertTrue(m562.containsID("http://bioregistry.io/kegg.pathway:hsa00562"));
assertTrue(m562.containsID(testPathwayUri));


Expand All @@ -73,7 +73,7 @@ public final void testClean() throws IOException {
model.merge(m10);
model.merge(m562);
assertTrue(model.containsID(testPathwayUri));
assertTrue(model.containsID("bioregistry.io/kegg.pathway:hsa00562"));
assertTrue(model.containsID("http://bioregistry.io/kegg.pathway:hsa00562"));

//save result 1
new SimpleIOHandler().convertToOWL(model, new FileOutputStream(
Expand All @@ -88,7 +88,7 @@ public final void testClean() throws IOException {
model.merge(m562);
model.merge(m10);
assertTrue(model.containsID(testPathwayUri));
assertTrue(model.containsID("bioregistry.io/kegg.pathway:hsa00562"));
assertTrue(model.containsID("http://bioregistry.io/kegg.pathway:hsa00562"));

//save test result 2
new SimpleIOHandler().convertToOWL(model, new FileOutputStream(
Expand All @@ -104,7 +104,7 @@ public final void testClean() throws IOException {
merger.merge(model, m562);
merger.merge(model, m10);
assertTrue(model.containsID(testPathwayUri));
assertTrue(model.containsID("bioregistry.io/kegg.pathway:hsa00562"));
assertTrue(model.containsID("http://bioregistry.io/kegg.pathway:hsa00562"));

//save test result 3
new SimpleIOHandler().convertToOWL(model, new FileOutputStream(
Expand Down
4 changes: 2 additions & 2 deletions src/test/java/cpath/cleaner/PathbankCleanerTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,12 @@ public class PathbankCleanerTest {
@Test
public final void testClean() throws IOException {
Cleaner cleaner = new PathbankCleaner();
String uri1 = "bioregistry.io/pathbank:SMP0000040"; //was "http://identifiers.org/smpdb/SMP0000040";
String uri1 = "http://bioregistry.io/pathbank:SMP0000040"; //was "http://identifiers.org/smpdb/SMP0000040";
String f1 = getClass().getClassLoader().getResource("").getPath() + File.separator + "PW000146.cleaned.owl";
cleaner.clean(new FileInputStream(getClass().getResource("/PW000146.owl").getFile()), new FileOutputStream(f1));
Model m1 = new SimpleIOHandler().convertFromOWL(new FileInputStream(f1));
Pathway p1 = (Pathway)m1.getByID(uri1);
String uri2 = "bioregistry.io/pathbank:SMP0000057"; //was "http://identifiers.org/smpdb/SMP0000057";
String uri2 = "http://bioregistry.io/pathbank:SMP0000057"; //was "http://identifiers.org/smpdb/SMP0000057";
String f2 = getClass().getClassLoader().getResource("").getPath() + File.separator + "PW000005.cleaned.owl";
cleaner.clean(new FileInputStream(getClass().getResource("/PW000005.owl").getFile()), new FileOutputStream(f2));
Model m2 = new SimpleIOHandler().convertFromOWL(new FileInputStream(f2));
Expand Down
16 changes: 8 additions & 8 deletions src/test/java/cpath/converter/ChebiConvertersTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,10 @@ public void convertObo() throws IOException {

// get all small molecule references out
assertEquals(7, model.getObjects(SmallMoleculeReference.class).size());
assertNotNull(model.getByID("bioregistry.io/chebi:58342")); //the SMR without InChIKey
assertNotNull(model.getByID("http://bioregistry.io/chebi:58342")); //the SMR without InChIKey

// get lactic acid sm
String rdfID = "bioregistry.io/chebi:422";
String rdfID = "http://bioregistry.io/chebi:422";
assertTrue(model.containsID(rdfID));
SmallMoleculeReference smallMoleculeReference = (SmallMoleculeReference) model.getByID(rdfID);

Expand All @@ -77,12 +77,12 @@ public void convertObo() throws IOException {
assertEquals(0, publicationXrefCount); //there are no such xrefs anymore

// following checks work in this test only (using in-memory model); with DAO - use getObject...
assertTrue(model.containsID("bioregistry.io/chebi:20"));
EntityReference er20 = (EntityReference) model.getByID("bioregistry.io/chebi:20");
assertTrue(model.containsID("bioregistry.io/chebi:28"));
// EntityReference er28 = (EntityReference) model.getByID("bioregistry.io/chebi:28");
assertTrue(model.containsID("bioregistry.io/chebi:422"));
EntityReference er422 = (EntityReference) model.getByID("bioregistry.io/chebi:422");
assertTrue(model.containsID("http://bioregistry.io/chebi:20"));
EntityReference er20 = (EntityReference) model.getByID("http://bioregistry.io/chebi:20");
assertTrue(model.containsID("http://bioregistry.io/chebi:28"));
// EntityReference er28 = (EntityReference) model.getByID("http://bioregistry.io/chebi:28");
assertTrue(model.containsID("http://bioregistry.io/chebi:422"));
EntityReference er422 = (EntityReference) model.getByID("http://bioregistry.io/chebi:422");

assertTrue(er20.getMemberEntityReferenceOf().isEmpty());
assertTrue(er422.getMemberEntityReferenceOf().isEmpty());
Expand Down
33 changes: 4 additions & 29 deletions src/test/java/cpath/converter/UniprotConverterTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -54,14 +54,14 @@ public void convert() throws IOException {
assertEquals(10, proteinReferences.size());
assertTrue(proteinReferences.iterator().next().getXref().iterator().hasNext());

ProteinReference pr = (ProteinReference) model.getByID("bioregistry.io/uniprot:P27797");
ProteinReference pr = (ProteinReference) model.getByID("http://bioregistry.io/uniprot:P27797");
assertEquals(10, pr.getName().size()); //make sure this one is passed (important!)
assertEquals("CALR_HUMAN", pr.getDisplayName());
assertEquals("Calreticulin", pr.getStandardName());
assertEquals(48, pr.getXref().size()); //no duplicates (UniProt, HGNC, PDB, IPI, EMBL, PIR, DIP, etc., xrefs)
assertEquals("bioregistry.io/ncbitaxon:9606", pr.getOrganism().getUri());
assertEquals("http://bioregistry.io/ncbitaxon:9606", pr.getOrganism().getUri());

pr = (ProteinReference) model.getByID("bioregistry.io/uniprot:P0DP23");
pr = (ProteinReference) model.getByID("http://bioregistry.io/uniprot:P0DP23");
assertNotNull(pr);
assertTrue(pr.getName().contains("CALM"));
assertTrue(pr.getName().contains("CALM1"));
Expand All @@ -70,32 +70,7 @@ public void convert() throws IOException {
assertTrue(pr.getXref().toString().contains("1J7P")); // has that PDB xref too
//we don't convert/add features anymore
assertTrue(pr.getEntityFeature().isEmpty());
// assertEquals(11, pr.getEntityFeature().size());
// //check for a feature object by using URI generated the same way as it's in the converter:
// String mfUri = Normalizer.uri(model.getXmlBase(), null, pr.getDisplayName() + "_1", ModificationFeature.class);
// ModificationFeature mf = (ModificationFeature) model.getByID(mfUri);
// assertNotNull(mf);
// assertTrue(pr.getEntityFeature().contains(mf));
// SequenceLocation sl = mf.getFeatureLocation();
// assertTrue(sl instanceof SequenceSite);
// assertEquals(2, ((SequenceSite)sl).getSequencePosition());
// assertEquals("N-acetylalanine", mf.getModificationType().getTerm().iterator().next());
// //test for the following FT entry was correctly parsed/converted:
// // FT MOD_RES 45
// // FT /note="Phosphothreonine; by CaMK4"
// // FT /evidence="ECO:0000250|UniProtKB:P0DP29"
// ModificationFeature f = null;
// for(EntityFeature ef : pr.getEntityFeature()) {
// if((ef instanceof ModificationFeature) && ((ModificationFeature)ef).getModificationType().getTerm().contains("Phosphothreonine")) {
// f = (ModificationFeature) ef;
// }
// }
// assertNotNull(f);
// assertTrue(f.getModificationType() instanceof SequenceModificationVocabulary);
// Set<String> terms = f.getModificationType().getTerm();
// assertTrue(terms.contains("Phosphothreonine"));
// assertTrue(f.getFeatureLocation() instanceof SequenceSite);


//this is just to test for a bug in the DR text format parser...
boolean rel = false;
for(Xref x : pr.getXref()) {
Expand Down
Loading

0 comments on commit 2ebd946

Please sign in to comment.