Merge pull request #29 from phyloref/example_brochu_2003

This pull request adds the publication [Brochu 2003](http://dx.doi.org/10.1146/annurev.earth.31.100901.141308) as curated by the Curation Tool. This required updated testcase2owl.py to retain information on the JSON-LD `@context` and `pso:holdsStatusInTime`, adds some definitions from the TimeInterval ontology, which we can't import directly as it results in an inconsistent ontology, fixed an incorrect error message, removed `@id` as a necessary field (closes #15), and add a `labels` term to PHYX file to represent sets of labels. It also incorporates JPhyloref improvements (phyloref/jphyloref#6, phyloref/jphyloref#9) that implements statuses for phyloreferences (phyloref/klados#25).
phyloref · Jun 18, 2018 · 40e3a11 · 40e3a11
2 parents ddf7c53 + 09b7faf
commit 40e3a11
Show file tree

Hide file tree

Showing 10 changed files with 1,192 additions and 16 deletions.
diff --git a/jphyloref/jphyloref.jar b/jphyloref/jphyloref.jar
diff --git a/testcase2owl/paper-context.json b/testcase2owl/paper-context.json
@@ -117,6 +117,12 @@
             "@type": "xsd:string"
         },
 
+        "labels": {
+            "@id": "rdfs:label",
+            "@type": "xsd:string",
+            "@container": "@set"
+        },
+
         "comment": {
             "@id": "rdfs:comment",
             "@type": "xsd:string"
@@ -235,12 +241,12 @@
             "range": "TU",
             "@type": "@set"
         },
-        
+
         "specifierWillNotMatch": {
             "@id": "testcase:specifier_will_not_match",
             "@type": "@set"
         },
-        
+
 
         "== TERMS FROM THE ANNOTATION ONTOLOGY ==": {},
 

diff --git a/testcase2owl/phyloref/Phylogeny.py b/testcase2owl/phyloref/Phylogeny.py
@@ -140,6 +140,9 @@ def add_all_child_nodes(dendropy_node):
                     for closeMatch in node_label.annotations.findall(name='closeMatch'):
                         node_label_strs.append(closeMatch.value)
 
+            # Record all the node labels.
+            node.labels = node_label_strs
+
             # Create taxonomic units for all the node labels.
             for node_label_str in node_label_strs:
                 if node_label_str.startswith("expected "):
@@ -216,6 +219,7 @@ def __init__(self):
         # be created and managed by Phylogeny classes.
 
         self.in_phylogeny = None
+        self.labels = []
         self.taxonomic_units = []
         self.children = []
         self.siblings = []
@@ -236,6 +240,9 @@ def as_jsonld(self):
             'siblings': self.siblings
         }
 
+        if self.labels:
+            jsonld['labels'] = self.labels
+
         if self.in_phylogeny is not None:
             jsonld['inPhylogeny'] = self.in_phylogeny
 

diff --git a/testcase2owl/phyloref/Phyloreference.py b/testcase2owl/phyloref/Phyloreference.py
@@ -25,6 +25,9 @@ def __init__(self, phyloref_id):
         # Information on matches among specifiers
         self.unmatched_specifiers = set()
 
+        # Information on the status of each phyloreference
+        self.holds_status_in_time = []
+
         # Additional classes
         self.additional_classes = []
 
@@ -87,6 +90,12 @@ def load_from_json(phyloref_id, json):
                 external_specifier.id = '{0}_specifier{1}'.format(phyloref_id, phyloref.count_specifiers)
                 phyloref.external_specifiers_list.append(external_specifier)
 
+        if 'pso:holdsStatusInTime' in json:
+            # For now, we store the status information verbatim and restore it on export
+            # TODO: add an additional class to model status information once we're sure 
+            # it works for our needs.
+            phyloref.holds_status_in_time = json['pso:holdsStatusInTime']
+
         return phyloref
 
     def export_to_jsonld_document(self):
@@ -100,6 +109,7 @@ def export_to_jsonld_document(self):
         doc['@type'] = types
         doc['label'] = self.label
         doc['cladeDefinition'] = self.clade_definition
+        doc['pso:holdsStatusInTime'] = self.holds_status_in_time
 
         # Write out all specifiers.
         doc['hasInternalSpecifier'] = [specifier.as_jsonld() for specifier in self.internal_specifiers_list]

diff --git a/testcase2owl/phyloref/PhyloreferenceTestCase.py b/testcase2owl/phyloref/PhyloreferenceTestCase.py
@@ -40,8 +40,12 @@ def append_extend_or_ignore(property, dict, key):
         else:
             property.append(dict[key])
 
-    def __init__(self, id):
-        """ Create a test case for a given identifier. """
+    def __init__(self, id="#"):
+        """ Create a test case for a given identifier.
+        If no identifier is provided, we default to creating relative paths, which in JSON-LD will be converted
+        into absolute paths using the base IRI (see https://json-ld.org/spec/latest/json-ld/#iris).
+        """
+
         self.id = id
 
         # Make sure the identifier ends with '#' or '/', since we're going to extend it to build identifiers
@@ -54,6 +58,7 @@ def __init__(self, id):
         self.owl_imports = owlterms.OWL_IMPORTS
 
         # Metadata
+        self.context = []
         self.citation = []
         self.url = []
         self.year = []
@@ -68,13 +73,15 @@ def __init__(self, id):
     @staticmethod
     def load_from_document(doc):
         """ Load a test case from a JSON file. """
-        if '@id' not in doc:
-            raise PhyloreferenceTestCase.TestCaseException("Document does not contain required key '@id'")
 
-        testCase = PhyloreferenceTestCase(doc['@id'])
+        if '@id' in doc:
+            testCase = PhyloreferenceTestCase(doc['@id'])
+        else:
+            testCase = PhyloreferenceTestCase()
 
         # Load document-level properties
         PhyloreferenceTestCase.append_extend_or_ignore(testCase.type, doc, '@type')
+        PhyloreferenceTestCase.append_extend_or_ignore(testCase.context, doc, '@context')
         PhyloreferenceTestCase.append_extend_or_ignore(testCase.owl_imports, doc, 'owl:imports')
 
         PhyloreferenceTestCase.append_extend_or_ignore(testCase.citation, doc, 'citation')
@@ -123,6 +130,7 @@ def export_unless_blank(prop, var):
             elif len(var) > 1:
                 doc[prop] = var
 
+        export_unless_blank('@context', self.context)
         export_unless_blank('citation', self.citation)
         export_unless_blank('url', self.url)
         export_unless_blank('year', self.year)

diff --git a/testcase2owl/phyloref/owlterms.py b/testcase2owl/phyloref/owlterms.py
@@ -14,9 +14,20 @@
         # Will become "http://vocab.phyloref.org/phyloref/testcase.owl",
     "https://raw.githubusercontent.com/phyloref/phyloref-ontology/master/phyloref.owl",
         # Will become "http://phyloinformatics.net/phyloref.owl"
-    "http://purl.obolibrary.org/obo/bco.owl"
+    "http://purl.obolibrary.org/obo/bco.owl",
         # Contains OWL definitions for Darwin Core terms
         # TODO remove once we've implemented these properties ourselves.
+    # "http://www.ontologydesignpatterns.org/cp/owl/timeinterval.owl",
+        # OWL definitions for time intervals; used by the Publication Status Ontology
+        # TODO cannot be imported as they cause JFact++ 1.2.4 to report an inconsistent ontology
+    # "http://www.essepuntato.it/2012/04/tvc",
+        # OWL definitions for tvc:atTime, which links publication statuses with
+        # time intervals
+        # TODO: cannot be imported as one of its prerequisties don't work
+    # "http://purl.org/spar/pso",
+        # Publication Status Ontology: used to assign publication statuses to
+        # individual phyloreferences
+        # TODO: cannot be imported as one of its prerequisites don't work
 ]
 
 # CDAO terms

diff --git a/testcase2owl/testcase2owl.py b/testcase2owl/testcase2owl.py
@@ -123,7 +123,7 @@ def get_command_line_arguments():
         if count_unmatched_specifiers > 0:
             raise PhyloreferenceTestCase.TestCaseException(
                 "One or more specifiers could not be matched. " +
-                "Use 'match_not_expected' to document why it could not be matched."
+                "Use 'specifierWillNotMatch' to document why it could not be matched."
             )
 
 except PhyloreferenceTestCase.TestCaseException as e:
@@ -140,14 +140,28 @@ def get_command_line_arguments():
 os.chdir(current_working_directory)
 
 # Step 4. Write the paper back out again.
-path_to_this_script = os.path.dirname(os.path.realpath(__file__))
-doc['@context'] = path_to_this_script + '/paper-context.json'
+if '@context' not in doc:
+    path_to_this_script = os.path.dirname(os.path.realpath(__file__))
+    doc['@context'] = path_to_this_script + '/paper-context.json'
+
+# Add type declarations so that jphyloref can process timeintervals correctly.
+output = [ doc ]
+
+output.append({
+    '@id': 'http://www.ontologydesignpatterns.org/cp/owl/timeinterval.owl#hasIntervalStartDate',
+    '@type': { '@id': 'http://www.w3.org/2002/07/owl#DatatypeProperty' }
+})
+
+output.append({
+    '@id': 'http://www.ontologydesignpatterns.org/cp/owl/timeinterval.owl#hasIntervalEndDate',
+    '@type': { '@id': 'http://www.w3.org/2002/07/owl#DatatypeProperty' }
+})
 
 # json.dump() has issues with documents that are partially str and partially
 # unicode. Instead, we dump it to a string, make sure Python knows to treat
 # that string as unicode, and then write it out.
 
-output_as_json = json.dumps(doc, indent=4, sort_keys=True, ensure_ascii=False)
+output_as_json = json.dumps(output, indent=4, sort_keys=True, ensure_ascii=False)
 if isinstance(output_as_json, str):
     try:
         unicode = str