diff --git a/data2rdf/models/graph.py b/data2rdf/models/graph.py index 7efc6be..228b862 100644 --- a/data2rdf/models/graph.py +++ b/data2rdf/models/graph.py @@ -29,6 +29,9 @@ class ValueRelationMapping(BaseModel): description="""Object/Data/Annotation property for the value resolving from `key` of this model""", ) + datatype: Optional[str] = Field( + None, description="XSD Datatype of the value" + ) class ClassTypeGraph(BasicGraphModel): @@ -39,7 +42,7 @@ class ClassTypeGraph(BasicGraphModel): description="""Value of the suffix of the ontological class to be used""", ) - rdfs_type: AnyUrl = Field( + rdfs_type: str = Field( "owl:Class", description="rdfs:type for this concept" ) annotation_properties: Optional[List[ValueRelationMapping]] = Field( @@ -80,16 +83,23 @@ def value_json(cls, value) -> "Dict[str, Any]": @property def json_ld(self) -> "Dict[str, Any]": annotations = { - model.relation: self.value_json(model.value) + model.relation: ( + {"@type": f"xsd:{model.datatype}", "@value": model.value} + if model.datatype + else self.value_json(model.value) + ) for model in self.annotation_properties } datatypes = { - model.relation: self.value_json(model.value) + model.relation: ( + {"@type": f"xsd:{model.datatype}", "@value": model.value} + if model.datatype + else self.value_json(model.value) + ) for model in self.data_properties } objects = { - model.relation: str(model.value) - for model in self.object_properties + model.relation: model.value for model in self.object_properties } return { "@context": { diff --git a/data2rdf/models/mapping.py b/data2rdf/models/mapping.py index 0363e0a..ca66ce5 100644 --- a/data2rdf/models/mapping.py +++ b/data2rdf/models/mapping.py @@ -35,6 +35,10 @@ class TBoxBaseMapping(BasicConceptMapping): ..., description="Type of the semantic relation used in the mappings" ) + datatype: Optional[str] = Field( + None, description="XSD Datatype of the targed value" + ) + class CustomRelation(BaseModel): """Custom relation model""" diff --git a/data2rdf/parsers/base.py b/data2rdf/parsers/base.py index a97bb07..84081af 100644 --- a/data2rdf/parsers/base.py +++ b/data2rdf/parsers/base.py @@ -132,12 +132,14 @@ class TBoxBaseParser(AnyBoxBaseParser): where the suffix of the ontological class to be created.""", ) - rdfs_type: AnyUrl = Field( - "owl:Class", description="rdfs:type for the concepts" + rdfs_type_location: Optional[str] = Field( + None, + description="""Key/column name/reference to location in the data file + where the rdfs:type for the concepts is defined.""", ) version_info: Optional[str] = Field( - None, description="Version of the ontplpgy" + None, description="Version of the ontology" ) ontology_iri: Optional[Union[str, AnyUrl]] = Field( @@ -152,6 +154,10 @@ class TBoxBaseParser(AnyBoxBaseParser): None, description="Name of the authors contributing to the ontology." ) + fillna: Optional[Any] = Field( + "", description="Value to fill NaN values in the parsed dataframe." + ) + _classes: Any = PrivateAttr() @property diff --git a/data2rdf/parsers/excel.py b/data2rdf/parsers/excel.py index 6c3b9e0..69cf451 100644 --- a/data2rdf/parsers/excel.py +++ b/data2rdf/parsers/excel.py @@ -2,7 +2,7 @@ import warnings from io import BytesIO -from typing import Any, Dict, List, Optional, Union +from typing import Any, Dict, List, Union from urllib.parse import quote, urljoin import pandas as pd @@ -56,10 +56,6 @@ class ExcelTBoxParser(TBoxBaseParser): a list with the mapping.""", ) - fillna: Optional[Any] = Field( - "", description="Value to fill NaN values in the parsed dataframe." - ) - # OVERRIDE @property def mapping_model(cls) -> TBoxBaseMapping: diff --git a/data2rdf/parsers/utils.py b/data2rdf/parsers/utils.py index 7f9676a..a766add 100644 --- a/data2rdf/parsers/utils.py +++ b/data2rdf/parsers/utils.py @@ -1,6 +1,7 @@ """Data2RDF parser utilities""" import json +import warnings from typing import TYPE_CHECKING import numpy as np @@ -113,23 +114,43 @@ def _make_tbox_classes( value = row[key] if isinstance(value, float) and np.isnan(value): value = self.fillna - relation_mapping = { - "value": value, - "relation": model.relation, - } - if model.relation_type == RelationType.ANNOTATION_PROPERTY: - annotations.append(relation_mapping) - if model.relation_type == RelationType.DATA_PROPERTY: - datatypes.append(relation_mapping) - if model.relation_type == RelationType.OBJECT_PROPERTY: - objects.append(relation_mapping) + if value: + relation_mapping = { + "value": value, + "relation": model.relation, + "datatype": model.datatype, + } + if model.relation_type == RelationType.ANNOTATION_PROPERTY: + annotations.append(relation_mapping) + if model.relation_type == RelationType.DATA_PROPERTY: + datatypes.append(relation_mapping) + if model.relation_type == RelationType.OBJECT_PROPERTY: + objects.append(relation_mapping) + else: + warnings.warn( + f"Data for key `{key}` does not exist in row {n}.", + MappingMissmatchWarning, + ) except KeyError: - raise MappingMissmatchWarning( - f"Column with name `{key}` does not exist in provided worksheet." + warnings.warn( + f"Data for key `{key}` does not exist in row {n}.", + MappingMissmatchWarning, ) + if self.rdfs_type_location: + rdfs_type = row[self.rdfs_type_location] + if isinstance(rdfs_type, type(None)) or ( + isinstance(rdfs_type, float) and np.isnan(rdfs_type) + ): + warnings.warn( + f"Data for key `{self.rdfs_type_location}` does not exist in row {n}.", + MappingMissmatchWarning, + ) + rdfs_type = "owl:Class" + else: + rdfs_type = "owl:Class" subgraph = ClassTypeGraph( - rdfs_type=self.rdfs_type, + rdfs_type=rdfs_type, suffix=row[self.suffix_location], annotation_properties=annotations, object_properties=objects, diff --git a/tests/tbox/explicit_datatypes/__init__.py b/tests/tbox/explicit_datatypes/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/tbox/explicit_datatypes/test_explicit_datatypes.py b/tests/tbox/explicit_datatypes/test_explicit_datatypes.py new file mode 100644 index 0000000..e029fd7 --- /dev/null +++ b/tests/tbox/explicit_datatypes/test_explicit_datatypes.py @@ -0,0 +1,133 @@ +"""Test for explicit datatypes.""" + +import pytest + +DATA = [ + { + "Ontological concept ID": "TestingMachine", + "Label": "Testing machine", + "Description": "Some description", + "Superclass": "owl:Thing", #TODO: This one is converted to a string. + "Comment": None, + "Source": 123, #TODO: This one is converted to a float. Why? + "Author's name": None, + "Author's email": None + }, + { + "Ontological concept ID": "hasTestingMachine", + "Label": "has Testing machine", + "Type": "owl:ObjectProperty", + "Description": "Some description", + "Comment": None, + "Source": None, + "Author's name": None, + "Author's email": None + } +] + + +MAPPING = [ + { + "key": "Label", + "relation": "http://www.w3.org/2000/01/rdf-schema#label", + "relation_type": "annotation_property", + }, + { + "key": "Description", + "relation": "http://purl.org/dc/terms/description", + "relation_type": "data_property", + }, + { + "key": "Superclass", + "relation": "http://www.w3.org/2000/01/rdf-schema#subClassOf", + "relation_type": "object_property", + }, + { + "key": "Comment", + "relation": "http://www.w3.org/2000/01/rdf-schema#comment", + "relation_type": "data_property", + }, + { + "key": "Source", + "relation": "http://purl.org/dc/terms/source", + "relation_type": "data_property", + "datatype": "integer" + }, + { + "key": "Author's name", + "relation": "http://purl.org/dc/terms/contributor", + "relation_type": "data_property", + }, + { + "key": "Author's email", + "relation": "http://xmlns.com/foaf/0.1/mbox", + "relation_type": "data_property", + } +] + +EXPECTED = """ +@prefix dcterms: . +@prefix foaf1: . +@prefix owl: . +@prefix rdfs: . +@prefix xsd: . + + a owl:Ontology ; + dcterms:creator [ a foaf1:Person ; + foaf1:name "Jane Doe" ] ; + dcterms:title "Test Ontology" ; + owl:versionInfo "1.0.0" . + + a owl:Class ; + rdfs:label "Testing machine"^^xsd:string ; + dcterms:description "Some description"^^xsd:string ; + dcterms:source "123"^^xsd:integer ; + rdfs:subClassOf owl:Thing . + + a owl:ObjectProperty ; + rdfs:label "has Testing machine"^^xsd:string ; + dcterms:description "Some description"^^xsd:string .""" + +def test_explicit_datatypes(): + from rdflib import Graph + + from data2rdf import ( # isort:skip + Data2RDF, + Parser, + ) + from data2rdf.warnings import MappingMissmatchWarning + + with pytest.warns( + MappingMissmatchWarning, match="Data for key" + ) as warnings: + + pipeline = Data2RDF( + mode="tbox", + raw_data=DATA, + mapping=MAPPING, + parser=Parser.json, + parser_args={ + "suffix_location": "Ontological concept ID", + "rdfs_type_location": "Type", + "ontology_title": "Test Ontology", + "authors": ["Jane Doe"], + "version_info": "1.0.0", + }, + config={ + "base_iri": "https://w3id.org/dimat", + }, + ) + + missmatches = [ + warning + for warning in warnings + if warning.category == MappingMissmatchWarning + ] + assert len(missmatches) == 9 + + print(pipeline.graph.serialize(format="turtle")) #TODO: remove print + + expected_graph = Graph() + expected_graph.parse(data=EXPECTED) + + assert pipeline.graph.isomorphic(expected_graph)