diff --git a/data2rdf/models/graph.py b/data2rdf/models/graph.py
index 7efc6be..228b862 100644
--- a/data2rdf/models/graph.py
+++ b/data2rdf/models/graph.py
@@ -29,6 +29,9 @@ class ValueRelationMapping(BaseModel):
description="""Object/Data/Annotation property for the value
resolving from `key` of this model""",
)
+ datatype: Optional[str] = Field(
+ None, description="XSD Datatype of the value"
+ )
class ClassTypeGraph(BasicGraphModel):
@@ -39,7 +42,7 @@ class ClassTypeGraph(BasicGraphModel):
description="""Value of the suffix of the
ontological class to be used""",
)
- rdfs_type: AnyUrl = Field(
+ rdfs_type: str = Field(
"owl:Class", description="rdfs:type for this concept"
)
annotation_properties: Optional[List[ValueRelationMapping]] = Field(
@@ -80,16 +83,23 @@ def value_json(cls, value) -> "Dict[str, Any]":
@property
def json_ld(self) -> "Dict[str, Any]":
annotations = {
- model.relation: self.value_json(model.value)
+ model.relation: (
+ {"@type": f"xsd:{model.datatype}", "@value": model.value}
+ if model.datatype
+ else self.value_json(model.value)
+ )
for model in self.annotation_properties
}
datatypes = {
- model.relation: self.value_json(model.value)
+ model.relation: (
+ {"@type": f"xsd:{model.datatype}", "@value": model.value}
+ if model.datatype
+ else self.value_json(model.value)
+ )
for model in self.data_properties
}
objects = {
- model.relation: str(model.value)
- for model in self.object_properties
+ model.relation: model.value for model in self.object_properties
}
return {
"@context": {
diff --git a/data2rdf/models/mapping.py b/data2rdf/models/mapping.py
index 0363e0a..ca66ce5 100644
--- a/data2rdf/models/mapping.py
+++ b/data2rdf/models/mapping.py
@@ -35,6 +35,10 @@ class TBoxBaseMapping(BasicConceptMapping):
..., description="Type of the semantic relation used in the mappings"
)
+ datatype: Optional[str] = Field(
+ None, description="XSD Datatype of the targed value"
+ )
+
class CustomRelation(BaseModel):
"""Custom relation model"""
diff --git a/data2rdf/parsers/base.py b/data2rdf/parsers/base.py
index a97bb07..84081af 100644
--- a/data2rdf/parsers/base.py
+++ b/data2rdf/parsers/base.py
@@ -132,12 +132,14 @@ class TBoxBaseParser(AnyBoxBaseParser):
where the suffix of the ontological class to be created.""",
)
- rdfs_type: AnyUrl = Field(
- "owl:Class", description="rdfs:type for the concepts"
+ rdfs_type_location: Optional[str] = Field(
+ None,
+ description="""Key/column name/reference to location in the data file
+ where the rdfs:type for the concepts is defined.""",
)
version_info: Optional[str] = Field(
- None, description="Version of the ontplpgy"
+ None, description="Version of the ontology"
)
ontology_iri: Optional[Union[str, AnyUrl]] = Field(
@@ -152,6 +154,10 @@ class TBoxBaseParser(AnyBoxBaseParser):
None, description="Name of the authors contributing to the ontology."
)
+ fillna: Optional[Any] = Field(
+ "", description="Value to fill NaN values in the parsed dataframe."
+ )
+
_classes: Any = PrivateAttr()
@property
diff --git a/data2rdf/parsers/excel.py b/data2rdf/parsers/excel.py
index 6c3b9e0..69cf451 100644
--- a/data2rdf/parsers/excel.py
+++ b/data2rdf/parsers/excel.py
@@ -2,7 +2,7 @@
import warnings
from io import BytesIO
-from typing import Any, Dict, List, Optional, Union
+from typing import Any, Dict, List, Union
from urllib.parse import quote, urljoin
import pandas as pd
@@ -56,10 +56,6 @@ class ExcelTBoxParser(TBoxBaseParser):
a list with the mapping.""",
)
- fillna: Optional[Any] = Field(
- "", description="Value to fill NaN values in the parsed dataframe."
- )
-
# OVERRIDE
@property
def mapping_model(cls) -> TBoxBaseMapping:
diff --git a/data2rdf/parsers/utils.py b/data2rdf/parsers/utils.py
index 7f9676a..a766add 100644
--- a/data2rdf/parsers/utils.py
+++ b/data2rdf/parsers/utils.py
@@ -1,6 +1,7 @@
"""Data2RDF parser utilities"""
import json
+import warnings
from typing import TYPE_CHECKING
import numpy as np
@@ -113,23 +114,43 @@ def _make_tbox_classes(
value = row[key]
if isinstance(value, float) and np.isnan(value):
value = self.fillna
- relation_mapping = {
- "value": value,
- "relation": model.relation,
- }
- if model.relation_type == RelationType.ANNOTATION_PROPERTY:
- annotations.append(relation_mapping)
- if model.relation_type == RelationType.DATA_PROPERTY:
- datatypes.append(relation_mapping)
- if model.relation_type == RelationType.OBJECT_PROPERTY:
- objects.append(relation_mapping)
+ if value:
+ relation_mapping = {
+ "value": value,
+ "relation": model.relation,
+ "datatype": model.datatype,
+ }
+ if model.relation_type == RelationType.ANNOTATION_PROPERTY:
+ annotations.append(relation_mapping)
+ if model.relation_type == RelationType.DATA_PROPERTY:
+ datatypes.append(relation_mapping)
+ if model.relation_type == RelationType.OBJECT_PROPERTY:
+ objects.append(relation_mapping)
+ else:
+ warnings.warn(
+ f"Data for key `{key}` does not exist in row {n}.",
+ MappingMissmatchWarning,
+ )
except KeyError:
- raise MappingMissmatchWarning(
- f"Column with name `{key}` does not exist in provided worksheet."
+ warnings.warn(
+ f"Data for key `{key}` does not exist in row {n}.",
+ MappingMissmatchWarning,
)
+ if self.rdfs_type_location:
+ rdfs_type = row[self.rdfs_type_location]
+ if isinstance(rdfs_type, type(None)) or (
+ isinstance(rdfs_type, float) and np.isnan(rdfs_type)
+ ):
+ warnings.warn(
+ f"Data for key `{self.rdfs_type_location}` does not exist in row {n}.",
+ MappingMissmatchWarning,
+ )
+ rdfs_type = "owl:Class"
+ else:
+ rdfs_type = "owl:Class"
subgraph = ClassTypeGraph(
- rdfs_type=self.rdfs_type,
+ rdfs_type=rdfs_type,
suffix=row[self.suffix_location],
annotation_properties=annotations,
object_properties=objects,
diff --git a/tests/tbox/explicit_datatypes/__init__.py b/tests/tbox/explicit_datatypes/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/tbox/explicit_datatypes/test_explicit_datatypes.py b/tests/tbox/explicit_datatypes/test_explicit_datatypes.py
new file mode 100644
index 0000000..e029fd7
--- /dev/null
+++ b/tests/tbox/explicit_datatypes/test_explicit_datatypes.py
@@ -0,0 +1,133 @@
+"""Test for explicit datatypes."""
+
+import pytest
+
+DATA = [
+ {
+ "Ontological concept ID": "TestingMachine",
+ "Label": "Testing machine",
+ "Description": "Some description",
+ "Superclass": "owl:Thing", #TODO: This one is converted to a string.
+ "Comment": None,
+ "Source": 123, #TODO: This one is converted to a float. Why?
+ "Author's name": None,
+ "Author's email": None
+ },
+ {
+ "Ontological concept ID": "hasTestingMachine",
+ "Label": "has Testing machine",
+ "Type": "owl:ObjectProperty",
+ "Description": "Some description",
+ "Comment": None,
+ "Source": None,
+ "Author's name": None,
+ "Author's email": None
+ }
+]
+
+
+MAPPING = [
+ {
+ "key": "Label",
+ "relation": "http://www.w3.org/2000/01/rdf-schema#label",
+ "relation_type": "annotation_property",
+ },
+ {
+ "key": "Description",
+ "relation": "http://purl.org/dc/terms/description",
+ "relation_type": "data_property",
+ },
+ {
+ "key": "Superclass",
+ "relation": "http://www.w3.org/2000/01/rdf-schema#subClassOf",
+ "relation_type": "object_property",
+ },
+ {
+ "key": "Comment",
+ "relation": "http://www.w3.org/2000/01/rdf-schema#comment",
+ "relation_type": "data_property",
+ },
+ {
+ "key": "Source",
+ "relation": "http://purl.org/dc/terms/source",
+ "relation_type": "data_property",
+ "datatype": "integer"
+ },
+ {
+ "key": "Author's name",
+ "relation": "http://purl.org/dc/terms/contributor",
+ "relation_type": "data_property",
+ },
+ {
+ "key": "Author's email",
+ "relation": "http://xmlns.com/foaf/0.1/mbox",
+ "relation_type": "data_property",
+ }
+]
+
+EXPECTED = """
+@prefix dcterms: .
+@prefix foaf1: .
+@prefix owl: .
+@prefix rdfs: .
+@prefix xsd: .
+
+ a owl:Ontology ;
+ dcterms:creator [ a foaf1:Person ;
+ foaf1:name "Jane Doe" ] ;
+ dcterms:title "Test Ontology" ;
+ owl:versionInfo "1.0.0" .
+
+ a owl:Class ;
+ rdfs:label "Testing machine"^^xsd:string ;
+ dcterms:description "Some description"^^xsd:string ;
+ dcterms:source "123"^^xsd:integer ;
+ rdfs:subClassOf owl:Thing .
+
+ a owl:ObjectProperty ;
+ rdfs:label "has Testing machine"^^xsd:string ;
+ dcterms:description "Some description"^^xsd:string ."""
+
+def test_explicit_datatypes():
+ from rdflib import Graph
+
+ from data2rdf import ( # isort:skip
+ Data2RDF,
+ Parser,
+ )
+ from data2rdf.warnings import MappingMissmatchWarning
+
+ with pytest.warns(
+ MappingMissmatchWarning, match="Data for key"
+ ) as warnings:
+
+ pipeline = Data2RDF(
+ mode="tbox",
+ raw_data=DATA,
+ mapping=MAPPING,
+ parser=Parser.json,
+ parser_args={
+ "suffix_location": "Ontological concept ID",
+ "rdfs_type_location": "Type",
+ "ontology_title": "Test Ontology",
+ "authors": ["Jane Doe"],
+ "version_info": "1.0.0",
+ },
+ config={
+ "base_iri": "https://w3id.org/dimat",
+ },
+ )
+
+ missmatches = [
+ warning
+ for warning in warnings
+ if warning.category == MappingMissmatchWarning
+ ]
+ assert len(missmatches) == 9
+
+ print(pipeline.graph.serialize(format="turtle")) #TODO: remove print
+
+ expected_graph = Graph()
+ expected_graph.parse(data=EXPECTED)
+
+ assert pipeline.graph.isomorphic(expected_graph)