Skip to content

Commit

Permalink
add explicit datatyping for tbox
Browse files Browse the repository at this point in the history
  • Loading branch information
MBueschelberger committed Nov 11, 2024
1 parent f6ed034 commit eaeb447
Show file tree
Hide file tree
Showing 7 changed files with 196 additions and 26 deletions.
20 changes: 15 additions & 5 deletions data2rdf/models/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ class ValueRelationMapping(BaseModel):
description="""Object/Data/Annotation property for the value
resolving from `key` of this model""",
)
datatype: Optional[str] = Field(
None, description="XSD Datatype of the value"
)


class ClassTypeGraph(BasicGraphModel):
Expand All @@ -39,7 +42,7 @@ class ClassTypeGraph(BasicGraphModel):
description="""Value of the suffix of the
ontological class to be used""",
)
rdfs_type: AnyUrl = Field(
rdfs_type: str = Field(
"owl:Class", description="rdfs:type for this concept"
)
annotation_properties: Optional[List[ValueRelationMapping]] = Field(
Expand Down Expand Up @@ -80,16 +83,23 @@ def value_json(cls, value) -> "Dict[str, Any]":
@property
def json_ld(self) -> "Dict[str, Any]":
annotations = {
model.relation: self.value_json(model.value)
model.relation: (
{"@type": f"xsd:{model.datatype}", "@value": model.value}
if model.datatype
else self.value_json(model.value)
)
for model in self.annotation_properties
}
datatypes = {
model.relation: self.value_json(model.value)
model.relation: (
{"@type": f"xsd:{model.datatype}", "@value": model.value}
if model.datatype
else self.value_json(model.value)
)
for model in self.data_properties
}
objects = {
model.relation: str(model.value)
for model in self.object_properties
model.relation: model.value for model in self.object_properties
}
return {
"@context": {
Expand Down
4 changes: 4 additions & 0 deletions data2rdf/models/mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@ class TBoxBaseMapping(BasicConceptMapping):
..., description="Type of the semantic relation used in the mappings"
)

datatype: Optional[str] = Field(
None, description="XSD Datatype of the targed value"
)


class CustomRelation(BaseModel):
"""Custom relation model"""
Expand Down
12 changes: 9 additions & 3 deletions data2rdf/parsers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,12 +132,14 @@ class TBoxBaseParser(AnyBoxBaseParser):
where the suffix of the ontological class to be created.""",
)

rdfs_type: AnyUrl = Field(
"owl:Class", description="rdfs:type for the concepts"
rdfs_type_location: Optional[str] = Field(
None,
description="""Key/column name/reference to location in the data file
where the rdfs:type for the concepts is defined.""",
)

version_info: Optional[str] = Field(
None, description="Version of the ontplpgy"
None, description="Version of the ontology"
)

ontology_iri: Optional[Union[str, AnyUrl]] = Field(
Expand All @@ -152,6 +154,10 @@ class TBoxBaseParser(AnyBoxBaseParser):
None, description="Name of the authors contributing to the ontology."
)

fillna: Optional[Any] = Field(
"", description="Value to fill NaN values in the parsed dataframe."
)

_classes: Any = PrivateAttr()

@property
Expand Down
6 changes: 1 addition & 5 deletions data2rdf/parsers/excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import warnings
from io import BytesIO
from typing import Any, Dict, List, Optional, Union
from typing import Any, Dict, List, Union
from urllib.parse import quote, urljoin

import pandas as pd
Expand Down Expand Up @@ -56,10 +56,6 @@ class ExcelTBoxParser(TBoxBaseParser):
a list with the mapping.""",
)

fillna: Optional[Any] = Field(
"", description="Value to fill NaN values in the parsed dataframe."
)

# OVERRIDE
@property
def mapping_model(cls) -> TBoxBaseMapping:
Expand Down
47 changes: 34 additions & 13 deletions data2rdf/parsers/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Data2RDF parser utilities"""

import json
import warnings
from typing import TYPE_CHECKING

import numpy as np
Expand Down Expand Up @@ -113,23 +114,43 @@ def _make_tbox_classes(
value = row[key]
if isinstance(value, float) and np.isnan(value):
value = self.fillna
relation_mapping = {
"value": value,
"relation": model.relation,
}
if model.relation_type == RelationType.ANNOTATION_PROPERTY:
annotations.append(relation_mapping)
if model.relation_type == RelationType.DATA_PROPERTY:
datatypes.append(relation_mapping)
if model.relation_type == RelationType.OBJECT_PROPERTY:
objects.append(relation_mapping)
if value:
relation_mapping = {
"value": value,
"relation": model.relation,
"datatype": model.datatype,
}
if model.relation_type == RelationType.ANNOTATION_PROPERTY:
annotations.append(relation_mapping)
if model.relation_type == RelationType.DATA_PROPERTY:
datatypes.append(relation_mapping)
if model.relation_type == RelationType.OBJECT_PROPERTY:
objects.append(relation_mapping)
else:
warnings.warn(
f"Data for key `{key}` does not exist in row {n}.",
MappingMissmatchWarning,
)
except KeyError:
raise MappingMissmatchWarning(
f"Column with name `{key}` does not exist in provided worksheet."
warnings.warn(
f"Data for key `{key}` does not exist in row {n}.",
MappingMissmatchWarning,
)

if self.rdfs_type_location:
rdfs_type = row[self.rdfs_type_location]
if isinstance(rdfs_type, type(None)) or (
isinstance(rdfs_type, float) and np.isnan(rdfs_type)
):
warnings.warn(
f"Data for key `{self.rdfs_type_location}` does not exist in row {n}.",
MappingMissmatchWarning,
)
rdfs_type = "owl:Class"
else:
rdfs_type = "owl:Class"
subgraph = ClassTypeGraph(
rdfs_type=self.rdfs_type,
rdfs_type=rdfs_type,
suffix=row[self.suffix_location],
annotation_properties=annotations,
object_properties=objects,
Expand Down
Empty file.
133 changes: 133 additions & 0 deletions tests/tbox/explicit_datatypes/test_explicit_datatypes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
"""Test for explicit datatypes."""

import pytest

DATA = [
{
"Ontological concept ID": "TestingMachine",
"Label": "Testing machine",
"Description": "Some description",
"Superclass": "owl:Thing", #TODO: This one is converted to a string.
"Comment": None,
"Source": 123, #TODO: This one is converted to a float. Why?
"Author's name": None,
"Author's email": None
},
{
"Ontological concept ID": "hasTestingMachine",
"Label": "has Testing machine",
"Type": "owl:ObjectProperty",
"Description": "Some description",
"Comment": None,
"Source": None,
"Author's name": None,
"Author's email": None
}
]


MAPPING = [
{
"key": "Label",
"relation": "http://www.w3.org/2000/01/rdf-schema#label",
"relation_type": "annotation_property",
},
{
"key": "Description",
"relation": "http://purl.org/dc/terms/description",
"relation_type": "data_property",
},
{
"key": "Superclass",
"relation": "http://www.w3.org/2000/01/rdf-schema#subClassOf",
"relation_type": "object_property",
},
{
"key": "Comment",
"relation": "http://www.w3.org/2000/01/rdf-schema#comment",
"relation_type": "data_property",
},
{
"key": "Source",
"relation": "http://purl.org/dc/terms/source",
"relation_type": "data_property",
"datatype": "integer"
},
{
"key": "Author's name",
"relation": "http://purl.org/dc/terms/contributor",
"relation_type": "data_property",
},
{
"key": "Author's email",
"relation": "http://xmlns.com/foaf/0.1/mbox",
"relation_type": "data_property",
}
]

EXPECTED = """
@prefix dcterms: <http://purl.org/dc/terms/> .
@prefix foaf1: <http://xmlns.com/foaf/spec/> .
@prefix owl: <http://www.w3.org/2002/07/owl#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
<https://w3id.org/dimat> a owl:Ontology ;
dcterms:creator [ a foaf1:Person ;
foaf1:name "Jane Doe" ] ;
dcterms:title "Test Ontology" ;
owl:versionInfo "1.0.0" .
<https://w3id.org/dimat/TestingMachine> a owl:Class ;
rdfs:label "Testing machine"^^xsd:string ;
dcterms:description "Some description"^^xsd:string ;
dcterms:source "123"^^xsd:integer ;
rdfs:subClassOf owl:Thing .
<https://w3id.org/dimat/hasTestingMachine> a owl:ObjectProperty ;
rdfs:label "has Testing machine"^^xsd:string ;
dcterms:description "Some description"^^xsd:string ."""

def test_explicit_datatypes():
from rdflib import Graph

from data2rdf import ( # isort:skip
Data2RDF,
Parser,
)
from data2rdf.warnings import MappingMissmatchWarning

with pytest.warns(
MappingMissmatchWarning, match="Data for key"
) as warnings:

pipeline = Data2RDF(
mode="tbox",
raw_data=DATA,
mapping=MAPPING,
parser=Parser.json,
parser_args={
"suffix_location": "Ontological concept ID",
"rdfs_type_location": "Type",
"ontology_title": "Test Ontology",
"authors": ["Jane Doe"],
"version_info": "1.0.0",
},
config={
"base_iri": "https://w3id.org/dimat",
},
)

missmatches = [
warning
for warning in warnings
if warning.category == MappingMissmatchWarning
]
assert len(missmatches) == 9

print(pipeline.graph.serialize(format="turtle")) #TODO: remove print

expected_graph = Graph()
expected_graph.parse(data=EXPECTED)

assert pipeline.graph.isomorphic(expected_graph)

0 comments on commit eaeb447

Please sign in to comment.