Skip to content

Commit

Permalink
let the custom relations have their own subgraph and allow string dat…
Browse files Browse the repository at this point in the history
…atypes for quantity graphs
  • Loading branch information
MBueschelberger committed Nov 27, 2024
1 parent 8ec2a54 commit d9aed40
Show file tree
Hide file tree
Showing 8 changed files with 522 additions and 54 deletions.
61 changes: 53 additions & 8 deletions data2rdf/models/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,16 @@
import warnings
from typing import Any, Dict, List, Optional, Union

from data2rdf.models.utils import apply_datatype, detect_datatype
from data2rdf.qudt.utils import _get_query_match
from data2rdf.utils import make_prefix

from data2rdf.models.utils import ( # isort:skip
apply_datatype,
detect_datatype,
is_float,
is_integer,
)

from data2rdf.models.base import ( # isort:skip
BasicGraphModel,
BasicSuffixModel,
Expand All @@ -15,6 +21,7 @@

from pydantic import ( # isort:skip
AnyUrl,
AliasChoices,
BaseModel,
Field,
ValidationInfo,
Expand Down Expand Up @@ -107,9 +114,9 @@ class QuantityGraph(BasicGraphModel, BasicSuffixModel):
a quantity describing a column of a time series or table with a unit."""

unit: Optional[Union[str, AnyUrl]] = Field(
..., description="QUDT Symbol or any other IRI for the unit mapping"
None, description="QUDT Symbol or any other IRI for the unit mapping"
)
value: Optional[Union[int, float]] = Field(
value: Optional[Union[int, float, str]] = Field(
None, description="Value of the quantity"
)

Expand All @@ -125,6 +132,17 @@ class QuantityGraph(BasicGraphModel, BasicSuffixModel):
for mapping the data value to the individual.""",
)

@field_validator("value", mode="after")
@classmethod
def validate_value(
cls, value: Union[int, float, str]
) -> Union[int, float]:
if is_float(value):
value = float(value)
elif is_integer(value):
value = int(value)
return value

@field_validator("unit", mode="after")
@classmethod
def validate_unit(
Expand Down Expand Up @@ -200,23 +218,34 @@ class PropertyGraph(BasicGraphModel, BasicSuffixModel):
discrete value but can also be a reference to a column in a table or
time series."""

value: Optional[Union[str, int, float, bool, AnyUrl]] = Field(
None, description="Value of the property"
)
value: Optional[
Union[str, int, float, bool, AnyUrl, "PropertyGraph", "QuantityGraph"]
] = Field(None, description="Value of the property")
annotation: Optional[Union[str, AnyUrl]] = Field(
None, description="Base IRI with which the value shall be concatenated"
)
value_relation: Optional[Union[str, AnyUrl]] = Field(
"rdfs:label",
description="""Data or annotation property
for mapping the data value to the individual.""",
alias=AliasChoices("relation", "value_relation", "valuerelation"),
)
value_relation_type: Optional[RelationType] = Field(
None, description="Type of the semantic relation used in the mappings"
None,
description="Type of the semantic relation used in the mappings",
alias=AliasChoices(
"value_relation_type",
"value_relationtype",
"relation_type",
"relationtype",
),
)
value_datatype: Optional[str] = Field(
None,
description="In case of an annotation or data property, this field indicates the XSD Datatype of the value",
alias=AliasChoices(
"value_datatype", "value_data_type", "datatype", "data_type"
),
)

@field_validator("annotation")
Expand All @@ -227,6 +256,19 @@ def validate_annotation(cls, value: AnyUrl) -> AnyUrl:
value = AnyUrl(str(value).strip())
return value

@model_validator(mode="after")
@classmethod
def validate_value(cls, self: "PropertyGraph") -> "PropertyGraph":
"""
Validate value of a property graph.
In case the value is a property graph or a quantity graph, make sure that
the config is set correctly.
"""
if isinstance(self.value, (PropertyGraph, QuantityGraph)):
self.value.config = self.config
return self

@model_validator(mode="after")
@classmethod
def validate_property_graph(cls, self: "PropertyGraph") -> "PropertyGraph":
Expand Down Expand Up @@ -264,7 +306,10 @@ def value_json(self) -> "Optional[Dict[str, str]]":
spec = apply_datatype(self.value, self.value_datatype)
response = {self.value_relation: spec}
else:
response = {self.value_relation: {"@id": str(self.value)}}
if isinstance(self.value, (PropertyGraph, QuantityGraph)):
response = {self.value_relation: self.value.json_ld}
else:
response = {self.value_relation: {"@id": str(self.value)}}
else:
response = {}
return response
Expand Down
49 changes: 45 additions & 4 deletions data2rdf/models/mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,14 @@

from typing import List, Optional, Union

from pydantic import AnyUrl, BaseModel, Field, field_validator, model_validator
from pydantic import (
AliasChoices,
AnyUrl,
BaseModel,
Field,
field_validator,
model_validator,
)

from .base import BasicConceptMapping, BasicSuffixModel, RelationType

Expand All @@ -28,7 +35,33 @@ class TBoxBaseMapping(BasicConceptMapping):
)

datatype: Optional[str] = Field(
None, description="XSD Datatype of the targed value"
None,
description="XSD Datatype of the targed value",
alias=AliasChoices("datatype", "data_type"),
)


class CustomRelationPropertySubgraph(BasicSuffixModel):
value_relation: Optional[str] = Field(
"rdfs:label",
description="""Object/Data/Annotation property for the value
resolving from `key` of this model""",
)


class CustomRelationQuantitySubgraph(BasicSuffixModel):
unit_relation: Optional[Union[str, AnyUrl]] = Field(
"qudt:hasUnit",
description="""Object property for mapping the IRI
of the unit to the individual.""",
)
value_relation: Optional[Union[str, AnyUrl]] = Field(
"qudt:value",
description="""Data property
for mapping the data value to the individual.""",
)
unit: Optional[Union[str, AnyUrl]] = Field(
None, description="Symbol or QUDT IRI for the mapping"
)


Expand All @@ -44,8 +77,16 @@ class CustomRelation(BaseModel):
...,
description="Cell number or Jsonpath to the value of the quantity or property",
)
object_data_type: Optional[str] = Field(
None, description="XSD Data type of the object"
object_data_type: Optional[
Union[
str, CustomRelationPropertySubgraph, CustomRelationQuantitySubgraph
]
] = Field(
None,
description="XSD Data type of the object or PropertyGraph-mapping or QuantityGraph-mapping",
alias=AliasChoices(
"object_datatype", "object_data_type", "object_type"
),
)
relation_type: Optional[RelationType] = Field(
None, description="Type of the semantic relation used in the mappings"
Expand Down
66 changes: 53 additions & 13 deletions data2rdf/parsers/excel.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,19 @@
from data2rdf.warnings import MappingMissmatchWarning

from .base import ABoxBaseParser, BaseFileParser, TBoxBaseParser
from .utils import _make_tbox_classes, _make_tbox_json_ld, _strip_unit

from .utils import ( # isort:skip
_make_tbox_classes,
_make_tbox_json_ld,
_strip_unit,
_value_exists,
)

from data2rdf.models.mapping import ( # isort:skip
ABoxExcelMapping,
TBoxBaseMapping,
CustomRelationPropertySubgraph,
CustomRelationQuantitySubgraph,
)


Expand Down Expand Up @@ -385,18 +393,23 @@ def _run_parser(

if datum.value_location and not datum.time_series_start:
value = worksheet[datum.value_location].value
if model_data.get("unit") and value:

if model_data.get("unit") and _value_exists(value):
model_data["value"] = value
elif not model_data.get("unit") and value:
elif not model_data.get("unit") and _value_exists(value):
model_data["value"] = str(value)
else:
message = f"""Concept with key `{datum.key}`
does not have a value at location `{datum.value_location}`.
Concept will be omitted in graph.
"""
warnings.warn(message, MappingMissmatchWarning)
else:
value = None

if model_data.get("value") or suffix in self.time_series:
value_exists = _value_exists(value)

if value_exists or suffix in self.time_series:
if datum.value_relation:
model_data["value_relation"] = datum.value_relation
if model_data.get("unit"):
Expand All @@ -410,7 +423,7 @@ def _run_parser(
value_relation_type=datum.value_relation_type,
)

if model_data.get("value"):
if value_exists:
self._general_metadata.append(model)
else:
self._time_series_metadata.append(model)
Expand All @@ -419,14 +432,35 @@ def _run_parser(
for relation in datum.custom_relations:
value = worksheet[relation.object_location].value

if not value:
message = f"""Concept with for iri `{datum.iri}`
does not have a value at location `{relation.object_location}`.
Concept will be omitted in graph.
"""
warnings.warn(message, MappingMissmatchWarning)

if value:
if isinstance(
relation.object_data_type,
(
CustomRelationPropertySubgraph,
CustomRelationQuantitySubgraph,
),
):
if isinstance(
relation.object_data_type,
CustomRelationPropertySubgraph,
):
Model = PropertyGraph
else:
Model = QuantityGraph
model = Model(
value=value,
**relation.object_data_type.model_dump(),
)
model.suffix += "_" + suffix
model = PropertyGraph(
value_relation=relation.relation,
value_relation_type="object_property",
value=model,
iri=datum.iri,
suffix=suffix,
config=self.config,
)
self._general_metadata.append(model)
elif _value_exists(value):
model = PropertyGraph(
value_relation=relation.relation,
value_relation_type=relation.relation_type,
Expand All @@ -437,6 +471,12 @@ def _run_parser(
config=self.config,
)
self._general_metadata.append(model)
else:
message = f"""Concept with for iri `{datum.iri}`
does not have a value at location `{relation.object_location}`.
Concept will be omitted in graph.
"""
warnings.warn(message, MappingMissmatchWarning)

# set time series as pd dataframe
self._time_series = pd.DataFrame.from_dict(
Expand Down
Loading

0 comments on commit d9aed40

Please sign in to comment.