Skip to content

Commit

Permalink
Merge branch 'main' into wip/dev-4516-validate-sub-props-seqnum
Browse files Browse the repository at this point in the history
# Conflicts:
#	src/dsp_tools/resources/validate_data/api-shapes.ttl
  • Loading branch information
Nora-Olivia-Ammann committed Jan 23, 2025
2 parents 897acd3 + 0b954d6 commit 74be187
Show file tree
Hide file tree
Showing 15 changed files with 452 additions and 26 deletions.
4 changes: 3 additions & 1 deletion src/dsp_tools/commands/validate_data/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from dsp_tools.commands.xmlupload.make_rdf_graph.constants import BOOLEAN_PROP_TYPE_INFO
from dsp_tools.commands.xmlupload.make_rdf_graph.constants import COLOR_PROP_TYPE_INFO
from dsp_tools.commands.xmlupload.make_rdf_graph.constants import DECIMAL_PROP_TYPE_INFO
from dsp_tools.commands.xmlupload.make_rdf_graph.constants import GEOMETRY_PROP_TYPE_INFO
from dsp_tools.commands.xmlupload.make_rdf_graph.constants import GEONAME_PROP_TYPE_INFO
from dsp_tools.commands.xmlupload.make_rdf_graph.constants import INT_PROP_TYPE_INFO
from dsp_tools.commands.xmlupload.make_rdf_graph.constants import RICHTEXT_PROP_TYPE_INFO
Expand All @@ -31,7 +32,6 @@
KNORA_API_STR = "http://api.knora.org/ontology/knora-api/v2#"
API_SHAPES_STR = "http://api.knora.org/ontology/knora-api/shapes/v2#"

REGION_RESOURCE = KNORA_API_STR + "Region"
LINKOBJ_RESOURCE = KNORA_API_STR + "LinkObj"
VIDEO_SEGMENT_RESOURCE = KNORA_API_STR + "VideoSegment"
AUDIO_SEGMENT_RESOURCE = KNORA_API_STR + "AudioSegment"
Expand Down Expand Up @@ -90,6 +90,7 @@
KnoraValueType.DATE_VALUE: RDFPropTypeInfo(KNORA_API.DateValue, KNORA_API.valueAsString, XSD.string),
KnoraValueType.DECIMAL_VALUE: DECIMAL_PROP_TYPE_INFO,
KnoraValueType.GEONAME_VALUE: GEONAME_PROP_TYPE_INFO,
KnoraValueType.GEOM_VALUE: GEOMETRY_PROP_TYPE_INFO,
KnoraValueType.LIST_VALUE: RDFPropTypeInfo(KNORA_API.ListValue, API_SHAPES.listNodeAsString, XSD.string),
KnoraValueType.LINK_VALUE: RDFPropTypeInfo(KNORA_API.LinkValue, API_SHAPES.linkValueHasTargetID, XSD.string),
KnoraValueType.INT_VALUE: INT_PROP_TYPE_INFO,
Expand All @@ -111,6 +112,7 @@
KnoraValueType.COLOR_VALUE: TripleObjectType.STRING,
KnoraValueType.DATE_VALUE: TripleObjectType.STRING,
KnoraValueType.DECIMAL_VALUE: TripleObjectType.DECIMAL,
KnoraValueType.GEOM_VALUE: TripleObjectType.STRING,
KnoraValueType.GEONAME_VALUE: TripleObjectType.STRING,
KnoraValueType.LIST_VALUE: TripleObjectType.STRING,
KnoraValueType.LINK_VALUE: TripleObjectType.IRI,
Expand Down
31 changes: 27 additions & 4 deletions src/dsp_tools/commands/validate_data/deserialise_input.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
import json
from json import JSONDecodeError
from pathlib import Path

from lxml import etree

from dsp_tools.commands.validate_data.constants import AUDIO_SEGMENT_RESOURCE
from dsp_tools.commands.validate_data.constants import KNORA_API_STR
from dsp_tools.commands.validate_data.constants import REGION_RESOURCE
from dsp_tools.commands.validate_data.constants import VIDEO_SEGMENT_RESOURCE
from dsp_tools.commands.validate_data.constants import XML_ATTRIB_TO_PROP_TYPE_MAPPER
from dsp_tools.commands.validate_data.constants import XML_TAG_TO_VALUE_TYPE_MAPPER
Expand Down Expand Up @@ -43,9 +44,7 @@ def _deserialise_all_resources(root: etree._Element) -> DataDeserialised:
for res in root.iterdescendants(tag="resource"):
dsp_type = None
res_type = res.attrib["restype"]
if res_type == REGION_RESOURCE:
dsp_type = REGION_RESOURCE
elif res_type == VIDEO_SEGMENT_RESOURCE:
if res_type == VIDEO_SEGMENT_RESOURCE:
dsp_type = VIDEO_SEGMENT_RESOURCE
elif res_type == AUDIO_SEGMENT_RESOURCE:
dsp_type = AUDIO_SEGMENT_RESOURCE
Expand Down Expand Up @@ -101,6 +100,8 @@ def _deserialise_one_property(prop_ele: etree._Element) -> list[ValueInformation
return _extract_text_value_information(prop_ele)
case "iiif-uri" | "bitstream" as file_tag:
return _deserialise_file_values(prop_ele, file_tag)
case "geometry-prop":
return _extract_geometry_value_information(prop_ele)
case _:
return []

Expand Down Expand Up @@ -169,6 +170,28 @@ def _extract_text_value_information(prop: etree._Element) -> list[ValueInformati
return all_vals


def _extract_geometry_value_information(prop: etree._Element) -> list[ValueInformation]:
prop_name = prop.attrib["name"]

def check_for_geometry_json(value: str | None) -> str | None:
if not value:
return None
try:
return json.dumps(json.loads(value))
except JSONDecodeError:
return None

return [
ValueInformation(
user_facing_prop=prop_name,
user_facing_value=check_for_geometry_json(val.text),
knora_type=KnoraValueType.GEOM_VALUE,
value_metadata=_extract_metadata_of_value(val),
)
for val in prop.iterchildren()
]


def _get_text_as_string(value: etree._Element) -> str | None:
if len(value):
text_list = []
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ class KnoraValueType(Enum):
DATE_VALUE = auto()
DECIMAL_VALUE = auto()
GEONAME_VALUE = auto()
GEOM_VALUE = auto()
INT_VALUE = auto()
LINK_VALUE = auto()
LIST_VALUE = auto()
Expand Down
4 changes: 2 additions & 2 deletions src/dsp_tools/commands/validate_data/utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from rdflib.term import Node


def reformat_onto_iri(iri: Node) -> str:
def reformat_onto_iri(iri: Node | str) -> str:
"""Takes a rdflib Node and returns a prefixed IRI in string form."""
iri_str = str(iri)
if "http://www.w3.org/2000/01/rdf-schema#" in iri_str:
Expand All @@ -13,6 +13,6 @@ def reformat_onto_iri(iri: Node) -> str:
return f"{onto}:{ending}"


def reformat_data_iri(iri: Node) -> str:
def reformat_data_iri(iri: Node | str) -> str:
"""Takes a rdflib Node with in the data namespace and returns only the suffix."""
return str(iri).replace("http://data/", "")
2 changes: 1 addition & 1 deletion src/dsp_tools/commands/validate_data/validate_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ def _inform_about_experimental_feature() -> None:
"Content of the values",
"Missing files",
"If the file type matches the ontology",
"DSP in-built resources: link (LinkObj)",
"DSP in-built resources: link (LinkObj), region",
]
print(BOLD_CYAN + LIST_SEPARATOR.join(what_is_validated) + RESET_TO_DEFAULT)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ api-shapes:rdfsLabel_Cardinality
###########################
# LinkObj

api-shapes:LinkObj_Cardinalities
knora-api:LinkObj
a sh:NodeShape ;
dash:closedByTypes true ;
sh:property api-shapes:rdfsLabel_Cardinality ,
Expand All @@ -42,10 +42,34 @@ api-shapes:LinkObj_Cardinalities
###########################
# Region

api-shapes:Region_Cardinalities
knora-api:Region
a sh:NodeShape ;
dash:closedByTypes true ;
sh:property api-shapes:rdfsLabel_Cardinality .
sh:property api-shapes:rdfsLabel_Cardinality ,
[
a sh:PropertyShape ;
sh:path knora-api:hasComment
] ,
[ a sh:PropertyShape ;
sh:message "1" ;
sh:minCount 1 ;
sh:maxCount 1 ;
sh:path knora-api:hasColor ;
sh:severity sh:Violation
] ,
[ a sh:PropertyShape ;
sh:message "1" ;
sh:minCount 1 ;
sh:maxCount 1 ;
sh:path knora-api:isRegionOf ;
sh:severity sh:Violation
] ,
[ a sh:PropertyShape ;
sh:message "1-n" ;
sh:minCount 1 ;
sh:path knora-api:hasGeometry ;
sh:severity sh:Violation
] .

###########################
# VideoSegment
Expand Down
91 changes: 90 additions & 1 deletion src/dsp_tools/resources/validate_data/api-shapes.ttl
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,15 @@ api-shapes:ColorValue_ClassShape
sh:name "Validates the class type" ;
sh:message "ColorValue" ;
sh:class knora-api:ColorValue ;
sh:property api-shapes:valueHasComment_Shape ;
sh:property api-shapes:colorValueAsColor_Shape, api-shapes:valueHasComment_Shape ;
sh:severity sh:Violation .

api-shapes:colorValueAsColor_Shape
a sh:PropertyShape ;
sh:message "The value must be a valid color" ;
sh:path knora-api:colorValueAsColor ;
sh:pattern "^#([0-9a-fA-F]{3}){1,2}$" ;
sh:datatype xsd:string ;
sh:severity sh:Violation .


Expand Down Expand Up @@ -138,6 +146,27 @@ api-shapes:geonameValueAsGeonameCode_Shape
sh:severity sh:Violation .


##############
# GeomValue
##############

api-shapes:GeomValue_ClassShape
a sh:NodeShape ;
sh:name "Validates the class type" ;
sh:message "GeomValue" ;
sh:property api-shapes:geometryValueAsGeometry_Shape, api-shapes:valueHasComment_Shape ;
sh:class knora-api:GeomValue ;
sh:severity sh:Violation .

api-shapes:geometryValueAsGeometry_Shape
a sh:PropertyShape ;
sh:message "The value must be a valid geometry JSON object" ;
sh:path knora-api:geometryValueAsGeometry ;
sh:pattern "\\s*\\S+\\s*" ;
sh:datatype xsd:string ;
sh:severity sh:Violation .


##############
# IntValue
##############
Expand Down Expand Up @@ -283,6 +312,25 @@ api-shapes:hasLinkTo_NodeShape
sh:severity sh:Violation .


### knora-api:isRegionOf

api-shapes:isRegionOf_PropertyShape
a sh:PropertyShape ;
sh:path knora-api:isRegionOf ;
sh:node api-shapes:LinkValue_ClassShape, api-shapes:isRegionOf_NodeShape .

api-shapes:isRegionOf_NodeShape
a sh:NodeShape ;
sh:name "This ensures that the target of the property is of type Representation, i.e. exists in the graph." ;
sh:property [
a sh:PropertyShape ;
sh:class knora-api:Representation ;
sh:message "http://api.knora.org/ontology/knora-api/v2#Representation" ;
sh:path api-shapes:linkValueHasTargetID ;
] ;
sh:severity sh:Violation .


### knora-api:hasComment

api-shapes:hasComment_PropertyShape
Expand All @@ -304,6 +352,36 @@ api-shapes:valueHasComment_Shape
sh:severity sh:Violation .


### knora-api:hasColor

api-shapes:hasColor_PropertyShape
a sh:PropertyShape ;
sh:node api-shapes:ColorValue_ClassShape ;
sh:path knora-api:hasColor .


### knora-api:hasGeometry

api-shapes:hasGeometry_PropertyShape
a sh:PropertyShape ;
sh:node api-shapes:GeomValue_ClassShape ;
sh:path knora-api:hasGeometry .


### knora-api:seqnum and knora-api:isPartOf

# dash:coExistsWith ensures that isPartOf also needs seqnum. There is no need for a second shape.
api-shapes:seqnum_PropShape
a sh:PropertyShape ;
sh:message """
The property seqnum or isPartOf (or sub-properties of them) must be used together.
This resource only used one of the properties.
""" ;
sh:path knora-api:seqnum ;
dash:coExistsWith knora-api:isPartOf ;
sh:severity sh:Violation .


#########################################
# DSP BUILT IN RESOURCES
#########################################
Expand All @@ -322,6 +400,17 @@ api-shapes:LinkObj_ResourceShape
###########################
# Region

api-shapes:Region_ResourceShape
a sh:NodeShape ;
sh:name "Validates the Region resource" ;
sh:targetClass knora-api:Region ;
sh:property api-shapes:rdfsLabel_Shape ,
api-shapes:hasColor_PropertyShape ,
api-shapes:isRegionOf_PropertyShape ,
api-shapes:hasGeometry_PropertyShape ,
api-shapes:hasComment_PropertyShape ;
sh:severity sh:Violation .


###########################
# VideoSegment
Expand Down
3 changes: 3 additions & 0 deletions test/e2e_validate_data/test_validate_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -474,6 +474,9 @@ def test_reformat_dsp_inbuilt_violation(self, dsp_inbuilt_violation: ValidationR
("link_obj_target_non_existent", LinkedResourceDoesNotExistProblem),
("missing_isPartOf", GenericProblemWithMessage),
("missing_seqnum", GenericProblemWithMessage),
("region_invalid_geometry", InputRegexProblem),
("region_isRegionOf_resource_does_not_exist", LinkedResourceDoesNotExistProblem),
("region_isRegionOf_resource_not_a_representation", LinkTargetTypeMismatchProblem),
("target_must_be_a_representation", LinkTargetTypeMismatchProblem),
("target_must_be_an_image_representation", LinkTargetTypeMismatchProblem),
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,52 @@ def onto_graph() -> Graph:
return g


@pytest.fixture
def report_target_resource_wrong_type(onto_graph: Graph) -> tuple[Graph, Graph]:
validation_str = f"""{PREFIXES}
[
a sh:ValidationResult ;
sh:detail _:detail_bn ;
sh:focusNode <http://data/region_isRegionOf_resource_not_a_representation> ;
sh:resultMessage "Value does not have shape api-shapes:isRegionOf_NodeShape" ;
sh:resultPath <http://api.knora.org/ontology/knora-api/v2#isRegionOf> ;
sh:resultSeverity sh:Violation ;
sh:sourceConstraintComponent sh:NodeConstraintComponent ;
sh:sourceShape <http://api.knora.org/ontology/knora-api/shapes/v2#isRegionOf_PropertyShape> ;
sh:value <http://data/value_isRegionOf>
] .
_:detail_bn a sh:ValidationResult ;
sh:focusNode <http://data/value_isRegionOf> ;
sh:resultMessage "http://api.knora.org/ontology/knora-api/v2#Representation" ;
sh:resultPath <http://api.knora.org/ontology/knora-api/shapes/v2#linkValueHasTargetID> ;
sh:resultSeverity sh:Violation ;
sh:sourceConstraintComponent sh:ClassConstraintComponent ;
sh:sourceShape _:source_shape ;
sh:value <http://data/target_res_without_representation_1> .
"""
validation_g = Graph()
validation_g.parse(data=validation_str, format="ttl")
data_str = f"""{PREFIXES}
<http://data/region_isRegionOf_resource_not_a_representation>
a knora-api:Region ;
rdfs:label "Region"^^xsd:string ;
knora-api:hasColor <http://data/value_hasColor> ;
knora-api:hasGeometry <http://data/value_hasGeometry> ;
knora-api:isRegionOf <http://data/value_isRegionOf> .
<http://data/value_isRegionOf> a knora-api:LinkValue ;
api-shapes:linkValueHasTargetID <http://data/target_res_without_representation_1> .
<http://data/target_res_without_representation_1> a in-built:TestNormalResource ;
rdfs:label "Resource without Representation"^^xsd:string .
"""
onto_data_g = Graph()
onto_data_g += onto_graph
onto_data_g.parse(data=data_str, format="ttl")
return validation_g, onto_data_g


@pytest.fixture
def report_not_resource(onto_graph: Graph) -> tuple[Graph, Graph]:
validation_str = f"""{PREFIXES}
Expand Down
6 changes: 3 additions & 3 deletions test/unittests/commands/validate_data/fixtures/xml_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ def decimal_value_corr_several() -> etree._Element:
@pytest.fixture
def geometry_value_corr() -> etree._Element:
return etree.fromstring("""
<geometry-prop name="hasGeometry">
<geometry-prop name="http://api.knora.org/ontology/knora-api/v2#hasGeometry">
<geometry>
{
"status": "active",
Expand All @@ -170,8 +170,8 @@ def geometry_value_corr() -> etree._Element:
@pytest.fixture
def geometry_value_wrong() -> etree._Element:
return etree.fromstring("""
<geometry-prop name="hasGeometry">
<geometry></geometry>
<geometry-prop name="http://api.knora.org/ontology/knora-api/v2#hasGeometry">
<geometry>{ not geometry }</geometry>
</geometry-prop>
""")

Expand Down
Loading

0 comments on commit 74be187

Please sign in to comment.