Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix bibliographic #46

Open
wants to merge 16 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 59 additions & 2 deletions app/models/profiles/profile.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from abc import ABC, abstractmethod
from pathlib import Path
import json

from jinja2 import Environment, FileSystemLoader, select_autoescape
from lxml import etree
Expand Down Expand Up @@ -53,6 +54,11 @@ def shacl_sip() -> Path:
def shacl_profile() -> Path:
pass

@staticmethod
@abstractmethod
def shacl_ie() -> Path:
pass

@staticmethod
@abstractmethod
def profile_name() -> str:
Expand Down Expand Up @@ -177,9 +183,21 @@ def _construct_shacl_graph(self) -> Graph:
Returns:
A SHACL graph.
"""

pass

def _construct_ie_shacl_graph(self) -> Graph:
"""Construct a graph containing the count Intellectual Entity SHACL.

This is used for validating the data graph.

Returns:
A SHACL graph.
"""

shacl_graph = Graph()
shacl_graph.parse(str(self.shacl_ie()), format="turtle")
return shacl_graph

def validate_graph(self, data_graph: Graph) -> bool:
"""Validate if the graph is conform.

Expand All @@ -198,6 +216,37 @@ def validate_graph(self, data_graph: Graph) -> bool:
raise GraphNotConformError(
"Graph is perceived as empty as it does not contain an intellectual entity."
)
result_query = """
PREFIX sh: <http://www.w3.org/ns/shacl#>
PREFIX haObjId: <https://data.hetarchief.be/id/entity/>
SELECT ?message ?severity ?focusNode
WHERE {
?s sh:resultMessage ?message ;
sh:resultSeverity ?severity_iri ;
sh:focusNode ?focusNode_iri
BIND(REPLACE(str(?severity_iri), str(sh:), "") as ?severity)
BIND(REPLACE(str(?focusNode_iri), str(haObjId:), "") as ?focusNode)
} ORDER BY ?severity
"""
ie_shacl_graph = self._construct_ie_shacl_graph()
conforms, results_graph, results_text = shacl_validate(
data_graph=data_graph,
shacl_graph=ie_shacl_graph,
meta_shacl=True,
allow_warnings=True,
)

if not conforms:
results = json.loads(
results_graph.query(result_query).serialize(format="json")
)
result_formatted_text = ""
for result in results["results"]["bindings"]:
result_formatted_text += f"Severity: {result['severity']['value']}\n"
result_formatted_text += f"Id: {result['focusNode']['value']}\n"
result_formatted_text += f"Message: {result['message']['value']}\n\n"
raise GraphNotConformError(result_formatted_text)

shacl_graph = self._construct_shacl_graph()
conforms, results_graph, results_text = shacl_validate(
data_graph=data_graph,
Expand All @@ -207,6 +256,14 @@ def validate_graph(self, data_graph: Graph) -> bool:
)

if not conforms:
raise GraphNotConformError(results_text)
results = json.loads(
results_graph.query(result_query).serialize(format="json")
)
result_formatted_text = ""
for result in results["results"]["bindings"]:
result_formatted_text += f"Severity: {result['severity']['value']}\n"
result_formatted_text += f"Id: {result['focusNode']['value']}\n"
result_formatted_text += f"Message: {result['message']['value']}\n\n"
raise GraphNotConformError(result_formatted_text)

return True
33 changes: 33 additions & 0 deletions app/models/profiles/profile_1_1.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,17 @@ def dcterms_xsd() -> Path:
"dc_basic.xsd",
)

@staticmethod
def shacl_ie() -> Path:
return Path(
"app",
"resources",
"1.1",
"basic",
"shacl",
"intellectual_entity_count.shacl.ttl",
)

def _validate_descriptive(self) -> list[XMLNotValidError]:
"""Validate the dcterms file.

Expand Down Expand Up @@ -340,6 +351,17 @@ def dc_schema_xsd() -> Path:
"descriptive_material_artwork.xsd",
)

@staticmethod
def shacl_ie() -> Path:
return Path(
"app",
"resources",
"1.1",
"material_artwork",
"shacl",
"intellectual_entity_count.shacl.ttl",
)

def _validate_descriptive(self) -> list[XMLNotValidError]:
"""Validate the dc+schema files.

Expand Down Expand Up @@ -669,6 +691,17 @@ def shacl_premis() -> Path:
"premis.shacl.ttl",
)

@staticmethod
def shacl_ie() -> Path:
return Path(
"app",
"resources",
"1.1",
"newspaper",
"shacl",
"intellectual_entity_count.shacl.ttl",
)

def _validate_descriptive(self) -> list[XMLNotValidError]:
"""Validate the MODS or dc+schema files.

Expand Down
36 changes: 34 additions & 2 deletions app/models/profiles/profile_1_2.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@
from lxml import etree
from rdflib import Graph

from app.models.profiles.profile import sa
from app.models.profiles.exceptions import XMLNotValidError, GraphParseError
from app.models.profiles.exceptions import XMLNotValidError
from app.models.profiles.profile_1_1 import (
BasicProfile11,
NewspaperProfile11,
Expand Down Expand Up @@ -129,6 +128,17 @@ def dc_schema_xsd() -> Path:
"descriptive_basic.xsd",
)

@staticmethod
def shacl_ie() -> Path:
return Path(
"app",
"resources",
"1.2",
"basic",
"shacl",
"intellectual_entity_count.shacl.ttl",
)

def _validate_descriptive(self) -> list[XMLNotValidError]:
"""Validate the dc+schema file.

Expand Down Expand Up @@ -288,6 +298,17 @@ def dc_schema_xsd() -> Path:
"descriptive_material_artwork.xsd",
)

@staticmethod
def shacl_ie() -> Path:
return Path(
"app",
"resources",
"1.2",
"material_artwork",
"shacl",
"intellectual_entity_count.shacl.ttl",
)


class BibliographicProfile12(NewspaperProfile11):
def profile_name() -> str:
Expand Down Expand Up @@ -424,3 +445,14 @@ def shacl_premis() -> Path:
"shacl",
"premis.shacl.ttl",
)

@staticmethod
def shacl_ie() -> Path:
return Path(
"app",
"resources",
"1.2",
"bibliographic",
"shacl",
"intellectual_entity_count.shacl.ttl",
)
21 changes: 21 additions & 0 deletions app/resources/1.1/basic/shacl/intellectual_entity_count.shacl.ttl
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
@prefix premis: <http://www.loc.gov/premis/rdf/v3/> .
@prefix sh: <http://www.w3.org/ns/shacl#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .

<#IntellectualEntityCountShape>
a sh:NodeShape ;
sh:targetNode premis:IntellectualEntity ;
sh:property [
sh:path [ sh:inversePath rdf:type ] ;
sh:minCount 1 ;
sh:severity sh:Violation ;
sh:message """No Intellectual Entity found.
Please check `premis:premis/premis:object/premis:objectIdentifier` and `mods:mods/mods:identifier[not(@type)]`"""@en ;
],
[
sh:path [ sh:inversePath rdf:type ] ;
sh:maxCount 1 ;
sh:severity sh:Violation ;
sh:message """More than one Intellectual Entity found.
Please check if `premis:premis/premis:object/premis:objectIdentifier` and `mods:mods/mods:identifier[not(@type)]` have the same value."""@en ;
] .
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
@prefix premis: <http://www.loc.gov/premis/rdf/v3/> .
@prefix sh: <http://www.w3.org/ns/shacl#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .

<#IntellectualEntityCountShape>
a sh:NodeShape ;
sh:targetNode premis:IntellectualEntity ;
sh:property [
sh:path [ sh:inversePath rdf:type ] ;
sh:minCount 1 ;
sh:severity sh:Violation ;
sh:message """No Intellectual Entity found.
Please check `premis:premis/premis:object/premis:objectIdentifier` and `mods:mods/mods:identifier[not(@type)]`"""@en ;
],
[
sh:path [ sh:inversePath rdf:type ] ;
sh:maxCount 1 ;
sh:severity sh:Violation ;
sh:message """More than one Intellectual Entity found.
Please check if `premis:premis/premis:object/premis:objectIdentifier` and `mods:mods/mods:identifier[not(@type)]` have the same value."""@en ;
] .
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
@prefix premis: <http://www.loc.gov/premis/rdf/v3/> .
@prefix sh: <http://www.w3.org/ns/shacl#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .

<#IntellectualEntityCountShape>
a sh:NodeShape ;
sh:targetNode premis:IntellectualEntity ;
sh:property [
sh:path [ sh:inversePath rdf:type ] ;
sh:minCount 1 ;
sh:severity sh:Violation ;
sh:message """No Intellectual Entity found.
Please check `premis:premis/premis:object/premis:objectIdentifier` and `mods:mods/mods:identifier[not(@type)]`"""@en ;
],
[
sh:path [ sh:inversePath rdf:type ] ;
sh:maxCount 1 ;
sh:severity sh:Violation ;
sh:message """More than one Intellectual Entity found.
Please check if `premis:premis/premis:object/premis:objectIdentifier` and `mods:mods/mods:identifier[not(@type)]` have the same value."""@en ;
] .
15 changes: 13 additions & 2 deletions app/resources/1.1/newspaper/shacl/premis.shacl.ttl
Original file line number Diff line number Diff line change
Expand Up @@ -187,9 +187,20 @@
sh:nodeKind sh:IRI ;
sh:class premis:File ;
sh:name "is source of"@en ;
sh:description "The source of the file"@en ;
sh:description "Is the source of the file"@en ;
sh:message """The File's value for `is source of` relationshipSubType is not a File.
Please check `premis:premis/premis:object[@xsi:type=premis:File]/premis:relationship/(premis:relationshipSubType="is included in")`
Please check `premis:premis/premis:object[@xsi:type=premis:File]/premis:relationship/(premis:relationshipSubType="is source of")`
OR check for mismatch between the File ids `premis:premis/premis:object/premis:objectIdentifier/premis:objectIdentifierValue`."""@en ;
],
[
a sh:PropertyShape ;
sh:path relSubType:hss ;
sh:nodeKind sh:IRI ;
sh:class premis:File ;
sh:name "has source"@en ;
sh:description "The source of the file"@en ;
sh:message """The File's value for `has source` relationshipSubType is not a File.
Please check `premis:premis/premis:object[@xsi:type=premis:File]/premis:relationship/(premis:relationshipSubType="has source")`
OR check for mismatch between the File ids `premis:premis/premis:object/premis:objectIdentifier/premis:objectIdentifierValue`."""@en ;
],
[
Expand Down
21 changes: 21 additions & 0 deletions app/resources/1.2/basic/shacl/intellectual_entity_count.shacl.ttl
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
@prefix premis: <http://www.loc.gov/premis/rdf/v3/> .
@prefix sh: <http://www.w3.org/ns/shacl#> .
@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .

<#IntellectualEntityCountShape>
a sh:NodeShape ;
sh:targetNode premis:IntellectualEntity ;
sh:property [
sh:path [ sh:inversePath rdf:type ] ;
sh:minCount 1 ;
sh:severity sh:Violation ;
sh:message """No Intellectual Entity found.
Please check `premis:premis/premis:object/premis:objectIdentifier` and `mods:mods/mods:identifier[not(@type)]`"""@en ;
],
[
sh:path [ sh:inversePath rdf:type ] ;
sh:maxCount 1 ;
sh:severity sh:Violation ;
sh:message """More than one Intellectual Entity found.
Please check if `premis:premis/premis:object/premis:objectIdentifier` and `mods:mods/mods:identifier[not(@type)]` have the same value."""@en ;
] .
Loading