Skip to content

Commit

Permalink
Add rights statement handling and multilingual support in DCAT profile
Browse files Browse the repository at this point in the history
Catalog:
- Rights statement

Dataset:
- Fix temporal resolution, only using ISO 8601

Distribution:
- Add always accessUrl

DataService:
- Add dcat:theme
  • Loading branch information
mjanez committed Jan 23, 2025
1 parent 94d5d65 commit daf6adb
Show file tree
Hide file tree
Showing 4 changed files with 69 additions and 20 deletions.
22 changes: 22 additions & 0 deletions ckanext/schemingdcat/profiles/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
SPDX,
CNT,
ORG,
ODRS,
# Default values
eu_dcat_ap_default_values,
)
Expand Down Expand Up @@ -80,6 +81,7 @@
"owl": OWL,
"org": ORG,
"spdx": SPDX,
"odrs": ODRS,
}

default_lang = config.get("ckan.locale_default", "en")
Expand Down Expand Up @@ -748,6 +750,26 @@ def _clean_publisher(self, dataset_ref):
self.g.remove((dataset_ref, DCT.publisher, publisher))
self.g.remove((publisher, None, None))

def _is_valid_temporal_resolution(self, value: str) -> bool:
"""
Validate ISO-8601 duration format.
Format: P[nY][nM][nD][T[nH][nM][nS]]
Examples:
PT1H - 1 hour
P1Y - 1 year
P3M - 3 months
P1DT12H - 1 day, 12 hours
PT30M - 30 minutes
"""
if not value or not isinstance(value, str):
return False

# Simplified pattern that allows single units
pattern = r'^P(?:\d+Y)?(?:\d+M)?(?:\d+D)?(?:T(?:\d+H)?(?:\d+M)?(?:\d+S)?)?$|^PT(?:\d+H)?(?:\d+M)?(?:\d+S)?$'

return bool(re.match(pattern, value))

# Graph enhancements. Fix Graph literals
def _process_batch(self, graph: Graph, updates: List[Tuple]) -> None:
"""
Expand Down
30 changes: 13 additions & 17 deletions ckanext/schemingdcat/profiles/dcat_ap/eu_dcat_ap_2.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,15 +237,6 @@ def _graph_from_dataset_v2(self, dataset_dict, dataset_ref):
# Catalog URI
catalog_ref = catalog_uri()

# Standard values
self._add_triple_from_dict(
dataset_dict,
dataset_ref,
DCAT.temporalResolution,
"temporal_resolution",
_datatype=XSD.duration,
)

# Lists
for key, predicate, fallbacks, type, datatype, _class in (
(
Expand Down Expand Up @@ -364,13 +355,14 @@ def _graph_from_dataset_v2(self, dataset_dict, dataset_ref):
self.g.add((distribution_ref, DCATAP.availability, URIRef(distribution_availability)))

# Temporal resolution
self._add_triple_from_dict(
resource_dict,
distribution_ref,
DCAT.temporalResolution,
"temporal_resolution",
_datatype=XSD.duration,
)
temporal_resolution = resource_dict.get("temporal_resolution")
log.debug('temporal_resolution:%s', temporal_resolution)
if temporal_resolution and self._is_valid_temporal_resolution(temporal_resolution):
self.g.add((
distribution_ref,
DCAT.temporalResolution,
Literal(temporal_resolution, datatype=XSD.duration)
))

# Spatial resolution in meters
spatial_resolution_in_meters = self._read_list_value(
Expand Down Expand Up @@ -484,7 +476,7 @@ def _graph_from_dataset_v2(self, dataset_dict, dataset_ref):
data_service_hvd_properties = [
('hvd_category', DCATAP.hvdCategory, lambda x: x),
(None, DCT.license, self.g.value, dataset_ref),
(None, DCT.accessRights, self.g.value, dataset_ref)
(None, DCT.accessRights, self.g.value, dataset_ref),
]

# Process mappings
Expand All @@ -501,6 +493,10 @@ def _graph_from_dataset_v2(self, dataset_dict, dataset_ref):
URIRef(value)
))

# Add all DCAT.theme from dataset_ref to access_service_node
for theme in self.g.objects(dataset_ref, DCAT.theme):
self.g.add((access_service_node, DCAT.theme, theme))

# Add DCAT.contactPoint from dataset_ref to access_service_node
contact_point = self.g.value(dataset_ref, DCAT.contactPoint)
if contact_point:
Expand Down
9 changes: 9 additions & 0 deletions ckanext/schemingdcat/profiles/dcat_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
OWL = Namespace("http://www.w3.org/2002/07/owl#")
SPDX = Namespace("http://spdx.org/rdf/terms#")
CNT = Namespace("http://www.w3.org/2011/content#")
ODRS = Namespace("http://schema.theodi.org/odrs#")

CODELISTS_DIR = Path(__file__).resolve().parent.parent / "codelists"
EU_VOCABS_DIR = CODELISTS_DIR / "dcat"
Expand Down Expand Up @@ -125,6 +126,8 @@
'theme_es': 'http://datos.gob.es/kos/sector-publico/sector/sector-publico',
'theme_eu': 'http://publications.europa.eu/resource/authority/data-theme/GOVE',
'theme_taxonomy': 'http://datos.gob.es/kos/sector-publico/sector/',
'rights_uri_label': 'Derechos relativos a la reutilización del Catálogo de Datos Abiertos',
'rights_attribution_text': 'Atribución de la autoría al organismo.',
'spatial_uri': 'http://datos.gob.es/recurso/sector-publico/territorio/Pais/España',
}

Expand All @@ -149,6 +152,8 @@
'theme_es': 'http://datos.gob.es/kos/sector-publico/sector/sector-publico',
'theme_eu': 'http://publications.europa.eu/resource/authority/data-theme/GOVE',
'theme_taxonomy': 'http://datos.gob.es/kos/sector-publico/sector/',
'rights_uri_label': 'Derechos relativos a la reutilización del Catálogo de Datos Abiertos',
'rights_attribution_text': 'Atribución de la autoría al organismo.',
'spatial_uri': 'http://datos.gob.es/recurso/sector-publico/territorio/Pais/España',
}

Expand Down Expand Up @@ -179,6 +184,8 @@
'theme_taxonomy': 'http://inspire.ec.europa.eu/theme',
'theme_es_taxonomy': 'http://datos.gob.es/kos/sector-publico/sector',
'theme_eu_taxonomy': 'http://publications.europa.eu/resource/authority/data-theme',
'rights_uri_label': 'Rights related to the re-use of the Open Data Catalogue',
'rights_attribution_text': 'Attribution of authorship to the organisation.',
'spatial_uri': 'http://publications.europa.eu/resource/authority/country/ESP',
}

Expand Down Expand Up @@ -207,6 +214,8 @@
'theme_taxonomy': 'http://inspire.ec.europa.eu/theme',
'theme_es_taxonomy': 'http://datos.gob.es/kos/sector-publico/sector',
'theme_eu_taxonomy': 'http://publications.europa.eu/resource/authority/data-theme',
'rights_uri_label': 'Rights related to the re-use of the Open Data Catalogue',
'rights_attribution_text': 'Attribution of authorship to the organisation.',
'spatial_uri': 'http://publications.europa.eu/resource/authority/country/ESP',
}

Expand Down
28 changes: 25 additions & 3 deletions ckanext/schemingdcat/profiles/eu_dcat_ap_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@
GEODCATAP,
ELI,
DCT,
DC,
ODRS,
ADMS,
VCARD,
FOAF,
Expand All @@ -56,6 +58,7 @@
metadata_field_names,
default_translated_fields,
eu_dcat_ap_default_values,
es_dcat_ap_default_values,
dcat_ap_default_licenses,
# URIS
IANA_MEDIA_TYPES_BASE_URI,
Expand Down Expand Up @@ -829,7 +832,7 @@ def _graph_from_dataset_base(self, dataset_dict, dataset_ref):
g.add((distribution, DCAT.mediaType, URIRef(mimetype_from_fmt)))

# URL fallback and old behavior
url = resource_dict.get("url")
url = resource_dict.get("url") or distribution
download_url = resource_dict.get("download_url")
access_url = resource_dict.get("access_url")

Expand Down Expand Up @@ -933,7 +936,6 @@ def _graph_from_catalog_base(self, catalog_dict, catalog_ref):

# Mandatory elements by NTI-RISP/DCAT-AP-ES (datos.gob.es)
items = [
("identifier", DCT.identifier, catalog_uri(), URIRef),
("encoding", CNT.characterEncoding, "UTF-8", Literal),
("language", DCT.language, language, URIRefOrLiteral),
("spatial_uri", DCT.spatial, spatial_uri, URIRefOrLiteral),
Expand All @@ -943,7 +945,9 @@ def _graph_from_catalog_base(self, catalog_dict, catalog_ref):
("homepage", FOAF.homepage, config.get("ckan.site_url"), URIRef),
("license", DCT.license, license, URIRef),
("conforms_to", DCT.conformsTo, eu_dcat_ap_default_values["conformance"], URIRef),
("access_rights", DCT.accessRights, access_rights, URIRefOrLiteral),
("access_rights", DC.rights, f'{catalog_uri()}/rights', URIRefOrLiteral),
# Unnecesary properties for dcat:Catalog
#("identifier", DCT.identifier, catalog_uri(), URIRef),
#("accessUrl", DCAT.accessURL, f'{catalog_uri()}/catalog.rdf', URIRef),
]

Expand Down Expand Up @@ -1008,6 +1012,24 @@ def _graph_from_catalog_base(self, catalog_dict, catalog_ref):

self._add_triples_from_dict(publisher_details, publisher_ref, items)

# Create rights statement node
rights_uri = URIRef(f'{catalog_uri()}/rights')
g.add((catalog_ref, DC.rights, rights_uri))

# Add rights statement properties
g.add((rights_uri, RDF.type, ODRS.RightsStatement))

# Add multilingual labels
g.add((rights_uri, RDFS.label, Literal(eu_dcat_ap_default_values['rights_uri_label'], lang='en')))
g.add((rights_uri, RDFS.label, Literal(es_dcat_ap_default_values['rights_uri_label'], lang='es')))

# Add multilingual attribution text
g.add((rights_uri, ODRS.attributionText, Literal(eu_dcat_ap_default_values['rights_attribution_text'], lang='en')))
g.add((rights_uri, ODRS.attributionText, Literal(es_dcat_ap_default_values['rights_attribution_text'], lang='es')))

# Add non-language specific properties
g.add((rights_uri, ODRS.dataLicense, URIRef(eu_dcat_ap_default_values["license_url"])))
g.add((rights_uri, ODRS.attributionURL, URIRef(publisher_ref)))

def _assign_theme_tags(self, dataset_dict, key, values):
for value in values:
Expand Down

0 comments on commit daf6adb

Please sign in to comment.