Skip to content

Commit

Permalink
Merge pull request #106 from opendata-swiss/fix_licenses_uri
Browse files Browse the repository at this point in the history
Update outputs of the Licenses and Rights
  • Loading branch information
bellisk authored Aug 8, 2024
2 parents d2a733d + 2e2ed15 commit c13a28c
Show file tree
Hide file tree
Showing 6 changed files with 181 additions and 118 deletions.
140 changes: 108 additions & 32 deletions ckanext/dcatapchharvest/dcat_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
DCT = Namespace("http://purl.org/dc/terms/")
EUTHEMES = \
Namespace("http://publications.europa.eu/resource/authority/data-theme/")
FOAF = Namespace("http://xmlns.com/foaf/0.1/")
HYDRA = Namespace('http://www.w3.org/ns/hydra/core#')

SKOSXL = Namespace("http://www.w3.org/2008/05/skos-xl#")
Expand All @@ -38,6 +39,7 @@
"skosxl": SKOSXL,
"rdf": RDF,
"rdfs": RDFS,
"foaf": FOAF,
}

theme_namespaces = {
Expand Down Expand Up @@ -167,38 +169,112 @@ def get_frequency_values():
return frequency_mapping


def get_license_uri_by_name(vocabulary_name):
license_vocabulary = get_license_values()
for key, value in license_vocabulary.items():
if unicode(vocabulary_name) == unicode(value):
return key
return None


def get_license_name_by_uri(vocabulary_uri):
license_vocabulary = get_license_values()
for key, value in license_vocabulary.items():
if unicode(vocabulary_uri) == unicode(key):
return unicode(value)
return None


def get_license_values():
g = Graph()
license_mapping = {}
for prefix, namespace in license_namespaces.items():
g.bind(prefix, namespace)
file = os.path.join(__location__, 'license.ttl')
g.parse(file, format='turtle')
for ogdch_license_ref in g.subjects(predicate=RDF.type,
object=SKOS.Concept):
license_mapping[ogdch_license_ref] = None
for license_pref_label in g.objects(subject=ogdch_license_ref,
predicate=SKOSXL.prefLabel):
for license_literal in g.objects(subject=license_pref_label,
predicate=SKOSXL.literalForm):
license_mapping[ogdch_license_ref] = license_literal
return license_mapping
class LicenseHandler:
def __init__(self):
self._license_cache = None

def _bind_namespaces(self, graph):
for prefix, namespace in license_namespaces.items():
graph.bind(prefix, namespace)

def _parse_graph(self, graph):
file = os.path.join(__location__, 'license.ttl')
graph.parse(file, format='turtle')

def _get_license_homepage(self, graph, license_ref):
for homepage in graph.objects(subject=license_ref,
predicate=FOAF.homepage):
return homepage
return None

def _get_license_literal(self, graph, license_ref):
for license_pref_label in graph.objects(subject=license_ref,
predicate=SKOSXL.prefLabel):
try:
return next(graph.objects(subject=license_pref_label,
predicate=SKOSXL.literalForm))
except StopIteration:
continue
return None

def _process_graph(self, graph):
license_ref_literal_mapping = {}
license_homepages_literal_mapping = {}
license_homepage_ref_mapping = {}

for ogdch_license_ref in graph.subjects(predicate=RDF.type,
object=SKOS.Concept):
license_homepage = self._get_license_homepage(graph,
ogdch_license_ref)
license_literal = self._get_license_literal(graph,
ogdch_license_ref)

license_homepages_literal_mapping[unicode(license_homepage)] = \
unicode(license_literal)
license_ref_literal_mapping[unicode(ogdch_license_ref)] = \
unicode(license_literal)
license_homepage_ref_mapping[unicode(license_homepage)] = \
unicode(ogdch_license_ref)

return (license_homepages_literal_mapping,
license_ref_literal_mapping, license_homepage_ref_mapping)

def _get_license_values(self):
if self._license_cache is None:
try:
g = Graph()
self._bind_namespaces(g)
self._parse_graph(g)

(license_homepages_literal_mapping,
license_ref_literal_mapping,
license_homepage_ref_mapping) = self._process_graph(g)

self._license_cache = (license_homepages_literal_mapping,
license_ref_literal_mapping,
license_homepage_ref_mapping)
except Exception as e:
raise RuntimeError("Failed to load license values: %s"
% e)
return self._license_cache

def get_license_ref_uri_by_name(self, vocabulary_name):
_, license_ref_literal_vocabulary, _ = self._get_license_values()
return next((key for key, value in
license_ref_literal_vocabulary.items()
if unicode(vocabulary_name) == value),
None)

def get_license_ref_uri_by_homepage_uri(self, vocabulary_name):
_, _, license_homepage_ref_vocabulary = self._get_license_values()
return license_homepage_ref_vocabulary.get(unicode(vocabulary_name))

def get_license_name_by_ref_uri(self, vocabulary_uri):
_, license_ref_literal_vocabulary, _ = self._get_license_values()
return license_ref_literal_vocabulary.get(
unicode(vocabulary_uri))

def get_license_name_by_homepage_uri(self, vocabulary_uri):
license_homepages_literal_vocabulary, _, _ = self._get_license_values()
return license_homepages_literal_vocabulary.get(
unicode(vocabulary_uri))

def get_license_homepage_uri_by_name(self, vocabulary_name):
license_homepages_literal_vocabulary, _, _ = self._get_license_values()
return next((key for key, value in
license_homepages_literal_vocabulary.items()
if unicode(vocabulary_name) == value),
None)

def get_license_homepage_uri_by_uri(self, vocabulary_uri):
_, _, license_homepage_ref_vocabulary = self._get_license_values()
license_homepages = list(license_homepage_ref_vocabulary.keys())
if unicode(vocabulary_uri) in license_homepages:
return unicode(vocabulary_uri)
return next((key for key, value in
license_homepage_ref_vocabulary.items()
if unicode(vocabulary_uri) == value),
None)


def get_theme_mapping():
Expand Down
6 changes: 3 additions & 3 deletions ckanext/dcatapchharvest/license.ttl
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
skosxl:literalForm "NonCommercialAllowed-CommercialAllowed-ReferenceRequired"@en ;
rdfs:label "NonCommercialAllowed-CommercialAllowed-ReferenceRequired"@de
] ;
foaf:homepage <https://opendata.swiss/en/terms-of-use/#terms_by> .
foaf:homepage <https://opendata.swiss/terms-of-use/#terms_by> .

<http://dcat-ap.ch/vocabulary/licenses/terms_ask>
a skos:Concept ;
Expand All @@ -61,7 +61,7 @@
skosxl:literalForm "NonCommercialAllowed-CommercialWithPermission-ReferenceNotRequired"@en ;
rdfs:label "NonCommercialAllowed-CommercialWithPermission-ReferenceNotRequired"@de
] ;
foaf:homepage <https://opendata.swiss/en/terms-of-use/#terms_ask> .
foaf:homepage <https://opendata.swiss/terms-of-use/#terms_ask> .

<http://dcat-ap.ch/vocabulary/licenses/terms_by_ask>
a skos:Concept ;
Expand All @@ -76,7 +76,7 @@
skosxl:literalForm "NonCommercialAllowed-CommercialWithPermission-ReferenceRequired"@en ;
rdfs:label "NonCommercialAllowed-CommercialWithPermission-ReferenceRequired"@de
] ;
foaf:homepage <https://opendata.swiss/en/terms-of-use/#terms_by_ask> .
foaf:homepage <https://opendata.swiss/terms-of-use/#terms_by_ask> .

<https://creativecommons.org/publicdomain/zero/1.0/>
a skos:Concept, cc:License ;
Expand Down
123 changes: 55 additions & 68 deletions ckanext/dcatapchharvest/profiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,8 @@
from ckanext.dcat.profiles import CleanedURIRef, RDFProfile, SchemaOrgProfile

log = logging.getLogger(__name__)

license_handler = dh.LicenseHandler()
valid_frequencies = dh.get_frequency_values()
valid_licenses = dh.get_license_values()
eu_theme_mapping = dh.get_theme_mapping()
valid_formats = dh.get_format_values()
valid_media_types = dh.get_iana_media_type_values()
Expand Down Expand Up @@ -277,23 +276,13 @@ def _get_iana_media_type(self, subject):
if media_type_key in valid_media_types:
return media_type_key

def _license_rights_name(self, subject, predicate):
for node in self.g.objects(subject, predicate):
# DCAT-AP CH v1: the license as a literal (should be
# the code for one of the DCAT-AP CH licenses)
if isinstance(node, Literal):
return unicode(node)
if isinstance(node, URIRef):
return dh.get_license_name_by_uri(node)
return None

def _license_rights_uri(self, subject, predicate):
def _license_rights_homepage_uri(self, subject, predicate):
for node in self.g.objects(subject, predicate):
# DCAT-AP CH v2 compatible license has to be a URI.
if isinstance(node, Literal):
return dh.get_license_uri_by_name(node)
return license_handler.get_license_homepage_uri_by_name(node)
if isinstance(node, URIRef):
return node
return license_handler.get_license_homepage_uri_by_uri(node)
return None

def _keywords(self, subject):
Expand Down Expand Up @@ -633,21 +622,34 @@ def parse_dataset(self, dataset_dict, dataset_ref): # noqa
if value:
resource_dict[key] = value

# Rights & License save name
rights = self._license_rights_name(distribution, DCT.rights)
license = self._license_rights_name(distribution, DCT.license)
# Rights & License save homepage uri
rights = self._license_rights_homepage_uri(
distribution, DCT.rights
)
license = self._license_rights_homepage_uri(
distribution, DCT.license
)

if rights is None and license is not None:
resource_dict['license'] = license
resource_dict['rights'] = license
if rights is not None and license is None:
resource_dict['license'] = rights
elif rights is not None and license is None:
resource_dict['rights'] = rights
if license is not None and rights is not None:
if 'cc' not in rights:
resource_dict['license'] = rights
else:
resource_dict['license'] = None
elif license is not None and rights is not None:
resource_dict['license'] = license
resource_dict['rights'] = rights
if 'cc' in rights:
if 'cc' in license and 'cc' not in rights:
resource_dict['license'] = rights
resource_dict['rights'] = license
elif 'cc' in license and 'cc' in rights:
resource_dict['license'] = None
else:
resource_dict['license'] = None
resource_dict['rights'] = None

# Format & Media type
resource_dict['format'] = \
Expand Down Expand Up @@ -1035,55 +1037,40 @@ def graph_from_dataset(self, dataset_dict, dataset_ref): # noqa
g.add((distribution, DCAT.byteSize,
Literal(resource_dict['byte_size'])))

def _get_rights_and_license_uri(self, resource_dict, property='license'):
if property not in ['license', 'rights']:
raise ValueError("Property must be 'license' or 'rights'")

homepage_uri = resource_dict.get(property)
if not homepage_uri:
return None

uri = license_handler.get_license_ref_uri_by_homepage_uri(homepage_uri)
if uri is not None:
return URIRef(uri)

name = license_handler.get_license_name_by_homepage_uri(homepage_uri)
if name is not None:
uri = license_handler.get_license_ref_uri_by_name(name)
if uri is not None:
return URIRef(uri)

return None

def _rights_and_license_to_graph(self, resource_dict, distribution):
g = self.g
if resource_dict.get('rights'):
rights_uri = dh.get_license_uri_by_name(
resource_dict.get('rights')
)
if rights_uri is not None:
rights_ref = URIRef(rights_uri)
g.add((rights_ref, RDF.type, DCT.RightsStatement))
g.add((distribution, DCT.rights, rights_ref))
if rights_uri is None:
rights_name = dh.get_license_name_by_uri(
resource_dict.get('rights')
)
if rights_name is not None:
resource_rights_ref = URIRef(
resource_dict.get('rights')
)
g.add((
resource_rights_ref,
RDF.type,
DCT.RightsStatement)
)
g.add((distribution, DCT.rights, resource_rights_ref))

if resource_dict.get('license'):
license_uri = dh.get_license_uri_by_name(
resource_dict.get('license')
)
if license_uri is not None:
license_ref = URIRef(license_uri)
g.add((license_ref, RDF.type, DCT.LicenseDocument))
g.add((distribution, DCT.license, license_ref))
if license_uri is None:
license_name = dh.get_license_name_by_uri(
resource_dict.get('license')
)
if license_name is not None:
resource_license_ref = URIRef(
resource_dict.get('license')
)
g.add((
resource_license_ref,
RDF.type,
DCT.LicenseDocument)
)
g.add(
(distribution, DCT.license, resource_license_ref)
)
rights_uri_ref = self._get_rights_and_license_uri(resource_dict,
'rights')
if rights_uri_ref is not None:
g.add((rights_uri_ref, RDF.type, DCT.RightsStatement))
g.add((distribution, DCT.rights, rights_uri_ref))

license_uri_ref = self._get_rights_and_license_uri(resource_dict,
'license')
if license_uri_ref is not None:
g.add((license_uri_ref, RDF.type, DCT.LicenseDocument))
g.add((distribution, DCT.license, license_uri_ref))

def _format_and_media_type_to_graph(self, resource_dict, distribution):
g = self.g
Expand Down
6 changes: 3 additions & 3 deletions ckanext/dcatapchharvest/tests/fixtures/dataset.json
Original file line number Diff line number Diff line change
Expand Up @@ -121,8 +121,8 @@
"https://example.com/documentation-resource-1",
"https://example.com/documentation-resource-2"
],
"rights": "Creative Commons Zero 1.0 Universell (CC0 1.0)",
"license": "NonCommercialAllowed-CommercialAllowed-ReferenceNotRequired",
"rights": "http://www.opendefinition.org/licenses/cc-zero",
"license": "https://opendata.swiss/terms-of-use/#terms_open",
"format": "CSV",
"issued": "2015-06-26T15:21:09.034694",
"modified": "2015-06-30T15:21:09.000000"
Expand All @@ -135,7 +135,7 @@
"https://example.com/documentation-resource-2"
],
"rights": "http://dcat-ap.ch/vocabulary/licenses/terms_by",
"license": "NonCommercialAllowed-CommercialAllowed-ReferenceRequired",
"license": "https://opendata.swiss/terms-of-use/#terms_by",
"format": "HTML"
},
{
Expand Down
Loading

0 comments on commit c13a28c

Please sign in to comment.