Skip to content

Commit

Permalink
refactor: Add LicenseHandler class to handle cashing of license vocab…
Browse files Browse the repository at this point in the history
…ulary
  • Loading branch information
kovalch committed Aug 5, 2024
1 parent d487e43 commit fe146b0
Show file tree
Hide file tree
Showing 2 changed files with 107 additions and 55 deletions.
149 changes: 101 additions & 48 deletions ckanext/dcatapchharvest/dcat_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,56 +169,109 @@ def get_frequency_values():
return frequency_mapping


def get_license_ref_uri_by_name(vocabulary_name):
_, license_ref_literal_vocabulary, _ = get_license_values()
for key, value in license_ref_literal_vocabulary.items():
if unicode(vocabulary_name) == value:
return key
return None


def get_license_ref_uri_by_homepage_uri(vocabulary_name):
_, _, license_homepage_ref_vocabulary = get_license_values()
for key, value in license_homepage_ref_vocabulary.items():
if unicode(vocabulary_name) == key:
return value
return None


def get_license_name_by_ref_uri(vocabulary_uri):
_, license_ref_literal_vocabulary, _ = get_license_values()
for key, value in license_ref_literal_vocabulary.items():
if unicode(vocabulary_uri) == key:
return value
return None


def get_license_name_by_homepage_uri(vocabulary_uri):
license_homepages_literal_vocabulary, _, _ = get_license_values()
for key, value in license_homepages_literal_vocabulary.items():
if unicode(vocabulary_uri) == key:
return value
return None


def get_license_homepage_uri_by_name(vocabulary_name):
license_homepages_literal_vocabulary, _, _ = get_license_values()
for key, value in license_homepages_literal_vocabulary.items():
if unicode(vocabulary_name) == value:
return key
return None


def get_license_homepage_uri_by_uri(vocabulary_uri):
_, _, license_homepage_ref_vocabulary = get_license_values()
license_homepages = list(license_homepage_ref_vocabulary.keys())
if unicode(vocabulary_uri) in license_homepages:
return unicode(vocabulary_uri)
else:
class LicenseHandler:
def __init__(self):
self._license_cache = None

def _get_license_values(self):
if self._license_cache is None:
try:
g = Graph()
license_ref_literal_mapping = {}
license_homepages_literal_mapping = {}
license_homepage_ref_mapping = {}

for prefix, namespace in license_namespaces.items():
g.bind(prefix, namespace)
file = os.path.join(__location__, 'license.ttl')
g.parse(file, format='turtle')
for ogdch_license_ref in g.subjects(predicate=RDF.type,
object=SKOS.Concept):
license_homepage = None
for homepage in g.objects(subject=ogdch_license_ref,
predicate=FOAF.homepage):
license_homepage = homepage
break # Assume one homepage per concept

license_literal = None
try:
for license_pref_label in g.objects(
subject=ogdch_license_ref,
predicate=SKOSXL.prefLabel):
license_literal = next(
g.objects(subject=license_pref_label,
predicate=SKOSXL.literalForm))
if license_literal is not None:
break # Assume one literal per concept

license_homepages_literal_mapping[
unicode(license_homepage)] = \
unicode(license_literal)
license_ref_literal_mapping[
unicode(ogdch_license_ref)] = \
unicode(license_literal)
license_homepage_ref_mapping[
unicode(license_homepage)] = \
unicode(ogdch_license_ref)

except Exception as e:
raise ValueError(
"SKOSXL.prefLabel is missing in the RDF-file: %s"
% e)

self._license_cache = (license_homepages_literal_mapping,
license_ref_literal_mapping,
license_homepage_ref_mapping)
except Exception as e:
raise RuntimeError("Failed to load license values: %s"
% e)
return self._license_cache

def get_license_ref_uri_by_name(self, vocabulary_name):
_, license_ref_literal_vocabulary, _ = self._get_license_values()
for key, value in license_ref_literal_vocabulary.items():
if unicode(vocabulary_name) == value:
return key
return None

def get_license_ref_uri_by_homepage_uri(self, vocabulary_name):
_, _, license_homepage_ref_vocabulary = self._get_license_values()
for key, value in license_homepage_ref_vocabulary.items():
if unicode(vocabulary_uri) == value:
if unicode(vocabulary_name) == key:
return value
return None

def get_license_name_by_ref_uri(self, vocabulary_uri):
_, license_ref_literal_vocabulary, _ = self._get_license_values()
for key, value in license_ref_literal_vocabulary.items():
if unicode(vocabulary_uri) == key:
return value
return None

def get_license_name_by_homepage_uri(self, vocabulary_uri):
license_homepages_literal_vocabulary, _, _ = self._get_license_values()
for key, value in license_homepages_literal_vocabulary.items():
if unicode(vocabulary_uri) == key:
return value
return None

def get_license_homepage_uri_by_name(self, vocabulary_name):
license_homepages_literal_vocabulary, _, _ = self._get_license_values()
for key, value in license_homepages_literal_vocabulary.items():
if unicode(vocabulary_name) == value:
return key
return None
return None

def get_license_homepage_uri_by_uri(self, vocabulary_uri):
_, _, license_homepage_ref_vocabulary = self._get_license_values()
license_homepages = list(license_homepage_ref_vocabulary.keys())
if unicode(vocabulary_uri) in license_homepages:
return unicode(vocabulary_uri)
else:
for key, value in license_homepage_ref_vocabulary.items():
if unicode(vocabulary_uri) == value:
return key
return None


def get_license_values():
Expand Down
13 changes: 6 additions & 7 deletions ckanext/dcatapchharvest/profiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,8 @@
from ckanext.dcat.profiles import CleanedURIRef, RDFProfile, SchemaOrgProfile

log = logging.getLogger(__name__)

license_handler = dh.LicenseHandler()
valid_frequencies = dh.get_frequency_values()
valid_licenses = dh.get_license_values()
eu_theme_mapping = dh.get_theme_mapping()
valid_formats = dh.get_format_values()
valid_media_types = dh.get_iana_media_type_values()
Expand Down Expand Up @@ -281,9 +280,9 @@ def _license_rights_homepage_uri(self, subject, predicate):
for node in self.g.objects(subject, predicate):
# DCAT-AP CH v2 compatible license has to be a URI.
if isinstance(node, Literal):
return dh.get_license_homepage_uri_by_name(node)
return license_handler.get_license_homepage_uri_by_name(node)
if isinstance(node, URIRef):
return dh.get_license_homepage_uri_by_uri(node)
return license_handler.get_license_homepage_uri_by_uri(node)
return None

def _keywords(self, subject):
Expand Down Expand Up @@ -1044,13 +1043,13 @@ def _get_rights_and_license_uri(self, resource_dict, property='license'):
if not homepage_uri:
return None

uri = dh.get_license_ref_uri_by_homepage_uri(homepage_uri)
uri = license_handler.get_license_ref_uri_by_homepage_uri(homepage_uri)
if uri is not None:
return URIRef(uri)

name = dh.get_license_name_by_homepage_uri(homepage_uri)
name = license_handler.get_license_name_by_homepage_uri(homepage_uri)
if name is not None:
uri = dh.get_license_ref_uri_by_name(name)
uri = license_handler.get_license_ref_uri_by_name(name)
if uri is not None:
return URIRef(uri)

Expand Down

0 comments on commit fe146b0

Please sign in to comment.