Skip to content

Commit

Permalink
feat: For I14Y harvester adapt the identifier
Browse files Browse the repository at this point in the history
  • Loading branch information
kovalch committed Feb 29, 2024
1 parent 3e1623a commit 653f0ba
Showing 1 changed file with 22 additions and 2 deletions.
24 changes: 22 additions & 2 deletions ckanext/dcatapchharvest/harvesters_i14y.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import ckan.plugins as p
import ckan.model as model
from ckan.logic import NotFound, get_action

from ckanext.dcat.harvesters.rdf import DCATRDFHarvester
from ckanext.dcat.interfaces import IDCATRDFHarvester
Expand Down Expand Up @@ -73,19 +74,31 @@ def _get_guid(self, dataset_dict, source_url=None): # noqa
Try to get a unique identifier for a harvested dataset
It will be the first found of:
* URI (rdf:about)
* dcat:identifier
* dct:identifier
* Source URL + Dataset name
* Dataset name
The last two are obviously not optimal, as depend on title, which
might change.
Returns None if no guid could be decided.
'''
guid = None
# get organization name
try:
dataset_organization = get_action('organization_show')(
{},
{'id': dataset_dict['owner_org']}
)
dataset_organization_name = dataset_organization['name']

except NotFound:
raise ValueError(
_('The selected organization was not found.') # noqa
)

if dataset_dict.get('identifier'):
guid = dataset_dict['identifier']
# check if the owner_org matches the identifier
try:
# check if the owner_org matches the identifier
if '@' in guid:
org_name = guid.split('@')[-1] # get last element
org = model.Group.by_name(org_name)
Expand All @@ -107,6 +120,13 @@ def _get_guid(self, dataset_dict, source_url=None): # noqa
log.error(error_msg)
self._save_gather_error(error_msg, self.harvest_job)
return None
else:
# save original I14Y identifier and create a new identifier of the form <id>@<slug>,# noqa
# where slug is an organization name
dataset_dict['identifier_i14y'] = dataset_dict['identifier'] # noqa
dataset_dict['identifier'] = \
dataset_dict['identifier'] + '@' + dataset_organization_name # noqa

except Exception as e:
log.exception("Error when getting identifier: %s" % e)
return None
Expand Down

0 comments on commit 653f0ba

Please sign in to comment.