From 4f718e6b0836a2dc3610899709d5e8e07c32dc9d Mon Sep 17 00:00:00 2001
From: kovalch <nataliia.kovalchuk@liip.ch>
Date: Thu, 29 Feb 2024 08:43:11 +0100
Subject: [PATCH 1/8] feat: Add a DCAT-AP Switzerland I14Y RDF Harvester from
 harvesters.py template

---
 ckanext/dcatapchharvest/harvesters_i14y.py | 188 +++++++++++++++++++++
 1 file changed, 188 insertions(+)
 create mode 100644 ckanext/dcatapchharvest/harvesters_i14y.py

diff --git a/ckanext/dcatapchharvest/harvesters_i14y.py b/ckanext/dcatapchharvest/harvesters_i14y.py
new file mode 100644
index 0000000..351f820
--- /dev/null
+++ b/ckanext/dcatapchharvest/harvesters_i14y.py
@@ -0,0 +1,188 @@
+import json
+
+import ckan.plugins as p
+import ckan.model as model
+
+from ckanext.dcat.harvesters.rdf import DCATRDFHarvester
+from ckanext.dcat.interfaces import IDCATRDFHarvester
+from ckanext.dcatapchharvest.dcat_helpers import get_pagination
+from ckanext.dcatapchharvest.harvest_helper import (
+    map_resources_to_ids,
+    check_package_change,
+    create_activity,
+)
+
+import logging
+log = logging.getLogger(__name__)
+
+
+class SwissDCATI14YRDFHarvester(DCATRDFHarvester):
+    p.implements(IDCATRDFHarvester, inherit=True)
+
+    harvest_job = None
+    current_page_url = None
+
+    def info(self):
+        return {
+            'name': 'dcat_ch_i14y_rdf',
+            'title': 'DCAT-AP Switzerland I14Y RDF Harvester',
+            'description': 'Harvester for DCAT-AP Switzerland datasets from an RDF graph desighned for i14Y'  # noqa
+        }
+
+    def validate_config(self, source_config):
+        source_config = super(SwissDCATI14YRDFHarvester, self).validate_config(source_config)  # noqa
+
+        if not source_config:
+            return source_config
+
+        source_config_obj = json.loads(source_config)
+
+        if 'excluded_dataset_identifiers' in source_config_obj:
+            excluded_dataset_identifiers = source_config_obj['excluded_dataset_identifiers']  # noqa
+            if not isinstance(excluded_dataset_identifiers, list):
+                raise ValueError('excluded_dataset_identifiers must be '
+                                 'a list of strings')
+            if not all(isinstance(item, basestring)
+                       for item in excluded_dataset_identifiers):
+                raise ValueError('excluded_dataset_identifiers must be '
+                                 'a list of strings')
+
+        if 'excluded_rights' in source_config_obj:
+            excluded_rights = source_config_obj['excluded_rights']
+            if not isinstance(excluded_rights, list):
+                raise ValueError('excluded_rights must be '
+                                 'a list of strings')
+            if not all(isinstance(item, basestring)
+                       for item in excluded_rights):
+                raise ValueError('excluded_rights must be '
+                                 'a list of strings')
+
+        return source_config
+
+    def before_download(self, url, harvest_job):
+        # save the harvest_job on the instance
+        self.harvest_job = harvest_job
+        self.current_page_url = url
+
+        # fix broken URL for City of Zurich
+        url = url.replace('ogd.global.szh.loc', 'data.stadt-zuerich.ch')
+        return url, []
+
+    def _get_guid(self, dataset_dict, source_url=None):  # noqa
+        '''
+        Try to get a unique identifier for a harvested dataset
+        It will be the first found of:
+         * URI (rdf:about)
+         * dcat:identifier
+         * Source URL + Dataset name
+         * Dataset name
+         The last two are obviously not optimal, as depend on title, which
+         might change.
+         Returns None if no guid could be decided.
+        '''
+        guid = None
+
+        if dataset_dict.get('identifier'):
+            guid = dataset_dict['identifier']
+            # check if the owner_org matches the identifier
+            try:
+                if '@' in guid:
+                    org_name = guid.split('@')[-1]  # get last element
+                    org = model.Group.by_name(org_name)
+                    if not org:
+                        error_msg = (
+                            'The organization in the dataset identifier (%s) '
+                            'does not not exist. ' % org_name
+                        )
+                        log.error(error_msg)
+                        self._save_gather_error(error_msg, self.harvest_job)
+                        return None
+
+                    if org.id != dataset_dict['owner_org']:
+                        error_msg = (
+                            'The organization in the dataset identifier (%s) '
+                            'does not match the organization in the harvester '
+                            'config (%s)' % (org.id, dataset_dict['owner_org'])
+                        )
+                        log.error(error_msg)
+                        self._save_gather_error(error_msg, self.harvest_job)
+                        return None
+            except Exception as e:
+                log.exception("Error when getting identifier: %s" % e)
+                return None
+            return dataset_dict['identifier']
+
+        for extra in dataset_dict.get('extras', []):
+            if extra['key'] == 'uri' and extra['value']:
+                return extra['value']
+
+        if dataset_dict.get('uri'):
+            return dataset_dict['uri']
+
+        for extra in dataset_dict.get('extras', []):
+            if extra['key'] == 'identifier' and extra['value']:
+                return extra['value']
+
+        for extra in dataset_dict.get('extras', []):
+            if extra['key'] == 'dcat_identifier' and extra['value']:
+                return extra['value']
+
+        if dataset_dict.get('name'):
+            guid = dataset_dict['name']
+            if source_url:
+                guid = source_url.rstrip('/') + '/' + guid
+
+        return guid
+
+    def _gen_new_name(self, title):
+        return super(SwissDCATI14YRDFHarvester, self)._gen_new_name(_derive_flat_title(title))  # noqa
+
+    def before_create(self, harvest_object, dataset_dict, temp_dict):
+        try:
+            source_config_obj = json.loads(harvest_object.job.source.config)
+            for excluded_dataset_identifier in source_config_obj.get('excluded_dataset_identifiers', []):  # noqa
+                if excluded_dataset_identifier == dataset_dict.get('identifier'):  # noqa
+                    dataset_dict.clear()
+            excluded_rights = source_config_obj.get('excluded_rights', [])
+            dataset_rights = set([res.get('rights') for res in dataset_dict.get('resources', [])])  # noqa
+            if [rights for rights in dataset_rights if rights in excluded_rights]:  # noqa
+                dataset_dict.clear()
+        except ValueError:
+            pass
+
+    def before_update(self, harvest_object, dataset_dict, temp_dict):
+        existing_pkg = map_resources_to_ids(dataset_dict, dataset_dict['name'])
+        package_changed, msg = check_package_change(existing_pkg, dataset_dict)
+        if package_changed:
+            create_activity(package_id=dataset_dict['id'], message=msg)
+
+    def after_download(self, content, harvest_job):
+        if not content:
+            after_download_error_msg = \
+                'The content of page-url {} could not be read'.format(
+                    self.current_page_url
+                )
+            log.info(after_download_error_msg)
+            return False, [after_download_error_msg]
+        return content, []
+
+    def after_parsing(self, rdf_parser, harvest_job):
+        parsed_content = rdf_parser.datasets()
+        dataset_identifiers = [dataset.get('identifier')
+                               for dataset in parsed_content]
+        pagination = get_pagination(rdf_parser.g)
+        log.debug("pagination-info: {}".format(pagination))
+        if not dataset_identifiers:
+            after_parsing_error_msg = \
+                'The content of page-url {} could not be parsed. ' \
+                'Therefore the harvesting was stopped.' \
+                'Pagination info: {}'.format(self.page_url, pagination)
+            log.info(after_parsing_error_msg)
+            return False, [after_parsing_error_msg]
+        log.debug("datasets parsed: {}".format(','.join(dataset_identifiers)))
+        return rdf_parser, []
+
+
+def _derive_flat_title(title_dict):
+    """localizes language dict if no language is specified"""
+    return title_dict.get('de') or title_dict.get('fr') or title_dict.get('en') or title_dict.get('it') or ""  # noqa

From 3e1623a335bff5de26e3acca9919e8d3cd946e91 Mon Sep 17 00:00:00 2001
From: kovalch <nataliia.kovalchuk@liip.ch>
Date: Thu, 29 Feb 2024 08:45:51 +0100
Subject: [PATCH 2/8] feat: Add new entry point for the i14y harvester

---
 setup.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/setup.py b/setup.py
index f769ce9..6f0869a 100644
--- a/setup.py
+++ b/setup.py
@@ -79,6 +79,7 @@
         [ckan.plugins]
         ogdch_dcat=ckanext.dcatapchharvest.plugins:OgdchDcatPlugin
         dcat_ch_rdf_harvester=ckanext.dcatapchharvest.harvesters:SwissDCATRDFHarvester
+        dcat_ch_i14y_rdf_harvester=ckanext.dcatapchharvest.harvesters_i14y:SwissDCATI14YRDFHarvester
 
         [ckan.rdf.profiles]
         swiss_dcat_ap=ckanext.dcatapchharvest.profiles:SwissDCATAPProfile

From 9252c82aef76d9ab4a520ca48fb538f4d34828e8 Mon Sep 17 00:00:00 2001
From: kovalch <nataliia.kovalchuk@liip.ch>
Date: Thu, 29 Feb 2024 11:18:01 +0100
Subject: [PATCH 3/8] feat: Add SwissDCATI14YRDFHarvester

I14Y harvester that adapt the identifier
---
 ckanext/dcatapchharvest/harvesters.py      |  47 +++++-
 ckanext/dcatapchharvest/harvesters_i14y.py | 188 ---------------------
 setup.py                                   |   2 +-
 3 files changed, 47 insertions(+), 190 deletions(-)
 delete mode 100644 ckanext/dcatapchharvest/harvesters_i14y.py

diff --git a/ckanext/dcatapchharvest/harvesters.py b/ckanext/dcatapchharvest/harvesters.py
index b4cfe2b..b783e19 100644
--- a/ckanext/dcatapchharvest/harvesters.py
+++ b/ckanext/dcatapchharvest/harvesters.py
@@ -2,6 +2,7 @@
 
 import ckan.plugins as p
 import ckan.model as model
+from ckan.logic import NotFound, get_action
 
 from ckanext.dcat.harvesters.rdf import DCATRDFHarvester
 from ckanext.dcat.interfaces import IDCATRDFHarvester
@@ -73,7 +74,7 @@ def _get_guid(self, dataset_dict, source_url=None):  # noqa
         Try to get a unique identifier for a harvested dataset
         It will be the first found of:
          * URI (rdf:about)
-         * dcat:identifier
+         * dct:identifier
          * Source URL + Dataset name
          * Dataset name
          The last two are obviously not optimal, as depend on title, which
@@ -186,3 +187,47 @@ def after_parsing(self, rdf_parser, harvest_job):
 def _derive_flat_title(title_dict):
     """localizes language dict if no language is specified"""
     return title_dict.get('de') or title_dict.get('fr') or title_dict.get('en') or title_dict.get('it') or ""  # noqa
+
+
+class SwissDCATI14YRDFHarvester(SwissDCATRDFHarvester):
+
+    def info(self):
+        info = super(SwissDCATI14YRDFHarvester, self).info()
+
+        info['name'] = 'dcat_ch_i14y_rdf'
+        info['title'] = 'DCAT-AP Switzerland I14Y RDF Harvester'
+        info['description'] = \
+            'Harvester for DCAT-AP Switzerland datasets from an RDF graph desighned for I14Y'  # noqa
+
+        return info
+
+    def _get_guid(self, dataset_dict, source_url=None):
+        guid = super(SwissDCATI14YRDFHarvester, self).\
+            _get_guid(dataset_dict, source_url)
+
+        # get organization name
+        try:
+            dataset_organization = get_action('organization_show')(
+                {},
+                {'id': dataset_dict['owner_org']}
+            )
+            dataset_organization_name = dataset_organization['name']
+
+        except NotFound:
+            raise ValueError(
+                'The selected organization was not found.'
+            )
+
+        # identifier that has form of <id>,
+        # should be changed to the form <id>@<slug>,
+        # where slug is an organization name
+        if (dataset_dict.get('identifier')
+                and dataset_dict['identifier'] == guid
+                and '@' not in guid):
+            dataset_dict['identifier_i14y'] =\
+                dataset_dict['identifier']
+            dataset_dict['identifier'] =\
+                dataset_dict['identifier'] + '@'\
+                + dataset_organization_name
+
+        return dataset_dict['identifier']
diff --git a/ckanext/dcatapchharvest/harvesters_i14y.py b/ckanext/dcatapchharvest/harvesters_i14y.py
deleted file mode 100644
index 351f820..0000000
--- a/ckanext/dcatapchharvest/harvesters_i14y.py
+++ /dev/null
@@ -1,188 +0,0 @@
-import json
-
-import ckan.plugins as p
-import ckan.model as model
-
-from ckanext.dcat.harvesters.rdf import DCATRDFHarvester
-from ckanext.dcat.interfaces import IDCATRDFHarvester
-from ckanext.dcatapchharvest.dcat_helpers import get_pagination
-from ckanext.dcatapchharvest.harvest_helper import (
-    map_resources_to_ids,
-    check_package_change,
-    create_activity,
-)
-
-import logging
-log = logging.getLogger(__name__)
-
-
-class SwissDCATI14YRDFHarvester(DCATRDFHarvester):
-    p.implements(IDCATRDFHarvester, inherit=True)
-
-    harvest_job = None
-    current_page_url = None
-
-    def info(self):
-        return {
-            'name': 'dcat_ch_i14y_rdf',
-            'title': 'DCAT-AP Switzerland I14Y RDF Harvester',
-            'description': 'Harvester for DCAT-AP Switzerland datasets from an RDF graph desighned for i14Y'  # noqa
-        }
-
-    def validate_config(self, source_config):
-        source_config = super(SwissDCATI14YRDFHarvester, self).validate_config(source_config)  # noqa
-
-        if not source_config:
-            return source_config
-
-        source_config_obj = json.loads(source_config)
-
-        if 'excluded_dataset_identifiers' in source_config_obj:
-            excluded_dataset_identifiers = source_config_obj['excluded_dataset_identifiers']  # noqa
-            if not isinstance(excluded_dataset_identifiers, list):
-                raise ValueError('excluded_dataset_identifiers must be '
-                                 'a list of strings')
-            if not all(isinstance(item, basestring)
-                       for item in excluded_dataset_identifiers):
-                raise ValueError('excluded_dataset_identifiers must be '
-                                 'a list of strings')
-
-        if 'excluded_rights' in source_config_obj:
-            excluded_rights = source_config_obj['excluded_rights']
-            if not isinstance(excluded_rights, list):
-                raise ValueError('excluded_rights must be '
-                                 'a list of strings')
-            if not all(isinstance(item, basestring)
-                       for item in excluded_rights):
-                raise ValueError('excluded_rights must be '
-                                 'a list of strings')
-
-        return source_config
-
-    def before_download(self, url, harvest_job):
-        # save the harvest_job on the instance
-        self.harvest_job = harvest_job
-        self.current_page_url = url
-
-        # fix broken URL for City of Zurich
-        url = url.replace('ogd.global.szh.loc', 'data.stadt-zuerich.ch')
-        return url, []
-
-    def _get_guid(self, dataset_dict, source_url=None):  # noqa
-        '''
-        Try to get a unique identifier for a harvested dataset
-        It will be the first found of:
-         * URI (rdf:about)
-         * dcat:identifier
-         * Source URL + Dataset name
-         * Dataset name
-         The last two are obviously not optimal, as depend on title, which
-         might change.
-         Returns None if no guid could be decided.
-        '''
-        guid = None
-
-        if dataset_dict.get('identifier'):
-            guid = dataset_dict['identifier']
-            # check if the owner_org matches the identifier
-            try:
-                if '@' in guid:
-                    org_name = guid.split('@')[-1]  # get last element
-                    org = model.Group.by_name(org_name)
-                    if not org:
-                        error_msg = (
-                            'The organization in the dataset identifier (%s) '
-                            'does not not exist. ' % org_name
-                        )
-                        log.error(error_msg)
-                        self._save_gather_error(error_msg, self.harvest_job)
-                        return None
-
-                    if org.id != dataset_dict['owner_org']:
-                        error_msg = (
-                            'The organization in the dataset identifier (%s) '
-                            'does not match the organization in the harvester '
-                            'config (%s)' % (org.id, dataset_dict['owner_org'])
-                        )
-                        log.error(error_msg)
-                        self._save_gather_error(error_msg, self.harvest_job)
-                        return None
-            except Exception as e:
-                log.exception("Error when getting identifier: %s" % e)
-                return None
-            return dataset_dict['identifier']
-
-        for extra in dataset_dict.get('extras', []):
-            if extra['key'] == 'uri' and extra['value']:
-                return extra['value']
-
-        if dataset_dict.get('uri'):
-            return dataset_dict['uri']
-
-        for extra in dataset_dict.get('extras', []):
-            if extra['key'] == 'identifier' and extra['value']:
-                return extra['value']
-
-        for extra in dataset_dict.get('extras', []):
-            if extra['key'] == 'dcat_identifier' and extra['value']:
-                return extra['value']
-
-        if dataset_dict.get('name'):
-            guid = dataset_dict['name']
-            if source_url:
-                guid = source_url.rstrip('/') + '/' + guid
-
-        return guid
-
-    def _gen_new_name(self, title):
-        return super(SwissDCATI14YRDFHarvester, self)._gen_new_name(_derive_flat_title(title))  # noqa
-
-    def before_create(self, harvest_object, dataset_dict, temp_dict):
-        try:
-            source_config_obj = json.loads(harvest_object.job.source.config)
-            for excluded_dataset_identifier in source_config_obj.get('excluded_dataset_identifiers', []):  # noqa
-                if excluded_dataset_identifier == dataset_dict.get('identifier'):  # noqa
-                    dataset_dict.clear()
-            excluded_rights = source_config_obj.get('excluded_rights', [])
-            dataset_rights = set([res.get('rights') for res in dataset_dict.get('resources', [])])  # noqa
-            if [rights for rights in dataset_rights if rights in excluded_rights]:  # noqa
-                dataset_dict.clear()
-        except ValueError:
-            pass
-
-    def before_update(self, harvest_object, dataset_dict, temp_dict):
-        existing_pkg = map_resources_to_ids(dataset_dict, dataset_dict['name'])
-        package_changed, msg = check_package_change(existing_pkg, dataset_dict)
-        if package_changed:
-            create_activity(package_id=dataset_dict['id'], message=msg)
-
-    def after_download(self, content, harvest_job):
-        if not content:
-            after_download_error_msg = \
-                'The content of page-url {} could not be read'.format(
-                    self.current_page_url
-                )
-            log.info(after_download_error_msg)
-            return False, [after_download_error_msg]
-        return content, []
-
-    def after_parsing(self, rdf_parser, harvest_job):
-        parsed_content = rdf_parser.datasets()
-        dataset_identifiers = [dataset.get('identifier')
-                               for dataset in parsed_content]
-        pagination = get_pagination(rdf_parser.g)
-        log.debug("pagination-info: {}".format(pagination))
-        if not dataset_identifiers:
-            after_parsing_error_msg = \
-                'The content of page-url {} could not be parsed. ' \
-                'Therefore the harvesting was stopped.' \
-                'Pagination info: {}'.format(self.page_url, pagination)
-            log.info(after_parsing_error_msg)
-            return False, [after_parsing_error_msg]
-        log.debug("datasets parsed: {}".format(','.join(dataset_identifiers)))
-        return rdf_parser, []
-
-
-def _derive_flat_title(title_dict):
-    """localizes language dict if no language is specified"""
-    return title_dict.get('de') or title_dict.get('fr') or title_dict.get('en') or title_dict.get('it') or ""  # noqa
diff --git a/setup.py b/setup.py
index 6f0869a..732517e 100644
--- a/setup.py
+++ b/setup.py
@@ -79,7 +79,7 @@
         [ckan.plugins]
         ogdch_dcat=ckanext.dcatapchharvest.plugins:OgdchDcatPlugin
         dcat_ch_rdf_harvester=ckanext.dcatapchharvest.harvesters:SwissDCATRDFHarvester
-        dcat_ch_i14y_rdf_harvester=ckanext.dcatapchharvest.harvesters_i14y:SwissDCATI14YRDFHarvester
+        dcat_ch_i14y_rdf_harvester=ckanext.dcatapchharvest.harvesters:SwissDCATI14YRDFHarvester
 
         [ckan.rdf.profiles]
         swiss_dcat_ap=ckanext.dcatapchharvest.profiles:SwissDCATAPProfile

From 0e0c2bcd98b89332c6086fa238b2ce18166098ba Mon Sep 17 00:00:00 2001
From: kovalch <nataliia.kovalchuk@liip.ch>
Date: Fri, 1 Mar 2024 13:41:18 +0100
Subject: [PATCH 4/8] style: Update description of the ne harvester

---
 ckanext/dcatapchharvest/harvesters.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/ckanext/dcatapchharvest/harvesters.py b/ckanext/dcatapchharvest/harvesters.py
index b783e19..c2f7e8b 100644
--- a/ckanext/dcatapchharvest/harvesters.py
+++ b/ckanext/dcatapchharvest/harvesters.py
@@ -197,7 +197,8 @@ def info(self):
         info['name'] = 'dcat_ch_i14y_rdf'
         info['title'] = 'DCAT-AP Switzerland I14Y RDF Harvester'
         info['description'] = \
-            'Harvester for DCAT-AP Switzerland datasets from an RDF graph desighned for I14Y'  # noqa
+            'Harvester for DCAT-AP Switzerland datasets from ' \
+            'an RDF graph designed for I14Y'
 
         return info
 

From 965753749cd46c409cea6e205f27adca752c906e Mon Sep 17 00:00:00 2001
From: kovalch <nataliia.kovalchuk@liip.ch>
Date: Fri, 1 Mar 2024 14:28:09 +0100
Subject: [PATCH 5/8] style: Remove not needed noqa

---
 ckanext/dcatapchharvest/harvesters.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ckanext/dcatapchharvest/harvesters.py b/ckanext/dcatapchharvest/harvesters.py
index c2f7e8b..6667992 100644
--- a/ckanext/dcatapchharvest/harvesters.py
+++ b/ckanext/dcatapchharvest/harvesters.py
@@ -69,7 +69,7 @@ def before_download(self, url, harvest_job):
         url = url.replace('ogd.global.szh.loc', 'data.stadt-zuerich.ch')
         return url, []
 
-    def _get_guid(self, dataset_dict, source_url=None):  # noqa
+    def _get_guid(self, dataset_dict, source_url=None):
         '''
         Try to get a unique identifier for a harvested dataset
         It will be the first found of:

From 9364990082c7d85bb7ef8d32bfc569d7b9c51cda Mon Sep 17 00:00:00 2001
From: kovalch <nataliia.kovalchuk@liip.ch>
Date: Fri, 1 Mar 2024 14:31:52 +0100
Subject: [PATCH 6/8] style: Move back noqa

---
 ckanext/dcatapchharvest/harvesters.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ckanext/dcatapchharvest/harvesters.py b/ckanext/dcatapchharvest/harvesters.py
index 6667992..c2f7e8b 100644
--- a/ckanext/dcatapchharvest/harvesters.py
+++ b/ckanext/dcatapchharvest/harvesters.py
@@ -69,7 +69,7 @@ def before_download(self, url, harvest_job):
         url = url.replace('ogd.global.szh.loc', 'data.stadt-zuerich.ch')
         return url, []
 
-    def _get_guid(self, dataset_dict, source_url=None):
+    def _get_guid(self, dataset_dict, source_url=None):  # noqa
         '''
         Try to get a unique identifier for a harvested dataset
         It will be the first found of:

From 971010c26c99ad844d890c39623667e3f2b38bf9 Mon Sep 17 00:00:00 2001
From: kovalch <nataliia.kovalchuk@liip.ch>
Date: Tue, 28 May 2024 10:42:21 +0200
Subject: [PATCH 7/8] fix:  Use ckan.plugins.toolkit instead of ckan.logic

---
 ckanext/dcatapchharvest/harvesters.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/ckanext/dcatapchharvest/harvesters.py b/ckanext/dcatapchharvest/harvesters.py
index c2f7e8b..6034d30 100644
--- a/ckanext/dcatapchharvest/harvesters.py
+++ b/ckanext/dcatapchharvest/harvesters.py
@@ -2,7 +2,7 @@
 
 import ckan.plugins as p
 import ckan.model as model
-from ckan.logic import NotFound, get_action
+import ckan.plugins.toolkit as tk
 
 from ckanext.dcat.harvesters.rdf import DCATRDFHarvester
 from ckanext.dcat.interfaces import IDCATRDFHarvester
@@ -208,13 +208,13 @@ def _get_guid(self, dataset_dict, source_url=None):
 
         # get organization name
         try:
-            dataset_organization = get_action('organization_show')(
+            dataset_organization = tk.get_action('organization_show')(
                 {},
                 {'id': dataset_dict['owner_org']}
             )
             dataset_organization_name = dataset_organization['name']
 
-        except NotFound:
+        except tk.NotFound:
             raise ValueError(
                 'The selected organization was not found.'
             )

From 2d407250c5dea733413377a622db6c69643cb2d1 Mon Sep 17 00:00:00 2001
From: kovalch <nataliia.kovalchuk@liip.ch>
Date: Tue, 28 May 2024 11:05:35 +0200
Subject: [PATCH 8/8] fix: ObjectNotFound exeption for tk instead of
 logic.NotFound

---
 ckanext/dcatapchharvest/harvesters.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ckanext/dcatapchharvest/harvesters.py b/ckanext/dcatapchharvest/harvesters.py
index 6034d30..9520fc1 100644
--- a/ckanext/dcatapchharvest/harvesters.py
+++ b/ckanext/dcatapchharvest/harvesters.py
@@ -214,7 +214,7 @@ def _get_guid(self, dataset_dict, source_url=None):
             )
             dataset_organization_name = dataset_organization['name']
 
-        except tk.NotFound:
+        except tk.ObjectNotFound:
             raise ValueError(
                 'The selected organization was not found.'
             )