From 150860e71a383a3afc7450904f5e826290d4b993 Mon Sep 17 00:00:00 2001 From: william dutton Date: Wed, 15 Feb 2023 15:25:26 +1000 Subject: [PATCH 1/4] Allow User-Agent header to be added from config. --- ckanext/harvest/harvesters/ckanharvester.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ckanext/harvest/harvesters/ckanharvester.py b/ckanext/harvest/harvesters/ckanharvester.py index 1da0787e..739baff8 100644 --- a/ckanext/harvest/harvesters/ckanharvester.py +++ b/ckanext/harvest/harvesters/ckanharvester.py @@ -36,6 +36,11 @@ def _get_search_api_offset(self): def _get_content(self, url): headers = {} + + user_agent = self.config.get('user_agent') + if user_agent: + headers['User-Agent'] = user_agent + api_key = self.config.get('api_key') if api_key: headers['Authorization'] = api_key From 26c901f674707ae11acd2611a58c66fa9b6254ad Mon Sep 17 00:00:00 2001 From: antuarc Date: Fri, 15 Nov 2024 15:26:44 +1000 Subject: [PATCH 2/4] fix test container account, GitHub ckan/ckan-docker-base #86 --- .github/workflows/test.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 32fc0d5e..3271637d 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -32,6 +32,7 @@ jobs: runs-on: ubuntu-latest container: image: ${{ matrix.ckan-image }} + options: --user root services: solr: image: ckan/ckan-solr:${{ matrix.ckan-version }}-solr9 @@ -63,7 +64,7 @@ jobs: - name: Install requirements (2.9) run: | pip install -U pytest-rerunfailures - if: ${{ matrix.ckan-version == '2.9' }} + if: ${{ matrix.ckan-version == '2.9' }} - name: Setup extension (CKAN >= 2.9) run: | ckan -c test.ini db init From 955e283bc4aea9c89f5d28aa8eacf9baec844971 Mon Sep 17 00:00:00 2001 From: antuarc Date: Fri, 15 Nov 2024 15:42:56 +1000 Subject: [PATCH 3/4] add custom User Agent config value to README --- README.rst | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/README.rst b/README.rst index d3508ac0..266630f4 100644 --- a/README.rst +++ b/README.rst @@ -232,7 +232,7 @@ For example, in case you want to retain changes made by the users to the fields Command line interface ====================== -The ``ckan harvester`` command provides utilities to manage harvest operations from the command line. +The ``ckan harvester`` command provides utilities to manage harvest operations from the command line. Please refer to the help message of each command for more details:: @@ -329,6 +329,9 @@ field. The currently supported configuration options are: * api_key: If the remote CKAN instance has restricted access to the API, you can provide a CKAN API key, which will be sent in any request. +* user_agent: Set a custom user agent string on gathering and fetching, + to handle servers that whitelist or blacklist specific values. + * read_only: Create harvested packages in read-only mode. Only the user who performed the harvest (the one defined in the previous setting or the 'harvest' sysadmin) will be able to edit and administer the packages From 51c60e473b6d2ec2f483976308b3c5a645679405 Mon Sep 17 00:00:00 2001 From: antuarc Date: Thu, 28 Nov 2024 10:45:39 +1000 Subject: [PATCH 4/4] ensure user agent is a string --- ckanext/harvest/harvesters/ckanharvester.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ckanext/harvest/harvesters/ckanharvester.py b/ckanext/harvest/harvesters/ckanharvester.py index e8d5324d..878f20c5 100644 --- a/ckanext/harvest/harvesters/ckanharvester.py +++ b/ckanext/harvest/harvesters/ckanharvester.py @@ -38,7 +38,7 @@ def _get_content(self, url): user_agent = self.config.get('user_agent') if user_agent: - headers['User-Agent'] = user_agent + headers['User-Agent'] = str(user_agent) api_key = self.config.get('api_key') if api_key: