From 79a987852a2a8b3bad5090887016400bc427b85f Mon Sep 17 00:00:00 2001 From: briri Date: Wed, 31 Jul 2024 15:27:08 -0700 Subject: [PATCH 1/2] updated ROR downloader code --- app/services/external_apis/ror_service.rb | 37 ++++++++++++----------- 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/app/services/external_apis/ror_service.rb b/app/services/external_apis/ror_service.rb index cff4abbeeb..f86ca6aaae 100644 --- a/app/services/external_apis/ror_service.rb +++ b/app/services/external_apis/ror_service.rb @@ -56,7 +56,6 @@ def fetch(force: false) # Fetch the Zenodo metadata for ROR to see if we have the latest data dump metadata = fetch_zenodo_metadata - if metadata.present? FileUtils.mkdir_p(file_dir) @@ -67,29 +66,28 @@ def fetch(force: false) if old_checksum_val == metadata[:checksum] log_message(method: method, message: 'There is no new ROR file to process.') else - download_file = metadata.fetch(:links, {})[:download] + download_file = download_file = metadata['key'] + download_url = metadata.fetch('links', {}).fetch('download', metadata.fetch('links', {})['self']) log_message(method: method, message: "New ROR file detected - checksum #{metadata[:checksum]}") log_message(method: method, message: "Downloading #{download_file}") + log_message(method: method, message: "From #{download_url}") + + payload = download_ror_file(url: download_url) + +puts payload[0..100] - payload = download_ror_file(url: metadata.fetch(:links, {})[:download]) if payload.present? file = File.open(zip_file, 'wb') file.write(payload) - # rubocop:disable Metrics/BlockNesting - if validate_downloaded_file(file_path: zip_file, checksum: metadata[:checksum]) - json_file = download_file.split('/').last.gsub('.zip', '') - json_file = "#{json_file}.json" unless json_file.end_with?('.json') - - # Process the ROR JSON - if process_ror_file(zip_file: zip_file, file: json_file) - checksum = File.open(checksum_file, 'w') - checksum.write(metadata[:checksum]) - end - else - log_error(method: method, error: StandardError.new('Downloaded ROR zip does not match checksum!')) + json_file = download_file.split('/').last.gsub('.zip', '') + json_file = "#{json_file}.json" unless json_file.end_with?('.json') + + # Process the ROR JSON + if process_ror_file(zip_file: zip_file, file: json_file) + checksum = File.open(checksum_file, 'w') + checksum.write(metadata[:checksum]) end - # rubocop:enable Metrics/BlockNesting else log_error(method: method, error: StandardError.new('Unable to download ROR file!')) end @@ -121,7 +119,7 @@ def fetch_zenodo_metadata # Extract the most recent file's metadata file_metadata = json.fetch('hits', {}).fetch('hits', []).first&.fetch('files', [])&.last&.with_indifferent_access - unless file_metadata.present? && file_metadata.fetch(:links, {})[:download].present? + unless file_metadata.present? handle_http_failure(method: 'No file found in ROR metadata from Zenodo', http_response: resp) notify_administrators(obj: 'RorService', response: resp) return nil @@ -140,9 +138,12 @@ def download_ror_file(url:) headers = { host: 'zenodo.org', - Accept: 'application/zip' + Accept: 'application/json', + 'Content-Type': 'application/json', + 'User-Agent': "California Digital Library - dmptool.org (mailto:dmptool@ucop.edu)" } resp = http_get(uri: url, additional_headers: headers, debug: false) + unless resp.present? && resp.code == 200 handle_http_failure(method: "Fetching ROR file from Zenodo - #{url}", http_response: resp) notify_administrators(obj: 'RorService', response: resp) From 58690ee7282f0bb8267976cea3e7e949feed30c8 Mon Sep 17 00:00:00 2001 From: briri Date: Wed, 31 Jul 2024 15:37:40 -0700 Subject: [PATCH 2/2] removed debug line --- app/services/external_apis/ror_service.rb | 3 --- 1 file changed, 3 deletions(-) diff --git a/app/services/external_apis/ror_service.rb b/app/services/external_apis/ror_service.rb index f86ca6aaae..8dc9479b02 100644 --- a/app/services/external_apis/ror_service.rb +++ b/app/services/external_apis/ror_service.rb @@ -73,9 +73,6 @@ def fetch(force: false) log_message(method: method, message: "From #{download_url}") payload = download_ror_file(url: download_url) - -puts payload[0..100] - if payload.present? file = File.open(zip_file, 'wb') file.write(payload)