Skip to content

Commit

Permalink
Move InvenioRDM adaptations into new plugin
Browse files Browse the repository at this point in the history
This is still based on C&P... should we extract the "common base" that is still valid and only migrate the relevant parts?

Also: I stupidly changed all string occurrences... to also distinguish configurations and avoid accidential mis-use.
  • Loading branch information
led02 committed Nov 1, 2023
1 parent 2364bc3 commit 464375e
Show file tree
Hide file tree
Showing 4 changed files with 737 additions and 65 deletions.
10 changes: 5 additions & 5 deletions hermes.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,15 @@ from = [ "cff", "git" ]
validate = false

[deposit]
mapping = "invenio"
target = "invenio"
mapping = "invenio_rdm"
target = "invenio_rdm"

[deposit.invenio]
[deposit.invenio_rdm]
site_url = "https://sandbox.zenodo.org"
communities = ["zenodo"]
access_right = "open"

[deposit.invenio.api_paths]
[deposit.invenio_rdm.api_paths]
depositions = "api/deposit/depositions"
licenses = "api/licenses"
licenses = "api/vocabularies/licenses"
communities = "api/communities"
8 changes: 8 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -88,34 +88,42 @@ git_add_branch = "hermes.commands.process.git:add_branch"

[tool.poetry.plugins."hermes.deposit.prepare"]
invenio = "hermes.commands.deposit.invenio:prepare"
invenio_rdm = "hermes.commands.deposit.invenio_rdm:prepare"
file = "hermes.commands.deposit.file:dummy_noop"

[tool.poetry.plugins."hermes.deposit.map"]
invenio = "hermes.commands.deposit.invenio:map_metadata"
invenio_rdm = "hermes.commands.deposit.invenio_rdm:map_metadata"
file = "hermes.commands.deposit.file:map_metadata"

[tool.poetry.plugins."hermes.deposit.create_initial_version"]
invenio = "hermes.commands.deposit.invenio:create_initial_version"
invenio_rdm = "hermes.commands.deposit.invenio_rdm:create_initial_version"
file = "hermes.commands.deposit.file:dummy_noop"

[tool.poetry.plugins."hermes.deposit.create_new_version"]
invenio = "hermes.commands.deposit.invenio:create_new_version"
invenio_rdm = "hermes.commands.deposit.invenio_rdm:create_new_version"
file = "hermes.commands.deposit.file:dummy_noop"

[tool.poetry.plugins."hermes.deposit.update_metadata"]
invenio = "hermes.commands.deposit.invenio:update_metadata"
invenio_rdm = "hermes.commands.deposit.invenio_rdm:update_metadata"
file = "hermes.commands.deposit.file:dummy_noop"

[tool.poetry.plugins."hermes.deposit.delete_artifacts"]
invenio = "hermes.commands.deposit.invenio:delete_artifacts"
invenio_rdm = "hermes.commands.deposit.invenio_rdm:delete_artifacts"
file = "hermes.commands.deposit.file:dummy_noop"

[tool.poetry.plugins."hermes.deposit.upload_artifacts"]
invenio = "hermes.commands.deposit.invenio:upload_artifacts"
invenio_rdm = "hermes.commands.deposit.invenio_rdm:upload_artifacts"
file = "hermes.commands.deposit.file:dummy_noop"

[tool.poetry.plugins."hermes.deposit.publish"]
invenio = "hermes.commands.deposit.invenio:publish"
invenio_rdm = "hermes.commands.deposit.invenio_rdm:publish"
file = "hermes.commands.deposit.file:publish"

[tool.poetry.plugins."hermes.postprocess"]
Expand Down
67 changes: 7 additions & 60 deletions src/hermes/commands/deposit/invenio.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
from hermes.model.path import ContextPath
from hermes.utils import hermes_user_agent

_DEFAULT_LICENSES_API_PATH = "api/vocabularies/licenses"
_DEFAULT_LICENSES_API_PATH = "api/licenses"
_DEFAULT_COMMUNITIES_API_PATH = "api/communities"
_DEFAULT_DEPOSITIONS_API_PATH = "api/deposit/depositions"

Expand Down Expand Up @@ -134,7 +134,6 @@ def create_initial_version(click_ctx: click.Context, ctx: CodeMetaContext):
)

if not response.ok:
print(response.text)
raise RuntimeError(f"Could not create initial deposit {deposit_url!r}")

deposit = response.json()
Expand Down Expand Up @@ -534,7 +533,7 @@ def _get_license_identifier(ctx: CodeMetaContext, license_api_url: str):
Typically, Invenio instances offer licenses from https://opendefinition.org and
https://spdx.org. However, it is possible to mint PIDs for custom licenses.
An API endpoint (usually ``/api/vocabularies/licenses``) can be used to check whether a given
An API endpoint (usually ``/api/licenses``) can be used to check whether a given
license is supported by the Invenio instance. This function tries to retrieve the
license by the identifier at the end of the license URL path. If this identifier
does not exist on the Invenio instance, a :class:`RuntimeError` is raised. If no
Expand All @@ -552,71 +551,19 @@ def _get_license_identifier(ctx: CodeMetaContext, license_api_url: str):
"Licenses of type 'CreativeWork' are not supported."
)

# First try: Look up license by assuming lower-case name is the correct identifier
parsed_url = urlparse(license_url)
url_path = parsed_url.path.rstrip("/")
license_id = url_path.split("/")[-1].lower()
license_id = url_path.split("/")[-1]

response = requests.get(
f"{license_api_url}/{license_id}", headers={"User-Agent": hermes_user_agent}
)
if response.ok:
license_info = response.json()

# Second try: Fetch full list of licenses available... maybe we should cache this.
else:
license_info = _look_up_license_info(license_api_url, license_url)

return license_info["id"]


def _look_up_license_info(license_api_url, license_url):
"""Deliberately try to resolve the license URL to a valid InvenioRDM license information record from the
vocabulary.
First, this method tries to find the license URL in the list of known license vocabulary (which is fetched each
time, ouch...).
If the URL is not found (what is pretty probable by now, as CFFConvert produces SPDX-URLs while InvenioRDM still
relies on the overhauled opensource.org URLs), the SPDX information record is fetched and all valid cross references
are sought for.
:param license_api_url: Base API endpoint for InvenioRDM license vocabulary queries.
:param license_url: The URL for the license we are search an identifier for.
:return: The vocabulary record that is provided by InvenioRDM.
"""
response = requests.get(
f"{license_api_url}?size=1000", headers={"User-Agent": hermes_user_agent}
)
if response.status_code == 404:
raise RuntimeError(f"Not a valid license identifier: {license_id}")
# Catch other problems
response.raise_for_status()
valid_licenses = response.json()

def _search_license_info(_url):
for license_info in valid_licenses['hits']['hits']:
try:
if license_info['props']['url'] == _url:
return license_info
except KeyError:
continue
else:
return None

license_info = _search_license_info(license_url)
if license_info is None and license_url.startswith('https://spdx.org/licenses/'):
response = requests.get(f"{license_url}.json", headers={"User-Agent": hermes_user_agent})
response.raise_for_status()

for license_cross_ref in response.json()['crossRef']:
if not license_cross_ref['isValid']:
continue

license_info = _search_license_info(license_cross_ref["url"])
if license_info is not None:
break
else:
raise RuntimeError(f"Could not resolve license URL {license_url} to a valid identifier.")

return license_info
return response.json()["id"]


def _get_community_identifiers(ctx: CodeMetaContext, communities_api_url: str):
Expand Down
Loading

0 comments on commit 464375e

Please sign in to comment.