diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml new file mode 100644 index 0000000..c9f7872 --- /dev/null +++ b/.github/workflows/python-package.yml @@ -0,0 +1,62 @@ +# This workflow will install Python dependencies, run tests and lint with a variety of Python versions +# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python +# Some changes have been made: different OS +# Copied from kuleuven/mango-mdschema + +name: Python package + +run-name: Build triggered via ${{ github.event_name }} by ${{ github.actor }} + +on: + # push won't trigger this because you cannot push without PR anyways + pull_request: + branches: ["main"] + +jobs: + test: + strategy: + fail-fast: false + matrix: + # test in multiple python versions and OS + # results in 12 (4x3) jobs + python-version: ["3.8", "3.9", "3.10", "3.11"] + os: [ubuntu-latest, windows-latest, macos-latest] + runs-on: ${{ matrix.os }} + + steps: + - uses: actions/checkout@v4 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python -m pip install pytest pytest-cov + pip install -r requirements.txt + - name: Test with pytest + run: | + pytest tests --doctest-modules --junitxml=junit/test-results-${{ matrix.python-version }}-${{ matrix.os }}.xml --cov --cov-report=xml --cov-report=html + - name: Upload pytest test results + uses: actions/upload-artifact@v4 + with: + name: pytest-results-${{ matrix.python-version }}-${{ matrix.os }} + path: junit/test-results-${{ matrix.python-version }}-${{ matrix.os }}.xml + # Use always() to always run this step to publish test results when there are test failures + if: ${{ always() }} + lint: + # only run one linting job on ubuntu and the latest Python + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v5 + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python -m pip install pylint + pip install -r requirements.txt + - name: Lint with pylint + run: | + # fail only if there are errors + pylint src/irods2dataverse tests --fail-under=6 diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml new file mode 100644 index 0000000..ba26cee --- /dev/null +++ b/.github/workflows/python-publish.yml @@ -0,0 +1,82 @@ +# This workflow will upload a Python Package using Twine when a release is created +# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries + +# This workflow uses actions that are not certified by GitHub. +# They are provided by a third-party and are governed by +# separate terms of service, privacy policy, and support +# documentation. + +# Lots taken from https://packaging.python.org/en/latest/guides/publishing-package-distribution-releases-using-github-actions-ci-cd-workflows/ +# Copied from kuleuven/mango-mdschema + +name: Upload Python Package + +run-name: Deploy triggered by ${{ github.actor }} + +on: + release: + types: [published] + push: + branches: [main] +jobs: + build: + runs-on: ubuntu-latest # only build this distribution for now + steps: + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: "3.x" + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install build + - name: Build package + run: python -m build + - name: Archive production artifacts + uses: actions/upload-artifact@v4 + with: + name: package-distribution + path: dist/ + pypi-publish: + name: Upload release to PyPI + # only upload to PyPI on releases and tagged branches + if: startsWith(github.ref, 'refs/tags/') + needs: + - build + runs-on: ubuntu-latest + environment: + name: pypi + url: https://pypi.org/p/irods2dataverse + permissions: + id-token: write # IMPORTANT: this permission is mandatory for trusted publishing + steps: + - name: Download dists + uses: actions/download-artifact@v4 + with: + name: package-distribution + path: dist/ + - name: Publish package distributions to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 + testpypi-publish: + name: Upload release to TestPyPI + # upload to TestPyPI on every push to main, not with tags + if: (!startsWith(github.ref, 'refs/tags/')) + needs: + - build + runs-on: ubuntu-latest + environment: + name: testpypi + url: https://test.pypi.org/p/irods2dataverse + permissions: + id-token: write # IMPORTANT: this permission is mandatory for trusted publishing + steps: + - name: Download dists + uses: actions/download-artifact@v4 + with: + name: package-distribution + path: dist/ + - name: Publish package distributions to TestPyPI + uses: pypa/gh-action-pypi-publish@release/v1 + with: + repository-url: https://test.pypi.org/legacy/ diff --git a/.gitignore b/.gitignore index 59a3d42..690b004 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,5 @@ /src/.ipynb_checkpoints/ /venv/ src/__pycache__/ -src/tests/__pycache__/ +tests/__pycache__/ src/irods2dataverse/__pycache__/ \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..e0daa26 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2024 KU Leuven + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/dev/avu2json.ipynb b/dev/avu2json.ipynb deleted file mode 100644 index 9c76006..0000000 --- a/dev/avu2json.ipynb +++ /dev/null @@ -1,859 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "f9a8cad8-4a34-45ef-9b24-121fc1ff6f2f", - "metadata": {}, - "outputs": [], - "source": [ - "from mango_mdschema import Schema" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "9b4abef7-9bab-40c4-9462-9dff70e556a9", - "metadata": {}, - "outputs": [], - "source": [ - "import json\n", - "import os, os.path" - ] - }, - { - "cell_type": "code", - "execution_count": 44, - "id": "b91ddc7a-cddf-4e92-91c8-5265d4f36e61", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 44, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "dv_schema = Schema(\"../doc/metadata/mango2dv-demo-1.0.0-published.json\")\n", - "dv_schema" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "798da134-2fc3-4e0f-af81-c66f81c3d007", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "dv_schema = Schema(\"../doc/metadata/mango2dv-rdr-1.0.0-published.json\")\n", - "dv_schema" - ] - }, - { - "cell_type": "code", - "execution_count": 156, - "id": "6a521604-2e27-40bf-9629-63ebcfa33b1b", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "\u001b[0;31mInit signature:\u001b[0m \u001b[0mSchema\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpath\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mprefix\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mstr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'mgs'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mDocstring:\u001b[0m \n", - "Class representing a Metadata Schema.\n", - "\n", - "Attributes:\n", - " name (str): Name of the schema.\n", - " version (str): Version of the schema.\n", - " title (str): Title of the schema, for messages.\n", - " The name if no such title is provided in the JSON (which should not happen).\n", - " root (CompositeField): Root field of the schema.\n", - " fields (dict): Dictionary of fields in the schema (alias for root.fields)\n", - " required_fields (dict): Dictionary of required fields and their default values.\n", - " If a field is required and has no default value, it is not present in the dictionary.\n", - "\u001b[0;31mInit docstring:\u001b[0m\n", - "Init a Schema object from a JSON file.\n", - "\n", - "Args:\n", - " path (str): Path to the metadata schema.\n", - " prefix (str): Prefix to add to the metadata names. Default is 'mgs' (ManGO schema).\n", - "\n", - "Raises:\n", - " IOError: When the file cannot be opened.\n", - " KeyError: Some fields are required\n", - " ('schema_name', 'version', 'status', 'properties' and 'title'.)\n", - " If any of them are missing from the schema, this error is raised.\n", - " ValueError: When the schema is not published.\n", - "\u001b[0;31mFile:\u001b[0m ~/.local/lib/python3.10/site-packages/mango_mdschema/schema.py\n", - "\u001b[0;31mType:\u001b[0m type\n", - "\u001b[0;31mSubclasses:\u001b[0m " - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "?Schema" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "3bee3104-d74d-402d-9e9a-c6e90ba9e712", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[1mManGO Dataset to RDR\u001b[0m\n", - "Metadata annotated with the schema 'mango2dv-rdr' (1.0.0) carry the prefix 'mgs'.\n", - "This schema contains the following 7 fields:\n", - "- \u001b[1mtitle\u001b[0m, of type 'text' (required).\n", - "- \u001b[1mauthor\u001b[0m, of type 'object' (required).\n", - "- \u001b[1mdatasetContact\u001b[0m, of type 'object' (required).\n", - "- \u001b[1mdsDescription\u001b[0m, of type 'object' (required).\n", - "- \u001b[1mkeyword\u001b[0m, of type 'object' (required).\n", - "- \u001b[1mtechnicalFormat\u001b[0m, of type 'text' (required).\n", - "- \u001b[1maccess\u001b[0m, of type 'object'.\n" - ] - } - ], - "source": [ - "print(dv_schema)" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "148110e9-2724-45a2-8247-8f412378bb50", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[1mType\u001b[0m: object.\n", - "\u001b[1mRequired\u001b[0m: True. (2 of its 2 fields are required.)\n", - "\u001b[1mRepeatable\u001b[0m: False.\n", - "\n", - "Composed of the following fields:\n", - "\u001b[4mmango2dv-rdr.author.authorName\u001b[0m\n", - "\u001b[1mType\u001b[0m: text.\n", - "\u001b[1mRequired\u001b[0m: True. \u001b[1mDefault\u001b[0m: None.\n", - "\u001b[1mRepeatable\u001b[0m: False.\n", - "\n", - "\u001b[4mmango2dv-rdr.author.authorAffiliation\u001b[0m\n", - "\u001b[1mType\u001b[0m: text.\n", - "\u001b[1mRequired\u001b[0m: True. \u001b[1mDefault\u001b[0m: None.\n", - "\u001b[1mRepeatable\u001b[0m: False.\n" - ] - } - ], - "source": [ - "dv_schema.print_requirements(\"author\")" - ] - }, - { - "cell_type": "code", - "execution_count": 183, - "id": "77c54ec5-e125-47f2-9899-b9e4fae33b11", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'datasetVersion': {'metadataBlocks': {'citation': {'fields': [{'value': '...Title...',\n", - " 'typeClass': 'primitive',\n", - " 'multiple': False,\n", - " 'typeName': 'title'},\n", - " {'value': [{'authorName': {'value': '...LastName..., ...FirstName...',\n", - " 'typeClass': 'primitive',\n", - " 'multiple': False,\n", - " 'typeName': 'authorName'},\n", - " 'authorAffiliation': {'value': '...Affiliation...',\n", - " 'typeClass': 'primitive',\n", - " 'multiple': False,\n", - " 'typeName': 'authorAffiliation'}}],\n", - " 'typeClass': 'compound',\n", - " 'multiple': False,\n", - " 'typeName': 'author'},\n", - " {'value': [{'datasetContactEmail': {'value': '...Email...',\n", - " 'typeClass': 'primitive',\n", - " 'multiple': False,\n", - " 'typeName': 'datasetContactEmail'},\n", - " 'datasetContactName': {'value': '...LastName..., ...FirstName...',\n", - " 'typeClass': 'primitive',\n", - " 'multiple': False,\n", - " 'typeName': 'datasetContactName'}}],\n", - " 'typeClass': 'compound',\n", - " 'multiple': False,\n", - " 'typeName': 'datasetContact'},\n", - " {'value': [{'dsDescriptionValue': {'value': '...Description...',\n", - " 'typeClass': 'primitive',\n", - " 'multiple': False,\n", - " 'typeName': 'dsDescriptionValue'}}],\n", - " 'typeClass': 'compound',\n", - " 'multiple': True,\n", - " 'typeName': 'dsDescription'},\n", - " {'value': [{'keywordValue': {'value': '...Keyword...',\n", - " 'typeName': 'keywordValue',\n", - " 'multiple': False,\n", - " 'typeClass': 'primitive'}}],\n", - " 'typeClass': 'compound',\n", - " 'multiple': True,\n", - " 'typeName': 'keyword'},\n", - " {'value': '...filetypes...',\n", - " 'typeClass': 'primitive',\n", - " 'multiple': False,\n", - " 'typeName': 'technicalFormat'},\n", - " {'value': {'accessRights': {'typeName': 'accessRights',\n", - " 'multiple': False,\n", - " 'typeClass': 'controlledVocabulary',\n", - " 'value': '...open/restricted/embargoed/closed..'}},\n", - " 'typeClass': 'compound',\n", - " 'multiple': False,\n", - " 'typeName': 'access'}],\n", - " 'displayName': 'Citation Metadata'}}}}" - ] - }, - "execution_count": 183, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "with open(\"../doc/metadata/template_RDR.json\") as f:\n", - " template = json.load(f)\n", - "template" - ] - }, - { - "cell_type": "code", - "execution_count": 109, - "id": "2ee53f56-0c3f-4706-9d8d-7244c9bff73f", - "metadata": {}, - "outputs": [], - "source": [ - "# this is an array of fields\n", - "fields = template[\"datasetVersion\"][\"metadataBlocks\"][\"citation\"][\"fields\"]" - ] - }, - { - "cell_type": "code", - "execution_count": 110, - "id": "5ccabf0e-6d25-44d0-b0b1-4c06def47062", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[{'value': 'Minimum Viable Workflow - 16 May 2024',\n", - " 'typeClass': 'primitive',\n", - " 'multiple': False,\n", - " 'typeName': 'title'},\n", - " {'value': [{'authorName': {'value': 'Kafetzaki, Danai',\n", - " 'typeClass': 'primitive',\n", - " 'multiple': False,\n", - " 'typeName': 'authorName'},\n", - " 'authorAffiliation': {'value': 'KU Leuven',\n", - " 'typeClass': 'primitive',\n", - " 'multiple': False,\n", - " 'typeName': 'authorAffiliation'}}],\n", - " 'typeClass': 'compound',\n", - " 'multiple': False,\n", - " 'typeName': 'author'},\n", - " {'value': [{'datasetContactEmail': {'value': 'danai.kafetzaki@kuleuven.be',\n", - " 'typeClass': 'primitive',\n", - " 'multiple': False,\n", - " 'typeName': 'datasetContactEmail'},\n", - " 'datasetContactName': {'value': 'Kafetzaki, Danai',\n", - " 'typeClass': 'primitive',\n", - " 'multiple': False,\n", - " 'typeName': 'datasetContactName'}}],\n", - " 'typeClass': 'compound',\n", - " 'multiple': False,\n", - " 'typeName': 'datasetContact'},\n", - " {'value': [{'dsDescriptionValue': {'value': 'This is a minimal end-to-end implementation for iRODS-Dataverse integration, a KU Leuven and SURF collaboration',\n", - " 'typeClass': 'primitive',\n", - " 'multiple': False,\n", - " 'typeName': 'dsDescriptionValue'}}],\n", - " 'typeClass': 'compound',\n", - " 'multiple': True,\n", - " 'typeName': 'dsDescription'},\n", - " {'value': [{'keywordValue': {'typeName': 'keywordValue',\n", - " 'multiple': False,\n", - " 'typeClass': 'primitive',\n", - " 'value': 'required-keyword'}}],\n", - " 'typeClass': 'compound',\n", - " 'multiple': True,\n", - " 'typeName': 'keyword'},\n", - " {'value': 'json',\n", - " 'typeClass': 'primitive',\n", - " 'multiple': False,\n", - " 'typeName': 'technicalFormat'},\n", - " {'value': {'accessRights': {'typeName': 'accessRights',\n", - " 'multiple': False,\n", - " 'typeClass': 'controlledVocabulary',\n", - " 'value': 'open'}},\n", - " 'typeClass': 'compound',\n", - " 'multiple': False,\n", - " 'typeName': 'access'}]" - ] - }, - "execution_count": 110, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "fields" - ] - }, - { - "cell_type": "markdown", - "id": "84acad0f-2872-4b00-b18c-a8449aaf8ac7", - "metadata": {}, - "source": [ - "# Simulate getting metadata from iRODS\n", - "\n", - "The code below is to simulate, based on the template metadata, doing the following:\n", - "\n", - "```python\n", - "with iRODSSession(irods_env_file=env_file) as session:\n", - " obj = session.data_objects.get('path/to/my/object')\n", - " avus_as_json = dv_schema.extract(obj)\n", - "\n", - "```" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "83599430-83d3-40e0-863a-31ea247978c2", - "metadata": {}, - "outputs": [], - "source": [ - "def field(f):\n", - " if f[\"typeClass\"] == \"compound\":\n", - " values = f[\"value\"] if type(f[\"value\"]) != list else f[\"value\"][0]\n", - " return (f[\"typeName\"], {k: v for k, v in [field(ff) for ff in values.values()]})\n", - " else:\n", - " return (f[\"typeName\"], f[\"value\"])" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "id": "9fdd2f8c-6102-4407-9134-96d950ee7cc8", - "metadata": {}, - "outputs": [], - "source": [ - "metadata_dict = {k: v for k, v in [field(f) for f in fields]}" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "id": "4cb486d5-3115-4892-9e76-b72169c7ecf3", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'title': 'Minimum Viable Workflow - 16 May 2024',\n", - " 'author': {'authorName': 'Kafetzaki, Danai',\n", - " 'authorAffiliation': 'KU Leuven'},\n", - " 'datasetContact': {'datasetContactEmail': 'danai.kafetzaki@kuleuven.be',\n", - " 'datasetContactName': 'Kafetzaki, Danai'},\n", - " 'dsDescription': {'dsDescriptionValue': 'This is a minimal end-to-end implementation for iRODS-Dataverse integration, a KU Leuven and SURF collaboration'},\n", - " 'keyword': {'keywordValue': 'required-keyword'},\n", - " 'technicalFormat': 'json',\n", - " 'access': {'accessRights': 'open'}}" - ] - }, - "execution_count": 23, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "metadata_dict" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "id": "cae9ffa7-37c4-4202-87e1-641e940a34f6", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'title': 'Minimum Viable Workflow - 16 May 2024',\n", - " 'author': {'authorName': 'Kafetzaki, Danai',\n", - " 'authorAffiliation': 'KU Leuven'},\n", - " 'datasetContact': {'datasetContactEmail': 'danai.kafetzaki@kuleuven.be',\n", - " 'datasetContactName': 'Kafetzaki, Danai'},\n", - " 'dsDescription': [{'dsDescriptionValue': 'This is a minimal end-to-end implementation for iRODS-Dataverse integration, a KU Leuven and SURF collaboration'}],\n", - " 'keyword': [{'keywordValue': 'required-keyword'}],\n", - " 'technicalFormat': 'json',\n", - " 'access': {'accessRights': 'open'}}" - ] - }, - "execution_count": 24, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "dv_schema.validate(metadata_dict)" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "id": "5804d9b2-385a-4ce5-9cfa-696e394d04e2", - "metadata": {}, - "outputs": [], - "source": [ - "# if we extracted the metadata from irods\n", - "as_avus = dv_schema.to_avus(metadata_dict)" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "id": "de9cfac5-ec45-49c3-a63c-dfa667e7b5ff", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[,\n", - " ,\n", - " ,\n", - " ,\n", - " ,\n", - " ,\n", - " ,\n", - " ,\n", - " ]" - ] - }, - "execution_count": 26, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "as_avus" - ] - }, - { - "cell_type": "markdown", - "id": "0b6dc5ac-8d2f-43cd-9511-ba9f6776f8f4", - "metadata": {}, - "source": [ - "# Convert AVUs to template JSON" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "id": "c357c023-3539-4508-baac-7fbe0449708a", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'access': {'accessRights': 'open'},\n", - " 'author': {'authorAffiliation': 'KU Leuven',\n", - " 'authorName': 'Kafetzaki, Danai'},\n", - " 'datasetContact': {'datasetContactEmail': 'danai.kafetzaki@kuleuven.be',\n", - " 'datasetContactName': 'Kafetzaki, Danai'},\n", - " 'dsDescription': [{'dsDescriptionValue': 'This is a minimal end-to-end implementation for iRODS-Dataverse integration, a KU Leuven and SURF collaboration'}],\n", - " 'keyword': [{'keywordValue': 'required-keyword'}],\n", - " 'technicalFormat': 'json',\n", - " 'title': 'Minimum Viable Workflow - 16 May 2024'}" - ] - }, - "execution_count": 27, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "avus_as_json = dv_schema.from_avus(as_avus)\n", - "avus_as_json" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "id": "765e61c1-6fa2-4009-a455-945932fb9de5", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n" - ] - } - ], - "source": [ - "for v in avus_as_json.values():\n", - " print(type(v))" - ] - }, - { - "cell_type": "code", - "execution_count": 47, - "id": "d6c13319-45b5-4155-8300-87a4eccba100", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "False" - ] - }, - "execution_count": 47, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "type(\"mari\") == type(\"taihou\") == int" - ] - }, - { - "cell_type": "code", - "execution_count": 180, - "id": "6374553d-e5eb-4da4-88c2-9a1a10043e7d", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'access': {'accessRights': 'open'},\n", - " 'author': [{'authorAffiliation': 'KU Leuven',\n", - " 'authorName': 'Kafetzaki, Danai'}],\n", - " 'datasetContact': [{'datasetContactEmail': 'danai.kafetzaki@kuleuven.be',\n", - " 'datasetContactName': 'Kafetzaki, Danai'}],\n", - " 'dsDescription': [{'dsDescriptionValue': 'This is a minimal end-to-end implementation for iRODS-Dataverse integration, a KU Leuven and SURF collaboration'}],\n", - " 'keyword': [{'keywordValue': 'required-keyword'}],\n", - " 'technicalFormat': 'json',\n", - " 'title': 'Minimum Viable Workflow - 16 May 2024'}" - ] - }, - "execution_count": 180, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "avus_as_json" - ] - }, - { - "cell_type": "code", - "execution_count": 188, - "id": "79d806da-cb38-4f76-90db-e1c8cea1599e", - "metadata": {}, - "outputs": [], - "source": [ - "def return_dict(value, fromAvu):\n", - " return {k: update_template(value[k], fromAvu) for k in value.keys()}\n", - " \n", - "def update_template(field, avus_as_json):\n", - " typeName = field[\"typeName\"]\n", - " value = field[\"value\"]\n", - " fromAvu = avus_as_json[typeName]\n", - " typeClass = field[\"typeClass\"]\n", - " if typeClass != \"compound\":\n", - " field[\"value\"] = fromAvu\n", - " elif type(value) == list:\n", - " if type(fromAvu) != list:\n", - " fromAvu = [fromAvu]\n", - " field[\"value\"] = [return_dict(x, y) for x, y in zip(value, fromAvu)]\n", - " else:\n", - " field[\"value\"] = return_dict(value, fromAvu)\n", - " return field" - ] - }, - { - "cell_type": "code", - "execution_count": 189, - "id": "8f708d55-3c33-4ea3-8d2a-5917fbce1e51", - "metadata": { - "scrolled": true - }, - "outputs": [ - { - "data": { - "text/plain": [ - "{'datasetVersion': {'metadataBlocks': {'citation': {'fields': [{'value': '...Title...',\n", - " 'typeClass': 'primitive',\n", - " 'multiple': False,\n", - " 'typeName': 'title'},\n", - " {'value': [{'authorName': {'value': '...LastName..., ...FirstName...',\n", - " 'typeClass': 'primitive',\n", - " 'multiple': False,\n", - " 'typeName': 'authorName'},\n", - " 'authorAffiliation': {'value': '...Affiliation...',\n", - " 'typeClass': 'primitive',\n", - " 'multiple': False,\n", - " 'typeName': 'authorAffiliation'}}],\n", - " 'typeClass': 'compound',\n", - " 'multiple': False,\n", - " 'typeName': 'author'},\n", - " {'value': [{'datasetContactEmail': {'value': '...Email...',\n", - " 'typeClass': 'primitive',\n", - " 'multiple': False,\n", - " 'typeName': 'datasetContactEmail'},\n", - " 'datasetContactName': {'value': '...LastName..., ...FirstName...',\n", - " 'typeClass': 'primitive',\n", - " 'multiple': False,\n", - " 'typeName': 'datasetContactName'}}],\n", - " 'typeClass': 'compound',\n", - " 'multiple': False,\n", - " 'typeName': 'datasetContact'},\n", - " {'value': [{'dsDescriptionValue': {'value': '...Description...',\n", - " 'typeClass': 'primitive',\n", - " 'multiple': False,\n", - " 'typeName': 'dsDescriptionValue'}}],\n", - " 'typeClass': 'compound',\n", - " 'multiple': True,\n", - " 'typeName': 'dsDescription'},\n", - " {'value': [{'keywordValue': {'value': '...Keyword...',\n", - " 'typeName': 'keywordValue',\n", - " 'multiple': False,\n", - " 'typeClass': 'primitive'}}],\n", - " 'typeClass': 'compound',\n", - " 'multiple': True,\n", - " 'typeName': 'keyword'},\n", - " {'value': '...filetypes...',\n", - " 'typeClass': 'primitive',\n", - " 'multiple': False,\n", - " 'typeName': 'technicalFormat'},\n", - " {'value': {'accessRights': {'typeName': 'accessRights',\n", - " 'multiple': False,\n", - " 'typeClass': 'controlledVocabulary',\n", - " 'value': '...open/restricted/embargoed/closed..'}},\n", - " 'typeClass': 'compound',\n", - " 'multiple': False,\n", - " 'typeName': 'access'}],\n", - " 'displayName': 'Citation Metadata'}}}}" - ] - }, - "execution_count": 189, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "with open(\"../doc/metadata/template_RDR.json\") as f:\n", - " template = json.load(f)\n", - "template" - ] - }, - { - "cell_type": "code", - "execution_count": 190, - "id": "7fd17efb-a84c-4f7f-9cc0-79e91d345f71", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[{'value': 'Minimum Viable Workflow - 16 May 2024',\n", - " 'typeClass': 'primitive',\n", - " 'multiple': False,\n", - " 'typeName': 'title'},\n", - " {'value': [{'authorName': {'value': 'Kafetzaki, Danai',\n", - " 'typeClass': 'primitive',\n", - " 'multiple': False,\n", - " 'typeName': 'authorName'},\n", - " 'authorAffiliation': {'value': 'KU Leuven',\n", - " 'typeClass': 'primitive',\n", - " 'multiple': False,\n", - " 'typeName': 'authorAffiliation'}}],\n", - " 'typeClass': 'compound',\n", - " 'multiple': False,\n", - " 'typeName': 'author'},\n", - " {'value': [{'datasetContactEmail': {'value': 'danai.kafetzaki@kuleuven.be',\n", - " 'typeClass': 'primitive',\n", - " 'multiple': False,\n", - " 'typeName': 'datasetContactEmail'},\n", - " 'datasetContactName': {'value': 'Kafetzaki, Danai',\n", - " 'typeClass': 'primitive',\n", - " 'multiple': False,\n", - " 'typeName': 'datasetContactName'}}],\n", - " 'typeClass': 'compound',\n", - " 'multiple': False,\n", - " 'typeName': 'datasetContact'},\n", - " {'value': [{'dsDescriptionValue': {'value': 'This is a minimal end-to-end implementation for iRODS-Dataverse integration, a KU Leuven and SURF collaboration',\n", - " 'typeClass': 'primitive',\n", - " 'multiple': False,\n", - " 'typeName': 'dsDescriptionValue'}}],\n", - " 'typeClass': 'compound',\n", - " 'multiple': True,\n", - " 'typeName': 'dsDescription'},\n", - " {'value': [{'keywordValue': {'value': 'required-keyword',\n", - " 'typeName': 'keywordValue',\n", - " 'multiple': False,\n", - " 'typeClass': 'primitive'}}],\n", - " 'typeClass': 'compound',\n", - " 'multiple': True,\n", - " 'typeName': 'keyword'},\n", - " {'value': 'json',\n", - " 'typeClass': 'primitive',\n", - " 'multiple': False,\n", - " 'typeName': 'technicalFormat'},\n", - " {'value': {'accessRights': {'typeName': 'accessRights',\n", - " 'multiple': False,\n", - " 'typeClass': 'controlledVocabulary',\n", - " 'value': 'open'}},\n", - " 'typeClass': 'compound',\n", - " 'multiple': False,\n", - " 'typeName': 'access'}]" - ] - }, - "execution_count": 190, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "[update_template(field, avus_as_json) for field in template[\"datasetVersion\"][\"metadataBlocks\"][\"citation\"][\"fields\"]]\n", - " " - ] - }, - { - "cell_type": "code", - "execution_count": 177, - "id": "171f8cdc-9f5e-4b7a-ac08-6f3836a26aae", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'datasetVersion': {'metadataBlocks': {'citation': {'fields': [{'value': 'Minimum Viable Workflow - 16 May 2024',\n", - " 'typeClass': 'primitive',\n", - " 'multiple': False,\n", - " 'typeName': 'title'},\n", - " {'value': [{'authorName': None, 'authorAffiliation': None}],\n", - " 'typeClass': 'compound',\n", - " 'multiple': False,\n", - " 'typeName': 'author'},\n", - " {'value': [{'datasetContactEmail': None, 'datasetContactName': None}],\n", - " 'typeClass': 'compound',\n", - " 'multiple': False,\n", - " 'typeName': 'datasetContact'},\n", - " {'value': [{'dsDescriptionValue': None}],\n", - " 'typeClass': 'compound',\n", - " 'multiple': True,\n", - " 'typeName': 'dsDescription'},\n", - " {'value': [{'keywordValue': None}],\n", - " 'typeClass': 'compound',\n", - " 'multiple': True,\n", - " 'typeName': 'keyword'},\n", - " {'value': 'json',\n", - " 'typeClass': 'primitive',\n", - " 'multiple': False,\n", - " 'typeName': 'technicalFormat'},\n", - " {'value': {'accessRights': None},\n", - " 'typeClass': 'compound',\n", - " 'multiple': False,\n", - " 'typeName': 'access'}],\n", - " 'displayName': 'Citation Metadata'}}}}" - ] - }, - "execution_count": 177, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "template" - ] - }, - { - "cell_type": "code", - "execution_count": 163, - "id": "2d842e69-c162-42e7-aff3-54105267d6ef", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'datasetVersion': {'metadataBlocks': {'citation': {'fields': [1, 2, 3], 'displayName': 'Citation Metadata'}}}}\n" - ] - } - ], - "source": [ - "fill_in_template(template, avus_as_json)\n", - "print(template)" - ] - }, - { - "cell_type": "code", - "execution_count": 155, - "id": "cc577bea-9a0b-49b0-be02-50aab68a9bca", - "metadata": {}, - "outputs": [], - "source": [ - "template[\"datasetVersion\"][\"metadataBlocks\"][\"citation\"][\"fields\"] = new_fields\n", - "with open(\"../doc/metadata/filled_in_template_RDR.json\", \"w\") as f:\n", - " json.dump(template, f, indent=4)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.8" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/doc/data/iRODSfile.txt b/doc/data/iRODSfile.txt deleted file mode 100644 index 106d423..0000000 --- a/doc/data/iRODSfile.txt +++ /dev/null @@ -1,2 +0,0 @@ -This is an iRODS data object and the user chooses to publish it in a Dataverse dataset. -The data object is created for the demo of March 19, 2024. \ No newline at end of file diff --git a/doc/metadata/exDsPublished.json b/doc/metadata/exDsPublished.json deleted file mode 100644 index 7ab55f6..0000000 --- a/doc/metadata/exDsPublished.json +++ /dev/null @@ -1,170 +0,0 @@ -{ - "status": "OK", - "data": { - "id": 2134832, - "identifier": "FK2/9CAB6I", - "persistentUrl": "https://doi.org/10.70122/FK2/9CAB6I", - "protocol": "doi", - "authority": "10.70122", - "publisher": "Demo Dataverse", - "publicationDate": "2024-03-14", - "storageIdentifier": "s3://10.70122/FK2/9CAB6I", - "latestVersion": { - "id": 249201, - "datasetId": 2134832, - "datasetPersistentId": "doi:10.70122/FK2/9CAB6I", - "storageIdentifier": "s3://10.70122/FK2/9CAB6I", - "versionNumber": 1, - "versionMinorNumber": 0, - "versionState": "RELEASED", - "UNF": "UNF:6:+EkmNSuhdUrexrhcJNmOgg==", - "lastUpdateTime": "2024-03-14T20:57:43Z", - "releaseTime": "2024-03-14T20:57:43Z", - "createTime": "2024-03-14T20:44:52Z", - "publicationDate": "2024-03-14", - "citationDate": "2024-03-14", - "license": { - "name": "CC0 1.0", - "uri": "http://creativecommons.org/publicdomain/zero/1.0", - "iconUri": "https://licensebuttons.net/p/zero/1.0/88x31.png" - }, - "fileAccessRequest": true, - "metadataBlocks": { - "citation": { - "displayName": "Citation Metadata", - "name": "citation", - "fields": [ - { - "typeName": "title", - "multiple": false, - "typeClass": "primitive", - "value": "Mark Twain Works" - }, - { - "typeName": "author", - "multiple": true, - "typeClass": "compound", - "value": [ - { - "authorName": { - "typeName": "authorName", - "multiple": false, - "typeClass": "primitive", - "value": "Hawkins, Jacob Bryan" - }, - "authorAffiliation": { - "typeName": "authorAffiliation", - "multiple": false, - "typeClass": "primitive", - "value": "University of Tennessee" - } - } - ] - }, - { - "typeName": "datasetContact", - "multiple": true, - "typeClass": "compound", - "value": [ - { - "datasetContactName": { - "typeName": "datasetContactName", - "multiple": false, - "typeClass": "primitive", - "value": "Hawkins, Jacob Bryan" - }, - "datasetContactAffiliation": { - "typeName": "datasetContactAffiliation", - "multiple": false, - "typeClass": "primitive", - "value": "University of Tennessee" - }, - "datasetContactEmail": { - "typeName": "datasetContactEmail", - "multiple": false, - "typeClass": "primitive", - "value": "jhawki41@vols.utk.edu" - } - } - ] - }, - { - "typeName": "dsDescription", - "multiple": true, - "typeClass": "compound", - "value": [ - { - "dsDescriptionValue": { - "typeName": "dsDescriptionValue", - "multiple": false, - "typeClass": "primitive", - "value": "This csv contains 15 of Mark Twain's most downloaded works from Project Gutenburg. It also contains the top 10 most downloaded works on the site as a whole." - }, - "dsDescriptionDate": { - "typeName": "dsDescriptionDate", - "multiple": false, - "typeClass": "primitive", - "value": "2024-03-14" - } - } - ] - }, - { - "typeName": "subject", - "multiple": true, - "typeClass": "controlledVocabulary", - "value": [ - "Computer and Information Science" - ] - }, - { - "typeName": "depositor", - "multiple": false, - "typeClass": "primitive", - "value": "Hawkins, Jacob Bryan" - }, - { - "typeName": "dateOfDeposit", - "multiple": false, - "typeClass": "primitive", - "value": "2024-03-14" - } - ] - } - }, - "files": [ - { - "label": "data.tab", - "restricted": false, - "version": 3, - "datasetVersionId": 249201, - "dataFile": { - "id": 2134833, - "persistentId": "doi:10.70122/FK2/9CAB6I/SL7RZC", - "pidURL": "https://doi.org/10.70122/FK2/9CAB6I/SL7RZC", - "filename": "data.tab", - "contentType": "text/tab-separated-values", - "friendlyType": "Tab-Delimited", - "filesize": 2433, - "storageIdentifier": "s3://demo-dataverse-org:18e3eb5d49a-4e3962066d0d", - "originalFileFormat": "text/csv", - "originalFormatLabel": "Comma Separated Values", - "originalFileSize": 2332, - "originalFileName": "data.csv", - "UNF": "UNF:6:+EkmNSuhdUrexrhcJNmOgg==", - "rootDataFileId": -1, - "md5": "92020a0033d5d81e3f2ecd02201aad96", - "checksum": { - "type": "MD5", - "value": "92020a0033d5d81e3f2ecd02201aad96" - }, - "tabularData": true, - "creationDate": "2024-03-14", - "publicationDate": "2024-03-14", - "fileAccessRequest": true - } - } - ] - } - } -} \ No newline at end of file diff --git a/doc/metadata/mdDataset.json b/doc/metadata/mdDataset.json deleted file mode 100644 index 6c56cae..0000000 --- a/doc/metadata/mdDataset.json +++ /dev/null @@ -1,82 +0,0 @@ -{ - "datasetVersion": { - "metadataBlocks": { - "citation": { - "fields": [ - { - "value": "Demo for 02 April 2024", - "typeClass": "primitive", - "multiple": false, - "typeName": "title" - }, - { - "value": [ - { - "authorName": { - "value": "Kafetzaki, Danai", - "typeClass": "primitive", - "multiple": false, - "typeName": "authorName" - }, - "authorAffiliation": { - "value": "KU Leuven", - "typeClass": "primitive", - "multiple": false, - "typeName": "authorAffiliation" - } - } - ], - "typeClass": "compound", - "multiple": false, - "typeName": "author" - }, - { - "value": [ - { - "datasetContactEmail": { - "value": "danai.kafetzaki@kuleuven.be", - "typeClass": "primitive", - "multiple": false, - "typeName": "datasetContactEmail" - }, - "datasetContactName": { - "value": "Kafetzaki, Danai", - "typeClass": "primitive", - "multiple": false, - "typeName": "datasetContactName" - } - } - ], - "typeClass": "compound", - "multiple": false, - "typeName": "datasetContact" - }, - { - "value": [ - { - "dsDescriptionValue": { - "value": "This is a minimal end-to-end implementation for iRODS-Dataverse integartion, a KU Leuven and SURF collaboration", - "typeClass": "primitive", - "multiple": false, - "typeName": "dsDescriptionValue" - } - } - ], - "typeClass": "compound", - "multiple": true, - "typeName": "dsDescription" - }, - { - "value": [ - "Demo Only" - ], - "typeClass": "controlledVocabulary", - "multiple": true, - "typeName": "subject" - } - ], - "displayName": "Citation Metadata" - } - } - } -} \ No newline at end of file diff --git a/doc/metadata/mdDatasetTrial.json b/doc/metadata/mdDatasetTrial.json deleted file mode 100644 index 49053eb..0000000 --- a/doc/metadata/mdDatasetTrial.json +++ /dev/null @@ -1,37 +0,0 @@ -{ - "datasetVersion": { - "metadataBlocks": { - "citation": { - "fields": [ - { - "value": "Demo for 19 March 2024", - "typeClass": "primitive", - "multiple": false, - "typeName": "title" - }, - { - "value": [ - { - "authorName": { - "value": "Kafetzaki, Danai", - "typeClass": "primitive", - "multiple": false, - "typeName": "authorName" - }, - "authorAffiliation": { - "value": "KU Leuven", - "typeClass": "primitive", - "multiple": false, - "typeName": "authorAffiliation" - } - } - ], - "typeClass": "compound", - "multiple": true, - "typeName": "author" - } - ] - } - } - } -} \ No newline at end of file diff --git a/doc/metadata/mdDataset_RDR-pilot.json b/doc/metadata/mdDataset_RDR-pilot.json deleted file mode 100644 index f683ee1..0000000 --- a/doc/metadata/mdDataset_RDR-pilot.json +++ /dev/null @@ -1,108 +0,0 @@ -{ - "datasetVersion": { - "metadataBlocks": { - "citation": { - "fields": [ - { - "value": "Minimum Viable Workflow - 16 May 2024", - "typeClass": "primitive", - "multiple": false, - "typeName": "title" - }, - { - "value": [ - { - "authorName": { - "value": "Kafetzaki, Danai", - "typeClass": "primitive", - "multiple": false, - "typeName": "authorName" - }, - "authorAffiliation": { - "value": "KU Leuven", - "typeClass": "primitive", - "multiple": false, - "typeName": "authorAffiliation" - } - } - ], - "typeClass": "compound", - "multiple": false, - "typeName": "author" - }, - { - "value": [ - { - "datasetContactEmail": { - "value": "danai.kafetzaki@kuleuven.be", - "typeClass": "primitive", - "multiple": false, - "typeName": "datasetContactEmail" - }, - "datasetContactName": { - "value": "Kafetzaki, Danai", - "typeClass": "primitive", - "multiple": false, - "typeName": "datasetContactName" - } - } - ], - "typeClass": "compound", - "multiple": false, - "typeName": "datasetContact" - }, - { - "value": [ - { - "dsDescriptionValue": { - "value": "This is a minimal end-to-end implementation for iRODS-Dataverse integration, a KU Leuven and SURF collaboration", - "typeClass": "primitive", - "multiple": false, - "typeName": "dsDescriptionValue" - } - } - ], - "typeClass": "compound", - "multiple": true, - "typeName": "dsDescription" - }, - { - "value": [ - { - "keywordValue": { - "typeName": "keywordValue", - "multiple": false, - "typeClass": "primitive", - "value": "required-keyword" - } - } - ], - "typeClass": "compound", - "multiple": true, - "typeName": "keyword" - }, - { - "value": "json", - "typeClass": "primitive", - "multiple": false, - "typeName": "technicalFormat" - }, - { - "typeName": "access", - "multiple": false, - "typeClass": "compound", - "value": { - "accessRights": { - "typeName": "accessRights", - "multiple": false, - "typeClass": "controlledVocabulary", - "value": "open" - } - } - } - ], - "displayName": "Citation Metadata" - } - } - } -} \ No newline at end of file diff --git a/doc/metadata/mdDataset_RDR.json b/doc/metadata/mdDataset_RDR.json deleted file mode 100644 index f683ee1..0000000 --- a/doc/metadata/mdDataset_RDR.json +++ /dev/null @@ -1,108 +0,0 @@ -{ - "datasetVersion": { - "metadataBlocks": { - "citation": { - "fields": [ - { - "value": "Minimum Viable Workflow - 16 May 2024", - "typeClass": "primitive", - "multiple": false, - "typeName": "title" - }, - { - "value": [ - { - "authorName": { - "value": "Kafetzaki, Danai", - "typeClass": "primitive", - "multiple": false, - "typeName": "authorName" - }, - "authorAffiliation": { - "value": "KU Leuven", - "typeClass": "primitive", - "multiple": false, - "typeName": "authorAffiliation" - } - } - ], - "typeClass": "compound", - "multiple": false, - "typeName": "author" - }, - { - "value": [ - { - "datasetContactEmail": { - "value": "danai.kafetzaki@kuleuven.be", - "typeClass": "primitive", - "multiple": false, - "typeName": "datasetContactEmail" - }, - "datasetContactName": { - "value": "Kafetzaki, Danai", - "typeClass": "primitive", - "multiple": false, - "typeName": "datasetContactName" - } - } - ], - "typeClass": "compound", - "multiple": false, - "typeName": "datasetContact" - }, - { - "value": [ - { - "dsDescriptionValue": { - "value": "This is a minimal end-to-end implementation for iRODS-Dataverse integration, a KU Leuven and SURF collaboration", - "typeClass": "primitive", - "multiple": false, - "typeName": "dsDescriptionValue" - } - } - ], - "typeClass": "compound", - "multiple": true, - "typeName": "dsDescription" - }, - { - "value": [ - { - "keywordValue": { - "typeName": "keywordValue", - "multiple": false, - "typeClass": "primitive", - "value": "required-keyword" - } - } - ], - "typeClass": "compound", - "multiple": true, - "typeName": "keyword" - }, - { - "value": "json", - "typeClass": "primitive", - "multiple": false, - "typeName": "technicalFormat" - }, - { - "typeName": "access", - "multiple": false, - "typeClass": "compound", - "value": { - "accessRights": { - "typeName": "accessRights", - "multiple": false, - "typeClass": "controlledVocabulary", - "value": "open" - } - } - } - ], - "displayName": "Citation Metadata" - } - } - } -} \ No newline at end of file diff --git a/doc/metadata/metadataBlocks.json b/doc/metadata/metadataBlocks.json deleted file mode 100644 index d98cb4d..0000000 --- a/doc/metadata/metadataBlocks.json +++ /dev/null @@ -1 +0,0 @@ -{"status": "OK", "data": [{"id": 2, "displayName": "Geospatial Metadata", "name": "geospatial"}, {"id": 3, "displayName": "Social Science and Humanities Metadata", "name": "socialscience"}, {"id": 4, "displayName": "Astronomy and Astrophysics Metadata", "name": "astrophysics"}, {"id": 5, "displayName": "Life Sciences Metadata", "name": "biomedical"}, {"id": 6, "displayName": "Journal Metadata", "name": "journal"}, {"id": 7, "displayName": "MRA Metadata", "name": "customMRA"}, {"id": 8, "displayName": "Graduate School of Design Metadata", "name": "customGSD"}, {"id": 9, "displayName": "Alliance for Research on Corporate Sustainability Metadata", "name": "customARCS"}, {"id": 10, "displayName": "Political Science Replication Initiative Metadata", "name": "customPSRI"}, {"id": 11, "displayName": "PSI Metadata", "name": "customPSI"}, {"id": 12, "displayName": "CHIA Metadata", "name": "customCHIA"}, {"id": 13, "displayName": "Digaai Metadata", "name": "customDigaai"}, {"id": 1, "displayName": "Citation Metadata", "name": "citation"}, {"id": 14, "displayName": "SAEF Metadata", "name": "customSAEF"}]} \ No newline at end of file diff --git a/doc/metadata/short_metadata_demo.json b/doc/metadata/short_metadata_demo.json deleted file mode 100644 index 821b96d..0000000 --- a/doc/metadata/short_metadata_demo.json +++ /dev/null @@ -1,19 +0,0 @@ -{ - "author": { - "authorAffiliation": "KU Leuven", - "authorName": "Kafetzaki, Danai" - }, - "datasetContact": { - "datasetContactEmail": "danai.kafetzaki@kuleuven.be", - "datasetContactName": "Kafetzaki, Danai" - }, - "dsDescription": [ - { - "dsDescriptionValue": "This is a minimal end-to-end implementation for iRODS-Dataverse integration, a KU Leuven and SURF collaboration" - } - ], - "subject": [ - "Demo Only" - ], - "title": "Minimum Viable Workflow - 16 May 2024" -} \ No newline at end of file diff --git a/doc/metadata/short_metadata_rdr-pilot.json b/doc/metadata/short_metadata_rdr-pilot.json deleted file mode 100644 index 9a8f68d..0000000 --- a/doc/metadata/short_metadata_rdr-pilot.json +++ /dev/null @@ -1,31 +0,0 @@ -{ - "access": { - "accessRights": "open", - "dateAvailable": "", - "legitimateOptout": "other" - }, - "author": [ - { - "authorAffiliation": "KU Leuven", - "authorName": "Kafetzaki, Danai" - } - ], - "datasetContact": [ - { - "datasetContactEmail": "danai.kafetzaki@kuleuven.be", - "datasetContactName": "Kafetzaki, Danai" - } - ], - "dsDescription": [ - { - "dsDescriptionValue": "This is a minimal end-to-end implementation for iRODS-Dataverse integration, a KU Leuven and SURF collaboration" - } - ], - "keyword": [ - { - "keywordValue": "required-keyword" - } - ], - "technicalFormat": "json", - "title": "Demo - 09 December 2024" -} \ No newline at end of file diff --git a/doc/metadata/short_metadata_rdr.json b/doc/metadata/short_metadata_rdr.json deleted file mode 100644 index 9a8f68d..0000000 --- a/doc/metadata/short_metadata_rdr.json +++ /dev/null @@ -1,31 +0,0 @@ -{ - "access": { - "accessRights": "open", - "dateAvailable": "", - "legitimateOptout": "other" - }, - "author": [ - { - "authorAffiliation": "KU Leuven", - "authorName": "Kafetzaki, Danai" - } - ], - "datasetContact": [ - { - "datasetContactEmail": "danai.kafetzaki@kuleuven.be", - "datasetContactName": "Kafetzaki, Danai" - } - ], - "dsDescription": [ - { - "dsDescriptionValue": "This is a minimal end-to-end implementation for iRODS-Dataverse integration, a KU Leuven and SURF collaboration" - } - ], - "keyword": [ - { - "keywordValue": "required-keyword" - } - ], - "technicalFormat": "json", - "title": "Demo - 09 December 2024" -} \ No newline at end of file diff --git a/doc/metadata/subjects.json b/doc/metadata/subjects.json deleted file mode 100644 index 8847e4b..0000000 --- a/doc/metadata/subjects.json +++ /dev/null @@ -1,22 +0,0 @@ -{ - "subjects": { - "values": [ - "Agricultural Sciences", - "Arts and Humanities", - "Astronomy and Astrophysics", - "Business and Management", - "Chemistry", - "Computer and Information Science", - "Earth and Environmental Sciences", - "Engineering", - "Law", - "Mathematical Sciences", - "Medicine, Health and Life Sciences", - "Physics", - "Social Sciences", - "Other", - "Demo Only" - ], - "description": "Controlled list of subjects for DEMO Dataverse" - } -} \ No newline at end of file diff --git a/doc/metadata/templateDataset.json b/doc/metadata/templateDataset.json deleted file mode 100644 index 3ac64b2..0000000 --- a/doc/metadata/templateDataset.json +++ /dev/null @@ -1 +0,0 @@ -{"datasetVersion": {"metadataBlocks": {"citation": {"fields": []}}}} \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..d475bc1 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,34 @@ +[build-system] +requires = ["setuptools>67", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +dynamic = ["dependencies", "optional-dependencies"] +name = "irods2dataverse" +description = "Tag and push datasets from iRODS to a dataverse installation" +readme = "README.md" +requires-python = ">=3.8" +version = "0.0.1" +license = {file = "LICENSE"} +authors = [ + { name = "Danai Kafetzaki", email = "danai.kafetzaki@kuleuven.be" }, + { name = "Joachim Bovin", email = "joachim.bovin@kuleuven.be" }, + { name = "Mariana Montes", email = "mariana.montes@kuleuven.be" }, +] + +[project.urls] +repository = "https://github.com/kuleuven/iRODS-Dataverse" + +[tool.setuptools.dynamic] +dependencies = {file = ["requirements.txt"]} +optional-dependencies.dev = { file = ["requirements-dev.txt"] } + + +[tool.pylint."messages control"] +disable = ["too-many-arguments"] + +[tool.pylint.reports] +output-format = ["colorized"] + +[tool.pytest.ini_options] +pythonpath = ["src"] diff --git a/src/irods2dataverse/directUpload.py b/src/irods2dataverse/directUpload.py deleted file mode 100644 index 3d0c8d9..0000000 --- a/src/irods2dataverse/directUpload.py +++ /dev/null @@ -1,110 +0,0 @@ -import requests -import maskpass -import magic -import functions -from irods.data_object import iRODSDataObject - -# TO DO: integrate with functions and user-script - -### Get information from iRODS ### -# authenticate in iRODS -session = functions.authenticate_iRODS("/home/danai/.irods/irods_environment.json") - -# select iRODS object (result of user script query) -src = "/set/home/datateam_set/iRODS2DV/iRODSfileUserScript.txt" -src_dataObj: iRODSDataObject = session.data_objects.get(src) -data = src_dataObj.open("r") - -# Get the checksum value from iRODS -chksumRes = src_dataObj.chksum() -chksumVal = chksumRes[5:] # this is algorithm-specific - -# Get the mimetype (from paul, mango portal) -with src_dataObj.open("r") as f: - blub = f.read(50 * 1024) - mimeTypeVal = magic.from_buffer(blub, mime=True) - -# Get the size of the object -df_size = src_dataObj.size + 1 # add 1 byte - -### Configuration specific information ### -# select Dataverse installation (result of user script query) -inp_dv = "RDR-pilot" # Direct upload works in RDR and RDR-pilot, not in Demo because it is not enabled -print(f"Selected Dataverse installation is <{inp_dv}>") - -# Dataverse installation specific info - (URL: result of configuration; DOI: result of user script process + AVU in iRODS with ID 'dv.ds.DOI') -if inp_dv == "RDR": - BASE_URL = "https://rdr.kuleuven.be" - dv_ds_DOI = "doi:10.48804/RQLUMN" -elif inp_dv == "Demo": - BASE_URL = "https://demo.dataverse.org" - dv_ds_DOI = "doi:10.70122/FK2/GTGRKF" -elif inp_dv == "RDR-pilot": - BASE_URL = "https://www.rdm.libis.kuleuven.be" - dv_ds_DOI = "doi:10.82111/JGBUBM" - -# Ask the Token for the selected installation -print( - f"Provide your Token for <{inp_dv}> Dataverse installation. Your Token will be encrypted and saved securely for future reference until expiration." -) -token = maskpass.askpass(prompt="", mask="*") - -### Direct upload with requests ### - -# create headers with Dataverse token: used in step-1 and step-3 -headers_key = { - "X-Dataverse-key": token, -} -# create headers with content type for data transmission: used in step-2 -headers_ct = { - "Content-Type": "application/x-www-form-urlencoded", -} - -# Step-1: GET Direct Upload URL -response1 = requests.get( - f"{BASE_URL}/api/datasets/:persistentId/uploadurls?persistentId={dv_ds_DOI}&size={df_size}", - headers=headers_key, -) -# verify status -print(str(response1)) # -# save the url -url = response1.json()["data"]["url"] - -# Step-2: PUT the file in S3 -response2 = requests.put( - url, - headers=headers_ct, - data=data, -) -# close the iRODS file -data.close() -# verify status -print(str(response2)) # - -# Step-3: POST (link) the uploaded file to a Dataset -# create a dictionary for the metadata of the data file -dm_dict = { - "description": "This is the description of the directly uploaded file.", # TO DO: get from iRODS metadata - "directoryLabel": "data/subdir1", # TO DO: get from iRODS, based on the path of the file in a dataset - "categories": ["Data"], - "restrict": "false", - "storageIdentifier": response1.json()["data"]["storageIdentifier"], - "fileName": src_dataObj.name, - "mimeType": mimeTypeVal, - "checksum": {"@type": "SHA-256", "@value": chksumVal}, -} -# create a dictionary for jsonData -files = { - "jsonData": (None, f"{dm_dict}"), -} -# send the POST request -response3 = requests.post( - f"{BASE_URL}/api/datasets/:persistentId/add?persistentId={dv_ds_DOI}", - headers=headers_key, - files=files, -) -# verify status -print(str(response3)) # - -### Clean-up iRODS session ### -session.cleanup() diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/resources/mango2dv-demo-1.0.0-published.json b/tests/resources/mango2dv-demo-1.0.0-published.json new file mode 100644 index 0000000..15c1680 --- /dev/null +++ b/tests/resources/mango2dv-demo-1.0.0-published.json @@ -0,0 +1,107 @@ +{ + "schema_name": "mango2dv-demo", + "version": "1.0.0", + "status": "published", + "properties": { + "title": { + "title": "Title", + "type": "text", + "placeholder": "", + "pattern": "", + "required": true, + "help": "The main title of the Dataset" + }, + "author": { + "title": "Author", + "properties": { + "authorName": { + "title": "Name", + "type": "text", + "placeholder": "Last-name, First-name", + "pattern": "", + "required": true, + "help": "The name of the author, such as the person's name or the name of the organization" + }, + "authorAffiliation": { + "title": "Affiliation", + "type": "text", + "placeholder": "", + "pattern": "", + "required": true, + "help": "The name of the entity affiliated with the author, e.g. an organization's name" + } + }, + "type": "object", + "repeatable": false, + "help": "The entity, e.g. person or organization, that created the Dataset" + }, + "datasetContact": { + "title": "Point of Contact", + "properties": { + "datasetContactName": { + "title": "Name", + "type": "text", + "placeholder": "Last-name, First-name", + "pattern": "", + "required": true, + "help": "The name of the author, such as the person's name or the name of the organization" + }, + "datasetContactEmail": { + "title": "E-mail", + "type": "text", + "placeholder": "", + "pattern": "", + "required": false, + "help": "The point of contact's email address" + } + }, + "type": "object", + "repeatable": false, + "help": "The entity, e.g. person or organization, that users of the Dataset can contact with questions" + }, + "dsDescription": { + "title": "Description", + "properties": { + "dsDescriptionValue": { + "title": "Text of the description", + "type": "textarea", + "placeholder": "This dataset contains data used in...", + "pattern": "", + "required": true, + "help": "The name of the author, such as the person's name or the name of the organization" + } + }, + "type": "object", + "repeatable": true, + "help": "A summary describing the purpose, nature, and scope of the Dataset" + }, + "subject": { + "title": "Subject", + "type": "select", + "values": [ + "Agricultural Sciences", + "Arts and Humanities", + "Astronomy and Astrophysics", + "Business and Management", + "Chemistry", + "Computer and Information Science", + "Earth and Environmental Sciences", + "Engineering", + "Law", + "Mathematical Sciences", + "Medicine, Health and Life Sciences", + "Physics", + "Social Sciences", + "Other", + "Demo Only" + ], + "multiple": true, + "ui": "checkbox", + "help": "The area of study relevant to the Dataset" + } + }, + "edited_by": "u0112360", + "realm": "datateam_set", + "title": "ManGO Dataset to Dataverse Demo", + "parent": "" +} \ No newline at end of file diff --git a/doc/metadata/mdDataset_Demo.json b/tests/resources/template_Demo.json similarity index 84% rename from doc/metadata/mdDataset_Demo.json rename to tests/resources/template_Demo.json index 810f142..40ac6ab 100644 --- a/doc/metadata/mdDataset_Demo.json +++ b/tests/resources/template_Demo.json @@ -4,7 +4,7 @@ "citation": { "fields": [ { - "value": "User Script July 2024", + "value": "...Title...", "typeClass": "primitive", "multiple": false, "typeName": "title" @@ -13,13 +13,13 @@ "value": [ { "authorName": { - "value": "Kafetzaki, Danai", + "value": "...LastName..., ...FirstName...", "typeClass": "primitive", "multiple": false, "typeName": "authorName" }, "authorAffiliation": { - "value": "KU Leuven", + "value": "...Affiliation...", "typeClass": "primitive", "multiple": false, "typeName": "authorAffiliation" @@ -34,13 +34,13 @@ "value": [ { "datasetContactEmail": { - "value": "danai.kafetzaki@kuleuven.be", + "value": "...Email...", "typeClass": "primitive", "multiple": false, "typeName": "datasetContactEmail" }, "datasetContactName": { - "value": "Kafetzaki, Danai", + "value": "...LastName..., ...FirstName...", "typeClass": "primitive", "multiple": false, "typeName": "datasetContactName" @@ -55,7 +55,7 @@ "value": [ { "dsDescriptionValue": { - "value": "This is a minimal end-to-end implementation for iRODS-Dataverse integartion, a KU Leuven and SURF collaboration", + "value": "...Description...", "typeClass": "primitive", "multiple": false, "typeName": "dsDescriptionValue" @@ -68,7 +68,7 @@ }, { "value": [ - "Demo Only" + "...One-of-the-subjects-at-/doc/metadata/CVs/subjects_Demo.json..." ], "typeClass": "controlledVocabulary", "multiple": true, diff --git a/src/tests/test_avu2json.py b/tests/test_avu2json.py similarity index 71% rename from src/tests/test_avu2json.py rename to tests/test_avu2json.py index 1822607..b59b7d8 100644 --- a/src/tests/test_avu2json.py +++ b/tests/test_avu2json.py @@ -1,5 +1,6 @@ import unittest -from avu2json import * +import os.path +from irods2dataverse.avu2json import * class TestFieldTransformation(unittest.TestCase): @@ -7,11 +8,11 @@ def setUp(self): self.metadatadict = { "author": { "authorAffiliation": "KU Leuven", - "authorName": "Kafetzaki, Danai", + "authorName": "Doe, Jane", }, "datasetContact": { - "datasetContactEmail": "danai.kafetzaki@kuleuven.be", - "datasetContactName": "Kafetzaki, Danai", + "datasetContactEmail": "user.name@kuleuven.be", + "datasetContactName": "Doe, Jane", }, "dsDescription": [ { @@ -21,7 +22,9 @@ def setUp(self): "subject": ["Demo Only"], "title": "Minimum Viable Workflow - 16 May 2024", } - self.schema_demo_path = "../doc/metadata/mango2dv-demo-1.0.0-published.json" + self.schema_demo_path = os.path.join( + os.path.dirname(__file__), "resources", "mango2dv-demo-1.0.0-published.json" + ) def test_validation(self): validated_metadata = parse_json_metadata( @@ -37,9 +40,10 @@ def test_fill_in_simple_field(self): "typeName": "title", } new_title = update_template(title_template, self.metadatadict) - self.assertEqual(title_template["typeClass"], new_title["typeClass"]) - self.assertEqual(title_template["multiple"], new_title["multiple"]) - self.assertEqual(title_template["typeName"], new_title["typeName"]) + + for property in ["typeClass", "multiple", "typeName"]: + with self.subTest(property=property): + self.assertEqual(title_template[property], new_title[property]) self.assertEqual(self.metadatadict["title"], new_title["value"]) def test_fill_in_composite_field(self): @@ -65,21 +69,26 @@ def test_fill_in_composite_field(self): "typeName": "author", } new_author = update_template(author_template, self.metadatadict) - self.assertEqual(author_template["typeClass"], new_author["typeClass"]) - self.assertEqual(author_template["multiple"], new_author["multiple"]) - self.assertEqual(author_template["typeName"], new_author["typeName"]) + for property in ["typeClass", "multiple", "typeName"]: + with self.subTest(property=property): + self.assertEqual(author_template[property], new_author[property]) original_authorname = author_template["value"][0]["authorName"] new_authorname = new_author["value"][0]["authorName"] - self.assertEqual(original_authorname["typeClass"], new_authorname["typeClass"]) - self.assertEqual(original_authorname["multiple"], new_authorname["multiple"]) - self.assertEqual(original_authorname["typeName"], new_authorname["typeName"]) + for property in ["typeClass", "multiple", "typeName"]: + with self.subTest(property=property): + self.assertEqual( + original_authorname[property], new_authorname[property] + ) self.assertEqual( self.metadatadict["author"]["authorName"], new_authorname["value"] ) def test_rewriting_template(self): - demo_template = extract_template("../doc/metadata/template_Demo.json") + template_path = os.path.join( + os.path.dirname(__file__), "resources", "template_Demo.json" + ) + demo_template = extract_template(template_path) self.assertIsInstance(demo_template, dict) fields = demo_template["datasetVersion"]["metadataBlocks"]["citation"]["fields"]